refactor: 整理代码格式并修复部分类型和依赖问题
1. 调整task.py的TypeVar导入和默认值 2. 格式化多处列表和参数写法,统一括号风格 3. 为pdftool.py添加pyrefly忽略注释修复类型警告 4. 为emlmanager.py添加数据库连接断言和检查 5. 修正hfdownload.py的depends_on参数为元组格式
This commit is contained in:
@@ -88,6 +88,8 @@ class EmailDatabase:
|
||||
|
||||
def insert_email(self, email_data: dict[str, Any]) -> bool:
|
||||
"""插入邮件数据."""
|
||||
assert self.conn, "数据库连接未初始化"
|
||||
|
||||
try:
|
||||
with self._lock:
|
||||
cursor = self.conn.cursor()
|
||||
@@ -123,6 +125,8 @@ class EmailDatabase:
|
||||
self, keyword: str = "", field: str = "all", limit: int = 100, offset: int = 0
|
||||
) -> list[dict[str, Any]]:
|
||||
"""搜索邮件."""
|
||||
assert self.conn, "数据库连接未初始化"
|
||||
|
||||
with self._lock:
|
||||
cursor = self.conn.cursor()
|
||||
|
||||
@@ -154,6 +158,8 @@ class EmailDatabase:
|
||||
|
||||
def get_grouped_emails(self) -> dict[str, list[dict[str, Any]]]:
|
||||
"""获取按主题分组的邮件."""
|
||||
assert self.conn, "数据库连接未初始化"
|
||||
|
||||
with self._lock:
|
||||
cursor = self.conn.cursor()
|
||||
cursor.execute(f"SELECT * FROM {TABLE_NAME} ORDER BY subject, date_parsed DESC")
|
||||
@@ -183,6 +189,8 @@ class EmailDatabase:
|
||||
|
||||
def get_email_count(self) -> int:
|
||||
"""获取邮件总数."""
|
||||
assert self.conn, "数据库连接未初始化"
|
||||
|
||||
with self._lock:
|
||||
cursor = self.conn.cursor()
|
||||
cursor.execute(f"SELECT COUNT(*) FROM {TABLE_NAME}")
|
||||
@@ -190,6 +198,8 @@ class EmailDatabase:
|
||||
|
||||
def clear_all(self) -> None:
|
||||
"""清空所有邮件数据."""
|
||||
assert self.conn, "数据库连接未初始化"
|
||||
|
||||
with self._lock:
|
||||
cursor = self.conn.cursor()
|
||||
cursor.execute(f"DELETE FROM {TABLE_NAME}")
|
||||
@@ -557,15 +567,13 @@ class EmlManagerHandler(BaseHTTPRequestHandler):
|
||||
|
||||
emails = self.db.search_emails(keyword, field, limit, offset)
|
||||
total_count = self.db.get_email_count()
|
||||
self._send_json_response(
|
||||
{
|
||||
"emails": emails,
|
||||
"count": len(emails),
|
||||
"total": total_count,
|
||||
"limit": limit,
|
||||
"offset": offset,
|
||||
}
|
||||
)
|
||||
self._send_json_response({
|
||||
"emails": emails,
|
||||
"count": len(emails),
|
||||
"total": total_count,
|
||||
"limit": limit,
|
||||
"offset": offset,
|
||||
})
|
||||
|
||||
def _api_get_email(self, query_params: dict[str, list[str]]) -> None:
|
||||
"""API: 获取单个邮件详情."""
|
||||
@@ -578,6 +586,10 @@ class EmlManagerHandler(BaseHTTPRequestHandler):
|
||||
self._send_json_response({"error": "缺少邮件ID"}, 400)
|
||||
return
|
||||
|
||||
if not self.db.conn:
|
||||
self._send_json_response({"error": "数据库连接未初始化"}, 500)
|
||||
return
|
||||
|
||||
with self.db._lock:
|
||||
cursor = self.db.conn.cursor()
|
||||
cursor.execute(f"SELECT * FROM {TABLE_NAME} WHERE id = ?", (int(email_id),))
|
||||
@@ -630,6 +642,10 @@ class EmlManagerHandler(BaseHTTPRequestHandler):
|
||||
if not eml_files:
|
||||
return
|
||||
|
||||
if not self.db.conn:
|
||||
self._send_json_response({"error": "数据库连接未初始化"}, 500)
|
||||
return
|
||||
|
||||
# 先批量查询所有已存在的文件
|
||||
with self.db._lock:
|
||||
cursor = self.db.conn.cursor()
|
||||
@@ -1268,6 +1284,10 @@ def main() -> None:
|
||||
if eml_files:
|
||||
print(f"发现 {len(eml_files)} 个 EML 文件,开始导入...")
|
||||
|
||||
if not EmlManagerHandler.db.conn:
|
||||
print("数据库连接未初始化,无法导入邮件")
|
||||
return
|
||||
|
||||
# 先批量查询所有已存在的文件
|
||||
with EmlManagerHandler.db._lock:
|
||||
cursor = EmlManagerHandler.db.conn.cursor()
|
||||
|
||||
@@ -39,7 +39,7 @@ RUSTUP_MIRRORS: dict[str, dict[str, str]] = {
|
||||
UsableRustVersion = Literal["stable", "nightly", "beta"]
|
||||
UsableMirror = Literal["aliyun", "ustc", "tsinghua"]
|
||||
|
||||
DEFAULT_RUST_VERSION: str = "stable"
|
||||
DEFAULT_RUST_VERSION: UsableRustVersion = "stable"
|
||||
DEFAULT_MIRROR: UsableMirror = "tsinghua"
|
||||
|
||||
|
||||
@@ -136,13 +136,13 @@ def main() -> None:
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.command == "mirror":
|
||||
graph = px.Graph.from_specs(
|
||||
[px.TaskSpec("set_rust_mirror", fn=set_rust_mirror, args=(args.name,), verbose=True)]
|
||||
)
|
||||
graph = px.Graph.from_specs([
|
||||
px.TaskSpec("set_rust_mirror", fn=set_rust_mirror, args=(args.name,), verbose=True)
|
||||
])
|
||||
elif args.command == "install":
|
||||
graph = px.Graph.from_specs(
|
||||
[px.TaskSpec("install_rust", cmd=["rustup", "toolchain", "install", args.version], verbose=True)]
|
||||
)
|
||||
graph = px.Graph.from_specs([
|
||||
px.TaskSpec("install_rust", cmd=["rustup", "toolchain", "install", args.version], verbose=True)
|
||||
])
|
||||
else:
|
||||
parser.print_help()
|
||||
return
|
||||
|
||||
+25
-35
@@ -33,20 +33,16 @@ def init_sub_dirs() -> None:
|
||||
sub_dirs = [subdir for subdir in Path.cwd().iterdir() if subdir.is_dir()]
|
||||
for subdir in sub_dirs:
|
||||
px.run(
|
||||
px.Graph.from_specs(
|
||||
[
|
||||
px.TaskSpec(
|
||||
"init",
|
||||
cmd=["git", "init"],
|
||||
conditions=[not_has_git_repo],
|
||||
cwd=str(subdir),
|
||||
),
|
||||
px.TaskSpec("add", cmd=["git", "add", "."], depends_on=["init"], cwd=str(subdir)),
|
||||
px.TaskSpec(
|
||||
"commit", cmd=["git", "commit", "-m", "init commit"], depends_on=["add"], cwd=str(subdir)
|
||||
),
|
||||
]
|
||||
),
|
||||
px.Graph.from_specs([
|
||||
px.TaskSpec(
|
||||
"init",
|
||||
cmd=["git", "init"],
|
||||
conditions=(not_has_git_repo,),
|
||||
cwd=subdir,
|
||||
),
|
||||
px.TaskSpec("add", cmd=["git", "add", "."], depends_on=("init",)),
|
||||
px.TaskSpec("commit", cmd=["git", "commit", "-m", "init commit"], depends_on=("add",)),
|
||||
]),
|
||||
)
|
||||
|
||||
|
||||
@@ -73,29 +69,23 @@ def main() -> None:
|
||||
description="Gittool - Git 执行工具.",
|
||||
graphs={
|
||||
# 添加并提交
|
||||
"a": px.Graph.from_specs(
|
||||
[
|
||||
px.TaskSpec("add", cmd=["git", "add", "."], conditions=[has_files]),
|
||||
px.TaskSpec("commit", cmd=["git", "commit", "-m", "chore: update"], depends_on=["add"]),
|
||||
]
|
||||
),
|
||||
"a": px.Graph.from_specs([
|
||||
px.TaskSpec("add", cmd=["git", "add", "."], conditions=(has_files,)),
|
||||
px.TaskSpec("commit", cmd=["git", "commit", "-m", "chore: update"], depends_on=("add",)),
|
||||
]),
|
||||
# 清理
|
||||
"c": px.Graph.from_specs(
|
||||
[
|
||||
px.TaskSpec("clean", cmd=["git", "clean", "-xfd", *EXCLUDE_CMDS]),
|
||||
px.TaskSpec("status", cmd=["git", "status", "--porcelain"], depends_on=["clean"]),
|
||||
]
|
||||
),
|
||||
"c": px.Graph.from_specs([
|
||||
px.TaskSpec("clean", cmd=["git", "clean", "-xfd", *EXCLUDE_CMDS]),
|
||||
px.TaskSpec("status", cmd=["git", "status", "--porcelain"], depends_on=("clean",)),
|
||||
]),
|
||||
# 初始化、添加并提交
|
||||
"i": px.Graph.from_specs(
|
||||
[
|
||||
px.TaskSpec("init", cmd=["git", "init"], conditions=[not_has_git_repo]),
|
||||
px.TaskSpec("add", cmd=["git", "add", "."], depends_on=["init"], conditions=[has_files]),
|
||||
px.TaskSpec(
|
||||
"commit", cmd=["git", "commit", "-m", "init commit"], depends_on=["add"], conditions=[has_files]
|
||||
),
|
||||
]
|
||||
),
|
||||
"i": px.Graph.from_specs([
|
||||
px.TaskSpec("init", cmd=["git", "init"], conditions=(not_has_git_repo,)),
|
||||
px.TaskSpec("add", cmd=["git", "add", "."], depends_on=("init",), conditions=(has_files,)),
|
||||
px.TaskSpec(
|
||||
"commit", cmd=["git", "commit", "-m", "init commit"], depends_on=("add",), conditions=(has_files,)
|
||||
),
|
||||
]),
|
||||
# 初始化子目录
|
||||
"isub": px.Graph.from_specs([isub]),
|
||||
# 推送
|
||||
|
||||
@@ -37,50 +37,46 @@ def main():
|
||||
download_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if args.use_hfd:
|
||||
graph = px.Graph.from_specs(
|
||||
[
|
||||
px.TaskSpec(name="setenvs", fn=setenvs, verbose=True),
|
||||
px.TaskSpec(
|
||||
name="download_hfd",
|
||||
cmd=["wget", "https://hf-mirror.com/hfd/hfd.sh"],
|
||||
depends_on=["setenvs"],
|
||||
verbose=True,
|
||||
),
|
||||
px.TaskSpec(
|
||||
name="chmod_hfd",
|
||||
cmd=["chmod", "a+x", "hfd.sh"],
|
||||
depends_on=["download_hfd"],
|
||||
verbose=True,
|
||||
),
|
||||
px.TaskSpec(
|
||||
name="run_hfd",
|
||||
cmd=["./hfd.sh", dataset_name, args.type],
|
||||
depends_on=["chmod_hfd"],
|
||||
verbose=True,
|
||||
),
|
||||
]
|
||||
)
|
||||
graph = px.Graph.from_specs([
|
||||
px.TaskSpec(name="setenvs", fn=setenvs, verbose=True),
|
||||
px.TaskSpec(
|
||||
name="download_hfd",
|
||||
cmd=["wget", "https://hf-mirror.com/hfd/hfd.sh"],
|
||||
depends_on=("setenvs",),
|
||||
verbose=True,
|
||||
),
|
||||
px.TaskSpec(
|
||||
name="chmod_hfd",
|
||||
cmd=["chmod", "a+x", "hfd.sh"],
|
||||
depends_on=("download_hfd",),
|
||||
verbose=True,
|
||||
),
|
||||
px.TaskSpec(
|
||||
name="run_hfd",
|
||||
cmd=["./hfd.sh", dataset_name, args.type],
|
||||
depends_on=("chmod_hfd",),
|
||||
verbose=True,
|
||||
),
|
||||
])
|
||||
else:
|
||||
graph = px.Graph.from_specs(
|
||||
[
|
||||
px.TaskSpec(name="setenvs", fn=setenvs, verbose=True),
|
||||
px.TaskSpec(
|
||||
name="download",
|
||||
cmd=[
|
||||
"uvx",
|
||||
"hf",
|
||||
"download",
|
||||
"--repo-type",
|
||||
args.type,
|
||||
"--force-download",
|
||||
dataset_name,
|
||||
"--local-dir",
|
||||
str(Path.cwd() / dataset_name),
|
||||
],
|
||||
depends_on=["setenvs"],
|
||||
verbose=True,
|
||||
),
|
||||
]
|
||||
)
|
||||
graph = px.Graph.from_specs([
|
||||
px.TaskSpec(name="setenvs", fn=setenvs, verbose=True),
|
||||
px.TaskSpec(
|
||||
name="download",
|
||||
cmd=[
|
||||
"uvx",
|
||||
"hf",
|
||||
"download",
|
||||
"--repo-type",
|
||||
args.type,
|
||||
"--force-download",
|
||||
dataset_name,
|
||||
"--local-dir",
|
||||
str(Path.cwd() / dataset_name),
|
||||
],
|
||||
depends_on=("setenvs",),
|
||||
verbose=True,
|
||||
),
|
||||
])
|
||||
|
||||
px.run(graph, strategy="thread", verbose=True)
|
||||
|
||||
@@ -146,7 +146,7 @@ def pdf_extract_text(input_path: Path, output_path: Path) -> None:
|
||||
doc = fitz.open(str(input_path))
|
||||
text = ""
|
||||
for page in doc:
|
||||
text += page.get_text() + "\n\n"
|
||||
text += str(page.get_text()) + "\n\n"
|
||||
doc.close()
|
||||
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
@@ -164,6 +164,7 @@ def pdf_extract_images(input_path: Path, output_dir: Path) -> None:
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
image_count = 0
|
||||
# pyrefly: ignore [bad-argument-type]
|
||||
for page_num, page in enumerate(doc):
|
||||
images = page.get_images(full=True)
|
||||
for img_idx, img in enumerate(images):
|
||||
@@ -249,9 +250,13 @@ def pdf_info(input_path: Path) -> None:
|
||||
doc = fitz.open(str(input_path))
|
||||
print(f"文件: {input_path}")
|
||||
print(f"页数: {doc.page_count}")
|
||||
# pyrefly: ignore [missing-attribute]
|
||||
print(f"标题: {doc.metadata.get('title', 'N/A')}")
|
||||
# pyrefly: ignore [missing-attribute]
|
||||
print(f"作者: {doc.metadata.get('author', 'N/A')}")
|
||||
# pyrefly: ignore [missing-attribute]
|
||||
print(f"创建日期: {doc.metadata.get('creationDate', 'N/A')}")
|
||||
# pyrefly: ignore [missing-attribute]
|
||||
print(f"修改日期: {doc.metadata.get('modDate', 'N/A')}")
|
||||
print(f"文件大小: {input_path.stat().st_size / 1024:.1f} KB")
|
||||
doc.close()
|
||||
@@ -281,6 +286,7 @@ def pdf_ocr(input_path: Path, output_path: Path, lang: str = "chi_sim+eng") -> N
|
||||
new_page = new_doc.new_page(width=page.rect.width, height=page.rect.height)
|
||||
new_page.insert_image(new_page.rect, pixmap=pix)
|
||||
text_rect = fitz.Rect(0, 0, page.rect.width, page.rect.height)
|
||||
# pyrefly: ignore [bad-argument-type]
|
||||
new_page.insert_textbox(text_rect, ocr_text)
|
||||
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
@@ -319,6 +325,7 @@ def pdf_to_images(input_path: Path, output_dir: Path, dpi: int = 300) -> None:
|
||||
doc = fitz.open(str(input_path))
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# pyrefly: ignore [bad-argument-type]
|
||||
for page_num, page in enumerate(doc):
|
||||
pix = page.get_pixmap(dpi=dpi)
|
||||
image_path = output_dir / f"{input_path.stem}_page_{page_num + 1}.png"
|
||||
|
||||
+3
-2
@@ -28,12 +28,13 @@ from typing import (
|
||||
Mapping,
|
||||
Optional,
|
||||
Tuple,
|
||||
TypeVar,
|
||||
Union,
|
||||
cast,
|
||||
)
|
||||
|
||||
T = TypeVar("T")
|
||||
from typing_extensions import TypeVar
|
||||
|
||||
T = TypeVar("T", default=Any)
|
||||
|
||||
# 任务可调用对象可以是同步或异步的。显式保留联合类型,让 mypy 理解两种形态。
|
||||
TaskFn = Union[
|
||||
|
||||
Reference in New Issue
Block a user