refactor: 整理代码格式并修复部分类型和依赖问题

1. 调整task.py的TypeVar导入和默认值 2. 格式化多处列表和参数写法，统一括号风格 3. 为pdftool.py添加pyrefly忽略注释修复类型警告 4. 为emlmanager.py添加数据库连接断言和检查 5. 修正hfdownload.py的depends_on参数为元组格式
2026-06-26 21:52:44 +08:00
parent fd282db28f
commit 7ded8df05e
6 changed files with 112 additions and 98 deletions
@@ -88,6 +88,8 @@ class EmailDatabase:
    def insert_email(self, email_data: dict[str, Any]) -> bool:
        """插入邮件数据."""
        assert self.conn, "数据库连接未初始化"
        try:
            with self._lock:
                cursor = self.conn.cursor()
@@ -123,6 +125,8 @@ class EmailDatabase:
        self, keyword: str = "", field: str = "all", limit: int = 100, offset: int = 0
    ) -> list[dict[str, Any]]:
        """搜索邮件."""
        assert self.conn, "数据库连接未初始化"
        with self._lock:
            cursor = self.conn.cursor()
@@ -154,6 +158,8 @@ class EmailDatabase:
    def get_grouped_emails(self) -> dict[str, list[dict[str, Any]]]:
        """获取按主题分组的邮件."""
        assert self.conn, "数据库连接未初始化"
        with self._lock:
            cursor = self.conn.cursor()
            cursor.execute(f"SELECT * FROM {TABLE_NAME} ORDER BY subject, date_parsed DESC")
@@ -183,6 +189,8 @@ class EmailDatabase:
    def get_email_count(self) -> int:
        """获取邮件总数."""
        assert self.conn, "数据库连接未初始化"
        with self._lock:
            cursor = self.conn.cursor()
            cursor.execute(f"SELECT COUNT(*) FROM {TABLE_NAME}")
@@ -190,6 +198,8 @@ class EmailDatabase:
    def clear_all(self) -> None:
        """清空所有邮件数据."""
        assert self.conn, "数据库连接未初始化"
        with self._lock:
            cursor = self.conn.cursor()
            cursor.execute(f"DELETE FROM {TABLE_NAME}")
@@ -557,15 +567,13 @@ class EmlManagerHandler(BaseHTTPRequestHandler):
        emails = self.db.search_emails(keyword, field, limit, offset)
        total_count = self.db.get_email_count()
-        self._send_json_response(
+        self._send_json_response({
-            {
+            "emails": emails,
-                "emails": emails,
+            "count": len(emails),
-                "count": len(emails),
+            "total": total_count,
-                "total": total_count,
+            "limit": limit,
-                "limit": limit,
+            "offset": offset,
-                "offset": offset,
+        })
            }
        )
    def _api_get_email(self, query_params: dict[str, list[str]]) -> None:
        """API: 获取单个邮件详情."""
@@ -578,6 +586,10 @@ class EmlManagerHandler(BaseHTTPRequestHandler):
            self._send_json_response({"error": "缺少邮件ID"}, 400)
            return
        if not self.db.conn:
            self._send_json_response({"error": "数据库连接未初始化"}, 500)
            return
        with self.db._lock:
            cursor = self.db.conn.cursor()
            cursor.execute(f"SELECT * FROM {TABLE_NAME} WHERE id = ?", (int(email_id),))
@@ -630,6 +642,10 @@ class EmlManagerHandler(BaseHTTPRequestHandler):
            if not eml_files:
                return
            if not self.db.conn:
                self._send_json_response({"error": "数据库连接未初始化"}, 500)
                return
            # 先批量查询所有已存在的文件
            with self.db._lock:
                cursor = self.db.conn.cursor()
@@ -1268,6 +1284,10 @@ def main() -> None:
            if eml_files:
                print(f"发现 {len(eml_files)} 个 EML 文件，开始导入...")
                if not EmlManagerHandler.db.conn:
                    print("数据库连接未初始化，无法导入邮件")
                    return
                # 先批量查询所有已存在的文件
                with EmlManagerHandler.db._lock:
                    cursor = EmlManagerHandler.db.conn.cursor()
@@ -39,7 +39,7 @@ RUSTUP_MIRRORS: dict[str, dict[str, str]] = {
 UsableRustVersion = Literal["stable", "nightly", "beta"]
 UsableMirror = Literal["aliyun", "ustc", "tsinghua"]
-DEFAULT_RUST_VERSION: str = "stable"
+DEFAULT_RUST_VERSION: UsableRustVersion = "stable"
 DEFAULT_MIRROR: UsableMirror = "tsinghua"
@@ -136,13 +136,13 @@ def main() -> None:
    args = parser.parse_args()
    if args.command == "mirror":
-        graph = px.Graph.from_specs(
+        graph = px.Graph.from_specs([
-            [px.TaskSpec("set_rust_mirror", fn=set_rust_mirror, args=(args.name,), verbose=True)]
+            px.TaskSpec("set_rust_mirror", fn=set_rust_mirror, args=(args.name,), verbose=True)
-        )
+        ])
    elif args.command == "install":
-        graph = px.Graph.from_specs(
+        graph = px.Graph.from_specs([
-            [px.TaskSpec("install_rust", cmd=["rustup", "toolchain", "install", args.version], verbose=True)]
+            px.TaskSpec("install_rust", cmd=["rustup", "toolchain", "install", args.version], verbose=True)
-        )
+        ])
    else:
        parser.print_help()
        return
@@ -33,20 +33,16 @@ def init_sub_dirs() -> None:
    sub_dirs = [subdir for subdir in Path.cwd().iterdir() if subdir.is_dir()]
    for subdir in sub_dirs:
        px.run(
-            px.Graph.from_specs(
+            px.Graph.from_specs([
-                [
+                px.TaskSpec(
-                    px.TaskSpec(
+                    "init",
-                        "init",
+                    cmd=["git", "init"],
-                        cmd=["git", "init"],
+                    conditions=(not_has_git_repo,),
-                        conditions=[not_has_git_repo],
+                    cwd=subdir,
-                        cwd=str(subdir),
+                ),
-                    ),
+                px.TaskSpec("add", cmd=["git", "add", "."], depends_on=("init",)),
-                    px.TaskSpec("add", cmd=["git", "add", "."], depends_on=["init"], cwd=str(subdir)),
+                px.TaskSpec("commit", cmd=["git", "commit", "-m", "init commit"], depends_on=("add",)),
-                    px.TaskSpec(
+            ]),
                        "commit", cmd=["git", "commit", "-m", "init commit"], depends_on=["add"], cwd=str(subdir)
                    ),
                ]
            ),
        )
@@ -73,29 +69,23 @@ def main() -> None:
        description="Gittool - Git 执行工具.",
        graphs={
            # 添加并提交
-            "a": px.Graph.from_specs(
+            "a": px.Graph.from_specs([
-                [
+                px.TaskSpec("add", cmd=["git", "add", "."], conditions=(has_files,)),
-                    px.TaskSpec("add", cmd=["git", "add", "."], conditions=[has_files]),
+                px.TaskSpec("commit", cmd=["git", "commit", "-m", "chore: update"], depends_on=("add",)),
-                    px.TaskSpec("commit", cmd=["git", "commit", "-m", "chore: update"], depends_on=["add"]),
+            ]),
                ]
            ),
            # 清理
-            "c": px.Graph.from_specs(
+            "c": px.Graph.from_specs([
-                [
+                px.TaskSpec("clean", cmd=["git", "clean", "-xfd", *EXCLUDE_CMDS]),
-                    px.TaskSpec("clean", cmd=["git", "clean", "-xfd", *EXCLUDE_CMDS]),
+                px.TaskSpec("status", cmd=["git", "status", "--porcelain"], depends_on=("clean",)),
-                    px.TaskSpec("status", cmd=["git", "status", "--porcelain"], depends_on=["clean"]),
+            ]),
                ]
            ),
            # 初始化、添加并提交
-            "i": px.Graph.from_specs(
+            "i": px.Graph.from_specs([
-                [
+                px.TaskSpec("init", cmd=["git", "init"], conditions=(not_has_git_repo,)),
-                    px.TaskSpec("init", cmd=["git", "init"], conditions=[not_has_git_repo]),
+                px.TaskSpec("add", cmd=["git", "add", "."], depends_on=("init",), conditions=(has_files,)),
-                    px.TaskSpec("add", cmd=["git", "add", "."], depends_on=["init"], conditions=[has_files]),
+                px.TaskSpec(
-                    px.TaskSpec(
+                    "commit", cmd=["git", "commit", "-m", "init commit"], depends_on=("add",), conditions=(has_files,)
-                        "commit", cmd=["git", "commit", "-m", "init commit"], depends_on=["add"], conditions=[has_files]
+                ),
-                    ),
+            ]),
                ]
            ),
            # 初始化子目录
            "isub": px.Graph.from_specs([isub]),
            # 推送
@@ -37,50 +37,46 @@ def main():
    download_dir.mkdir(parents=True, exist_ok=True)
    if args.use_hfd:
-        graph = px.Graph.from_specs(
+        graph = px.Graph.from_specs([
-            [
+            px.TaskSpec(name="setenvs", fn=setenvs, verbose=True),
-                px.TaskSpec(name="setenvs", fn=setenvs, verbose=True),
+            px.TaskSpec(
-                px.TaskSpec(
+                name="download_hfd",
-                    name="download_hfd",
+                cmd=["wget", "https://hf-mirror.com/hfd/hfd.sh"],
-                    cmd=["wget", "https://hf-mirror.com/hfd/hfd.sh"],
+                depends_on=("setenvs",),
-                    depends_on=["setenvs"],
+                verbose=True,
-                    verbose=True,
+            ),
-                ),
+            px.TaskSpec(
-                px.TaskSpec(
+                name="chmod_hfd",
-                    name="chmod_hfd",
+                cmd=["chmod", "a+x", "hfd.sh"],
-                    cmd=["chmod", "a+x", "hfd.sh"],
+                depends_on=("download_hfd",),
-                    depends_on=["download_hfd"],
+                verbose=True,
-                    verbose=True,
+            ),
-                ),
+            px.TaskSpec(
-                px.TaskSpec(
+                name="run_hfd",
-                    name="run_hfd",
+                cmd=["./hfd.sh", dataset_name, args.type],
-                    cmd=["./hfd.sh", dataset_name, args.type],
+                depends_on=("chmod_hfd",),
-                    depends_on=["chmod_hfd"],
+                verbose=True,
-                    verbose=True,
+            ),
-                ),
+        ])
            ]
        )
    else:
-        graph = px.Graph.from_specs(
+        graph = px.Graph.from_specs([
-            [
+            px.TaskSpec(name="setenvs", fn=setenvs, verbose=True),
-                px.TaskSpec(name="setenvs", fn=setenvs, verbose=True),
+            px.TaskSpec(
-                px.TaskSpec(
+                name="download",
-                    name="download",
+                cmd=[
-                    cmd=[
+                    "uvx",
-                        "uvx",
+                    "hf",
-                        "hf",
+                    "download",
-                        "download",
+                    "--repo-type",
-                        "--repo-type",
+                    args.type,
-                        args.type,
+                    "--force-download",
-                        "--force-download",
+                    dataset_name,
-                        dataset_name,
+                    "--local-dir",
-                        "--local-dir",
+                    str(Path.cwd() / dataset_name),
-                        str(Path.cwd() / dataset_name),
+                ],
-                    ],
+                depends_on=("setenvs",),
-                    depends_on=["setenvs"],
+                verbose=True,
-                    verbose=True,
+            ),
-                ),
+        ])
            ]
        )
    px.run(graph, strategy="thread", verbose=True)
@@ -146,7 +146,7 @@ def pdf_extract_text(input_path: Path, output_path: Path) -> None:
    doc = fitz.open(str(input_path))
    text = ""
    for page in doc:
-        text += page.get_text() + "\n\n"
+        text += str(page.get_text()) + "\n\n"
    doc.close()
    output_path.parent.mkdir(parents=True, exist_ok=True)
@@ -164,6 +164,7 @@ def pdf_extract_images(input_path: Path, output_dir: Path) -> None:
    output_dir.mkdir(parents=True, exist_ok=True)
    image_count = 0
    # pyrefly: ignore [bad-argument-type]
    for page_num, page in enumerate(doc):
        images = page.get_images(full=True)
        for img_idx, img in enumerate(images):
@@ -249,9 +250,13 @@ def pdf_info(input_path: Path) -> None:
    doc = fitz.open(str(input_path))
    print(f"文件: {input_path}")
    print(f"页数: {doc.page_count}")
    # pyrefly: ignore [missing-attribute]
    print(f"标题: {doc.metadata.get('title', 'N/A')}")
    # pyrefly: ignore [missing-attribute]
    print(f"作者: {doc.metadata.get('author', 'N/A')}")
    # pyrefly: ignore [missing-attribute]
    print(f"创建日期: {doc.metadata.get('creationDate', 'N/A')}")
    # pyrefly: ignore [missing-attribute]
    print(f"修改日期: {doc.metadata.get('modDate', 'N/A')}")
    print(f"文件大小: {input_path.stat().st_size / 1024:.1f} KB")
    doc.close()
@@ -281,6 +286,7 @@ def pdf_ocr(input_path: Path, output_path: Path, lang: str = "chi_sim+eng") -> N
        new_page = new_doc.new_page(width=page.rect.width, height=page.rect.height)
        new_page.insert_image(new_page.rect, pixmap=pix)
        text_rect = fitz.Rect(0, 0, page.rect.width, page.rect.height)
        # pyrefly: ignore [bad-argument-type]
        new_page.insert_textbox(text_rect, ocr_text)
    output_path.parent.mkdir(parents=True, exist_ok=True)
@@ -319,6 +325,7 @@ def pdf_to_images(input_path: Path, output_dir: Path, dpi: int = 300) -> None:
    doc = fitz.open(str(input_path))
    output_dir.mkdir(parents=True, exist_ok=True)
    # pyrefly: ignore [bad-argument-type]
    for page_num, page in enumerate(doc):
        pix = page.get_pixmap(dpi=dpi)
        image_path = output_dir / f"{input_path.stem}_page_{page_num + 1}.png"
@@ -28,12 +28,13 @@ from typing import (
    Mapping,
    Optional,
    Tuple,
    TypeVar,
    Union,
    cast,
 )
-T = TypeVar("T")
+from typing_extensions import TypeVar
 T = TypeVar("T", default=Any)
 # 任务可调用对象可以是同步或异步的。显式保留联合类型，让 mypy 理解两种形态。
 TaskFn = Union[