diff --git a/src/pyflowx/cli/emlmanager.py b/src/pyflowx/cli/emlmanager.py index e1a2b9a..0c60cdf 100644 --- a/src/pyflowx/cli/emlmanager.py +++ b/src/pyflowx/cli/emlmanager.py @@ -88,6 +88,8 @@ class EmailDatabase: def insert_email(self, email_data: dict[str, Any]) -> bool: """插入邮件数据.""" + assert self.conn, "数据库连接未初始化" + try: with self._lock: cursor = self.conn.cursor() @@ -123,6 +125,8 @@ class EmailDatabase: self, keyword: str = "", field: str = "all", limit: int = 100, offset: int = 0 ) -> list[dict[str, Any]]: """搜索邮件.""" + assert self.conn, "数据库连接未初始化" + with self._lock: cursor = self.conn.cursor() @@ -154,6 +158,8 @@ class EmailDatabase: def get_grouped_emails(self) -> dict[str, list[dict[str, Any]]]: """获取按主题分组的邮件.""" + assert self.conn, "数据库连接未初始化" + with self._lock: cursor = self.conn.cursor() cursor.execute(f"SELECT * FROM {TABLE_NAME} ORDER BY subject, date_parsed DESC") @@ -183,6 +189,8 @@ class EmailDatabase: def get_email_count(self) -> int: """获取邮件总数.""" + assert self.conn, "数据库连接未初始化" + with self._lock: cursor = self.conn.cursor() cursor.execute(f"SELECT COUNT(*) FROM {TABLE_NAME}") @@ -190,6 +198,8 @@ class EmailDatabase: def clear_all(self) -> None: """清空所有邮件数据.""" + assert self.conn, "数据库连接未初始化" + with self._lock: cursor = self.conn.cursor() cursor.execute(f"DELETE FROM {TABLE_NAME}") @@ -557,15 +567,13 @@ class EmlManagerHandler(BaseHTTPRequestHandler): emails = self.db.search_emails(keyword, field, limit, offset) total_count = self.db.get_email_count() - self._send_json_response( - { - "emails": emails, - "count": len(emails), - "total": total_count, - "limit": limit, - "offset": offset, - } - ) + self._send_json_response({ + "emails": emails, + "count": len(emails), + "total": total_count, + "limit": limit, + "offset": offset, + }) def _api_get_email(self, query_params: dict[str, list[str]]) -> None: """API: 获取单个邮件详情.""" @@ -578,6 +586,10 @@ class EmlManagerHandler(BaseHTTPRequestHandler): self._send_json_response({"error": "缺少邮件ID"}, 400) return + if not self.db.conn: + self._send_json_response({"error": "数据库连接未初始化"}, 500) + return + with self.db._lock: cursor = self.db.conn.cursor() cursor.execute(f"SELECT * FROM {TABLE_NAME} WHERE id = ?", (int(email_id),)) @@ -630,6 +642,10 @@ class EmlManagerHandler(BaseHTTPRequestHandler): if not eml_files: return + if not self.db.conn: + self._send_json_response({"error": "数据库连接未初始化"}, 500) + return + # 先批量查询所有已存在的文件 with self.db._lock: cursor = self.db.conn.cursor() @@ -1268,6 +1284,10 @@ def main() -> None: if eml_files: print(f"发现 {len(eml_files)} 个 EML 文件,开始导入...") + if not EmlManagerHandler.db.conn: + print("数据库连接未初始化,无法导入邮件") + return + # 先批量查询所有已存在的文件 with EmlManagerHandler.db._lock: cursor = EmlManagerHandler.db.conn.cursor() diff --git a/src/pyflowx/cli/envrs.py b/src/pyflowx/cli/envrs.py index 4e8ecab..3a45566 100644 --- a/src/pyflowx/cli/envrs.py +++ b/src/pyflowx/cli/envrs.py @@ -39,7 +39,7 @@ RUSTUP_MIRRORS: dict[str, dict[str, str]] = { UsableRustVersion = Literal["stable", "nightly", "beta"] UsableMirror = Literal["aliyun", "ustc", "tsinghua"] -DEFAULT_RUST_VERSION: str = "stable" +DEFAULT_RUST_VERSION: UsableRustVersion = "stable" DEFAULT_MIRROR: UsableMirror = "tsinghua" @@ -136,13 +136,13 @@ def main() -> None: args = parser.parse_args() if args.command == "mirror": - graph = px.Graph.from_specs( - [px.TaskSpec("set_rust_mirror", fn=set_rust_mirror, args=(args.name,), verbose=True)] - ) + graph = px.Graph.from_specs([ + px.TaskSpec("set_rust_mirror", fn=set_rust_mirror, args=(args.name,), verbose=True) + ]) elif args.command == "install": - graph = px.Graph.from_specs( - [px.TaskSpec("install_rust", cmd=["rustup", "toolchain", "install", args.version], verbose=True)] - ) + graph = px.Graph.from_specs([ + px.TaskSpec("install_rust", cmd=["rustup", "toolchain", "install", args.version], verbose=True) + ]) else: parser.print_help() return diff --git a/src/pyflowx/cli/gittool.py b/src/pyflowx/cli/gittool.py index 066301a..bb45a57 100644 --- a/src/pyflowx/cli/gittool.py +++ b/src/pyflowx/cli/gittool.py @@ -33,20 +33,16 @@ def init_sub_dirs() -> None: sub_dirs = [subdir for subdir in Path.cwd().iterdir() if subdir.is_dir()] for subdir in sub_dirs: px.run( - px.Graph.from_specs( - [ - px.TaskSpec( - "init", - cmd=["git", "init"], - conditions=[not_has_git_repo], - cwd=str(subdir), - ), - px.TaskSpec("add", cmd=["git", "add", "."], depends_on=["init"], cwd=str(subdir)), - px.TaskSpec( - "commit", cmd=["git", "commit", "-m", "init commit"], depends_on=["add"], cwd=str(subdir) - ), - ] - ), + px.Graph.from_specs([ + px.TaskSpec( + "init", + cmd=["git", "init"], + conditions=(not_has_git_repo,), + cwd=subdir, + ), + px.TaskSpec("add", cmd=["git", "add", "."], depends_on=("init",)), + px.TaskSpec("commit", cmd=["git", "commit", "-m", "init commit"], depends_on=("add",)), + ]), ) @@ -73,29 +69,23 @@ def main() -> None: description="Gittool - Git 执行工具.", graphs={ # 添加并提交 - "a": px.Graph.from_specs( - [ - px.TaskSpec("add", cmd=["git", "add", "."], conditions=[has_files]), - px.TaskSpec("commit", cmd=["git", "commit", "-m", "chore: update"], depends_on=["add"]), - ] - ), + "a": px.Graph.from_specs([ + px.TaskSpec("add", cmd=["git", "add", "."], conditions=(has_files,)), + px.TaskSpec("commit", cmd=["git", "commit", "-m", "chore: update"], depends_on=("add",)), + ]), # 清理 - "c": px.Graph.from_specs( - [ - px.TaskSpec("clean", cmd=["git", "clean", "-xfd", *EXCLUDE_CMDS]), - px.TaskSpec("status", cmd=["git", "status", "--porcelain"], depends_on=["clean"]), - ] - ), + "c": px.Graph.from_specs([ + px.TaskSpec("clean", cmd=["git", "clean", "-xfd", *EXCLUDE_CMDS]), + px.TaskSpec("status", cmd=["git", "status", "--porcelain"], depends_on=("clean",)), + ]), # 初始化、添加并提交 - "i": px.Graph.from_specs( - [ - px.TaskSpec("init", cmd=["git", "init"], conditions=[not_has_git_repo]), - px.TaskSpec("add", cmd=["git", "add", "."], depends_on=["init"], conditions=[has_files]), - px.TaskSpec( - "commit", cmd=["git", "commit", "-m", "init commit"], depends_on=["add"], conditions=[has_files] - ), - ] - ), + "i": px.Graph.from_specs([ + px.TaskSpec("init", cmd=["git", "init"], conditions=(not_has_git_repo,)), + px.TaskSpec("add", cmd=["git", "add", "."], depends_on=("init",), conditions=(has_files,)), + px.TaskSpec( + "commit", cmd=["git", "commit", "-m", "init commit"], depends_on=("add",), conditions=(has_files,) + ), + ]), # 初始化子目录 "isub": px.Graph.from_specs([isub]), # 推送 diff --git a/src/pyflowx/cli/hfdownload.py b/src/pyflowx/cli/hfdownload.py index a3dc7bf..c5ecc70 100644 --- a/src/pyflowx/cli/hfdownload.py +++ b/src/pyflowx/cli/hfdownload.py @@ -37,50 +37,46 @@ def main(): download_dir.mkdir(parents=True, exist_ok=True) if args.use_hfd: - graph = px.Graph.from_specs( - [ - px.TaskSpec(name="setenvs", fn=setenvs, verbose=True), - px.TaskSpec( - name="download_hfd", - cmd=["wget", "https://hf-mirror.com/hfd/hfd.sh"], - depends_on=["setenvs"], - verbose=True, - ), - px.TaskSpec( - name="chmod_hfd", - cmd=["chmod", "a+x", "hfd.sh"], - depends_on=["download_hfd"], - verbose=True, - ), - px.TaskSpec( - name="run_hfd", - cmd=["./hfd.sh", dataset_name, args.type], - depends_on=["chmod_hfd"], - verbose=True, - ), - ] - ) + graph = px.Graph.from_specs([ + px.TaskSpec(name="setenvs", fn=setenvs, verbose=True), + px.TaskSpec( + name="download_hfd", + cmd=["wget", "https://hf-mirror.com/hfd/hfd.sh"], + depends_on=("setenvs",), + verbose=True, + ), + px.TaskSpec( + name="chmod_hfd", + cmd=["chmod", "a+x", "hfd.sh"], + depends_on=("download_hfd",), + verbose=True, + ), + px.TaskSpec( + name="run_hfd", + cmd=["./hfd.sh", dataset_name, args.type], + depends_on=("chmod_hfd",), + verbose=True, + ), + ]) else: - graph = px.Graph.from_specs( - [ - px.TaskSpec(name="setenvs", fn=setenvs, verbose=True), - px.TaskSpec( - name="download", - cmd=[ - "uvx", - "hf", - "download", - "--repo-type", - args.type, - "--force-download", - dataset_name, - "--local-dir", - str(Path.cwd() / dataset_name), - ], - depends_on=["setenvs"], - verbose=True, - ), - ] - ) + graph = px.Graph.from_specs([ + px.TaskSpec(name="setenvs", fn=setenvs, verbose=True), + px.TaskSpec( + name="download", + cmd=[ + "uvx", + "hf", + "download", + "--repo-type", + args.type, + "--force-download", + dataset_name, + "--local-dir", + str(Path.cwd() / dataset_name), + ], + depends_on=("setenvs",), + verbose=True, + ), + ]) px.run(graph, strategy="thread", verbose=True) diff --git a/src/pyflowx/cli/pdftool.py b/src/pyflowx/cli/pdftool.py index e1bdadb..b950f51 100644 --- a/src/pyflowx/cli/pdftool.py +++ b/src/pyflowx/cli/pdftool.py @@ -146,7 +146,7 @@ def pdf_extract_text(input_path: Path, output_path: Path) -> None: doc = fitz.open(str(input_path)) text = "" for page in doc: - text += page.get_text() + "\n\n" + text += str(page.get_text()) + "\n\n" doc.close() output_path.parent.mkdir(parents=True, exist_ok=True) @@ -164,6 +164,7 @@ def pdf_extract_images(input_path: Path, output_dir: Path) -> None: output_dir.mkdir(parents=True, exist_ok=True) image_count = 0 + # pyrefly: ignore [bad-argument-type] for page_num, page in enumerate(doc): images = page.get_images(full=True) for img_idx, img in enumerate(images): @@ -249,9 +250,13 @@ def pdf_info(input_path: Path) -> None: doc = fitz.open(str(input_path)) print(f"文件: {input_path}") print(f"页数: {doc.page_count}") + # pyrefly: ignore [missing-attribute] print(f"标题: {doc.metadata.get('title', 'N/A')}") + # pyrefly: ignore [missing-attribute] print(f"作者: {doc.metadata.get('author', 'N/A')}") + # pyrefly: ignore [missing-attribute] print(f"创建日期: {doc.metadata.get('creationDate', 'N/A')}") + # pyrefly: ignore [missing-attribute] print(f"修改日期: {doc.metadata.get('modDate', 'N/A')}") print(f"文件大小: {input_path.stat().st_size / 1024:.1f} KB") doc.close() @@ -281,6 +286,7 @@ def pdf_ocr(input_path: Path, output_path: Path, lang: str = "chi_sim+eng") -> N new_page = new_doc.new_page(width=page.rect.width, height=page.rect.height) new_page.insert_image(new_page.rect, pixmap=pix) text_rect = fitz.Rect(0, 0, page.rect.width, page.rect.height) + # pyrefly: ignore [bad-argument-type] new_page.insert_textbox(text_rect, ocr_text) output_path.parent.mkdir(parents=True, exist_ok=True) @@ -319,6 +325,7 @@ def pdf_to_images(input_path: Path, output_dir: Path, dpi: int = 300) -> None: doc = fitz.open(str(input_path)) output_dir.mkdir(parents=True, exist_ok=True) + # pyrefly: ignore [bad-argument-type] for page_num, page in enumerate(doc): pix = page.get_pixmap(dpi=dpi) image_path = output_dir / f"{input_path.stem}_page_{page_num + 1}.png" diff --git a/src/pyflowx/task.py b/src/pyflowx/task.py index 8508f2e..1d1a4a5 100644 --- a/src/pyflowx/task.py +++ b/src/pyflowx/task.py @@ -28,12 +28,13 @@ from typing import ( Mapping, Optional, Tuple, - TypeVar, Union, cast, ) -T = TypeVar("T") +from typing_extensions import TypeVar + +T = TypeVar("T", default=Any) # 任务可调用对象可以是同步或异步的。显式保留联合类型,让 mypy 理解两种形态。 TaskFn = Union[