refactor: 整理代码格式并修复部分类型和依赖问题
1. 调整task.py的TypeVar导入和默认值 2. 格式化多处列表和参数写法,统一括号风格 3. 为pdftool.py添加pyrefly忽略注释修复类型警告 4. 为emlmanager.py添加数据库连接断言和检查 5. 修正hfdownload.py的depends_on参数为元组格式
This commit is contained in:
@@ -88,6 +88,8 @@ class EmailDatabase:
|
|||||||
|
|
||||||
def insert_email(self, email_data: dict[str, Any]) -> bool:
|
def insert_email(self, email_data: dict[str, Any]) -> bool:
|
||||||
"""插入邮件数据."""
|
"""插入邮件数据."""
|
||||||
|
assert self.conn, "数据库连接未初始化"
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with self._lock:
|
with self._lock:
|
||||||
cursor = self.conn.cursor()
|
cursor = self.conn.cursor()
|
||||||
@@ -123,6 +125,8 @@ class EmailDatabase:
|
|||||||
self, keyword: str = "", field: str = "all", limit: int = 100, offset: int = 0
|
self, keyword: str = "", field: str = "all", limit: int = 100, offset: int = 0
|
||||||
) -> list[dict[str, Any]]:
|
) -> list[dict[str, Any]]:
|
||||||
"""搜索邮件."""
|
"""搜索邮件."""
|
||||||
|
assert self.conn, "数据库连接未初始化"
|
||||||
|
|
||||||
with self._lock:
|
with self._lock:
|
||||||
cursor = self.conn.cursor()
|
cursor = self.conn.cursor()
|
||||||
|
|
||||||
@@ -154,6 +158,8 @@ class EmailDatabase:
|
|||||||
|
|
||||||
def get_grouped_emails(self) -> dict[str, list[dict[str, Any]]]:
|
def get_grouped_emails(self) -> dict[str, list[dict[str, Any]]]:
|
||||||
"""获取按主题分组的邮件."""
|
"""获取按主题分组的邮件."""
|
||||||
|
assert self.conn, "数据库连接未初始化"
|
||||||
|
|
||||||
with self._lock:
|
with self._lock:
|
||||||
cursor = self.conn.cursor()
|
cursor = self.conn.cursor()
|
||||||
cursor.execute(f"SELECT * FROM {TABLE_NAME} ORDER BY subject, date_parsed DESC")
|
cursor.execute(f"SELECT * FROM {TABLE_NAME} ORDER BY subject, date_parsed DESC")
|
||||||
@@ -183,6 +189,8 @@ class EmailDatabase:
|
|||||||
|
|
||||||
def get_email_count(self) -> int:
|
def get_email_count(self) -> int:
|
||||||
"""获取邮件总数."""
|
"""获取邮件总数."""
|
||||||
|
assert self.conn, "数据库连接未初始化"
|
||||||
|
|
||||||
with self._lock:
|
with self._lock:
|
||||||
cursor = self.conn.cursor()
|
cursor = self.conn.cursor()
|
||||||
cursor.execute(f"SELECT COUNT(*) FROM {TABLE_NAME}")
|
cursor.execute(f"SELECT COUNT(*) FROM {TABLE_NAME}")
|
||||||
@@ -190,6 +198,8 @@ class EmailDatabase:
|
|||||||
|
|
||||||
def clear_all(self) -> None:
|
def clear_all(self) -> None:
|
||||||
"""清空所有邮件数据."""
|
"""清空所有邮件数据."""
|
||||||
|
assert self.conn, "数据库连接未初始化"
|
||||||
|
|
||||||
with self._lock:
|
with self._lock:
|
||||||
cursor = self.conn.cursor()
|
cursor = self.conn.cursor()
|
||||||
cursor.execute(f"DELETE FROM {TABLE_NAME}")
|
cursor.execute(f"DELETE FROM {TABLE_NAME}")
|
||||||
@@ -557,15 +567,13 @@ class EmlManagerHandler(BaseHTTPRequestHandler):
|
|||||||
|
|
||||||
emails = self.db.search_emails(keyword, field, limit, offset)
|
emails = self.db.search_emails(keyword, field, limit, offset)
|
||||||
total_count = self.db.get_email_count()
|
total_count = self.db.get_email_count()
|
||||||
self._send_json_response(
|
self._send_json_response({
|
||||||
{
|
"emails": emails,
|
||||||
"emails": emails,
|
"count": len(emails),
|
||||||
"count": len(emails),
|
"total": total_count,
|
||||||
"total": total_count,
|
"limit": limit,
|
||||||
"limit": limit,
|
"offset": offset,
|
||||||
"offset": offset,
|
})
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
def _api_get_email(self, query_params: dict[str, list[str]]) -> None:
|
def _api_get_email(self, query_params: dict[str, list[str]]) -> None:
|
||||||
"""API: 获取单个邮件详情."""
|
"""API: 获取单个邮件详情."""
|
||||||
@@ -578,6 +586,10 @@ class EmlManagerHandler(BaseHTTPRequestHandler):
|
|||||||
self._send_json_response({"error": "缺少邮件ID"}, 400)
|
self._send_json_response({"error": "缺少邮件ID"}, 400)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
if not self.db.conn:
|
||||||
|
self._send_json_response({"error": "数据库连接未初始化"}, 500)
|
||||||
|
return
|
||||||
|
|
||||||
with self.db._lock:
|
with self.db._lock:
|
||||||
cursor = self.db.conn.cursor()
|
cursor = self.db.conn.cursor()
|
||||||
cursor.execute(f"SELECT * FROM {TABLE_NAME} WHERE id = ?", (int(email_id),))
|
cursor.execute(f"SELECT * FROM {TABLE_NAME} WHERE id = ?", (int(email_id),))
|
||||||
@@ -630,6 +642,10 @@ class EmlManagerHandler(BaseHTTPRequestHandler):
|
|||||||
if not eml_files:
|
if not eml_files:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
if not self.db.conn:
|
||||||
|
self._send_json_response({"error": "数据库连接未初始化"}, 500)
|
||||||
|
return
|
||||||
|
|
||||||
# 先批量查询所有已存在的文件
|
# 先批量查询所有已存在的文件
|
||||||
with self.db._lock:
|
with self.db._lock:
|
||||||
cursor = self.db.conn.cursor()
|
cursor = self.db.conn.cursor()
|
||||||
@@ -1268,6 +1284,10 @@ def main() -> None:
|
|||||||
if eml_files:
|
if eml_files:
|
||||||
print(f"发现 {len(eml_files)} 个 EML 文件,开始导入...")
|
print(f"发现 {len(eml_files)} 个 EML 文件,开始导入...")
|
||||||
|
|
||||||
|
if not EmlManagerHandler.db.conn:
|
||||||
|
print("数据库连接未初始化,无法导入邮件")
|
||||||
|
return
|
||||||
|
|
||||||
# 先批量查询所有已存在的文件
|
# 先批量查询所有已存在的文件
|
||||||
with EmlManagerHandler.db._lock:
|
with EmlManagerHandler.db._lock:
|
||||||
cursor = EmlManagerHandler.db.conn.cursor()
|
cursor = EmlManagerHandler.db.conn.cursor()
|
||||||
|
|||||||
@@ -39,7 +39,7 @@ RUSTUP_MIRRORS: dict[str, dict[str, str]] = {
|
|||||||
UsableRustVersion = Literal["stable", "nightly", "beta"]
|
UsableRustVersion = Literal["stable", "nightly", "beta"]
|
||||||
UsableMirror = Literal["aliyun", "ustc", "tsinghua"]
|
UsableMirror = Literal["aliyun", "ustc", "tsinghua"]
|
||||||
|
|
||||||
DEFAULT_RUST_VERSION: str = "stable"
|
DEFAULT_RUST_VERSION: UsableRustVersion = "stable"
|
||||||
DEFAULT_MIRROR: UsableMirror = "tsinghua"
|
DEFAULT_MIRROR: UsableMirror = "tsinghua"
|
||||||
|
|
||||||
|
|
||||||
@@ -136,13 +136,13 @@ def main() -> None:
|
|||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
if args.command == "mirror":
|
if args.command == "mirror":
|
||||||
graph = px.Graph.from_specs(
|
graph = px.Graph.from_specs([
|
||||||
[px.TaskSpec("set_rust_mirror", fn=set_rust_mirror, args=(args.name,), verbose=True)]
|
px.TaskSpec("set_rust_mirror", fn=set_rust_mirror, args=(args.name,), verbose=True)
|
||||||
)
|
])
|
||||||
elif args.command == "install":
|
elif args.command == "install":
|
||||||
graph = px.Graph.from_specs(
|
graph = px.Graph.from_specs([
|
||||||
[px.TaskSpec("install_rust", cmd=["rustup", "toolchain", "install", args.version], verbose=True)]
|
px.TaskSpec("install_rust", cmd=["rustup", "toolchain", "install", args.version], verbose=True)
|
||||||
)
|
])
|
||||||
else:
|
else:
|
||||||
parser.print_help()
|
parser.print_help()
|
||||||
return
|
return
|
||||||
|
|||||||
+25
-35
@@ -33,20 +33,16 @@ def init_sub_dirs() -> None:
|
|||||||
sub_dirs = [subdir for subdir in Path.cwd().iterdir() if subdir.is_dir()]
|
sub_dirs = [subdir for subdir in Path.cwd().iterdir() if subdir.is_dir()]
|
||||||
for subdir in sub_dirs:
|
for subdir in sub_dirs:
|
||||||
px.run(
|
px.run(
|
||||||
px.Graph.from_specs(
|
px.Graph.from_specs([
|
||||||
[
|
px.TaskSpec(
|
||||||
px.TaskSpec(
|
"init",
|
||||||
"init",
|
cmd=["git", "init"],
|
||||||
cmd=["git", "init"],
|
conditions=(not_has_git_repo,),
|
||||||
conditions=[not_has_git_repo],
|
cwd=subdir,
|
||||||
cwd=str(subdir),
|
),
|
||||||
),
|
px.TaskSpec("add", cmd=["git", "add", "."], depends_on=("init",)),
|
||||||
px.TaskSpec("add", cmd=["git", "add", "."], depends_on=["init"], cwd=str(subdir)),
|
px.TaskSpec("commit", cmd=["git", "commit", "-m", "init commit"], depends_on=("add",)),
|
||||||
px.TaskSpec(
|
]),
|
||||||
"commit", cmd=["git", "commit", "-m", "init commit"], depends_on=["add"], cwd=str(subdir)
|
|
||||||
),
|
|
||||||
]
|
|
||||||
),
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -73,29 +69,23 @@ def main() -> None:
|
|||||||
description="Gittool - Git 执行工具.",
|
description="Gittool - Git 执行工具.",
|
||||||
graphs={
|
graphs={
|
||||||
# 添加并提交
|
# 添加并提交
|
||||||
"a": px.Graph.from_specs(
|
"a": px.Graph.from_specs([
|
||||||
[
|
px.TaskSpec("add", cmd=["git", "add", "."], conditions=(has_files,)),
|
||||||
px.TaskSpec("add", cmd=["git", "add", "."], conditions=[has_files]),
|
px.TaskSpec("commit", cmd=["git", "commit", "-m", "chore: update"], depends_on=("add",)),
|
||||||
px.TaskSpec("commit", cmd=["git", "commit", "-m", "chore: update"], depends_on=["add"]),
|
]),
|
||||||
]
|
|
||||||
),
|
|
||||||
# 清理
|
# 清理
|
||||||
"c": px.Graph.from_specs(
|
"c": px.Graph.from_specs([
|
||||||
[
|
px.TaskSpec("clean", cmd=["git", "clean", "-xfd", *EXCLUDE_CMDS]),
|
||||||
px.TaskSpec("clean", cmd=["git", "clean", "-xfd", *EXCLUDE_CMDS]),
|
px.TaskSpec("status", cmd=["git", "status", "--porcelain"], depends_on=("clean",)),
|
||||||
px.TaskSpec("status", cmd=["git", "status", "--porcelain"], depends_on=["clean"]),
|
]),
|
||||||
]
|
|
||||||
),
|
|
||||||
# 初始化、添加并提交
|
# 初始化、添加并提交
|
||||||
"i": px.Graph.from_specs(
|
"i": px.Graph.from_specs([
|
||||||
[
|
px.TaskSpec("init", cmd=["git", "init"], conditions=(not_has_git_repo,)),
|
||||||
px.TaskSpec("init", cmd=["git", "init"], conditions=[not_has_git_repo]),
|
px.TaskSpec("add", cmd=["git", "add", "."], depends_on=("init",), conditions=(has_files,)),
|
||||||
px.TaskSpec("add", cmd=["git", "add", "."], depends_on=["init"], conditions=[has_files]),
|
px.TaskSpec(
|
||||||
px.TaskSpec(
|
"commit", cmd=["git", "commit", "-m", "init commit"], depends_on=("add",), conditions=(has_files,)
|
||||||
"commit", cmd=["git", "commit", "-m", "init commit"], depends_on=["add"], conditions=[has_files]
|
),
|
||||||
),
|
]),
|
||||||
]
|
|
||||||
),
|
|
||||||
# 初始化子目录
|
# 初始化子目录
|
||||||
"isub": px.Graph.from_specs([isub]),
|
"isub": px.Graph.from_specs([isub]),
|
||||||
# 推送
|
# 推送
|
||||||
|
|||||||
@@ -37,50 +37,46 @@ def main():
|
|||||||
download_dir.mkdir(parents=True, exist_ok=True)
|
download_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
if args.use_hfd:
|
if args.use_hfd:
|
||||||
graph = px.Graph.from_specs(
|
graph = px.Graph.from_specs([
|
||||||
[
|
px.TaskSpec(name="setenvs", fn=setenvs, verbose=True),
|
||||||
px.TaskSpec(name="setenvs", fn=setenvs, verbose=True),
|
px.TaskSpec(
|
||||||
px.TaskSpec(
|
name="download_hfd",
|
||||||
name="download_hfd",
|
cmd=["wget", "https://hf-mirror.com/hfd/hfd.sh"],
|
||||||
cmd=["wget", "https://hf-mirror.com/hfd/hfd.sh"],
|
depends_on=("setenvs",),
|
||||||
depends_on=["setenvs"],
|
verbose=True,
|
||||||
verbose=True,
|
),
|
||||||
),
|
px.TaskSpec(
|
||||||
px.TaskSpec(
|
name="chmod_hfd",
|
||||||
name="chmod_hfd",
|
cmd=["chmod", "a+x", "hfd.sh"],
|
||||||
cmd=["chmod", "a+x", "hfd.sh"],
|
depends_on=("download_hfd",),
|
||||||
depends_on=["download_hfd"],
|
verbose=True,
|
||||||
verbose=True,
|
),
|
||||||
),
|
px.TaskSpec(
|
||||||
px.TaskSpec(
|
name="run_hfd",
|
||||||
name="run_hfd",
|
cmd=["./hfd.sh", dataset_name, args.type],
|
||||||
cmd=["./hfd.sh", dataset_name, args.type],
|
depends_on=("chmod_hfd",),
|
||||||
depends_on=["chmod_hfd"],
|
verbose=True,
|
||||||
verbose=True,
|
),
|
||||||
),
|
])
|
||||||
]
|
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
graph = px.Graph.from_specs(
|
graph = px.Graph.from_specs([
|
||||||
[
|
px.TaskSpec(name="setenvs", fn=setenvs, verbose=True),
|
||||||
px.TaskSpec(name="setenvs", fn=setenvs, verbose=True),
|
px.TaskSpec(
|
||||||
px.TaskSpec(
|
name="download",
|
||||||
name="download",
|
cmd=[
|
||||||
cmd=[
|
"uvx",
|
||||||
"uvx",
|
"hf",
|
||||||
"hf",
|
"download",
|
||||||
"download",
|
"--repo-type",
|
||||||
"--repo-type",
|
args.type,
|
||||||
args.type,
|
"--force-download",
|
||||||
"--force-download",
|
dataset_name,
|
||||||
dataset_name,
|
"--local-dir",
|
||||||
"--local-dir",
|
str(Path.cwd() / dataset_name),
|
||||||
str(Path.cwd() / dataset_name),
|
],
|
||||||
],
|
depends_on=("setenvs",),
|
||||||
depends_on=["setenvs"],
|
verbose=True,
|
||||||
verbose=True,
|
),
|
||||||
),
|
])
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
px.run(graph, strategy="thread", verbose=True)
|
px.run(graph, strategy="thread", verbose=True)
|
||||||
|
|||||||
@@ -146,7 +146,7 @@ def pdf_extract_text(input_path: Path, output_path: Path) -> None:
|
|||||||
doc = fitz.open(str(input_path))
|
doc = fitz.open(str(input_path))
|
||||||
text = ""
|
text = ""
|
||||||
for page in doc:
|
for page in doc:
|
||||||
text += page.get_text() + "\n\n"
|
text += str(page.get_text()) + "\n\n"
|
||||||
doc.close()
|
doc.close()
|
||||||
|
|
||||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
@@ -164,6 +164,7 @@ def pdf_extract_images(input_path: Path, output_dir: Path) -> None:
|
|||||||
output_dir.mkdir(parents=True, exist_ok=True)
|
output_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
image_count = 0
|
image_count = 0
|
||||||
|
# pyrefly: ignore [bad-argument-type]
|
||||||
for page_num, page in enumerate(doc):
|
for page_num, page in enumerate(doc):
|
||||||
images = page.get_images(full=True)
|
images = page.get_images(full=True)
|
||||||
for img_idx, img in enumerate(images):
|
for img_idx, img in enumerate(images):
|
||||||
@@ -249,9 +250,13 @@ def pdf_info(input_path: Path) -> None:
|
|||||||
doc = fitz.open(str(input_path))
|
doc = fitz.open(str(input_path))
|
||||||
print(f"文件: {input_path}")
|
print(f"文件: {input_path}")
|
||||||
print(f"页数: {doc.page_count}")
|
print(f"页数: {doc.page_count}")
|
||||||
|
# pyrefly: ignore [missing-attribute]
|
||||||
print(f"标题: {doc.metadata.get('title', 'N/A')}")
|
print(f"标题: {doc.metadata.get('title', 'N/A')}")
|
||||||
|
# pyrefly: ignore [missing-attribute]
|
||||||
print(f"作者: {doc.metadata.get('author', 'N/A')}")
|
print(f"作者: {doc.metadata.get('author', 'N/A')}")
|
||||||
|
# pyrefly: ignore [missing-attribute]
|
||||||
print(f"创建日期: {doc.metadata.get('creationDate', 'N/A')}")
|
print(f"创建日期: {doc.metadata.get('creationDate', 'N/A')}")
|
||||||
|
# pyrefly: ignore [missing-attribute]
|
||||||
print(f"修改日期: {doc.metadata.get('modDate', 'N/A')}")
|
print(f"修改日期: {doc.metadata.get('modDate', 'N/A')}")
|
||||||
print(f"文件大小: {input_path.stat().st_size / 1024:.1f} KB")
|
print(f"文件大小: {input_path.stat().st_size / 1024:.1f} KB")
|
||||||
doc.close()
|
doc.close()
|
||||||
@@ -281,6 +286,7 @@ def pdf_ocr(input_path: Path, output_path: Path, lang: str = "chi_sim+eng") -> N
|
|||||||
new_page = new_doc.new_page(width=page.rect.width, height=page.rect.height)
|
new_page = new_doc.new_page(width=page.rect.width, height=page.rect.height)
|
||||||
new_page.insert_image(new_page.rect, pixmap=pix)
|
new_page.insert_image(new_page.rect, pixmap=pix)
|
||||||
text_rect = fitz.Rect(0, 0, page.rect.width, page.rect.height)
|
text_rect = fitz.Rect(0, 0, page.rect.width, page.rect.height)
|
||||||
|
# pyrefly: ignore [bad-argument-type]
|
||||||
new_page.insert_textbox(text_rect, ocr_text)
|
new_page.insert_textbox(text_rect, ocr_text)
|
||||||
|
|
||||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
@@ -319,6 +325,7 @@ def pdf_to_images(input_path: Path, output_dir: Path, dpi: int = 300) -> None:
|
|||||||
doc = fitz.open(str(input_path))
|
doc = fitz.open(str(input_path))
|
||||||
output_dir.mkdir(parents=True, exist_ok=True)
|
output_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
# pyrefly: ignore [bad-argument-type]
|
||||||
for page_num, page in enumerate(doc):
|
for page_num, page in enumerate(doc):
|
||||||
pix = page.get_pixmap(dpi=dpi)
|
pix = page.get_pixmap(dpi=dpi)
|
||||||
image_path = output_dir / f"{input_path.stem}_page_{page_num + 1}.png"
|
image_path = output_dir / f"{input_path.stem}_page_{page_num + 1}.png"
|
||||||
|
|||||||
+3
-2
@@ -28,12 +28,13 @@ from typing import (
|
|||||||
Mapping,
|
Mapping,
|
||||||
Optional,
|
Optional,
|
||||||
Tuple,
|
Tuple,
|
||||||
TypeVar,
|
|
||||||
Union,
|
Union,
|
||||||
cast,
|
cast,
|
||||||
)
|
)
|
||||||
|
|
||||||
T = TypeVar("T")
|
from typing_extensions import TypeVar
|
||||||
|
|
||||||
|
T = TypeVar("T", default=Any)
|
||||||
|
|
||||||
# 任务可调用对象可以是同步或异步的。显式保留联合类型,让 mypy 理解两种形态。
|
# 任务可调用对象可以是同步或异步的。显式保留联合类型,让 mypy 理解两种形态。
|
||||||
TaskFn = Union[
|
TaskFn = Union[
|
||||||
|
|||||||
Reference in New Issue
Block a user