refactor(executors): 重构任务跳过逻辑,提取公共函数并格式化代码

1.  提取上游任务跳过检查和条件检查为公共工具函数
2.  重构同步和异步执行器的跳过判断逻辑,减少代码重复
3.  格式化gittool.py和测试文件的列表语法,提升可读性
This commit is contained in:
2026-06-21 21:55:18 +08:00
parent 9cc91d1153
commit 983d47bd2e
3 changed files with 235 additions and 155 deletions
+35 -25
View File
@@ -31,16 +31,20 @@ def init_sub_dirs() -> None:
sub_dirs = [subdir for subdir in Path.cwd().iterdir() if subdir.is_dir()] sub_dirs = [subdir for subdir in Path.cwd().iterdir() if subdir.is_dir()]
for subdir in sub_dirs: for subdir in sub_dirs:
px.run( px.run(
px.Graph.from_specs([ px.Graph.from_specs(
px.TaskSpec( [
"init", px.TaskSpec(
cmd=["git", "init"], "init",
conditions=[not_has_git_repo], cmd=["git", "init"],
cwd=str(subdir), conditions=[not_has_git_repo],
), cwd=str(subdir),
px.TaskSpec("add", cmd=["git", "add", "."], depends_on=["init"], cwd=str(subdir)), ),
px.TaskSpec("commit", cmd=["git", "commit", "-m", "init commit"], depends_on=["add"], cwd=str(subdir)), px.TaskSpec("add", cmd=["git", "add", "."], depends_on=["init"], cwd=str(subdir)),
]), px.TaskSpec(
"commit", cmd=["git", "commit", "-m", "init commit"], depends_on=["add"], cwd=str(subdir)
),
]
),
) )
@@ -67,23 +71,29 @@ def main() -> None:
description="Gittool - Git 执行工具.", description="Gittool - Git 执行工具.",
graphs={ graphs={
# 添加并提交 # 添加并提交
"a": px.Graph.from_specs([ "a": px.Graph.from_specs(
px.TaskSpec("add", cmd=["git", "add", "."], conditions=[has_files]), [
px.TaskSpec("commit", cmd=["git", "commit", "-m", "chore: update"], depends_on=["add"]), px.TaskSpec("add", cmd=["git", "add", "."], conditions=[has_files]),
]), px.TaskSpec("commit", cmd=["git", "commit", "-m", "chore: update"], depends_on=["add"]),
]
),
# 清理 # 清理
"c": px.Graph.from_specs([ "c": px.Graph.from_specs(
px.TaskSpec("clean", cmd=["git", "clean", "-xfd", *EXCLUDE_CMDS]), [
px.TaskSpec("status", cmd=["git", "status", "--porcelain"], depends_on=["clean"]), px.TaskSpec("clean", cmd=["git", "clean", "-xfd", *EXCLUDE_CMDS]),
]), px.TaskSpec("status", cmd=["git", "status", "--porcelain"], depends_on=["clean"]),
]
),
# 初始化、添加并提交 # 初始化、添加并提交
"i": px.Graph.from_specs([ "i": px.Graph.from_specs(
px.TaskSpec("init", cmd=["git", "init"], conditions=[not_has_git_repo]), [
px.TaskSpec("add", cmd=["git", "add", "."], depends_on=["init"], conditions=[has_files]), px.TaskSpec("init", cmd=["git", "init"], conditions=[not_has_git_repo]),
px.TaskSpec( px.TaskSpec("add", cmd=["git", "add", "."], depends_on=["init"], conditions=[has_files]),
"commit", cmd=["git", "commit", "-m", "init commit"], depends_on=["add"], conditions=[has_files] px.TaskSpec(
), "commit", cmd=["git", "commit", "-m", "init commit"], depends_on=["add"], conditions=[has_files]
]), ),
]
),
# 初始化子目录 # 初始化子目录
"isub": px.Graph.from_specs([isub]), "isub": px.Graph.from_specs([isub]),
# 推送 # 推送
+105 -65
View File
@@ -87,6 +87,56 @@ def _finalize_failure(
) )
def _check_upstream_skipped(
spec: TaskSpec[Any],
report: RunReport | None,
) -> tuple[bool, str | None]:
"""检查上游任务是否被 SKIPPED。
Returns
-------
tuple[bool, str | None]
(是否应该跳过, 跳过原因)
"""
if report is None:
return False, None
for dep in spec.depends_on:
if dep in report.results and report.results[dep].status == TaskStatus.SKIPPED:
return True, f"上游任务 '{dep}' 被跳过"
return False, None
def _check_conditions_for_skip(
spec: TaskSpec[Any],
) -> str | None:
"""检查任务条件是否满足,返回跳过原因(如果不满足)。
Returns
-------
str | None
跳过原因,如果条件满足则返回 None
"""
if spec.should_execute():
return None
# 检查是哪个条件不满足
failed_conditions = []
for condition in spec.conditions:
try:
if not condition():
failed_conditions.append(condition.__name__ or "匿名条件")
except Exception:
failed_conditions.append(condition.__name__ or "匿名条件(执行错误)")
if failed_conditions:
return f"条件不满足: {', '.join(failed_conditions)}"
elif spec.skip_if_missing and not spec._is_cmd_available():
return f"命令不存在: {spec.cmd[0] if spec.cmd else 'unknown'}"
else:
return "条件不满足"
def _run_sync_with_retry( def _run_sync_with_retry(
spec: TaskSpec[Any], spec: TaskSpec[Any],
context: Mapping[str, Any], context: Mapping[str, Any],
@@ -98,33 +148,20 @@ def _run_sync_with_retry(
result: TaskResult[Any] = TaskResult(spec=spec) result: TaskResult[Any] = TaskResult(spec=spec)
# 检查上游任务是否被 SKIPPED # 检查上游任务是否被 SKIPPED
if report is not None: should_skip, skip_reason = _check_upstream_skipped(spec, report)
for dep in spec.depends_on: if should_skip:
if dep in report.results and report.results[dep].status == TaskStatus.SKIPPED:
result.status = TaskStatus.SKIPPED
result.finished_at = datetime.now()
result.reason = f"上游任务 '{dep}' 被跳过"
logger.info("task %r skipped (上游任务 %r 被跳过)", spec.name, dep)
return result
# 检查条件是否满足
if not spec.should_execute():
result.status = TaskStatus.SKIPPED result.status = TaskStatus.SKIPPED
result.finished_at = datetime.now() result.finished_at = datetime.now()
# 检查是哪个条件不满足 result.reason = skip_reason
failed_conditions = [] logger.info("task %r skipped (上游任务被跳过)", spec.name)
for condition in spec.conditions: return result
try:
if not condition(): # 检查条件是否满足
failed_conditions.append(condition.__name__ or "匿名条件") skip_reason = _check_conditions_for_skip(spec)
except Exception: if skip_reason is not None:
failed_conditions.append(condition.__name__ or "匿名条件(执行错误)") result.status = TaskStatus.SKIPPED
if failed_conditions: result.finished_at = datetime.now()
result.reason = f"条件不满足: {', '.join(failed_conditions)}" result.reason = skip_reason
elif spec.skip_if_missing and not spec._is_cmd_available():
result.reason = f"命令不存在: {spec.cmd[0] if spec.cmd else 'unknown'}"
else:
result.reason = "条件不满足"
logger.info("task %r skipped (条件不满足)", spec.name) logger.info("task %r skipped (条件不满足)", spec.name)
return result return result
@@ -147,6 +184,36 @@ def _run_sync_with_retry(
raise AssertionError("unreachable") # pragma: no cover raise AssertionError("unreachable") # pragma: no cover
async def _execute_async_task(
spec: TaskSpec[Any],
args: tuple[Any, ...],
kwargs: dict[str, Any],
loop: asyncio.AbstractEventLoop,
) -> Any:
"""执行异步或同步任务(带超时处理)。
Returns
-------
Any
任务返回值
"""
if _is_async_fn(spec):
coro = cast(Awaitable[Any], spec.effective_fn(*args, **kwargs))
if spec.timeout is not None:
return await asyncio.wait_for(coro, timeout=spec.timeout)
else:
return await coro
else:
# 将同步工作卸载到线程,保持事件循环存活。
def fn_call() -> Any:
return spec.effective_fn(*args, **kwargs)
if spec.timeout is not None:
return await asyncio.wait_for(loop.run_in_executor(None, fn_call), timeout=spec.timeout)
else:
return await loop.run_in_executor(None, fn_call)
async def _run_async_with_retry( async def _run_async_with_retry(
spec: TaskSpec[Any], spec: TaskSpec[Any],
context: Mapping[str, Any], context: Mapping[str, Any],
@@ -158,33 +225,20 @@ async def _run_async_with_retry(
result: TaskResult[Any] = TaskResult[Any](spec=spec) result: TaskResult[Any] = TaskResult[Any](spec=spec)
# 检查上游任务是否被 SKIPPED # 检查上游任务是否被 SKIPPED
if report is not None: should_skip, skip_reason = _check_upstream_skipped(spec, report)
for dep in spec.depends_on: if should_skip:
if dep in report.results and report.results[dep].status == TaskStatus.SKIPPED:
result.status = TaskStatus.SKIPPED
result.finished_at = datetime.now()
result.reason = f"上游任务 '{dep}' 被跳过"
logger.info("task %r skipped (上游任务 %r 被跳过)", spec.name, dep)
return result
# 检查条件是否满足
if not spec.should_execute():
result.status = TaskStatus.SKIPPED result.status = TaskStatus.SKIPPED
result.finished_at = datetime.now() result.finished_at = datetime.now()
# 检查是哪个条件不满足 result.reason = skip_reason
failed_conditions = [] logger.info("task %r skipped (上游任务被跳过)", spec.name)
for condition in spec.conditions: return result
try:
if not condition(): # 检查条件是否满足
failed_conditions.append(condition.__name__ or "匿名条件") skip_reason = _check_conditions_for_skip(spec)
except Exception: if skip_reason is not None:
failed_conditions.append(condition.__name__ or "匿名条件(执行错误)") result.status = TaskStatus.SKIPPED
if failed_conditions: result.finished_at = datetime.now()
result.reason = f"条件不满足: {', '.join(failed_conditions)}" result.reason = skip_reason
elif spec.skip_if_missing and not spec._is_cmd_available():
result.reason = f"命令不存在: {spec.cmd[0] if spec.cmd else 'unknown'}"
else:
result.reason = "条件不满足"
logger.info("task %r skipped (条件不满足)", spec.name) logger.info("task %r skipped (条件不满足)", spec.name)
return result return result
@@ -196,21 +250,7 @@ async def _run_async_with_retry(
while True: while True:
result.attempts += 1 result.attempts += 1
try: try:
if _is_async_fn(spec): result.value = await _execute_async_task(spec, args, kwargs, loop)
coro = cast(Awaitable[Any], spec.effective_fn(*args, **kwargs))
if spec.timeout is not None:
result.value = await asyncio.wait_for(coro, timeout=spec.timeout)
else:
result.value = await coro
else:
# 将同步工作卸载到线程,保持事件循环存活。
def fn_call() -> Any:
return spec.effective_fn(*args, **kwargs)
if spec.timeout is not None:
result.value = await asyncio.wait_for(loop.run_in_executor(None, fn_call), timeout=spec.timeout)
else:
result.value = await loop.run_in_executor(None, fn_call)
result.status = TaskStatus.SUCCESS result.status = TaskStatus.SUCCESS
result.finished_at = datetime.now() result.finished_at = datetime.now()
return result return result
+95 -65
View File
@@ -26,10 +26,12 @@ def test_sequential_basic() -> None:
def double(extract: list[int]) -> list[int]: def double(extract: list[int]) -> list[int]:
return [x * 2 for x in extract] return [x * 2 for x in extract]
graph = px.Graph.from_specs([ graph = px.Graph.from_specs(
px.TaskSpec("extract", extract), [
px.TaskSpec("double", double, depends_on=("extract",)), px.TaskSpec("extract", extract),
]) px.TaskSpec("double", double, depends_on=("extract",)),
]
)
report = px.run(graph, strategy="sequential") report = px.run(graph, strategy="sequential")
assert report.success assert report.success
assert report["extract"] == [1, 2, 3] assert report["extract"] == [1, 2, 3]
@@ -46,12 +48,14 @@ def test_sequential_diamond() -> None:
return fn return fn
graph = px.Graph.from_specs([ graph = px.Graph.from_specs(
px.TaskSpec("a", make("a")), [
px.TaskSpec("b", make("b"), depends_on=("a",)), px.TaskSpec("a", make("a")),
px.TaskSpec("c", make("c"), depends_on=("a",)), px.TaskSpec("b", make("b"), depends_on=("a",)),
px.TaskSpec("d", make("d"), depends_on=("b", "c")), px.TaskSpec("c", make("c"), depends_on=("a",)),
]) px.TaskSpec("d", make("d"), depends_on=("b", "c")),
]
)
report = px.run(graph, strategy="sequential") report = px.run(graph, strategy="sequential")
assert report.success assert report.success
assert report["d"] == "d" assert report["d"] == "d"
@@ -65,10 +69,12 @@ def test_failure_propagates() -> None:
def downstream(_boom: None) -> int: def downstream(_boom: None) -> int:
return 1 return 1
graph = px.Graph.from_specs([ graph = px.Graph.from_specs(
px.TaskSpec("boom", boom), [
px.TaskSpec("downstream", downstream, depends_on=("boom",)), px.TaskSpec("boom", boom),
]) px.TaskSpec("downstream", downstream, depends_on=("boom",)),
]
)
with pytest.raises(TaskFailedError) as exc_info: with pytest.raises(TaskFailedError) as exc_info:
_ = px.run(graph, strategy="sequential") _ = px.run(graph, strategy="sequential")
assert exc_info.value.task == "boom" assert exc_info.value.task == "boom"
@@ -110,11 +116,13 @@ def test_threaded_parallelism() -> None:
time.sleep(0.3) time.sleep(0.3)
return "done" return "done"
graph = px.Graph.from_specs([ graph = px.Graph.from_specs(
px.TaskSpec("a", slow), [
px.TaskSpec("b", slow), px.TaskSpec("a", slow),
px.TaskSpec("c", slow), px.TaskSpec("b", slow),
]) px.TaskSpec("c", slow),
]
)
start = time.time() start = time.time()
report = px.run(graph, strategy="thread", max_workers=3) report = px.run(graph, strategy="thread", max_workers=3)
elapsed = time.time() - start elapsed = time.time() - start
@@ -137,11 +145,13 @@ def test_threaded_layer_barrier() -> None:
return fn return fn
graph = px.Graph.from_specs([ graph = px.Graph.from_specs(
px.TaskSpec("a", make("a")), [
px.TaskSpec("b", make("b")), px.TaskSpec("a", make("a")),
px.TaskSpec("c", make("c"), depends_on=("a", "b")), px.TaskSpec("b", make("b")),
]) px.TaskSpec("c", make("c"), depends_on=("a", "b")),
]
)
report = px.run(graph, strategy="thread", max_workers=2) report = px.run(graph, strategy="thread", max_workers=2)
assert report.success assert report.success
# c must finish after both a and b. # c must finish after both a and b.
@@ -160,10 +170,12 @@ def test_async_basic() -> None:
async def transform(fetch: int) -> int: async def transform(fetch: int) -> int:
return fetch * 2 return fetch * 2
graph = px.Graph.from_specs([ graph = px.Graph.from_specs(
px.TaskSpec("fetch", fetch), [
px.TaskSpec("transform", transform, depends_on=("fetch",)), px.TaskSpec("fetch", fetch),
]) px.TaskSpec("transform", transform, depends_on=("fetch",)),
]
)
report = px.run(graph, strategy="async") report = px.run(graph, strategy="async")
assert report.success assert report.success
assert report["transform"] == 84 assert report["transform"] == 84
@@ -175,11 +187,13 @@ def test_async_parallelism() -> None:
await asyncio.sleep(0.3) await asyncio.sleep(0.3)
return "done" return "done"
graph = px.Graph.from_specs([ graph = px.Graph.from_specs(
px.TaskSpec("a", slow), [
px.TaskSpec("b", slow), px.TaskSpec("a", slow),
px.TaskSpec("c", slow), px.TaskSpec("b", slow),
]) px.TaskSpec("c", slow),
]
)
start = time.time() start = time.time()
report = px.run(graph, strategy="async") report = px.run(graph, strategy="async")
elapsed = time.time() - start elapsed = time.time() - start
@@ -195,10 +209,12 @@ def test_async_mixed_sync_and_async() -> None:
await asyncio.sleep(0.01) await asyncio.sleep(0.01)
return sync_task + 5 return sync_task + 5
graph = px.Graph.from_specs([ graph = px.Graph.from_specs(
px.TaskSpec("sync_task", sync_task), [
px.TaskSpec("async_task", async_task, depends_on=("sync_task",)), px.TaskSpec("sync_task", sync_task),
]) px.TaskSpec("async_task", async_task, depends_on=("sync_task",)),
]
)
report = px.run(graph, strategy="async") report = px.run(graph, strategy="async")
assert report.success assert report.success
assert report["async_task"] == 15 assert report["async_task"] == 15
@@ -246,10 +262,12 @@ def test_memory_backend_resume() -> None:
return fn return fn
graph = px.Graph.from_specs([ graph = px.Graph.from_specs(
px.TaskSpec("a", make("a")), [
px.TaskSpec("b", make("b"), depends_on=("a",)), px.TaskSpec("a", make("a")),
]) px.TaskSpec("b", make("b"), depends_on=("a",)),
]
)
backend = MemoryBackend() backend = MemoryBackend()
_ = px.run(graph, strategy="sequential", state=backend) _ = px.run(graph, strategy="sequential", state=backend)
assert runs == ["a", "b"] assert runs == ["a", "b"]
@@ -375,10 +393,12 @@ def test_threaded_skips_cached_tasks() -> None:
return fn return fn
graph = px.Graph.from_specs([ graph = px.Graph.from_specs(
px.TaskSpec("a", make("a")), [
px.TaskSpec("b", make("b"), depends_on=("a",)), px.TaskSpec("a", make("a")),
]) px.TaskSpec("b", make("b"), depends_on=("a",)),
]
)
backend = px.MemoryBackend() backend = px.MemoryBackend()
# 第一次运行填充缓存 # 第一次运行填充缓存
_ = px.run(graph, strategy="thread", max_workers=2, state=backend) _ = px.run(graph, strategy="thread", max_workers=2, state=backend)
@@ -418,10 +438,12 @@ def test_async_skips_cached_tasks() -> None:
runs.append("b") runs.append("b")
return a + "b" return a + "b"
graph = px.Graph.from_specs([ graph = px.Graph.from_specs(
px.TaskSpec("a", a), [
px.TaskSpec("b", b, depends_on=("a",)), px.TaskSpec("a", a),
]) px.TaskSpec("b", b, depends_on=("a",)),
]
)
backend = px.MemoryBackend() backend = px.MemoryBackend()
_ = px.run(graph, strategy="async", state=backend) _ = px.run(graph, strategy="async", state=backend)
assert runs == ["a", "b"] assert runs == ["a", "b"]
@@ -497,10 +519,12 @@ def test_downstream_skipped_when_upstream_skipped_sequential() -> None:
def downstream(upstream: str) -> str: def downstream(upstream: str) -> str:
return upstream + "_processed" return upstream + "_processed"
graph = px.Graph.from_specs([ graph = px.Graph.from_specs(
px.TaskSpec("upstream", cmd=["echo", "hello"], conditions=(never_true,)), [
px.TaskSpec("downstream", downstream, depends_on=("upstream",)), px.TaskSpec("upstream", cmd=["echo", "hello"], conditions=(never_true,)),
]) px.TaskSpec("downstream", downstream, depends_on=("upstream",)),
]
)
report = px.run(graph, strategy="sequential") report = px.run(graph, strategy="sequential")
assert report.success assert report.success
assert report.result_of("upstream").status == px.TaskStatus.SKIPPED assert report.result_of("upstream").status == px.TaskStatus.SKIPPED
@@ -514,10 +538,12 @@ def test_downstream_skipped_when_upstream_skipped_thread() -> None:
def downstream(upstream: str) -> str: def downstream(upstream: str) -> str:
return upstream + "_processed" return upstream + "_processed"
graph = px.Graph.from_specs([ graph = px.Graph.from_specs(
px.TaskSpec("upstream", cmd=["echo", "hello"], conditions=(never_true,)), [
px.TaskSpec("downstream", downstream, depends_on=("upstream",)), px.TaskSpec("upstream", cmd=["echo", "hello"], conditions=(never_true,)),
]) px.TaskSpec("downstream", downstream, depends_on=("upstream",)),
]
)
report = px.run(graph, strategy="thread", max_workers=2) report = px.run(graph, strategy="thread", max_workers=2)
assert report.success assert report.success
assert report.result_of("upstream").status == px.TaskStatus.SKIPPED assert report.result_of("upstream").status == px.TaskStatus.SKIPPED
@@ -535,10 +561,12 @@ def test_downstream_skipped_when_upstream_skipped_async() -> None:
never_true = lambda: False # noqa: E731 never_true = lambda: False # noqa: E731
graph = px.Graph.from_specs([ graph = px.Graph.from_specs(
px.TaskSpec("upstream", upstream, conditions=(never_true,)), [
px.TaskSpec("downstream", downstream, depends_on=("upstream",)), px.TaskSpec("upstream", upstream, conditions=(never_true,)),
]) px.TaskSpec("downstream", downstream, depends_on=("upstream",)),
]
)
report = px.run(graph, strategy="async") report = px.run(graph, strategy="async")
assert report.success assert report.success
assert report.result_of("upstream").status == px.TaskStatus.SKIPPED assert report.result_of("upstream").status == px.TaskStatus.SKIPPED
@@ -555,10 +583,12 @@ def test_downstream_executes_when_upstream_succeeds() -> None:
def downstream(upstream: str) -> str: def downstream(upstream: str) -> str:
return upstream + "_processed" return upstream + "_processed"
graph = px.Graph.from_specs([ graph = px.Graph.from_specs(
px.TaskSpec("upstream", upstream, conditions=(always_true,)), [
px.TaskSpec("downstream", downstream, depends_on=("upstream",)), px.TaskSpec("upstream", upstream, conditions=(always_true,)),
]) px.TaskSpec("downstream", downstream, depends_on=("upstream",)),
]
)
report = px.run(graph, strategy="sequential") report = px.run(graph, strategy="sequential")
assert report.success assert report.success
assert report.result_of("upstream").status == px.TaskStatus.SUCCESS assert report.result_of("upstream").status == px.TaskStatus.SUCCESS