feat: 新增多项核心功能并优化默认执行策略

1.  将CliRunner默认执行策略从sequential改为dependency
2.  新增RunReport的任务状态查询和时长统计方法
3.  实现task装饰器并补充executor参数文档
4.  新增进程池执行器支持CPU密集型任务
5.  新增Graph.chain链式构建和add_subgraph子图合并功能
6.  新增流式任务传递、进程池执行、命名空间等多类测试用例
7.  补充tests目录路径导入配置
This commit is contained in:
2026-06-28 15:10:15 +08:00
parent 232e7293d9
commit 40f641611b
15 changed files with 907 additions and 15 deletions
+26
View File
@@ -0,0 +1,26 @@
"""进程池测试辅助:模块级函数(须可 pickle)。"""
from __future__ import annotations
import time
def cpu_heavy(n: int) -> int:
"""CPU 密集型计算(求平方和)。"""
return sum(i * i for i in range(n))
def add(a: int, b: int) -> int:
"""简单加法。"""
return a + b
def sub(a: int, b: int) -> int:
"""简单减法。"""
return a - b
def slow_sleep(seconds: float) -> int:
"""睡眠指定秒数,用于测试超时。"""
time.sleep(seconds)
return int(seconds)
+7
View File
@@ -1,9 +1,16 @@
from __future__ import annotations
import sys
from pathlib import Path
import pytest
# 将 tests 目录加入 sys.path,使进程池测试能 import _proc_helper 模块级辅助函数。
# 进程池 pickle 要求被调用函数为模块级,conftest.py 在 xdist worker 中也会执行。
_TESTS_DIR = str(Path(__file__).resolve().parent)
if _TESTS_DIR not in sys.path:
sys.path.insert(0, _TESTS_DIR)
@pytest.fixture(autouse=True)
def packtool_tmp_workdir(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
+101
View File
@@ -0,0 +1,101 @@
"""Tests for Graph.chain DSL."""
from __future__ import annotations
import pyflowx as px
from pyflowx.task import TaskSpec
def _fn() -> None:
return None
def test_chain_basic_linkage() -> None:
"""chain(a, b, c) 应建立 a->b->c 依赖."""
a = TaskSpec("a", _fn)
b = TaskSpec("b", _fn)
c = TaskSpec("c", _fn)
graph = px.Graph().chain(a, b, c)
assert graph.all_specs()["b"].depends_on == ("a",)
assert graph.all_specs()["c"].depends_on == ("b",)
assert graph.all_specs()["a"].depends_on == ()
def test_chain_single_spec() -> None:
"""chain(a) 应只注册 a,无依赖."""
a = TaskSpec("a", _fn)
graph = px.Graph().chain(a)
assert "a" in graph
assert graph.all_specs()["a"].depends_on == ()
def test_chain_preserves_existing_deps() -> None:
"""chain 应保留 spec 已有的 depends_on."""
a = TaskSpec("a", _fn)
b = TaskSpec("b", _fn)
c = TaskSpec("c", _fn, depends_on=("b",))
graph = px.Graph().chain(a, b, c)
# c 已有 depends_on=('b',),前驱是 b,已在依赖中,不重复添加
assert graph.all_specs()["c"].depends_on == ("b",)
def test_chain_merges_existing_deps() -> None:
"""chain 应将前驱追加到已有依赖前(若不存在)."""
a = TaskSpec("a", _fn)
x = TaskSpec("x", _fn)
c = TaskSpec("c", _fn, depends_on=("x",))
graph = px.Graph().chain(a, x, c)
# c 前驱是 x,但 c 已依赖 x,不重复
assert graph.all_specs()["c"].depends_on == ("x",)
def test_chain_returns_self() -> None:
"""chain 返回 self 支持链式调用."""
a = TaskSpec("a", _fn)
graph = px.Graph()
assert graph.chain(a) is graph
def test_chain_execution_order() -> None:
"""chain 应保证执行顺序."""
order: list[str] = []
def make(name: str):
def fn() -> str:
order.append(name)
return name
return fn
a = TaskSpec("a", make("a"))
b = TaskSpec("b", make("b"))
c = TaskSpec("c", make("c"))
graph = px.Graph().chain(a, b, c)
report = px.run(graph)
assert report.success
assert order == ["a", "b", "c"]
def test_chain_with_decorator_specs() -> None:
"""chain 应与 @task 装饰器配合."""
@px.task
def extract() -> int:
return 1
@px.task
def transform(extract: int) -> int:
return extract + 10
@px.task
def load(transform: int) -> int:
return transform + 100
graph = px.Graph().chain(extract, transform, load)
report = px.run(graph)
assert report.success
assert report["load"] == 111
+62
View File
@@ -0,0 +1,62 @@
"""Tests for process executor (spec.executor='process')."""
from __future__ import annotations
import pytest
# pyrefly: ignore[missing-import]
from _proc_helper import add, cpu_heavy, slow_sleep, sub
import pyflowx as px
from pyflowx.errors import TaskFailedError
def test_process_executor_runs_cpu_task() -> None:
"""executor='process' 应在进程池中执行 CPU 密集型任务."""
spec = px.TaskSpec("cpu", fn=cpu_heavy, args=(1000,), executor="process")
graph = px.Graph.from_specs([spec])
report = px.run(graph)
assert report.success
assert report["cpu"] == sum(i * i for i in range(1000))
def test_process_executor_with_dependency() -> None:
"""进程池任务应支持依赖注入."""
spec1 = px.TaskSpec("a", fn=cpu_heavy, args=(100,), executor="process")
spec2 = px.TaskSpec("b", fn=add, args=(3, 4), executor="process", depends_on=("a",))
graph = px.Graph.from_specs([spec1, spec2])
report = px.run(graph)
assert report.success
assert report["b"] == 7
def test_process_executor_default_is_thread() -> None:
"""TaskSpec.executor 默认应为 'thread'."""
spec = px.TaskSpec("x", fn=lambda: None)
assert spec.executor == "thread"
def test_inline_executor_runs_in_event_loop() -> None:
"""executor='inline' 应直接在事件循环线程调用."""
spec = px.TaskSpec("inline", fn=add, args=(10, 20), executor="inline")
graph = px.Graph.from_specs([spec])
report = px.run(graph)
assert report.success
assert report["inline"] == 30
def test_process_executor_with_kwargs() -> None:
"""进程池任务应支持 kwargs 注入."""
spec = px.TaskSpec("kw", fn=sub, args=(10,), kwargs={"b": 3}, executor="process")
graph = px.Graph.from_specs([spec])
report = px.run(graph)
assert report.success
assert report["kw"] == 7
def test_process_executor_timeout() -> None:
"""进程池任务超时应抛 TaskFailedError."""
spec = px.TaskSpec("slow", fn=slow_sleep, args=(10.0,), executor="process", timeout=0.1)
graph = px.Graph.from_specs([spec])
with pytest.raises(TaskFailedError):
px.run(graph)
+152
View File
@@ -0,0 +1,152 @@
"""Tests for Graph namespace and add_subgraph."""
from __future__ import annotations
import pytest
import pyflowx as px
def _fn() -> None:
return None
def test_graph_namespace_field_default_none() -> None:
"""Graph 默认 namespace 为 None."""
graph = px.Graph()
assert graph.namespace is None
def test_graph_from_specs_with_namespace() -> None:
"""from_specs(namespace=...) 应设置 graph.namespace."""
graph = px.Graph.from_specs([px.TaskSpec("a", _fn)], namespace="ns1")
assert graph.namespace == "ns1"
def test_add_subgraph_prefixes_task_names() -> None:
"""add_subgraph 应给子图任务名加命名空间前缀."""
sub = px.Graph.from_specs(
[px.TaskSpec("extract", _fn), px.TaskSpec("build", _fn, depends_on=("extract",))],
namespace="build",
)
main = px.Graph.from_specs([px.TaskSpec("start", _fn)])
main.add_subgraph(sub)
assert "start" in main
assert "build:extract" in main
assert "build:build" in main
def test_add_subgraph_renames_internal_deps() -> None:
"""add_subgraph 应给子图内部依赖名加前缀."""
sub = px.Graph.from_specs(
[px.TaskSpec("a", _fn), px.TaskSpec("b", _fn, depends_on=("a",))],
namespace="ns",
)
main = px.Graph()
main.add_subgraph(sub)
b_spec = main.all_specs()["ns:b"]
assert b_spec.depends_on == ("ns:a",)
def test_add_subgraph_all_internal_deps_prefixed() -> None:
"""add_subgraph 子图内所有任务(含被依赖的)都加前缀."""
sub = px.Graph.from_specs(
[px.TaskSpec("ext", _fn), px.TaskSpec("b", _fn, depends_on=("ext",))],
namespace="ns",
)
main = px.Graph()
main.add_subgraph(sub)
b_spec = main.all_specs()["ns:b"]
assert b_spec.depends_on == ("ns:ext",)
assert "ns:ext" in main
def test_add_subgraph_requires_namespace() -> None:
"""add_subgraph 无 namespace 时应抛 ValueError."""
sub = px.Graph.from_specs([px.TaskSpec("a", _fn)]) # 无 namespace
main = px.Graph()
with pytest.raises(ValueError, match="namespace"):
main.add_subgraph(sub)
def test_add_subgraph_explicit_namespace_overrides() -> None:
"""add_subgraph(namespace=...) 应覆盖子图自带 namespace."""
sub = px.Graph.from_specs([px.TaskSpec("a", _fn)], namespace="original")
main = px.Graph()
main.add_subgraph(sub, namespace="override")
assert "override:a" in main
assert "original:a" not in main
def test_add_subgraph_internal_injection_works() -> None:
"""子图内部依赖注入应通过 wrapper 正常工作."""
sub = px.Graph.from_specs(
[
px.TaskSpec("extract", lambda: [1, 2, 3]),
px.TaskSpec("build", lambda extract: [x * 2 for x in extract], depends_on=("extract",)),
],
namespace="build",
)
main = px.Graph()
main.add_subgraph(sub)
report = px.run(main)
assert report.success
assert report["build:build"] == [2, 4, 6]
def test_add_subgraph_cross_namespace_ref_via_context() -> None:
"""跨命名空间引用应通过 Context 标注接收."""
def consumer(ctx: px.Context) -> str:
return f"got {ctx['ns:data']}"
sub = px.Graph.from_specs(
[px.TaskSpec("data", lambda: "data_value")],
namespace="ns",
)
main = px.Graph()
main.add_subgraph(sub)
main.add(px.TaskSpec("consumer", consumer, depends_on=("ns:data",)))
report = px.run(main)
assert report.success
assert report["consumer"] == "got data_value"
def test_add_subgraph_context_annotation_in_subgraph() -> None:
"""子图内部任务用 Context 标注时,wrapper 应正确传递."""
def sink(ctx: px.Context) -> int:
return ctx["src"]
sub = px.Graph.from_specs(
[
px.TaskSpec("src", lambda: 42),
px.TaskSpec("sink", sink, depends_on=("src",)),
],
namespace="ns",
)
main = px.Graph()
main.add_subgraph(sub)
report = px.run(main)
assert report.success
assert report["ns:sink"] == 42
def test_add_subgraph_chained() -> None:
"""多个子图可链式合并到主图."""
sub_a = px.Graph.from_specs([px.TaskSpec("a", _fn)], namespace="nsA")
sub_b = px.Graph.from_specs([px.TaskSpec("b", _fn)], namespace="nsB")
main = px.Graph()
main.add_subgraph(sub_a).add_subgraph(sub_b)
assert "nsA:a" in main
assert "nsB:b" in main
+47
View File
@@ -126,3 +126,50 @@ class TestRunReportDescribe:
report.results["a"] = TaskResult[Any](spec=spec, status=TaskStatus.PENDING)
desc = report.describe()
assert "-" in desc # duration 显示为 "-"
class TestRunReportQueries:
"""测试 RunReport 的新查询 API."""
def test_succeeded_tasks(self) -> None:
"""succeeded_tasks 返回 SUCCESS 状态的任务名."""
report = px.RunReport()
report.results["a"] = _make_result("a", status=TaskStatus.SUCCESS)
report.results["b"] = _make_result("b", status=TaskStatus.FAILED)
report.results["c"] = _make_result("c", status=TaskStatus.SUCCESS)
assert report.succeeded_tasks() == ["a", "c"]
def test_skipped_tasks(self) -> None:
"""skipped_tasks 返回 SKIPPED 状态的任务名."""
report = px.RunReport()
report.results["a"] = _make_result("a", status=TaskStatus.SKIPPED)
report.results["b"] = _make_result("b", status=TaskStatus.SUCCESS)
assert report.skipped_tasks() == ["a"]
def test_tasks_by_status(self) -> None:
"""tasks_by_status 按指定状态过滤."""
report = px.RunReport()
report.results["a"] = _make_result("a", status=TaskStatus.FAILED)
report.results["b"] = _make_result("b", status=TaskStatus.FAILED)
report.results["c"] = _make_result("c", status=TaskStatus.SUCCESS)
assert report.tasks_by_status(TaskStatus.FAILED) == ["a", "b"]
assert report.tasks_by_status(TaskStatus.SUCCESS) == ["c"]
assert report.tasks_by_status(TaskStatus.SKIPPED) == []
def test_durations(self) -> None:
"""durations 返回任务名 -> 时长映射."""
report = px.RunReport()
report.results["a"] = _make_result("a", duration=1.5)
report.results["b"] = _make_result("b", duration=2.0)
durs = report.durations()
assert durs["a"] == 1.5
assert durs["b"] == 2.0
def test_durations_no_duration(self) -> None:
"""无时长的任务应返回 0.0."""
report = px.RunReport()
spec: TaskSpec[Any] = TaskSpec[Any]("a", _fn) # type: ignore[arg-type]
report.results["a"] = TaskResult[Any](spec=spec, status=TaskStatus.PENDING)
durs = report.durations()
assert durs["a"] == 0.0
+3 -3
View File
@@ -72,10 +72,10 @@ class TestCliRunnerConstruction:
)
assert runner.commands == ["clean", "build", "test"]
def test_default_strategy_is_sequential(self) -> None:
"""默认策略应为 Strategy.SEQUENTIAL."""
def test_default_strategy_is_dependency(self) -> None:
"""默认策略应为 dependency(依赖驱动,最大并行度)."""
runner = px.CliRunner({"clean": _echo_graph()})
assert runner.strategy == "sequential"
assert runner.strategy == "dependency"
def test_custom_strategy_string(self) -> None:
"""应支持通过字符串指定策略."""
+63
View File
@@ -0,0 +1,63 @@
"""Tests for streaming result passing (iterators between tasks)."""
from __future__ import annotations
from typing import Iterator
import pyflowx as px
def test_generator_passed_as_iterator() -> None:
"""上游返回生成器,下游应能惰性消费."""
@px.task
def source() -> Iterator[int]:
yield from range(5)
@px.task(depends_on=("source",))
def consume(source: Iterator[int]) -> int:
return sum(source)
graph = px.Graph.from_specs([source, consume])
report = px.run(graph)
assert report.success
assert report["consume"] == 10
def test_large_range_streaming() -> None:
"""大范围迭代器流式传递,避免中间列表."""
@px.task
def numbers() -> Iterator[int]:
yield from range(1000)
@px.task(depends_on=("numbers",))
def total(numbers: Iterator[int]) -> int:
return sum(numbers)
graph = px.Graph.from_specs([numbers, total])
report = px.run(graph)
assert report.success
assert report["total"] == sum(range(1000))
def test_chain_multiple_streams() -> None:
"""多个流式任务串联."""
@px.task
def gen() -> Iterator[int]:
yield from range(10)
@px.task(depends_on=("gen",))
def doubled(gen: Iterator[int]) -> Iterator[int]:
for x in gen:
yield x * 2
@px.task(depends_on=("doubled",))
def collect(doubled: Iterator[int]) -> list[int]:
return list(doubled)
graph = px.Graph.from_specs([gen, doubled, collect])
report = px.run(graph)
assert report.success
assert report["collect"] == [x * 2 for x in range(10)]
+136
View File
@@ -0,0 +1,136 @@
"""Tests for the @task decorator API."""
from __future__ import annotations
from pathlib import Path
from typing import Any, Mapping
import pyflowx as px
from pyflowx.task import RetryPolicy, TaskHooks, TaskSpec
def test_task_decorator_plain() -> None:
"""@task 无参数装饰:name 取函数名,返回 TaskSpec."""
@px.task
def extract() -> list[int]:
return [1, 2, 3]
assert isinstance(extract, TaskSpec)
assert extract.name == "extract"
assert extract.fn is not None
assert extract.depends_on == ()
def test_task_decorator_with_params() -> None:
"""@task(...) 带参数装饰:传递依赖与重试."""
@px.task(depends_on=("extract",), retry=RetryPolicy(max_attempts=3))
def double(extract: list[int]) -> list[int]:
return [x * 2 for x in extract]
assert isinstance(double, TaskSpec)
assert double.name == "double"
assert double.depends_on == ("extract",)
assert double.retry.max_attempts == 3
def test_task_decorator_explicit_name() -> None:
"""@task(name=...) 应使用显式名称而非函数名."""
@px.task(name="custom_name")
def my_func() -> None:
return None
assert my_func.name == "custom_name"
def test_task_decorator_cmd_form() -> None:
"""@task(cmd=...) 应支持命令形式."""
spec = px.task(cmd=["ls", "-la"], name="list_files")
assert isinstance(spec, TaskSpec)
assert spec.name == "list_files"
assert spec.cmd == ["ls", "-la"]
def test_task_decorator_full_options() -> None:
"""@task 应支持全部 TaskSpec 字段."""
@px.task(
depends_on=("a",),
soft_depends_on=("b",),
defaults={"b": 0},
args=(1,),
kwargs={"x": 2},
retry=RetryPolicy(max_attempts=5),
timeout=10.0,
tags=("t1",),
conditions=(px.BuiltinConditions.IS_WINDOWS,), # type: ignore[arg-type]
cwd="/tmp",
env={"K": "v"},
verbose=True,
skip_if_missing=True,
allow_upstream_skip=True,
strategy="thread",
priority=3,
concurrency_key="db",
continue_on_error=True,
)
def f(a: int) -> int:
return a
assert f.depends_on == ("a",)
assert f.soft_depends_on == ("b",)
assert f.defaults == {"b": 0}
assert f.args == (1,)
assert f.kwargs == {"x": 2}
assert f.retry.max_attempts == 5
assert f.timeout == 10.0
assert f.tags == ("t1",)
assert len(f.conditions) == 1
assert isinstance(f.cwd, Path)
assert f.cwd == Path("/tmp")
assert f.env == {"K": "v"}
assert f.verbose is True
assert f.skip_if_missing is True
assert f.allow_upstream_skip is True
assert f.strategy == "thread"
assert f.priority == 3
assert f.concurrency_key == "db"
assert f.continue_on_error is True
def test_task_decorator_runs_in_graph() -> None:
"""装饰器生成的 TaskSpec 应能直接构建图并运行."""
@px.task
def extract() -> list[int]:
return [1, 2, 3]
@px.task(depends_on=("extract",))
def double(extract: list[int]) -> list[int]:
return [x * 2 for x in extract]
graph = px.Graph.from_specs([extract, double])
report = px.run(graph)
assert report.success
assert report["double"] == [2, 4, 6]
def test_task_decorator_hooks_passthrough() -> None:
"""@task(hooks=...) 应传递 TaskHooks 实例."""
hooks = TaskHooks(pre_run=lambda _spec: None)
spec = px.task(fn=lambda: None, hooks=hooks, name="h")
assert spec.hooks is hooks
def test_task_decorator_cache_key_passthrough() -> None:
"""@task(cache_key=...) 应传递缓存键函数."""
def ck(ctx: Mapping[str, Any]) -> str:
return "k"
spec = px.task(fn=lambda: None, cache_key=ck, name="c")
assert spec.cache_key is ck