feat: 初始化PyFlowX轻量级DAG任务调度库

实现完整的DAG任务调度核心功能,包括:
1.  支持同步/异步/线程三种执行策略
2.  自动上下文注入,无需手动绑定任务依赖
3.  内置状态后端,支持断点续跑
4.  提供完整的测试用例与示例代码
5.  添加CI/CD配置与发布流程
This commit is contained in:
2026-06-20 10:41:33 +08:00
parent 70f3c03986
commit 8b7777d936
21 changed files with 6003 additions and 3 deletions
View File
+89
View File
@@ -0,0 +1,89 @@
"""Tests for context injection rules."""
from __future__ import annotations
from typing import Any
import pytest
import pyflowx as px
from pyflowx.context import build_call_args, describe_injection
from pyflowx.errors import InjectionError
def test_inject_by_parameter_name() -> None:
def fn(a: int, b: str) -> str:
return f"{a}{b}"
spec = px.TaskSpec("c", fn, ("a", "b"))
args, kwargs = build_call_args(spec, {"a": 1, "b": "x"})
assert args == ()
assert kwargs == {"a": 1, "b": "x"}
def test_inject_context_annotation() -> None:
def fn(ctx: px.Context) -> int:
return len(ctx)
spec = px.TaskSpec("agg", fn, ("a", "b"))
args, kwargs = build_call_args(spec, {"a": 1, "b": 2, "c": 99})
# Only the task's own deps are passed.
assert kwargs == {"ctx": {"a": 1, "b": 2}}
def test_inject_var_keyword() -> None:
def fn(**kwargs: Any) -> int:
return sum(kwargs.values())
spec = px.TaskSpec("agg", fn, ("a", "b"))
args, kwargs = build_call_args(spec, {"a": 1, "b": 2})
assert kwargs == {"a": 1, "b": 2}
def test_static_args_and_kwargs() -> None:
def fn(uid: int, source: str) -> str:
return f"{source}:{uid}"
spec = px.TaskSpec("fetch", fn, args=(42,), kwargs={"source": "api"})
args, kwargs = build_call_args(spec, {})
assert args == (42,)
assert kwargs == {"source": "api"}
def test_default_param_not_required() -> None:
def fn(a: int, flag: bool = True) -> int:
return a if flag else 0
spec = px.TaskSpec("t", fn, ("a",))
args, kwargs = build_call_args(spec, {"a": 5})
assert kwargs == {"a": 5}
def test_unresolved_required_param_raises() -> None:
def fn(a: int, missing: str) -> None:
return None
spec = px.TaskSpec("t", fn, ("a",))
with pytest.raises(InjectionError) as exc_info:
build_call_args(spec, {"a": 1})
assert "missing" in str(exc_info.value)
def test_static_kwargs_collide_with_dependency() -> None:
def fn(a: int) -> int:
return a
spec = px.TaskSpec("t", fn, ("a",), kwargs={"a": 99})
with pytest.raises(InjectionError):
build_call_args(spec, {"a": 1})
def test_describe_injection() -> None:
def fn(a: int, ctx: px.Context, flag: bool = False) -> None:
return None
spec = px.TaskSpec("t", fn, ("a",))
desc = describe_injection(spec)
assert "a=<result:a>" in desc
assert "ctx=<Context>" in desc
assert "flag=<default>" in desc
+322
View File
@@ -0,0 +1,322 @@
"""Tests for execution: sequential, thread, async, retries, timeout, resume."""
from __future__ import annotations
import asyncio
import os
import tempfile
import threading
import time
from typing import Any, List
import pytest
import pyflowx as px
from pyflowx.errors import TaskFailedError, TaskTimeoutError
from pyflowx.storage import JSONBackend, MemoryBackend
# ---------------------------------------------------------------------- #
# Sequential
# ---------------------------------------------------------------------- #
def test_sequential_basic() -> None:
def extract() -> list[int]:
return [1, 2, 3]
def double(extract: list[int]) -> list[int]:
return [x * 2 for x in extract]
graph = px.Graph.from_specs(
[
px.TaskSpec("extract", extract),
px.TaskSpec("double", double, ("extract",)),
]
)
report = px.run(graph, strategy="sequential")
assert report.success
assert report["extract"] == [1, 2, 3]
assert report["double"] == [2, 4, 6]
def test_sequential_diamond() -> None:
order: List[str] = []
def make(name: str) -> Any:
def fn() -> str:
order.append(name)
return name
return fn
graph = px.Graph.from_specs(
[
px.TaskSpec("a", make("a")),
px.TaskSpec("b", make("b"), ("a",)),
px.TaskSpec("c", make("c"), ("a",)),
px.TaskSpec("d", make("d"), ("b", "c")),
]
)
report = px.run(graph, strategy="sequential")
assert report.success
assert report["d"] == "d"
assert order == ["a", "b", "c", "d"]
def test_failure_propagates() -> None:
def boom() -> None:
raise ValueError("kaboom")
def downstream(boom: None) -> int:
return 1
graph = px.Graph.from_specs(
[
px.TaskSpec("boom", boom),
px.TaskSpec("downstream", downstream, ("boom",)),
]
)
with pytest.raises(TaskFailedError) as exc_info:
px.run(graph, strategy="sequential")
assert exc_info.value.task == "boom"
assert isinstance(exc_info.value.cause, ValueError)
def test_retries_then_succeeds() -> None:
attempts = {"n": 0}
def flaky() -> str:
attempts["n"] += 1
if attempts["n"] < 3:
raise RuntimeError("not yet")
return "ok"
graph = px.Graph.from_specs([px.TaskSpec("flaky", flaky, retries=2)])
report = px.run(graph, strategy="sequential")
assert report.success
assert report["flaky"] == "ok"
assert attempts["n"] == 3
def test_retries_exhausted() -> None:
def always_fail() -> None:
raise RuntimeError("nope")
graph = px.Graph.from_specs([px.TaskSpec("f", always_fail, retries=2)])
with pytest.raises(TaskFailedError) as exc_info:
px.run(graph, strategy="sequential")
assert exc_info.value.attempts == 3
# ---------------------------------------------------------------------- #
# Threaded
# ---------------------------------------------------------------------- #
def test_threaded_parallelism() -> None:
def slow() -> str:
time.sleep(0.3)
return "done"
graph = px.Graph.from_specs(
[
px.TaskSpec("a", slow),
px.TaskSpec("b", slow),
px.TaskSpec("c", slow),
]
)
start = time.time()
report = px.run(graph, strategy="thread", max_workers=3)
elapsed = time.time() - start
assert report.success
# Three 0.3s tasks in parallel should be well under 0.8s.
assert elapsed < 0.8
def test_threaded_layer_barrier() -> None:
finished: List[str] = []
lock = threading.Lock()
def make(name: str) -> Any:
def fn() -> str:
time.sleep(0.1)
with lock:
finished.append(name)
return name
return fn
graph = px.Graph.from_specs(
[
px.TaskSpec("a", make("a")),
px.TaskSpec("b", make("b")),
px.TaskSpec("c", make("c"), ("a", "b")),
]
)
report = px.run(graph, strategy="thread", max_workers=2)
assert report.success
# c must finish after both a and b.
assert finished.index("c") > finished.index("a")
assert finished.index("c") > finished.index("b")
# ---------------------------------------------------------------------- #
# Async
# ---------------------------------------------------------------------- #
def test_async_basic() -> None:
async def fetch() -> int:
await asyncio.sleep(0.01)
return 42
async def transform(fetch: int) -> int:
return fetch * 2
graph = px.Graph.from_specs(
[
px.TaskSpec("fetch", fetch),
px.TaskSpec("transform", transform, ("fetch",)),
]
)
report = px.run(graph, strategy="async")
assert report.success
assert report["transform"] == 84
def test_async_parallelism() -> None:
async def slow() -> str:
await asyncio.sleep(0.3)
return "done"
graph = px.Graph.from_specs(
[
px.TaskSpec("a", slow),
px.TaskSpec("b", slow),
px.TaskSpec("c", slow),
]
)
start = time.time()
report = px.run(graph, strategy="async")
elapsed = time.time() - start
assert report.success
assert elapsed < 0.8
def test_async_mixed_sync_and_async() -> None:
def sync_task() -> int:
return 10
async def async_task(sync_task: int) -> int:
await asyncio.sleep(0.01)
return sync_task + 5
graph = px.Graph.from_specs(
[
px.TaskSpec("sync_task", sync_task),
px.TaskSpec("async_task", async_task, ("sync_task",)),
]
)
report = px.run(graph, strategy="async")
assert report.success
assert report["async_task"] == 15
def test_async_timeout() -> None:
async def slow() -> None:
await asyncio.sleep(10)
graph = px.Graph.from_specs([px.TaskSpec("slow", slow, timeout=0.05)])
with pytest.raises(TaskFailedError) as exc_info:
px.run(graph, strategy="async")
assert isinstance(exc_info.value.cause, TaskTimeoutError)
# ---------------------------------------------------------------------- #
# Dry run
# ---------------------------------------------------------------------- #
def test_dry_run_does_not_execute(capsys: pytest.CaptureFixture[str]) -> None:
called: List[str] = []
def fn() -> str:
called.append("x")
return "should-not-run"
graph = px.Graph.from_specs([px.TaskSpec("a", fn)])
report = px.run(graph, strategy="sequential", dry_run=True)
assert called == []
assert len(report) == 0
out = capsys.readouterr().out
assert "Dry run" in out
assert "Layer 1" in out
# ---------------------------------------------------------------------- #
# State / resume
# ---------------------------------------------------------------------- #
def test_memory_backend_resume() -> None:
runs: List[str] = []
def make(name: str) -> Any:
def fn() -> str:
runs.append(name)
return name
return fn
graph = px.Graph.from_specs(
[
px.TaskSpec("a", make("a")),
px.TaskSpec("b", make("b"), ("a",)),
]
)
backend = MemoryBackend()
px.run(graph, strategy="sequential", state=backend)
assert runs == ["a", "b"]
# Second run: both cached, neither re-executed.
px.run(graph, strategy="sequential", state=backend)
assert runs == ["a", "b"] # unchanged
def test_json_backend_persistence() -> None:
with tempfile.TemporaryDirectory() as tmp:
path = os.path.join(tmp, "state.json")
def fn() -> int:
return 7
graph = px.Graph.from_specs([px.TaskSpec("a", fn)])
px.run(graph, strategy="sequential", state=JSONBackend(path))
# New backend reads the file; task should be skipped.
runs: List[str] = []
def fn2() -> int:
runs.append("ran")
return 8
graph2 = px.Graph.from_specs([px.TaskSpec("a", fn2)])
report = px.run(graph2, strategy="sequential", state=JSONBackend(path))
assert runs == []
assert report["a"] == 7 # cached value, not fn2's 8
# ---------------------------------------------------------------------- #
# Events
# ---------------------------------------------------------------------- #
def test_on_event_callback() -> None:
events: List[px.TaskEvent] = []
def fn() -> int:
return 1
graph = px.Graph.from_specs([px.TaskSpec("a", fn)])
px.run(graph, strategy="sequential", on_event=events.append)
statuses = [e.status for e in events]
assert px.TaskStatus.SUCCESS in statuses
assert all(e.task == "a" for e in events)
# ---------------------------------------------------------------------- #
# Invalid strategy
# ---------------------------------------------------------------------- #
def test_invalid_strategy() -> None:
graph = px.Graph.from_specs([px.TaskSpec("a", lambda: None)]) # type: ignore[arg-type]
with pytest.raises(ValueError):
px.run(graph, strategy="bogus") # type: ignore[arg-type]
+131
View File
@@ -0,0 +1,131 @@
"""Tests for Graph construction, validation, layering and subgraphs."""
from __future__ import annotations
import pytest
import pyflowx as px
from pyflowx.errors import CycleError, DuplicateTaskError, MissingDependencyError
def _fn() -> None:
return None
def test_from_specs_builds_graph() -> None:
graph = px.Graph.from_specs([
px.TaskSpec("a", _fn),
px.TaskSpec("b", _fn, ("a",)),
px.TaskSpec("c", _fn, ("a", "b")),
])
assert set(graph.names) == {"a", "b", "c"}
assert graph.dependencies("c") == ("a", "b")
assert len(graph) == 3
assert "a" in graph
def test_from_specs_allows_forward_references() -> None:
# b depends on a, but a is declared after b — order should not matter.
graph = px.Graph.from_specs([
px.TaskSpec("b", _fn, ("a",)),
px.TaskSpec("a", _fn),
])
assert graph.layers() == [["a"], ["b"]]
def test_duplicate_task_raises() -> None:
with pytest.raises(DuplicateTaskError):
px.Graph.from_specs([
px.TaskSpec("a", _fn),
px.TaskSpec("a", _fn),
])
def test_missing_dependency_raises() -> None:
with pytest.raises(MissingDependencyError) as exc_info:
px.Graph.from_specs([px.TaskSpec("b", _fn, ("a",))])
assert exc_info.value.task == "b"
assert exc_info.value.dependency == "a"
def test_cycle_detection() -> None:
with pytest.raises(CycleError):
px.Graph.from_specs([
px.TaskSpec("a", _fn, ("c",)),
px.TaskSpec("b", _fn, ("a",)),
px.TaskSpec("c", _fn, ("b",)),
])
def test_layers_grouping() -> None:
graph = px.Graph.from_specs([
px.TaskSpec("a", _fn),
px.TaskSpec("b", _fn),
px.TaskSpec("c", _fn, ("a", "b")),
px.TaskSpec("d", _fn, ("c",)),
])
layers = graph.layers()
assert layers == [["a", "b"], ["c"], ["d"]]
def test_self_dependency_rejected() -> None:
with pytest.raises(ValueError):
px.TaskSpec("a", _fn, ("a",))
def test_to_mermaid() -> None:
graph = px.Graph.from_specs([
px.TaskSpec("a", _fn),
px.TaskSpec("b", _fn, ("a",)),
])
mermaid = graph.to_mermaid()
assert mermaid.startswith("graph TD")
assert 'a["a"]' in mermaid
assert "a --> b" in mermaid
def test_to_mermaid_invalid_orientation() -> None:
graph = px.Graph.from_specs([px.TaskSpec("a", _fn)])
with pytest.raises(ValueError):
graph.to_mermaid("XX")
def test_subgraph_by_tags() -> None:
graph = px.Graph.from_specs([
px.TaskSpec("a", _fn, tags=("ingest",)),
px.TaskSpec("b", _fn, ("a",), tags=("ingest",)),
px.TaskSpec("c", _fn, ("b",), tags=("report",)),
])
sub = graph.subgraph(["ingest"])
assert set(sub.names) == {"a", "b"}
# Edge to dropped task c is removed; b no longer waits for anything
# outside the subgraph (c was never a dep of b anyway).
assert sub.dependencies("b") == ("a",)
def test_subgraph_by_names() -> None:
graph = px.Graph.from_specs([
px.TaskSpec("a", _fn),
px.TaskSpec("b", _fn, ("a",)),
px.TaskSpec("c", _fn, ("b",)),
])
sub = graph.subgraph_by_names(["a", "b"])
assert set(sub.names) == {"a", "b"}
# c is dropped, so b's dep on c (none here) — but a->b edge preserved.
assert sub.dependencies("b") == ("a",)
def test_subgraph_by_names_unknown() -> None:
graph = px.Graph.from_specs([px.TaskSpec("a", _fn)])
with pytest.raises(KeyError):
graph.subgraph_by_names(["nope"])
def test_describe() -> None:
graph = px.Graph.from_specs([
px.TaskSpec("a", _fn),
px.TaskSpec("b", _fn, ("a",)),
])
desc = graph.describe()
assert "Layer 1" in desc
assert "Layer 2" in desc