feat: 初始化PyFlowX轻量级DAG任务调度库
实现完整的DAG任务调度核心功能,包括: 1. 支持同步/异步/线程三种执行策略 2. 自动上下文注入,无需手动绑定任务依赖 3. 内置状态后端,支持断点续跑 4. 提供完整的测试用例与示例代码 5. 添加CI/CD配置与发布流程
This commit is contained in:
@@ -0,0 +1,89 @@
|
||||
"""Tests for context injection rules."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
import pytest
|
||||
|
||||
import pyflowx as px
|
||||
from pyflowx.context import build_call_args, describe_injection
|
||||
from pyflowx.errors import InjectionError
|
||||
|
||||
|
||||
def test_inject_by_parameter_name() -> None:
|
||||
def fn(a: int, b: str) -> str:
|
||||
return f"{a}{b}"
|
||||
|
||||
spec = px.TaskSpec("c", fn, ("a", "b"))
|
||||
args, kwargs = build_call_args(spec, {"a": 1, "b": "x"})
|
||||
assert args == ()
|
||||
assert kwargs == {"a": 1, "b": "x"}
|
||||
|
||||
|
||||
def test_inject_context_annotation() -> None:
|
||||
def fn(ctx: px.Context) -> int:
|
||||
return len(ctx)
|
||||
|
||||
spec = px.TaskSpec("agg", fn, ("a", "b"))
|
||||
args, kwargs = build_call_args(spec, {"a": 1, "b": 2, "c": 99})
|
||||
# Only the task's own deps are passed.
|
||||
assert kwargs == {"ctx": {"a": 1, "b": 2}}
|
||||
|
||||
|
||||
def test_inject_var_keyword() -> None:
|
||||
def fn(**kwargs: Any) -> int:
|
||||
return sum(kwargs.values())
|
||||
|
||||
spec = px.TaskSpec("agg", fn, ("a", "b"))
|
||||
args, kwargs = build_call_args(spec, {"a": 1, "b": 2})
|
||||
assert kwargs == {"a": 1, "b": 2}
|
||||
|
||||
|
||||
def test_static_args_and_kwargs() -> None:
|
||||
def fn(uid: int, source: str) -> str:
|
||||
return f"{source}:{uid}"
|
||||
|
||||
spec = px.TaskSpec("fetch", fn, args=(42,), kwargs={"source": "api"})
|
||||
args, kwargs = build_call_args(spec, {})
|
||||
assert args == (42,)
|
||||
assert kwargs == {"source": "api"}
|
||||
|
||||
|
||||
def test_default_param_not_required() -> None:
|
||||
def fn(a: int, flag: bool = True) -> int:
|
||||
return a if flag else 0
|
||||
|
||||
spec = px.TaskSpec("t", fn, ("a",))
|
||||
args, kwargs = build_call_args(spec, {"a": 5})
|
||||
assert kwargs == {"a": 5}
|
||||
|
||||
|
||||
def test_unresolved_required_param_raises() -> None:
|
||||
def fn(a: int, missing: str) -> None:
|
||||
return None
|
||||
|
||||
spec = px.TaskSpec("t", fn, ("a",))
|
||||
with pytest.raises(InjectionError) as exc_info:
|
||||
build_call_args(spec, {"a": 1})
|
||||
assert "missing" in str(exc_info.value)
|
||||
|
||||
|
||||
def test_static_kwargs_collide_with_dependency() -> None:
|
||||
def fn(a: int) -> int:
|
||||
return a
|
||||
|
||||
spec = px.TaskSpec("t", fn, ("a",), kwargs={"a": 99})
|
||||
with pytest.raises(InjectionError):
|
||||
build_call_args(spec, {"a": 1})
|
||||
|
||||
|
||||
def test_describe_injection() -> None:
|
||||
def fn(a: int, ctx: px.Context, flag: bool = False) -> None:
|
||||
return None
|
||||
|
||||
spec = px.TaskSpec("t", fn, ("a",))
|
||||
desc = describe_injection(spec)
|
||||
assert "a=<result:a>" in desc
|
||||
assert "ctx=<Context>" in desc
|
||||
assert "flag=<default>" in desc
|
||||
@@ -0,0 +1,322 @@
|
||||
"""Tests for execution: sequential, thread, async, retries, timeout, resume."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import tempfile
|
||||
import threading
|
||||
import time
|
||||
from typing import Any, List
|
||||
|
||||
import pytest
|
||||
|
||||
import pyflowx as px
|
||||
from pyflowx.errors import TaskFailedError, TaskTimeoutError
|
||||
from pyflowx.storage import JSONBackend, MemoryBackend
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# Sequential
|
||||
# ---------------------------------------------------------------------- #
|
||||
def test_sequential_basic() -> None:
|
||||
def extract() -> list[int]:
|
||||
return [1, 2, 3]
|
||||
|
||||
def double(extract: list[int]) -> list[int]:
|
||||
return [x * 2 for x in extract]
|
||||
|
||||
graph = px.Graph.from_specs(
|
||||
[
|
||||
px.TaskSpec("extract", extract),
|
||||
px.TaskSpec("double", double, ("extract",)),
|
||||
]
|
||||
)
|
||||
report = px.run(graph, strategy="sequential")
|
||||
assert report.success
|
||||
assert report["extract"] == [1, 2, 3]
|
||||
assert report["double"] == [2, 4, 6]
|
||||
|
||||
|
||||
def test_sequential_diamond() -> None:
|
||||
order: List[str] = []
|
||||
|
||||
def make(name: str) -> Any:
|
||||
def fn() -> str:
|
||||
order.append(name)
|
||||
return name
|
||||
|
||||
return fn
|
||||
|
||||
graph = px.Graph.from_specs(
|
||||
[
|
||||
px.TaskSpec("a", make("a")),
|
||||
px.TaskSpec("b", make("b"), ("a",)),
|
||||
px.TaskSpec("c", make("c"), ("a",)),
|
||||
px.TaskSpec("d", make("d"), ("b", "c")),
|
||||
]
|
||||
)
|
||||
report = px.run(graph, strategy="sequential")
|
||||
assert report.success
|
||||
assert report["d"] == "d"
|
||||
assert order == ["a", "b", "c", "d"]
|
||||
|
||||
|
||||
def test_failure_propagates() -> None:
|
||||
def boom() -> None:
|
||||
raise ValueError("kaboom")
|
||||
|
||||
def downstream(boom: None) -> int:
|
||||
return 1
|
||||
|
||||
graph = px.Graph.from_specs(
|
||||
[
|
||||
px.TaskSpec("boom", boom),
|
||||
px.TaskSpec("downstream", downstream, ("boom",)),
|
||||
]
|
||||
)
|
||||
with pytest.raises(TaskFailedError) as exc_info:
|
||||
px.run(graph, strategy="sequential")
|
||||
assert exc_info.value.task == "boom"
|
||||
assert isinstance(exc_info.value.cause, ValueError)
|
||||
|
||||
|
||||
def test_retries_then_succeeds() -> None:
|
||||
attempts = {"n": 0}
|
||||
|
||||
def flaky() -> str:
|
||||
attempts["n"] += 1
|
||||
if attempts["n"] < 3:
|
||||
raise RuntimeError("not yet")
|
||||
return "ok"
|
||||
|
||||
graph = px.Graph.from_specs([px.TaskSpec("flaky", flaky, retries=2)])
|
||||
report = px.run(graph, strategy="sequential")
|
||||
assert report.success
|
||||
assert report["flaky"] == "ok"
|
||||
assert attempts["n"] == 3
|
||||
|
||||
|
||||
def test_retries_exhausted() -> None:
|
||||
def always_fail() -> None:
|
||||
raise RuntimeError("nope")
|
||||
|
||||
graph = px.Graph.from_specs([px.TaskSpec("f", always_fail, retries=2)])
|
||||
with pytest.raises(TaskFailedError) as exc_info:
|
||||
px.run(graph, strategy="sequential")
|
||||
assert exc_info.value.attempts == 3
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# Threaded
|
||||
# ---------------------------------------------------------------------- #
|
||||
def test_threaded_parallelism() -> None:
|
||||
def slow() -> str:
|
||||
time.sleep(0.3)
|
||||
return "done"
|
||||
|
||||
graph = px.Graph.from_specs(
|
||||
[
|
||||
px.TaskSpec("a", slow),
|
||||
px.TaskSpec("b", slow),
|
||||
px.TaskSpec("c", slow),
|
||||
]
|
||||
)
|
||||
start = time.time()
|
||||
report = px.run(graph, strategy="thread", max_workers=3)
|
||||
elapsed = time.time() - start
|
||||
assert report.success
|
||||
# Three 0.3s tasks in parallel should be well under 0.8s.
|
||||
assert elapsed < 0.8
|
||||
|
||||
|
||||
def test_threaded_layer_barrier() -> None:
|
||||
finished: List[str] = []
|
||||
lock = threading.Lock()
|
||||
|
||||
def make(name: str) -> Any:
|
||||
def fn() -> str:
|
||||
time.sleep(0.1)
|
||||
with lock:
|
||||
finished.append(name)
|
||||
return name
|
||||
|
||||
return fn
|
||||
|
||||
graph = px.Graph.from_specs(
|
||||
[
|
||||
px.TaskSpec("a", make("a")),
|
||||
px.TaskSpec("b", make("b")),
|
||||
px.TaskSpec("c", make("c"), ("a", "b")),
|
||||
]
|
||||
)
|
||||
report = px.run(graph, strategy="thread", max_workers=2)
|
||||
assert report.success
|
||||
# c must finish after both a and b.
|
||||
assert finished.index("c") > finished.index("a")
|
||||
assert finished.index("c") > finished.index("b")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# Async
|
||||
# ---------------------------------------------------------------------- #
|
||||
def test_async_basic() -> None:
|
||||
async def fetch() -> int:
|
||||
await asyncio.sleep(0.01)
|
||||
return 42
|
||||
|
||||
async def transform(fetch: int) -> int:
|
||||
return fetch * 2
|
||||
|
||||
graph = px.Graph.from_specs(
|
||||
[
|
||||
px.TaskSpec("fetch", fetch),
|
||||
px.TaskSpec("transform", transform, ("fetch",)),
|
||||
]
|
||||
)
|
||||
report = px.run(graph, strategy="async")
|
||||
assert report.success
|
||||
assert report["transform"] == 84
|
||||
|
||||
|
||||
def test_async_parallelism() -> None:
|
||||
async def slow() -> str:
|
||||
await asyncio.sleep(0.3)
|
||||
return "done"
|
||||
|
||||
graph = px.Graph.from_specs(
|
||||
[
|
||||
px.TaskSpec("a", slow),
|
||||
px.TaskSpec("b", slow),
|
||||
px.TaskSpec("c", slow),
|
||||
]
|
||||
)
|
||||
start = time.time()
|
||||
report = px.run(graph, strategy="async")
|
||||
elapsed = time.time() - start
|
||||
assert report.success
|
||||
assert elapsed < 0.8
|
||||
|
||||
|
||||
def test_async_mixed_sync_and_async() -> None:
|
||||
def sync_task() -> int:
|
||||
return 10
|
||||
|
||||
async def async_task(sync_task: int) -> int:
|
||||
await asyncio.sleep(0.01)
|
||||
return sync_task + 5
|
||||
|
||||
graph = px.Graph.from_specs(
|
||||
[
|
||||
px.TaskSpec("sync_task", sync_task),
|
||||
px.TaskSpec("async_task", async_task, ("sync_task",)),
|
||||
]
|
||||
)
|
||||
report = px.run(graph, strategy="async")
|
||||
assert report.success
|
||||
assert report["async_task"] == 15
|
||||
|
||||
|
||||
def test_async_timeout() -> None:
|
||||
async def slow() -> None:
|
||||
await asyncio.sleep(10)
|
||||
|
||||
graph = px.Graph.from_specs([px.TaskSpec("slow", slow, timeout=0.05)])
|
||||
with pytest.raises(TaskFailedError) as exc_info:
|
||||
px.run(graph, strategy="async")
|
||||
assert isinstance(exc_info.value.cause, TaskTimeoutError)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# Dry run
|
||||
# ---------------------------------------------------------------------- #
|
||||
def test_dry_run_does_not_execute(capsys: pytest.CaptureFixture[str]) -> None:
|
||||
called: List[str] = []
|
||||
|
||||
def fn() -> str:
|
||||
called.append("x")
|
||||
return "should-not-run"
|
||||
|
||||
graph = px.Graph.from_specs([px.TaskSpec("a", fn)])
|
||||
report = px.run(graph, strategy="sequential", dry_run=True)
|
||||
assert called == []
|
||||
assert len(report) == 0
|
||||
out = capsys.readouterr().out
|
||||
assert "Dry run" in out
|
||||
assert "Layer 1" in out
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# State / resume
|
||||
# ---------------------------------------------------------------------- #
|
||||
def test_memory_backend_resume() -> None:
|
||||
runs: List[str] = []
|
||||
|
||||
def make(name: str) -> Any:
|
||||
def fn() -> str:
|
||||
runs.append(name)
|
||||
return name
|
||||
|
||||
return fn
|
||||
|
||||
graph = px.Graph.from_specs(
|
||||
[
|
||||
px.TaskSpec("a", make("a")),
|
||||
px.TaskSpec("b", make("b"), ("a",)),
|
||||
]
|
||||
)
|
||||
backend = MemoryBackend()
|
||||
px.run(graph, strategy="sequential", state=backend)
|
||||
assert runs == ["a", "b"]
|
||||
|
||||
# Second run: both cached, neither re-executed.
|
||||
px.run(graph, strategy="sequential", state=backend)
|
||||
assert runs == ["a", "b"] # unchanged
|
||||
|
||||
|
||||
def test_json_backend_persistence() -> None:
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
path = os.path.join(tmp, "state.json")
|
||||
|
||||
def fn() -> int:
|
||||
return 7
|
||||
|
||||
graph = px.Graph.from_specs([px.TaskSpec("a", fn)])
|
||||
px.run(graph, strategy="sequential", state=JSONBackend(path))
|
||||
|
||||
# New backend reads the file; task should be skipped.
|
||||
runs: List[str] = []
|
||||
|
||||
def fn2() -> int:
|
||||
runs.append("ran")
|
||||
return 8
|
||||
|
||||
graph2 = px.Graph.from_specs([px.TaskSpec("a", fn2)])
|
||||
report = px.run(graph2, strategy="sequential", state=JSONBackend(path))
|
||||
assert runs == []
|
||||
assert report["a"] == 7 # cached value, not fn2's 8
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# Events
|
||||
# ---------------------------------------------------------------------- #
|
||||
def test_on_event_callback() -> None:
|
||||
events: List[px.TaskEvent] = []
|
||||
|
||||
def fn() -> int:
|
||||
return 1
|
||||
|
||||
graph = px.Graph.from_specs([px.TaskSpec("a", fn)])
|
||||
px.run(graph, strategy="sequential", on_event=events.append)
|
||||
statuses = [e.status for e in events]
|
||||
assert px.TaskStatus.SUCCESS in statuses
|
||||
assert all(e.task == "a" for e in events)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# Invalid strategy
|
||||
# ---------------------------------------------------------------------- #
|
||||
def test_invalid_strategy() -> None:
|
||||
graph = px.Graph.from_specs([px.TaskSpec("a", lambda: None)]) # type: ignore[arg-type]
|
||||
with pytest.raises(ValueError):
|
||||
px.run(graph, strategy="bogus") # type: ignore[arg-type]
|
||||
@@ -0,0 +1,131 @@
|
||||
"""Tests for Graph construction, validation, layering and subgraphs."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
import pyflowx as px
|
||||
from pyflowx.errors import CycleError, DuplicateTaskError, MissingDependencyError
|
||||
|
||||
|
||||
def _fn() -> None:
|
||||
return None
|
||||
|
||||
|
||||
def test_from_specs_builds_graph() -> None:
|
||||
graph = px.Graph.from_specs([
|
||||
px.TaskSpec("a", _fn),
|
||||
px.TaskSpec("b", _fn, ("a",)),
|
||||
px.TaskSpec("c", _fn, ("a", "b")),
|
||||
])
|
||||
assert set(graph.names) == {"a", "b", "c"}
|
||||
assert graph.dependencies("c") == ("a", "b")
|
||||
assert len(graph) == 3
|
||||
assert "a" in graph
|
||||
|
||||
|
||||
def test_from_specs_allows_forward_references() -> None:
|
||||
# b depends on a, but a is declared after b — order should not matter.
|
||||
graph = px.Graph.from_specs([
|
||||
px.TaskSpec("b", _fn, ("a",)),
|
||||
px.TaskSpec("a", _fn),
|
||||
])
|
||||
assert graph.layers() == [["a"], ["b"]]
|
||||
|
||||
|
||||
def test_duplicate_task_raises() -> None:
|
||||
with pytest.raises(DuplicateTaskError):
|
||||
px.Graph.from_specs([
|
||||
px.TaskSpec("a", _fn),
|
||||
px.TaskSpec("a", _fn),
|
||||
])
|
||||
|
||||
|
||||
def test_missing_dependency_raises() -> None:
|
||||
with pytest.raises(MissingDependencyError) as exc_info:
|
||||
px.Graph.from_specs([px.TaskSpec("b", _fn, ("a",))])
|
||||
assert exc_info.value.task == "b"
|
||||
assert exc_info.value.dependency == "a"
|
||||
|
||||
|
||||
def test_cycle_detection() -> None:
|
||||
with pytest.raises(CycleError):
|
||||
px.Graph.from_specs([
|
||||
px.TaskSpec("a", _fn, ("c",)),
|
||||
px.TaskSpec("b", _fn, ("a",)),
|
||||
px.TaskSpec("c", _fn, ("b",)),
|
||||
])
|
||||
|
||||
|
||||
def test_layers_grouping() -> None:
|
||||
graph = px.Graph.from_specs([
|
||||
px.TaskSpec("a", _fn),
|
||||
px.TaskSpec("b", _fn),
|
||||
px.TaskSpec("c", _fn, ("a", "b")),
|
||||
px.TaskSpec("d", _fn, ("c",)),
|
||||
])
|
||||
layers = graph.layers()
|
||||
assert layers == [["a", "b"], ["c"], ["d"]]
|
||||
|
||||
|
||||
def test_self_dependency_rejected() -> None:
|
||||
with pytest.raises(ValueError):
|
||||
px.TaskSpec("a", _fn, ("a",))
|
||||
|
||||
|
||||
def test_to_mermaid() -> None:
|
||||
graph = px.Graph.from_specs([
|
||||
px.TaskSpec("a", _fn),
|
||||
px.TaskSpec("b", _fn, ("a",)),
|
||||
])
|
||||
mermaid = graph.to_mermaid()
|
||||
assert mermaid.startswith("graph TD")
|
||||
assert 'a["a"]' in mermaid
|
||||
assert "a --> b" in mermaid
|
||||
|
||||
|
||||
def test_to_mermaid_invalid_orientation() -> None:
|
||||
graph = px.Graph.from_specs([px.TaskSpec("a", _fn)])
|
||||
with pytest.raises(ValueError):
|
||||
graph.to_mermaid("XX")
|
||||
|
||||
|
||||
def test_subgraph_by_tags() -> None:
|
||||
graph = px.Graph.from_specs([
|
||||
px.TaskSpec("a", _fn, tags=("ingest",)),
|
||||
px.TaskSpec("b", _fn, ("a",), tags=("ingest",)),
|
||||
px.TaskSpec("c", _fn, ("b",), tags=("report",)),
|
||||
])
|
||||
sub = graph.subgraph(["ingest"])
|
||||
assert set(sub.names) == {"a", "b"}
|
||||
# Edge to dropped task c is removed; b no longer waits for anything
|
||||
# outside the subgraph (c was never a dep of b anyway).
|
||||
assert sub.dependencies("b") == ("a",)
|
||||
|
||||
|
||||
def test_subgraph_by_names() -> None:
|
||||
graph = px.Graph.from_specs([
|
||||
px.TaskSpec("a", _fn),
|
||||
px.TaskSpec("b", _fn, ("a",)),
|
||||
px.TaskSpec("c", _fn, ("b",)),
|
||||
])
|
||||
sub = graph.subgraph_by_names(["a", "b"])
|
||||
assert set(sub.names) == {"a", "b"}
|
||||
# c is dropped, so b's dep on c (none here) — but a->b edge preserved.
|
||||
assert sub.dependencies("b") == ("a",)
|
||||
|
||||
|
||||
def test_subgraph_by_names_unknown() -> None:
|
||||
graph = px.Graph.from_specs([px.TaskSpec("a", _fn)])
|
||||
with pytest.raises(KeyError):
|
||||
graph.subgraph_by_names(["nope"])
|
||||
|
||||
|
||||
def test_describe() -> None:
|
||||
graph = px.Graph.from_specs([
|
||||
px.TaskSpec("a", _fn),
|
||||
px.TaskSpec("b", _fn, ("a",)),
|
||||
])
|
||||
desc = graph.describe()
|
||||
assert "Layer 1" in desc
|
||||
assert "Layer 2" in desc
|
||||
Reference in New Issue
Block a user