feat: 初始化PyFlowX轻量级DAG任务调度库

实现完整的DAG任务调度核心功能，包括： 1. 支持同步/异步/线程三种执行策略 2. 自动上下文注入，无需手动绑定任务依赖 3. 内置状态后端，支持断点续跑 4. 提供完整的测试用例与示例代码 5. 添加CI/CD配置与发布流程
2026-06-20 10:41:33 +08:00
parent 70f3c03986
commit 8b7777d936
21 changed files with 6003 additions and 3 deletions
@@ -0,0 +1,131 @@
 name: CI
 on:
  push:
    branches: [main, develop]
  pull_request:
    branches: [main, develop]
  workflow_dispatch:
 concurrency:
  group: ${{ github.workflow }}-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  # ─────────────────────────────────────────────────────────────
  # 后端：多平台 × 多 Python 版本矩阵测试
  # ─────────────────────────────────────────────────────────────
  backend-test:
    name: Backend (${{ matrix.os }} / py${{ matrix.python-version }})
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest, windows-latest, macos-latest]
        python-version: ['3.13', '3.14']
        exclude:
          # macOS + py3.14 暂时跳过（部分依赖未发布 wheel）
          - os: macos-latest
            python-version: '3.14'
    steps:
      - name: Checkout
        uses: actions/checkout@v4
      - name: 安装 uv
        uses: astral-sh/setup-uv@v5
        with:
          version: latest
          enable-cache: true
          cache-dependency-glob: uv.lock
      - name: 设置 Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}
      - name: 安装依赖
        run: uv sync --extra dev --frozen
      - name: Ruff 检查
        run: uv run ruff check backend/endo tests
      - name: Ruff 格式检查
        run: uv run ruff format --check backend/endo tests
      - name: 运行测试
        env:
          PYTHONPATH: backend
        run: uv run pytest -v --cov=endo --cov-report=xml --cov-report=term-missing
      - name: 上传覆盖率
        if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.13'
        uses: actions/upload-artifact@v4
        with:
          name: coverage-${{ matrix.os }}-py${{ matrix.python-version }}
          path: coverage.xml
          retention-days: 7
  # ─────────────────────────────────────────────────────────────
  # 前端：多平台构建验证
  # ─────────────────────────────────────────────────────────────
  frontend-build:
    name: Frontend (${{ matrix.os }} / node${{ matrix.node-version }})
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest, windows-latest, macos-latest]
        node-version: [20, 22]
    defaults:
      run:
        working-directory: frontend
    steps:
      - name: Checkout
        uses: actions/checkout@v4
      - name: 安装 pnpm
        uses: pnpm/action-setup@v4
        with:
          version: 9
      - name: 设置 Node ${{ matrix.node-version }}
        uses: actions/setup-node@v4
        with:
          node-version: ${{ matrix.node-version }}
          cache: pnpm
          cache-dependency-path: frontend/pnpm-lock.yaml
      - name: 安装依赖
        run: pnpm install --frozen-lockfile
      - name: TypeScript 类型检查
        run: npx tsc --noEmit -p tsconfig.app.json
      - name: 构建
        run: pnpm run build
      - name: 上传构建产物
        if: matrix.os == 'ubuntu-latest' && matrix.node-version == 22
        uses: actions/upload-artifact@v4
        with:
          name: frontend-dist
          path: frontend/dist
          retention-days: 7
  # ─────────────────────────────────────────────────────────────
  # 聚合：所有测试通过后才标记完成
  # ─────────────────────────────────────────────────────────────
  ci-pass:
    name: CI Pass
    runs-on: ubuntu-latest
    needs: [backend-test, frontend-build]
    if: always()
    steps:
      - name: 检查依赖任务结果
        if: ${{ needs.backend-test.result != 'success' || needs.frontend-build.result != 'success' }}
        run: |
          echo "backend-test: ${{ needs.backend-test.result }}"
          echo "frontend-build: ${{ needs.frontend-build.result }}"
          exit 1
      - name: 全部通过
        run: echo "✅ 所有 CI 检查通过"
@@ -0,0 +1,253 @@
 name: Release
 on:
  push:
    tags:
      - 'v*.*.*'
  workflow_dispatch:
    inputs:
      tag:
        description: '发布版本号（如 v0.1.0）'
        required: true
        type: string
 permissions:
  contents: write
  # Trusted Publishing (OIDC) 上传 PyPI 所需
  id-token: write
 jobs:
  # ─────────────────────────────────────────────────────────────
  # 预检：发布前必须通过 CI
  # ─────────────────────────────────────────────────────────────
  pre-check:
    name: Pre-release Check
    runs-on: ubuntu-latest
    outputs:
      version: ${{ steps.meta.outputs.version }}
    steps:
      - name: Checkout
        uses: actions/checkout@v4
        with:
          fetch-depth: 0
      - name: 解析版本号
        id: meta
        run: |
          if [ -n "${{ inputs.tag }}" ]; then
            TAG="${{ inputs.tag }}"
          else
            TAG="${GITHUB_REF#refs/tags/}"
          fi
          # 去除前缀 v
          VERSION="${TAG#v}"
          echo "tag=$TAG" >> $GITHUB_OUTPUT
          echo "version=$VERSION" >> $GITHUB_OUTPUT
          echo "发布版本: $VERSION (tag: $TAG)"
      - name: 校验版本号格式
        run: |
          VERSION="${{ steps.meta.outputs.version }}"
          if ! echo "$VERSION" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+(-[a-zA-Z0-9.]+)?$'; then
            echo "❌ 版本号格式错误: $VERSION（应为 x.y.z 或 x.y.z-rc.n）"
            exit 1
          fi
      - name: 校验 pyproject.toml 版本一致
        run: |
          # 精确提取 [project] 段的 version 字段（避免匹配到依赖的 version）
          PY_VERSION=$(awk '/^\[project\]/{f=1} f&&/^version[[:space:]]*=/{gsub(/[" ]/,"",$3); print $3; exit}' pyproject.toml)
          echo "pyproject.toml version: $PY_VERSION"
          if [ "$PY_VERSION" != "${{ steps.meta.outputs.version }}" ]; then
            echo "❌ pyproject.toml 版本($PY_VERSION) 与 tag 版本(${{ steps.meta.outputs.version }}) 不一致"
            echo "请先更新 pyproject.toml 中的 version 字段"
            exit 1
          fi
  # ─────────────────────────────────────────────────────────────
  # 构建：后端 wheel（纯 Python，单平台即可）+ 前端 dist
  # ─────────────────────────────────────────────────────────────
  build:
    name: Build Artifacts
    needs: pre-check
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
        uses: actions/checkout@v4
      - name: 安装 uv
        uses: astral-sh/setup-uv@v5
        with:
          version: latest
          enable-cache: true
      - name: 设置 Python 3.13
        uses: actions/setup-python@v5
        with:
          python-version: '3.13'
      - name: 安装 pnpm（前端构建依赖）
        uses: pnpm/action-setup@v4
        with:
          version: 9
      - name: 设置 Node 22（前端构建）
        uses: actions/setup-node@v4
        with:
          node-version: 22
          cache: pnpm
          cache-dependency-path: frontend/pnpm-lock.yaml
      - name: 安装前端依赖（缓存）
        working-directory: frontend
        run: pnpm install --frozen-lockfile
      - name: 构建后端 wheel + sdist（自动触发前端构建）
        run: uv build
      - name: 上传后端产物
        uses: actions/upload-artifact@v4
        with:
          name: backend-dist
          path: dist/*
          retention-days: 30
  build-frontend:
    name: Build Frontend
    needs: pre-check
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
        uses: actions/checkout@v4
      - name: 安装 pnpm
        uses: pnpm/action-setup@v4
        with:
          version: 9
      - name: 设置 Node 22
        uses: actions/setup-node@v4
        with:
          node-version: 22
          cache: pnpm
          cache-dependency-path: frontend/pnpm-lock.yaml
      - name: 安装依赖
        working-directory: frontend
        run: pnpm install --frozen-lockfile
      - name: 构建
        working-directory: frontend
        run: pnpm run build
      - name: 打包前端 dist
        run: |
          cd frontend
          zip -r ../endo-frontend-${{ needs.pre-check.outputs.version }}.zip dist
      - name: 上传前端产物
        uses: actions/upload-artifact@v4
        with:
          name: frontend-dist-release
          path: endo-frontend-*.zip
          retention-days: 30
  # ─────────────────────────────────────────────────────────────
  # 发布：上传到 PyPI（Trusted Publishing / OIDC）
  # ─────────────────────────────────────────────────────────────
  publish-pypi:
    name: Publish to PyPI
    needs: [pre-check, build]
    runs-on: ubuntu-latest
    environment:
      name: pypi
      url: https://pypi.org/project/endo/${{ needs.pre-check.outputs.version }}
    permissions:
      id-token: write
    steps:
      - name: 下载后端构建产物
        uses: actions/download-artifact@v4
        with:
          name: backend-dist
          path: dist
      - name: 校验产物
        run: |
          echo "待上传产物："
          ls -la dist/
          if [ -z "$(ls -A dist/*.whl dist/*.tar.gz 2>/dev/null)" ]; then
            echo "❌ 未找到 wheel 或 sdist 产物"
            exit 1
          fi
      - name: 上传到 PyPI
        uses: pypa/gh-action-pypi-publish@release/v1
        with:
          attestations: true
  # ─────────────────────────────────────────────────────────────
  # 发布：创建 GitHub Release
  # ─────────────────────────────────────────────────────────────
  release:
    name: Publish Release
    needs: [pre-check, build, build-frontend, publish-pypi]
    runs-on: ubuntu-latest
    permissions:
      contents: write
    steps:
      - name: Checkout
        uses: actions/checkout@v4
      - name: 下载所有构建产物
        uses: actions/download-artifact@v4
        with:
          path: release-assets
      - name: 整理发布产物
        run: |
          mkdir -p assets
          find release-assets -name "*.whl" -exec cp {} assets/ \;
          find release-assets -name "*.tar.gz" -exec cp {} assets/ \;
          find release-assets -name "*.zip" -exec cp {} assets/ \;
          ls -la assets/
      - name: 生成 Release Notes
        id: notes
        run: |
          {
            echo "## endo ${{ needs.pre-check.outputs.version }}"
            echo ""
            echo "### 下载"
            echo ""
            echo "- **后端 wheel**: \`endo-${{ needs.pre-check.outputs.version }}-py3-none-any.whl\`"
            echo "- **源码包**: \`endo-${{ needs.pre-check.outputs.version }}.tar.gz\`"
            echo "- **前端 dist**: \`endo-frontend-${{ needs.pre-check.outputs.version }}.zip\`"
            echo ""
            echo "### 安装"
            echo ""
            echo '```bash'
            echo "# 后端"
            echo "pip install endo-${{ needs.pre-check.outputs.version }}-py3-none-any.whl"
            echo ""
            echo "# 前端"
            echo "unzip endo-frontend-${{ needs.pre-check.outputs.version }}.zip -d frontend-dist"
            echo '```'
            echo ""
            echo "### 完整变更日志"
          } > RELEASE_NOTES.md
          {
            echo "content<<EOF"
            cat RELEASE_NOTES.md
            echo "EOF"
          } >> $GITHUB_OUTPUT
      - name: 创建 GitHub Release
        uses: softprops/action-gh-release@v2
        with:
          tag_name: ${{ needs.pre-check.outputs.tag }}
          name: endo ${{ needs.pre-check.outputs.version }}
          body: ${{ steps.notes.outputs.content }}
          files: assets/*
          draft: false
          prerelease: ${{ contains(needs.pre-check.outputs.version, '-') }}
          generate_release_notes: true
@@ -8,3 +8,4 @@ wheels/
 # Virtual environments
 .venv
 .coverage
@@ -0,0 +1,58 @@
 """Example 3: async aggregation with static args and Context injection.
 Shows:
  * async task functions executed with strategy="async".
  * static positional args (TaskSpec.args) for parameterised tasks.
  * Context annotation to receive the full upstream result mapping.
  * on_event callback for real-time progress.
 """
 from __future__ import annotations
 import asyncio
 from typing import Any, Dict, List
 import pyflowx as px
 async def fetch_user(uid: int) -> dict:
    await asyncio.sleep(0.2)
    return {"id": uid, "name": f"User{uid}"}
 async def fetch_posts(uid: int) -> List[int]:
    await asyncio.sleep(0.2)
    return [uid, uid + 1]
 # Context annotation → receives the full mapping of upstream results.
 def aggregate(ctx: px.Context) -> Dict[str, Any]:
    return dict(ctx)
 def main() -> None:
    graph = px.Graph.from_specs(
        [
            # Static positional args parameterise the same function twice.
            px.TaskSpec("fetch_user", fetch_user, args=(1,)),
            px.TaskSpec("fetch_posts", fetch_posts, args=(1,)),
            px.TaskSpec("aggregate", aggregate, ("fetch_user", "fetch_posts")),
        ]
    )
    print("=== Dry run ===")
    px.run(graph, strategy="async", dry_run=True)
    events: List[px.TaskEvent] = []
    print("\n=== Async execution ===")
    report = px.run(graph, strategy="async", on_event=events.append)
    for ev in events:
        print(f"  event: {ev.task} -> {ev.status.value}")
    print(f"\naggregate = {report['aggregate']}")
    print(report.describe())
 if __name__ == "__main__":
    main()
@@ -0,0 +1,81 @@
 """Example 1: ETL pipeline (sequential strategy).
 Demonstrates the core PyFlowX workflow:
  * Define tasks as plain functions.
  * Declare the DAG with a list of TaskSpec.
  * Parameter names == dependency names → automatic context injection,
    no wrappers needed (contrast with flowweaver's get_task_result boilerplate).
  * dry_run to preview, then execute and read typed results from RunReport.
 """
 from __future__ import annotations
 from typing import List
 import pyflowx as px
 # --- task functions: pure, testable, no framework coupling ------------- #
 def extract_customers() -> List[dict]:
    return [
        {"id": "C001", "name": "Alice"},
        {"id": "C002", "name": "Bob"},
    ]
 def extract_orders() -> List[dict]:
    return [
        {"id": "O001", "customer_id": "C001", "amount": 150.0},
        {"id": "O002", "customer_id": "C002", "amount": 200.5},
    ]
 # Parameter names match dependency names → automatic injection.
 def transform(
    extract_customers: List[dict],
    extract_orders: List[dict],
 ) -> List[dict]:
    cmap = {c["id"]: c for c in extract_customers}
    return [
        {**o, "customer_name": cmap[o["customer_id"]]["name"]}
        for o in extract_orders
        if o["customer_id"] in cmap
    ]
 def load(transform: List[dict]) -> int:
    print(f"  loaded {len(transform)} records")
    return len(transform)
 def main() -> None:
    graph = px.Graph.from_specs(
        [
            px.TaskSpec("extract_customers", extract_customers, tags=("extract",)),
            px.TaskSpec("extract_orders", extract_orders, tags=("extract",)),
            px.TaskSpec(
                "transform",
                transform,
                ("extract_customers", "extract_orders"),
                tags=("transform",),
            ),
            px.TaskSpec("load", load, ("transform",), retries=1, tags=("load",)),
        ]
    )
    print("=== Execution plan ===")
    print(graph.describe())
    print("\n=== Dry run (no execution) ===")
    px.run(graph, strategy="sequential", dry_run=True)
    print("\n=== Sequential execution ===")
    report = px.run(graph, strategy="sequential")
    print(report.describe())
    print(f"\nload result = {report['load']}")
    print(f"summary = {report.summary()}")
 if __name__ == "__main__":
    main()
@@ -0,0 +1,59 @@
 """Example 2: parallel execution (thread strategy).
 Same DAG run with sequential vs. thread strategy to show layer-internal
 parallelism. Tasks within a layer run concurrently; layers are barriers.
 Layer 1: [fetch_a, fetch_b]   (parallel)
 Layer 2: [merge]              (waits for both)
 """
 from __future__ import annotations
 import time
 import pyflowx as px
 def fetch_a() -> str:
    time.sleep(0.5)
    return "a"
 def fetch_b() -> str:
    time.sleep(0.5)
    return "b"
 def merge(fetch_a: str, fetch_b: str) -> str:
    return fetch_a + fetch_b
 def main() -> None:
    graph = px.Graph.from_specs(
        [
            px.TaskSpec("fetch_a", fetch_a),
            px.TaskSpec("fetch_b", fetch_b),
            px.TaskSpec("merge", merge, ("fetch_a", "fetch_b")),
        ]
    )
    print("=== Mermaid diagram ===")
    print(graph.to_mermaid("LR"))
    print("\n=== Sequential (expect ~1.0s) ===")
    start = time.time()
    report_seq = px.run(graph, strategy="sequential")
    t_seq = time.time() - start
    print(f"  result={report_seq['merge']}  time={t_seq:.2f}s")
    print("\n=== Threaded (expect ~0.5s) ===")
    start = time.time()
    report_thr = px.run(graph, strategy="thread", max_workers=2)
    t_thr = time.time() - start
    print(f"  result={report_thr['merge']}  time={t_thr:.2f}s")
    print(f"\nspeedup = {t_seq / t_thr:.2f}x")
 if __name__ == "__main__":
    main()
@@ -1,7 +1,72 @@
 [project]
 authors = [{ name = "pyflowx" }]
 classifiers = [
    "Programming Language :: Python :: 3",
    "Programming Language :: Python :: 3.10",
    "Programming Language :: Python :: 3.11",
    "Programming Language :: Python :: 3.12",
    "Programming Language :: Python :: 3.8",
    "Programming Language :: Python :: 3.9",
    "Topic :: Software Development :: Libraries :: Application Frameworks",
 ]
 description = "Lightweight, type-safe DAG task scheduler with multi-strategy execution."
 keywords = ["async", "dag", "scheduler", "task", "workflow"]
 license = { text = "MIT" }
 name = "pyflowx"
 version = "0.1.0"
 description = "Add your description here"
 readme = "README.md"
 requires-python = ">=3.8"
-dependencies = []
+version = "0.1.0"
 # graphlib_backport only needed on Python 3.8 (stdlib graphlib exists in 3.9+)
 dependencies = ["graphlib_backport >= 1.0.0; python_version < '3.9'"]
 [project.optional-dependencies]
 dev = [
    "hatch>=1.14.2",
    "httpx>=0.28.0",
    "mypy >= 1.0",
    "prek>=0.4.5",
    "pytest-asyncio>=0.24.0",
    "pytest-cov>=5.0.0",
    "pytest-html>=4.1.1",
    "pytest-mock>=3.14.0",
    "pytest-xdist>=3.6.1",
    "pytest>=8.0.0",
    "ruff>=0.8.0",
    "tox-uv>=1.13.1",
    "tox>=4.25.0",
 ]
 [build-system]
 build-backend = "hatchling.build"
 requires      = ["hatchling"]
 [tool.hatch.build.targets.wheel]
 packages = ["src/pyflowx"]
 [tool.hatch.build.targets.wheel.force-include]
 "src/pyflowx/py.typed" = "pyflowx/py.typed"
 [tool.mypy]
 # mypy 2.x requires a >=3.10 target. We check against 3.10 syntax; the
 # runtime stays 3.8-compatible via `from __future__ import annotations`
 # (all annotations are strings at runtime) and the graphlib_backport
 # conditional dependency for topological sorting.
 check_untyped_defs       = true
 disallow_incomplete_defs = true
 disallow_untyped_defs    = true
 files                    = ["src/pyflowx"]
 ignore_missing_imports   = false
 python_version           = "3.8"
 strict                   = true
 warn_return_any          = true
 warn_unused_configs      = true
 [tool.uv.sources]
 pyflowx = { workspace = true }
 [[tool.uv.index]]
 default = true
 url     = "https://mirrors.aliyun.com/pypi/simple/"
 [dependency-groups]
 dev = ["pyflowx[dev]"]
@@ -0,0 +1,75 @@
 """PyFlowX — lightweight, type-safe DAG task scheduler.
 Public API
 ----------
 * :class:`TaskSpec` — immutable task descriptor (the only thing you configure).
 * :class:`Graph` — DAG built from a list of specs; validates, layers, visualises.
 * :func:`run` — execute a graph with ``sequential`` / ``thread`` / ``async``.
 * :class:`RunReport` — typed, queryable result of a run.
 * :class:`Context` — annotation marker for whole-context injection.
 * State backends: :class:`StateBackend`, :class:`MemoryBackend`, :class:`JSONBackend`.
 Quick start
 -----------
    import pyflowx as px
    def extract() -> list[int]: return [1, 2, 3]
    def double(extract: list[int]) -> list[int]: return [x * 2 for x in extract]
    graph = px.Graph.from_specs([
        px.TaskSpec("extract", extract),
        px.TaskSpec("double", double, ("extract",)),
    ])
    report = px.run(graph, strategy="sequential")
    print(report["double"])  # [2, 4, 6]
 """
 from __future__ import annotations
 from .context import Context, build_call_args, describe_injection
 from .errors import (
    CycleError,
    DuplicateTaskError,
    InjectionError,
    MissingDependencyError,
    PyFlowXError,
    StorageError,
    TaskFailedError,
    TaskTimeoutError,
 )
 from .executors import run
 from .graph import Graph
 from .report import RunReport
 from .storage import JSONBackend, MemoryBackend, StateBackend
 from .task import TaskEvent, TaskResult, TaskSpec, TaskStatus
 __version__ = "0.1.0"
 __all__ = [
    # core types
    "TaskSpec",
    "TaskStatus",
    "TaskResult",
    "TaskEvent",
    "Context",
    "Graph",
    "RunReport",
    # execution
    "run",
    # state backends
    "StateBackend",
    "MemoryBackend",
    "JSONBackend",
    # errors
    "PyFlowXError",
    "DuplicateTaskError",
    "MissingDependencyError",
    "CycleError",
    "TaskFailedError",
    "TaskTimeoutError",
    "InjectionError",
    "StorageError",
    # helpers (advanced)
    "build_call_args",
    "describe_injection",
 ]
@@ -0,0 +1,203 @@
 """Context injection: turn upstream results into function arguments.
 This is the mechanism that lets users write plain functions whose
 parameter names *are* the dependency declarations, removing the boiler-
 plate wrappers that plague other DAG libraries (e.g. ``def wrapper():
 return fn(workflow.get_task_result('x'))``).
 Injection rules (evaluated in order)
 -----------------------------------
 1. A parameter whose **annotation is** :class:`Context` receives the full
   result mapping. Useful for tasks that need to iterate over all inputs.
 2. A parameter whose **name matches a dependency** receives that
   dependency's result.
 3. A ``**kwargs`` parameter receives *all* dependency results as a dict.
 4. ``TaskSpec.args`` / ``TaskSpec.kwargs`` supply static values for
   parameters that are *not* dependencies.
 If a parameter cannot be resolved and has no default, an
 :class:`~pyflowx.errors.InjectionError` is raised with a precise message.
 """
 from __future__ import annotations
 import inspect
 from typing import Any, Dict, List, Mapping, Set, Tuple
 from .errors import InjectionError
 from .task import Context, TaskSpec
 __all__ = ["Context", "build_call_args", "describe_injection"]
 def _is_context_annotation(annotation: Any) -> bool:
    """True when a parameter annotation is (or refers to) ``Context``.
    Handles three forms:
    * the ``Context`` alias object itself;
    * a typing alias whose ``__name__``/``_name`` is ``Context`` or ``Mapping``;
    * a *string* annotation (``from __future__ import annotations`` makes all
      annotations strings at runtime) such as ``"Context"`` or ``"px.Context"``.
    """
    if annotation is Context:
        return True
    # String annotation from `from __future__ import annotations`.
    if isinstance(annotation, str):
        # Match "Context", "px.Context", "pyflowx.Context", etc.
        return annotation == "Context" or annotation.endswith(".Context")
    # Match by qualified name to support ``from pyflowx import Context``
    # re-exports.
    name = getattr(annotation, "__name__", None) or getattr(annotation, "_name", None)
    if name in ("Context", "Mapping"):
        return True
    return False
 def build_call_args(
    spec: TaskSpec[object],
    context: Mapping[str, Any],
 ) -> Tuple[Tuple[Any, ...], Dict[str, Any]]:
    """Resolve the ``(args, kwargs)`` to call ``spec.fn`` with.
    Parameters
    ----------
    spec:
        The task spec, providing ``fn``, ``depends_on``, ``args``, ``kwargs``.
    context:
        Mapping of dependency-name -> result value. Only the task's own
        ``depends_on`` entries are guaranteed present; other tasks' results
        are excluded to keep injection deterministic.
    Returns
    -------
    (args, kwargs)
        Ready to splat into ``spec.fn(*args, **kwargs)``.
    Raises
    ------
    InjectionError
        If a required parameter cannot be satisfied, or if static
        ``kwargs`` collide with an injected dependency name.
    """
    sig = inspect.signature(spec.fn)
    params = sig.parameters
    # Detect special parameter kinds.
    var_keyword = next(
        (p for p in params.values() if p.kind == inspect.Parameter.VAR_KEYWORD),
        None,
    )
    # The subset of context relevant to this task.
    dep_context: Dict[str, Any] = {
        name: context[name] for name in spec.depends_on if name in context
    }
    # Detect collisions between static kwargs and dependency names.
    collisions = set(spec.kwargs) & set(dep_context)
    if collisions:
        raise InjectionError(
            spec.name,
            f"static kwargs {sorted(collisions)} collide with dependency names; "
            "rename the static kwarg or the dependency.",
        )
    injected_kwargs: Dict[str, Any] = {}
    leftover_dep_results: Dict[str, Any] = dict(dep_context)
    # Positional parameters consumed by spec.args. We track which param
    # names are filled positionally so they are skipped during name-based
    # injection (dependency / Context / static kwargs).
    positional_params: List[str] = []
    positional_kinds = (
        inspect.Parameter.POSITIONAL_ONLY,
        inspect.Parameter.POSITIONAL_OR_KEYWORD,
    )
    for pname, param in params.items():
        if param.kind in positional_kinds:
            positional_params.append(pname)
    # The first len(spec.args) positional params are filled by spec.args.
    args_filled: Set[str] = set(positional_params[: len(spec.args)])
    for pname, param in params.items():
        # Skip params already filled by positional spec.args.
        if pname in args_filled:
            continue
        # Rule 1: annotated as Context -> full mapping.
        if _is_context_annotation(param.annotation):
            injected_kwargs[pname] = dep_context
            continue
        # Rule 2: name matches a dependency.
        if pname in dep_context:
            injected_kwargs[pname] = dep_context[pname]
            leftover_dep_results.pop(pname, None)
            continue
        # Rule 3: handled after the loop via **kwargs.
        # Rule 4: static kwargs fill the rest.
        if pname in spec.kwargs:
            injected_kwargs[pname] = spec.kwargs[pname]
            continue
        # No source for this parameter: must have a default, else error.
        if param.default is inspect.Parameter.empty and param.kind not in (
            inspect.Parameter.VAR_POSITIONAL,
            inspect.Parameter.VAR_KEYWORD,
        ):
            raise InjectionError(
                spec.name,
                f"parameter {pname!r} has no dependency, static value, or default.",
            )
    # Rule 3: **kwargs swallows remaining dependency results.
    if var_keyword is not None and leftover_dep_results:
        # Merge static kwargs first, then dependency results (static wins
        # on collision — but we already rejected collisions above).
        merged = dict(spec.kwargs)
        merged.update(injected_kwargs)
        merged.update(leftover_dep_results)
        injected_kwargs = merged
    return tuple(spec.args), injected_kwargs
 def describe_injection(spec: TaskSpec[object]) -> str:
    """Human-readable description of how a task's args will be injected.
    Used by ``dry_run`` to show the execution plan without executing it.
    """
    sig = inspect.signature(spec.fn)
    # Determine which positional params are filled by spec.args.
    positional_params = [
        p
        for p, param in sig.parameters.items()
        if param.kind
        in (
            inspect.Parameter.POSITIONAL_ONLY,
            inspect.Parameter.POSITIONAL_OR_KEYWORD,
        )
    ]
    args_filled = set(positional_params[: len(spec.args)])
    parts = []
    for pname, param in sig.parameters.items():
        if pname in args_filled:
            idx = positional_params.index(pname)
            parts.append(f"{pname}={spec.args[idx]!r}")
        elif _is_context_annotation(param.annotation):
            parts.append(f"{pname}=<Context>")
        elif pname in spec.depends_on:
            parts.append(f"{pname}=<result:{pname}>")
        elif pname in spec.kwargs:
            parts.append(f"{pname}={spec.kwargs[pname]!r}")
        elif param.default is not inspect.Parameter.empty:
            parts.append(f"{pname}=<default>")
        elif param.kind == inspect.Parameter.VAR_KEYWORD:
            parts.append("**kwargs=<all-deps>")
        elif param.kind == inspect.Parameter.VAR_POSITIONAL:
            parts.append("*args")
        else:
            parts.append(f"{pname}=<UNRESOLVED>")
    return f"{spec.name}({', '.join(parts)})"
@@ -0,0 +1,93 @@
 """PyFlowX error hierarchy.
 All errors are concrete subclasses of :class:`PyFlowXError` so callers can
 catch the entire family with a single ``except`` clause, while still being
 able to discriminate by type for fine-grained handling.
 """
 from __future__ import annotations
 from typing import Any, Iterable, Optional
 class PyFlowXError(Exception):
    """Base class for every PyFlowX error."""
 class DuplicateTaskError(PyFlowXError):
    """Raised when a task name is registered more than once."""
    def __init__(self, name: str) -> None:
        super().__init__(f"Task '{name}' is already registered in the graph.")
        self.name = name
 class MissingDependencyError(PyFlowXError):
    """Raised when a task depends on a name that is not in the graph."""
    def __init__(self, task: str, dependency: str) -> None:
        super().__init__(
            f"Task '{task}' depends on unknown task '{dependency}'. "
            "Add the dependency before (or together with) this task."
        )
        self.task = task
        self.dependency = dependency
 class CycleError(PyFlowXError):
    """Raised when the dependency graph contains a cycle."""
    def __init__(self, cycle: Iterable[str]) -> None:
        cycle_list = list(cycle)
        chain = " -> ".join(cycle_list + cycle_list[:1])
        super().__init__(f"The dependency graph contains a cycle: {chain}")
        self.cycle = cycle_list
 class TaskFailedError(PyFlowXError):
    """Raised when a task fails after exhausting all retries.
    The original exception is preserved on :attr:`__cause__` and also exposed
    via :attr:`cause` for convenient access in user code.
    """
    def __init__(
        self,
        task: str,
        cause: BaseException,
        attempts: int,
        layer: Optional[int] = None,
    ) -> None:
        location = f" (layer {layer})" if layer is not None else ""
        super().__init__(
            f"Task '{task}' failed after {attempts} attempt(s){location}: {cause}"
        )
        self.task = task
        self.cause = cause
        self.attempts = attempts
        self.layer = layer
 class TaskTimeoutError(PyFlowXError):
    """Raised when a task exceeds its configured timeout."""
    def __init__(self, task: str, timeout: float) -> None:
        super().__init__(f"Task '{task}' timed out after {timeout:.3f}s.")
        self.task = task
        self.timeout = timeout
 class InjectionError(PyFlowXError):
    """Raised when context injection cannot satisfy a task signature."""
    def __init__(self, task: str, detail: str) -> None:
        super().__init__(f"Cannot inject context for task '{task}': {detail}")
        self.task = task
 class StorageError(PyFlowXError):
    """Raised by state backends on persistence failures."""
    def __init__(self, detail: str, cause: Optional[BaseException] = None) -> None:
        super().__init__(f"State storage error: {detail}")
        self.cause: Any = cause
@@ -0,0 +1,425 @@
 """Executors and the public :func:`run` entry point.
 Three execution strategies share a common layer-by-layer driver:
 * ``sequential`` — deterministic, one task at a time. Best for debugging.
 * ``thread``     — layer-internal concurrency via a thread pool. Best for
                   I/O-bound sync tasks.
 * ``async``      — layer-internal concurrency via ``asyncio.gather``.
                   Sync tasks are offloaded to a thread pool; async tasks
                   run on the event loop. Best for I/O-bound async tasks.
 All three honour ``retries``, ``timeout``, context injection, state
 backends (resume), and emit :class:`~pyflowx.task.TaskEvent` for observers.
 """
 from __future__ import annotations
 import asyncio
 import concurrent.futures
 import inspect
 import logging
 from datetime import datetime
 from typing import Any, Awaitable, Callable, Dict, List, Mapping, Optional, cast
 from .context import build_call_args, describe_injection
 from .errors import TaskFailedError, TaskTimeoutError
 from .graph import Graph
 from .report import RunReport
 from .storage import StateBackend, resolve_backend
 from .task import TaskEvent, TaskResult, TaskSpec, TaskStatus
 logger = logging.getLogger("pyflowx")
 # Observer callback type.
 EventCallback = Callable[[TaskEvent], None]
 # Strategy selector literal.
 Strategy = str  # "sequential" | "thread" | "async"
 def _is_async_fn(spec: TaskSpec[object]) -> bool:
    """True if ``spec.fn`` is a coroutine function."""
    return inspect.iscoroutinefunction(spec.fn)
 def _emit(
    on_event: Optional[EventCallback],
    result: TaskResult[object],
 ) -> None:
    """Fire an observer event if a callback is registered."""
    if on_event is None:
        return
    on_event(
        TaskEvent(
            task=result.spec.name,
            status=result.status,
            attempts=result.attempts,
            error=repr(result.error) if result.error else None,
            duration=result.duration,
        )
    )
 def _run_sync_with_retry(
    spec: TaskSpec[object],
    context: Mapping[str, Any],
    layer_idx: Optional[int],
 ) -> TaskResult[object]:
    """Execute a sync task with retries; return a populated TaskResult."""
    result: TaskResult[object] = TaskResult(spec=spec)
    result.started_at = datetime.now()
    max_attempts = spec.retries + 1
    args, kwargs = build_call_args(spec, context)
    while result.attempts < max_attempts:
        result.attempts += 1
        try:
            result.value = spec.fn(*args, **kwargs)
            result.status = TaskStatus.SUCCESS
            result.finished_at = datetime.now()
            return result
        except Exception as exc:  # noqa: BLE001 - user code may raise anything
            result.error = exc
            if result.attempts >= max_attempts:
                break
            logger.warning(
                "task %r failed (attempt %d/%d): %r; retrying",
                spec.name,
                result.attempts,
                max_attempts,
                exc,
            )
    result.status = TaskStatus.FAILED
    result.finished_at = datetime.now()
    raise TaskFailedError(
        task=spec.name,
        cause=result.error if result.error is not None else RuntimeError("unknown"),
        attempts=result.attempts,
        layer=layer_idx,
    )
 async def _run_async_with_retry(
    spec: TaskSpec[object],
    context: Mapping[str, Any],
    layer_idx: Optional[int],
 ) -> TaskResult[object]:
    """Execute a task (sync or async) on the event loop with retries."""
    result: TaskResult[object] = TaskResult(spec=spec)
    result.started_at = datetime.now()
    max_attempts = spec.retries + 1
    args, kwargs = build_call_args(spec, context)
    loop = asyncio.get_event_loop()
    while result.attempts < max_attempts:
        result.attempts += 1
        try:
            if _is_async_fn(spec):
                coro = cast(Awaitable[Any], spec.fn(*args, **kwargs))
                if spec.timeout is not None:
                    result.value = await asyncio.wait_for(coro, timeout=spec.timeout)
                else:
                    result.value = await coro
            else:
                # Offload sync work to a thread so the event loop stays alive.
                fn_call: Callable[[], Any] = lambda: spec.fn(*args, **kwargs)
                if spec.timeout is not None:
                    result.value = await asyncio.wait_for(
                        loop.run_in_executor(None, fn_call), timeout=spec.timeout
                    )
                else:
                    result.value = await loop.run_in_executor(None, fn_call)
            result.status = TaskStatus.SUCCESS
            result.finished_at = datetime.now()
            return result
        except asyncio.TimeoutError:
            result.error = TaskTimeoutError(spec.name, spec.timeout or 0.0)
            if result.attempts >= max_attempts:
                break
            logger.warning(
                "task %r timed out (attempt %d/%d); retrying",
                spec.name,
                result.attempts,
                max_attempts,
            )
        except Exception as exc:  # noqa: BLE001
            result.error = exc
            if result.attempts >= max_attempts:
                break
            logger.warning(
                "task %r failed (attempt %d/%d): %r; retrying",
                spec.name,
                result.attempts,
                max_attempts,
                exc,
            )
    result.status = TaskStatus.FAILED
    result.finished_at = datetime.now()
    raise TaskFailedError(
        task=spec.name,
        cause=result.error if result.error is not None else RuntimeError("unknown"),
        attempts=result.attempts,
        layer=layer_idx,
    )
 # ---------------------------------------------------------------------- #
 # Layer driver
 # ---------------------------------------------------------------------- #
 def _build_context(
    spec: TaskSpec[object],
    global_context: Mapping[str, Any],
 ) -> Mapping[str, Any]:
    """Restrict the global context to this task's dependencies."""
    return {
        dep: global_context[dep] for dep in spec.depends_on if dep in global_context
    }
 def _execute_layer_sequential(
    layer: List[str],
    graph: Graph,
    context: Dict[str, Any],
    report: RunReport,
    backend: StateBackend,
    layer_idx: int,
    on_event: Optional[EventCallback],
 ) -> None:
    """Run a layer's tasks one by one."""
    for name in layer:
        spec = graph.spec(name)
        if backend.has(name):
            cached = backend.get(name)
            context[name] = cached
            result = TaskResult(spec=spec, status=TaskStatus.SKIPPED, value=cached)
            report.results[name] = result
            _emit(on_event, result)
            logger.info("task %r skipped (cached)", name)
            continue
        result = _run_sync_with_retry(spec, _build_context(spec, context), layer_idx)
        context[name] = result.value
        backend.save(name, result.value)
        report.results[name] = result
        _emit(on_event, result)
 def _execute_layer_threaded(
    layer: List[str],
    graph: Graph,
    context: Dict[str, Any],
    report: RunReport,
    backend: StateBackend,
    layer_idx: int,
    on_event: Optional[EventCallback],
    max_workers: int,
 ) -> None:
    """Run a layer's tasks concurrently in a thread pool."""
    # First, satisfy cached tasks synchronously.
    to_run: List[str] = []
    for name in layer:
        if backend.has(name):
            cached = backend.get(name)
            context[name] = cached
            result = TaskResult(
                spec=graph.spec(name), status=TaskStatus.SKIPPED, value=cached
            )
            report.results[name] = result
            _emit(on_event, result)
        else:
            to_run.append(name)
    if not to_run:
        return
    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as pool:
        future_to_name: Dict[concurrent.futures.Future[TaskResult[object]], str] = {}
        for name in to_run:
            spec = graph.spec(name)
            # Snapshot the context for this task to avoid races.
            task_ctx = _build_context(spec, context)
            fut = pool.submit(_run_sync_with_retry, spec, task_ctx, layer_idx)
            future_to_name[fut] = name
        for fut in concurrent.futures.as_completed(future_to_name):
            name = future_to_name[fut]
            result = fut.result()  # raises TaskFailedError on failure
            context[name] = result.value
            backend.save(name, result.value)
            report.results[name] = result
            _emit(on_event, result)
 async def _execute_layer_async(
    layer: List[str],
    graph: Graph,
    context: Dict[str, Any],
    report: RunReport,
    backend: StateBackend,
    layer_idx: int,
    on_event: Optional[EventCallback],
 ) -> None:
    """Run a layer's tasks concurrently on the event loop."""
    to_run: List[str] = []
    for name in layer:
        if backend.has(name):
            cached = backend.get(name)
            context[name] = cached
            result = TaskResult(
                spec=graph.spec(name), status=TaskStatus.SKIPPED, value=cached
            )
            report.results[name] = result
            _emit(on_event, result)
        else:
            to_run.append(name)
    if not to_run:
        return
    coros = []
    for name in to_run:
        spec = graph.spec(name)
        task_ctx = _build_context(spec, context)
        coros.append(_run_async_with_retry(spec, task_ctx, layer_idx))
    results = await asyncio.gather(*coros)
    for name, result in zip(to_run, results):
        context[name] = result.value
        backend.save(name, result.value)
        report.results[name] = result
        _emit(on_event, result)
 # ---------------------------------------------------------------------- #
 # Public API
 # ---------------------------------------------------------------------- #
 def run(
    graph: Graph,
    strategy: Strategy = "sequential",
    *,
    max_workers: Optional[int] = None,
    dry_run: bool = False,
    on_event: Optional[EventCallback] = None,
    state: Optional[StateBackend] = None,
 ) -> RunReport:
    """Execute a graph and return a :class:`RunReport`.
    Parameters
    ----------
    graph:
        The validated :class:`Graph` to execute.
    strategy:
        ``"sequential"`` (default), ``"thread"``, or ``"async"``.
    max_workers:
        Thread-pool size for ``"thread"``. Defaults to ``min(32, len(layer))``.
    dry_run:
        If ``True``, print the execution plan (layers + injection) and
        return an empty report without executing anything.
    on_event:
        Optional callback invoked on every status transition.
    state:
        Optional :class:`StateBackend` for resumable runs. Defaults to an
        in-memory backend (no persistence across processes).
    Raises
    ------
    ValueError
        If ``strategy`` is not recognised.
    TaskFailedError
        If any task fails after exhausting retries. The run aborts at the
        failing layer; tasks in later layers are not attempted.
    """
    if strategy not in ("sequential", "thread", "async"):
        raise ValueError(
            f"unknown strategy {strategy!r}; expected 'sequential', 'thread', or 'async'."
        )
    graph.validate()
    layers = graph.layers()
    if dry_run:
        _print_dry_run(graph, layers)
        return RunReport(success=True)
    backend = resolve_backend(state)
    report = RunReport()
    context: Dict[str, Any] = {}
    try:
        if strategy == "sequential":
            _drive_sequential(graph, layers, context, report, backend, on_event)
        elif strategy == "thread":
            _drive_threaded(
                graph, layers, context, report, backend, on_event, max_workers
            )
        else:
            _drive_async(graph, layers, context, report, backend, on_event)
    except TaskFailedError:
        report.success = False
        raise
    return report
 def _print_dry_run(graph: Graph, layers: List[List[str]]) -> None:
    """Print the execution plan without running anything."""
    print(f"Dry run: {len(graph)} tasks, {len(layers)} layers")
    for idx, layer in enumerate(layers, 1):
        print(f"  Layer {idx}: {layer}")
        for name in layer:
            print(f"    - {describe_injection(graph.spec(name))}")
 def _drive_sequential(
    graph: Graph,
    layers: List[List[str]],
    context: Dict[str, Any],
    report: RunReport,
    backend: StateBackend,
    on_event: Optional[EventCallback],
 ) -> None:
    for idx, layer in enumerate(layers, 1):
        _execute_layer_sequential(layer, graph, context, report, backend, idx, on_event)
 def _drive_threaded(
    graph: Graph,
    layers: List[List[str]],
    context: Dict[str, Any],
    report: RunReport,
    backend: StateBackend,
    on_event: Optional[EventCallback],
    max_workers: Optional[int],
 ) -> None:
    for idx, layer in enumerate(layers, 1):
        workers = max_workers or max(1, min(32, len(layer)))
        _execute_layer_threaded(
            layer, graph, context, report, backend, idx, on_event, workers
        )
 def _drive_async(
    graph: Graph,
    layers: List[List[str]],
    context: Dict[str, Any],
    report: RunReport,
    backend: StateBackend,
    on_event: Optional[EventCallback],
 ) -> None:
    asyncio.run(_async_drive(graph, layers, context, report, backend, on_event))
 async def _async_drive(
    graph: Graph,
    layers: List[List[str]],
    context: Dict[str, Any],
    report: RunReport,
    backend: StateBackend,
    on_event: Optional[EventCallback],
 ) -> None:
    for idx, layer in enumerate(layers, 1):
        await _execute_layer_async(
            layer, graph, context, report, backend, idx, on_event
        )
@@ -0,0 +1,245 @@
 """DAG construction, validation, layering and visualisation.
 Uses :mod:`graphlib` from the standard library (3.9+) or
 :mod:`graphlib_backport` (3.8) for topological sorting. The graph is
 built incrementally and validated eagerly so that misconfiguration fails
 fast — at construction time, not at execution time.
 """
 from __future__ import annotations
 import sys
 from typing import Dict, Iterable, List, Mapping, Sequence, Set, Tuple
 from .errors import CycleError, DuplicateTaskError, MissingDependencyError
 from .task import TaskSpec
 # graphlib lives in the stdlib since 3.9; fall back to the backport on 3.8.
 if sys.version_info >= (3, 9):
    import graphlib
    _TopologicalSorter = graphlib.TopologicalSorter
 else:  # pragma: no cover - exercised only on 3.8
    import graphlib  # type: ignore[no-redef]
    _TopologicalSorter = graphlib.TopologicalSorter
 class Graph:
    """An immutable-after-validation directed acyclic graph of tasks.
    The graph is built by adding :class:`~pyflowx.task.TaskSpec` instances.
    Each ``add`` performs eager validation (duplicate names, missing
    dependencies), and :meth:`validate` / :meth:`layers` perform full DAG
    validation (cycle detection) and topological layering.
    The graph holds only the *configuration*; runtime state lives in
    :class:`~pyflowx.report.RunReport`. This makes a graph safely
    re-runnable and shareable across threads.
    """
    def __init__(self) -> None:
        self._specs: Dict[str, TaskSpec[object]] = {}
        # Map task -> its direct dependencies (predecessors).
        self._deps: Dict[str, Tuple[str, ...]] = {}
    # ------------------------------------------------------------------ #
    # Construction
    # ------------------------------------------------------------------ #
    def add(self, spec: TaskSpec[object]) -> "Graph":
        """Register a task spec with eager validation.
        Returns ``self`` so calls can be chained, but the recommended
        entry point is :meth:`from_specs` which validates the whole batch
        together (allowing forward references in a single call).
        """
        self._specs[spec.name] = spec
        self._deps[spec.name] = spec.depends_on
        # Eagerly check duplicates and missing deps for the incremental API.
        self._validate_references()
        return self
    @classmethod
    def from_specs(cls, specs: Iterable[TaskSpec[object]]) -> "Graph":
        """Build a graph from an iterable of task specs.
        All specs are collected first, then validated together. This means
        a task may reference a dependency that appears *later* in the
        iterable — order does not matter, mirroring how a declarative
        config file reads.
        """
        graph = cls()
        for spec in specs:
            if spec.name in graph._specs:
                raise DuplicateTaskError(spec.name)
            graph._specs[spec.name] = spec
            graph._deps[spec.name] = spec.depends_on
        graph._validate_references()
        graph.validate()
        return graph
    # ------------------------------------------------------------------ #
    # Validation
    # ------------------------------------------------------------------ #
    def _validate_references(self) -> None:
        """Ensure every dependency name exists in the graph."""
        for name, deps in self._deps.items():
            for dep in deps:
                if dep not in self._specs:
                    raise MissingDependencyError(name, dep)
    def validate(self) -> None:
        """Run full DAG validation.
        Raises :class:`~pyflowx.errors.CycleError` if a cycle exists.
        Dependency existence is checked by :meth:`_validate_references`.
        """
        self._validate_references()
        sorter = _TopologicalSorter(self._deps)
        try:
            # prepare() raises CycleError on cycles; we don't need the
            # static_order() result here, just the validation side effect.
            sorter.prepare()
        except graphlib.CycleError as exc:
            # exc.args[1] is the list of nodes forming the cycle.
            cycle: Sequence[str] = exc.args[1] if len(exc.args) > 1 else []
            raise CycleError(list(cycle)) from exc
    # ------------------------------------------------------------------ #
    # Introspection
    # ------------------------------------------------------------------ #
    @property
    def names(self) -> List[str]:
        """All registered task names (insertion order)."""
        return list(self._specs.keys())
    def spec(self, name: str) -> TaskSpec[object]:
        """Return the spec for ``name``; ``KeyError`` if absent."""
        return self._specs[name]
    def dependencies(self, name: str) -> Tuple[str, ...]:
        """Direct predecessors of ``name``."""
        return self._deps[name]
    def all_specs(self) -> Mapping[str, TaskSpec[object]]:
        """Read-only view of name -> spec."""
        return self._specs
    def layers(self) -> List[List[str]]:
        """Group tasks into parallel-executable layers (Kahn's algorithm).
        Tasks within the same layer have no mutual dependencies and may
        run concurrently. Layers are returned in execution order.
        Raises :class:`~pyflowx.errors.CycleError` if the graph is cyclic.
        """
        self.validate()
        sorter = _TopologicalSorter(self._deps)
        result: List[List[str]] = []
        # ``get_ready`` + ``done`` gives us one layer at a time, which is
        # exactly the parallel-execution grouping we need.
        sorter.prepare()
        while sorter.is_active():
            ready = list(sorter.get_ready())
            # Sort for deterministic, reproducible execution plans.
            ready.sort()
            result.append(ready)
            for node in ready:
                sorter.done(node)
        return result
    # ------------------------------------------------------------------ #
    # Subgraph / tag filtering
    # ------------------------------------------------------------------ #
    def subgraph(self, tags: Iterable[str]) -> "Graph":
        """Return a new graph containing only tasks matching any tag.
        Dependencies are pruned to keep only edges between retained tasks;
        edges to dropped tasks are removed (the retained task no longer
        waits for them). Use this to run a slice of a large DAG for
        debugging.
        """
        wanted: Set[str] = set(tags)
        kept: List[TaskSpec[object]] = []
        for spec in self._specs.values():
            if wanted & set(spec.tags):
                pruned_deps = tuple(
                    d for d in spec.depends_on if d in self._specs and (wanted & set(self._specs[d].tags))
                )
                kept.append(
                    TaskSpec(
                        name=spec.name,
                        fn=spec.fn,
                        depends_on=pruned_deps,
                        args=spec.args,
                        kwargs=spec.kwargs,
                        retries=spec.retries,
                        timeout=spec.timeout,
                        tags=spec.tags,
                    )
                )
        return Graph.from_specs(kept)
    def subgraph_by_names(self, names: Iterable[str]) -> "Graph":
        """Return a new graph restricted to ``names`` (with pruned edges)."""
        wanted: Set[str] = set(names)
        for n in wanted:
            if n not in self._specs:
                raise KeyError(f"Unknown task name: {n!r}")
        kept: List[TaskSpec[object]] = []
        for spec in self._specs.values():
            if spec.name in wanted:
                pruned_deps = tuple(d for d in spec.depends_on if d in wanted)
                kept.append(
                    TaskSpec(
                        name=spec.name,
                        fn=spec.fn,
                        depends_on=pruned_deps,
                        args=spec.args,
                        kwargs=spec.kwargs,
                        retries=spec.retries,
                        timeout=spec.timeout,
                        tags=spec.tags,
                    )
                )
        return Graph.from_specs(kept)
    # ------------------------------------------------------------------ #
    # Visualisation
    # ------------------------------------------------------------------ #
    def to_mermaid(self, orientation: str = "TD") -> str:
        """Render the DAG as a Mermaid ``graph`` definition string.
        No external dependencies; the output can be pasted into Markdown,
        rendered by VS Code's Mermaid previewer, or saved to a file.
        """
        valid = {"TD", "TB", "BT", "LR", "RL"}
        orientation = orientation.upper()
        if orientation not in valid:
            raise ValueError(f"Invalid orientation {orientation!r}; expected one of {sorted(valid)}.")
        lines: List[str] = [f"graph {orientation}"]
        for name in self._specs:
            lines.append(f'    {name}["{name}"]')
        for name, deps in self._deps.items():
            for dep in deps:
                lines.append(f"    {dep} --> {name}")
        return "\n".join(lines) + "\n"
    # ------------------------------------------------------------------ #
    # Debug
    # ------------------------------------------------------------------ #
    def describe(self) -> str:
        """Human-readable multi-line summary for debugging."""
        out: List[str] = [f"Graph(tasks={len(self._specs)})"]
        for layer_idx, layer in enumerate(self.layers(), 1):
            out.append(f"  Layer {layer_idx}: {layer}")
        return "\n".join(out)
    def __repr__(self) -> str:
        return f"Graph(tasks={len(self._specs)})"
    def __len__(self) -> int:
        return len(self._specs)
    def __contains__(self, name: object) -> bool:
        return name in self._specs
@@ -0,0 +1,82 @@
 """Run report: typed, queryable result of a single :func:`pyflowx.run`.
 The report is the single source of truth after execution. It exposes
 per-task results via ``report["name"]`` (typed as ``Any`` because the
 mapping is heterogeneous), summary statistics, and a flag indicating
 whether the whole run succeeded.
 """
 from __future__ import annotations
 from dataclasses import dataclass, field
 from typing import Any, Dict, Iterator, List, Mapping, Optional
 from .task import TaskResult, TaskStatus
@dataclass
 class RunReport:
    """Aggregated outcome of a workflow run.
    Attributes
    ----------
    results:
        Mapping of task name -> :class:`TaskResult`. Insertion order
        matches the order tasks finished.
    success:
        ``True`` iff every non-skipped task ended in ``SUCCESS``.
    """
    results: Dict[str, TaskResult[object]] = field(default_factory=dict)
    success: bool = True
    # ---- typed access ------------------------------------------------- #
    def __getitem__(self, name: str) -> Any:
        """Return the *value* of task ``name`` (not the TaskResult).
        Raises ``KeyError`` if the task was not part of the run. Returns
        ``None`` for tasks that did not reach SUCCESS.
        """
        return self.results[name].value
    def result_of(self, name: str) -> TaskResult[object]:
        """Return the full :class:`TaskResult` for ``name``."""
        return self.results[name]
    def __contains__(self, name: object) -> bool:
        return name in self.results
    def __iter__(self) -> Iterator[str]:
        return iter(self.results)
    def __len__(self) -> int:
        return len(self.results)
    # ---- summary ------------------------------------------------------ #
    def summary(self) -> Dict[str, Any]:
        """Compact statistics dict for logging / dashboards."""
        counts: Dict[str, int] = {}
        total_duration = 0.0
        for r in self.results.values():
            counts[r.status.value] = counts.get(r.status.value, 0) + 1
            if r.duration is not None:
                total_duration += r.duration
        return {
            "success": self.success,
            "total_tasks": len(self.results),
            "by_status": counts,
            "total_duration_seconds": round(total_duration, 6),
        }
    def failed_tasks(self) -> List[str]:
        """Names of tasks that ended in FAILED status."""
        return [name for name, r in self.results.items() if r.status == TaskStatus.FAILED]
    def describe(self) -> str:
        """Human-readable multi-line report for debugging."""
        lines: List[str] = [f"RunReport(success={self.success})"]
        for name, r in self.results.items():
            dur = f"{r.duration:.3f}s" if r.duration is not None else "-"
            err = f" error={r.error!r}" if r.error else ""
            lines.append(f"  {name}: {r.status.value} ({dur} attempts={r.attempts}){err}")
        return "\n".join(lines)
@@ -0,0 +1,135 @@
 """State persistence backends for resumable runs.
 A :class:`StateBackend` stores the result of every successfully completed
 task. On a subsequent run, the executor asks the backend whether a task
 already has a stored result; if so, the task is skipped and its stored
 value is injected into downstream tasks.
 This is intentionally minimal: only *successful* results are persisted
 (failed tasks are re-run), and the storage shape is a flat
 ``{task_name: result}`` mapping. Two backends ship in-tree:
 * :class:`MemoryBackend` — fast, in-process, no I/O. Default.
 * :class:`JSONBackend` — persists to a JSON file for cross-process resume.
 Both are zero-dependency (``json`` is stdlib). Users can subclass
 :class:`StateBackend` to plug in SQLite, Redis, etc.
 """
 from __future__ import annotations
 import json
 import os
 from abc import ABC, abstractmethod
 from typing import Any, Dict, Mapping, Optional
 from .errors import StorageError
 class StateBackend(ABC):
    """Abstract base for resumable state storage."""
    @abstractmethod
    def load(self) -> Mapping[str, Any]:
        """Return the full stored mapping (may be empty)."""
    @abstractmethod
    def save(self, name: str, value: Any) -> None:
        """Persist a single task's successful result."""
    @abstractmethod
    def has(self, name: str) -> bool:
        """Whether ``name`` has a stored result."""
    @abstractmethod
    def get(self, name: str) -> Any:
        """Return the stored result for ``name`` (raise ``KeyError`` if absent)."""
    @abstractmethod
    def clear(self) -> None:
        """Remove all stored state."""
 class MemoryBackend(StateBackend):
    """In-process dict backend. Lost when the process exits."""
    def __init__(self) -> None:
        self._store: Dict[str, Any] = {}
    def load(self) -> Mapping[str, Any]:
        return dict(self._store)
    def save(self, name: str, value: Any) -> None:
        self._store[name] = value
    def has(self, name: str) -> bool:
        return name in self._store
    def get(self, name: str) -> Any:
        return self._store[name]
    def clear(self) -> None:
        self._store.clear()
 class JSONBackend(StateBackend):
    """File-backed JSON storage for cross-process resume.
    Results must be JSON-serialisable. Non-serialisable values raise
    :class:`~pyflowx.errors.StorageError` (the run itself is not aborted;
    only persistence of that one result fails).
    """
    def __init__(self, path: str) -> None:
        self._path = path
        self._store: Dict[str, Any] = {}
        self._load()
    def _load(self) -> None:
        if not os.path.exists(self._path):
            return
        try:
            with open(self._path, "r", encoding="utf-8") as fh:
                data = json.load(fh)
            if isinstance(data, dict):
                self._store = data
        except (OSError, json.JSONDecodeError) as exc:
            raise StorageError(f"cannot read state file {self._path!r}", exc) from exc
    def _flush(self) -> None:
        tmp = self._path + ".tmp"
        try:
            with open(tmp, "w", encoding="utf-8") as fh:
                json.dump(self._store, fh, ensure_ascii=False, indent=2)
            os.replace(tmp, self._path)
        except (OSError, TypeError) as exc:
            raise StorageError(f"cannot write state file {self._path!r}", exc) from exc
    def load(self) -> Mapping[str, Any]:
        return dict(self._store)
    def save(self, name: str, value: Any) -> None:
        # Validate serialisability before mutating in-memory state.
        try:
            json.dumps(value)
        except (TypeError, ValueError) as exc:
            raise StorageError(
                f"result of task {name!r} is not JSON-serialisable", exc
            ) from exc
        self._store[name] = value
        self._flush()
    def has(self, name: str) -> bool:
        return name in self._store
    def get(self, name: str) -> Any:
        return self._store[name]
    def clear(self) -> None:
        self._store.clear()
        self._flush()
 def resolve_backend(backend: Optional[StateBackend]) -> StateBackend:
    """Return ``backend`` or a fresh :class:`MemoryBackend` if ``None``."""
    return backend if backend is not None else MemoryBackend()
@@ -0,0 +1,151 @@
 """Core task data structures for PyFlowX.
 Everything here is a plain, immutable data structure — no decorators, no
 side effects. A :class:`TaskSpec` fully describes a task node; the
 :class:`Graph` (see :mod:`pyflowx.graph`) consumes a list of specs and
 builds the DAG.
 Design notes
 ------------
 * ``TaskSpec`` is a ``Generic[T]`` so that ``TaskSpec[int]`` carries the
  return type of ``fn`` all the way to :class:`RunReport`, giving callers
  typed access to ``report["name"]``.
 * ``Context`` is the only intentionally-dynamic type: results from
  upstream tasks are heterogeneous, so the cross-task mapping is
  ``Mapping[str, Any]``. Within a single task the types remain fully
  static because the function signature is checked by mypy.
 * ``TaskStatus`` is a closed enum; executors never invent ad-hoc strings.
 """
 from __future__ import annotations
 from dataclasses import dataclass, field
 from datetime import datetime
 from enum import Enum
 from typing import (
    Any,
    Callable,
    Coroutine,
    Generic,
    Mapping,
    Optional,
    Tuple,
    TypeVar,
    Union,
 )
 T = TypeVar("T")
 # A task callable may be synchronous or asynchronous. We keep the union
 # explicit so mypy understands both shapes.
 TaskFn = Union[
    Callable[..., T],
    Callable[..., Coroutine[Any, Any, T]],
 ]
 # The cross-task result mapping. Deliberately ``Any`` for values because
 # different tasks return different types; per-task typing is preserved by
 # the function signature itself.
 Context = Mapping[str, Any]
 class TaskStatus(Enum):
    """Lifecycle states of a task during a single run."""
    PENDING = "pending"
    RUNNING = "running"
    SUCCESS = "success"
    FAILED = "failed"
    SKIPPED = "skipped"  # used by resumable runs and subgraph filtering
@dataclass(frozen=True)
 class TaskSpec(Generic[T]):
    """Immutable description of a single DAG node.
    Parameters
    ----------
    name:
        Unique identifier of the task within a graph. Other tasks reference
        this name in ``depends_on``.
    fn:
        The callable to execute. May be sync or async. Its parameter names
        drive automatic context injection (see :mod:`pyflowx.context`).
    depends_on:
        Names of tasks whose results must be available before this task
        runs. Order is irrelevant; the framework topologically sorts.
    args:
        Static positional arguments appended *after* injected parameters.
        Useful for parameterised tasks (e.g. ``fetch_user(uid)``).
    kwargs:
        Static keyword arguments. Conflict with injected names raises
        :class:`~pyflowx.errors.InjectionError`.
    retries:
        Number of retry attempts on failure. ``0`` means a single attempt.
    timeout:
        Maximum execution time in seconds. ``None`` disables the timeout.
        For async tasks this uses :func:`asyncio.wait_for`; for sync tasks
        in the threaded/async executors it cancels the worker future.
    tags:
        Free-form labels used by :meth:`Graph.subgraph` for selective
        execution and debugging.
    """
    name: str
    fn: TaskFn[T]
    depends_on: Tuple[str, ...] = ()
    args: Tuple[Any, ...] = ()
    kwargs: Mapping[str, Any] = field(default_factory=dict)
    retries: int = 0
    timeout: Optional[float] = None
    tags: Tuple[str, ...] = ()
    def __post_init__(self) -> None:
        if not self.name:
            raise ValueError("TaskSpec.name must be a non-empty string.")
        if self.retries < 0:
            raise ValueError(f"TaskSpec '{self.name}': retries must be >= 0.")
        if self.timeout is not None and self.timeout <= 0:
            raise ValueError(f"TaskSpec '{self.name}': timeout must be > 0.")
        if self.name in self.depends_on:
            raise ValueError(f"TaskSpec '{self.name}' cannot depend on itself.")
@dataclass
 class TaskResult(Generic[T]):
    """Mutable per-task record produced during a run.
    A fresh :class:`TaskResult` is created for every run; the spec itself
    stays immutable. This keeps the same graph safely re-runnable.
    """
    spec: TaskSpec[T]
    status: TaskStatus = TaskStatus.PENDING
    value: Optional[T] = None
    error: Optional[BaseException] = None
    attempts: int = 0
    started_at: Optional[datetime] = None
    finished_at: Optional[datetime] = None
    @property
    def duration(self) -> Optional[float]:
        """Elapsed seconds between start and finish, or ``None``."""
        if self.started_at is None or self.finished_at is None:
            return None
        return (self.finished_at - self.started_at).total_seconds()
@dataclass(frozen=True)
 class TaskEvent:
    """Immutable event emitted during execution for observers.
    Passed to the ``on_event`` callback of :func:`pyflowx.run` so callers
    can build progress bars, metrics, or structured logs without coupling
    to executor internals.
    """
    task: str
    status: TaskStatus
    attempts: int = 0
    error: Optional[str] = None
    duration: Optional[float] = None
@@ -0,0 +1,89 @@
 """Tests for context injection rules."""
 from __future__ import annotations
 from typing import Any
 import pytest
 import pyflowx as px
 from pyflowx.context import build_call_args, describe_injection
 from pyflowx.errors import InjectionError
 def test_inject_by_parameter_name() -> None:
    def fn(a: int, b: str) -> str:
        return f"{a}{b}"
    spec = px.TaskSpec("c", fn, ("a", "b"))
    args, kwargs = build_call_args(spec, {"a": 1, "b": "x"})
    assert args == ()
    assert kwargs == {"a": 1, "b": "x"}
 def test_inject_context_annotation() -> None:
    def fn(ctx: px.Context) -> int:
        return len(ctx)
    spec = px.TaskSpec("agg", fn, ("a", "b"))
    args, kwargs = build_call_args(spec, {"a": 1, "b": 2, "c": 99})
    # Only the task's own deps are passed.
    assert kwargs == {"ctx": {"a": 1, "b": 2}}
 def test_inject_var_keyword() -> None:
    def fn(**kwargs: Any) -> int:
        return sum(kwargs.values())
    spec = px.TaskSpec("agg", fn, ("a", "b"))
    args, kwargs = build_call_args(spec, {"a": 1, "b": 2})
    assert kwargs == {"a": 1, "b": 2}
 def test_static_args_and_kwargs() -> None:
    def fn(uid: int, source: str) -> str:
        return f"{source}:{uid}"
    spec = px.TaskSpec("fetch", fn, args=(42,), kwargs={"source": "api"})
    args, kwargs = build_call_args(spec, {})
    assert args == (42,)
    assert kwargs == {"source": "api"}
 def test_default_param_not_required() -> None:
    def fn(a: int, flag: bool = True) -> int:
        return a if flag else 0
    spec = px.TaskSpec("t", fn, ("a",))
    args, kwargs = build_call_args(spec, {"a": 5})
    assert kwargs == {"a": 5}
 def test_unresolved_required_param_raises() -> None:
    def fn(a: int, missing: str) -> None:
        return None
    spec = px.TaskSpec("t", fn, ("a",))
    with pytest.raises(InjectionError) as exc_info:
        build_call_args(spec, {"a": 1})
    assert "missing" in str(exc_info.value)
 def test_static_kwargs_collide_with_dependency() -> None:
    def fn(a: int) -> int:
        return a
    spec = px.TaskSpec("t", fn, ("a",), kwargs={"a": 99})
    with pytest.raises(InjectionError):
        build_call_args(spec, {"a": 1})
 def test_describe_injection() -> None:
    def fn(a: int, ctx: px.Context, flag: bool = False) -> None:
        return None
    spec = px.TaskSpec("t", fn, ("a",))
    desc = describe_injection(spec)
    assert "a=<result:a>" in desc
    assert "ctx=<Context>" in desc
    assert "flag=<default>" in desc
@@ -0,0 +1,322 @@
 """Tests for execution: sequential, thread, async, retries, timeout, resume."""
 from __future__ import annotations
 import asyncio
 import os
 import tempfile
 import threading
 import time
 from typing import Any, List
 import pytest
 import pyflowx as px
 from pyflowx.errors import TaskFailedError, TaskTimeoutError
 from pyflowx.storage import JSONBackend, MemoryBackend
 # ---------------------------------------------------------------------- #
 # Sequential
 # ---------------------------------------------------------------------- #
 def test_sequential_basic() -> None:
    def extract() -> list[int]:
        return [1, 2, 3]
    def double(extract: list[int]) -> list[int]:
        return [x * 2 for x in extract]
    graph = px.Graph.from_specs(
        [
            px.TaskSpec("extract", extract),
            px.TaskSpec("double", double, ("extract",)),
        ]
    )
    report = px.run(graph, strategy="sequential")
    assert report.success
    assert report["extract"] == [1, 2, 3]
    assert report["double"] == [2, 4, 6]
 def test_sequential_diamond() -> None:
    order: List[str] = []
    def make(name: str) -> Any:
        def fn() -> str:
            order.append(name)
            return name
        return fn
    graph = px.Graph.from_specs(
        [
            px.TaskSpec("a", make("a")),
            px.TaskSpec("b", make("b"), ("a",)),
            px.TaskSpec("c", make("c"), ("a",)),
            px.TaskSpec("d", make("d"), ("b", "c")),
        ]
    )
    report = px.run(graph, strategy="sequential")
    assert report.success
    assert report["d"] == "d"
    assert order == ["a", "b", "c", "d"]
 def test_failure_propagates() -> None:
    def boom() -> None:
        raise ValueError("kaboom")
    def downstream(boom: None) -> int:
        return 1
    graph = px.Graph.from_specs(
        [
            px.TaskSpec("boom", boom),
            px.TaskSpec("downstream", downstream, ("boom",)),
        ]
    )
    with pytest.raises(TaskFailedError) as exc_info:
        px.run(graph, strategy="sequential")
    assert exc_info.value.task == "boom"
    assert isinstance(exc_info.value.cause, ValueError)
 def test_retries_then_succeeds() -> None:
    attempts = {"n": 0}
    def flaky() -> str:
        attempts["n"] += 1
        if attempts["n"] < 3:
            raise RuntimeError("not yet")
        return "ok"
    graph = px.Graph.from_specs([px.TaskSpec("flaky", flaky, retries=2)])
    report = px.run(graph, strategy="sequential")
    assert report.success
    assert report["flaky"] == "ok"
    assert attempts["n"] == 3
 def test_retries_exhausted() -> None:
    def always_fail() -> None:
        raise RuntimeError("nope")
    graph = px.Graph.from_specs([px.TaskSpec("f", always_fail, retries=2)])
    with pytest.raises(TaskFailedError) as exc_info:
        px.run(graph, strategy="sequential")
    assert exc_info.value.attempts == 3
 # ---------------------------------------------------------------------- #
 # Threaded
 # ---------------------------------------------------------------------- #
 def test_threaded_parallelism() -> None:
    def slow() -> str:
        time.sleep(0.3)
        return "done"
    graph = px.Graph.from_specs(
        [
            px.TaskSpec("a", slow),
            px.TaskSpec("b", slow),
            px.TaskSpec("c", slow),
        ]
    )
    start = time.time()
    report = px.run(graph, strategy="thread", max_workers=3)
    elapsed = time.time() - start
    assert report.success
    # Three 0.3s tasks in parallel should be well under 0.8s.
    assert elapsed < 0.8
 def test_threaded_layer_barrier() -> None:
    finished: List[str] = []
    lock = threading.Lock()
    def make(name: str) -> Any:
        def fn() -> str:
            time.sleep(0.1)
            with lock:
                finished.append(name)
            return name
        return fn
    graph = px.Graph.from_specs(
        [
            px.TaskSpec("a", make("a")),
            px.TaskSpec("b", make("b")),
            px.TaskSpec("c", make("c"), ("a", "b")),
        ]
    )
    report = px.run(graph, strategy="thread", max_workers=2)
    assert report.success
    # c must finish after both a and b.
    assert finished.index("c") > finished.index("a")
    assert finished.index("c") > finished.index("b")
 # ---------------------------------------------------------------------- #
 # Async
 # ---------------------------------------------------------------------- #
 def test_async_basic() -> None:
    async def fetch() -> int:
        await asyncio.sleep(0.01)
        return 42
    async def transform(fetch: int) -> int:
        return fetch * 2
    graph = px.Graph.from_specs(
        [
            px.TaskSpec("fetch", fetch),
            px.TaskSpec("transform", transform, ("fetch",)),
        ]
    )
    report = px.run(graph, strategy="async")
    assert report.success
    assert report["transform"] == 84
 def test_async_parallelism() -> None:
    async def slow() -> str:
        await asyncio.sleep(0.3)
        return "done"
    graph = px.Graph.from_specs(
        [
            px.TaskSpec("a", slow),
            px.TaskSpec("b", slow),
            px.TaskSpec("c", slow),
        ]
    )
    start = time.time()
    report = px.run(graph, strategy="async")
    elapsed = time.time() - start
    assert report.success
    assert elapsed < 0.8
 def test_async_mixed_sync_and_async() -> None:
    def sync_task() -> int:
        return 10
    async def async_task(sync_task: int) -> int:
        await asyncio.sleep(0.01)
        return sync_task + 5
    graph = px.Graph.from_specs(
        [
            px.TaskSpec("sync_task", sync_task),
            px.TaskSpec("async_task", async_task, ("sync_task",)),
        ]
    )
    report = px.run(graph, strategy="async")
    assert report.success
    assert report["async_task"] == 15
 def test_async_timeout() -> None:
    async def slow() -> None:
        await asyncio.sleep(10)
    graph = px.Graph.from_specs([px.TaskSpec("slow", slow, timeout=0.05)])
    with pytest.raises(TaskFailedError) as exc_info:
        px.run(graph, strategy="async")
    assert isinstance(exc_info.value.cause, TaskTimeoutError)
 # ---------------------------------------------------------------------- #
 # Dry run
 # ---------------------------------------------------------------------- #
 def test_dry_run_does_not_execute(capsys: pytest.CaptureFixture[str]) -> None:
    called: List[str] = []
    def fn() -> str:
        called.append("x")
        return "should-not-run"
    graph = px.Graph.from_specs([px.TaskSpec("a", fn)])
    report = px.run(graph, strategy="sequential", dry_run=True)
    assert called == []
    assert len(report) == 0
    out = capsys.readouterr().out
    assert "Dry run" in out
    assert "Layer 1" in out
 # ---------------------------------------------------------------------- #
 # State / resume
 # ---------------------------------------------------------------------- #
 def test_memory_backend_resume() -> None:
    runs: List[str] = []
    def make(name: str) -> Any:
        def fn() -> str:
            runs.append(name)
            return name
        return fn
    graph = px.Graph.from_specs(
        [
            px.TaskSpec("a", make("a")),
            px.TaskSpec("b", make("b"), ("a",)),
        ]
    )
    backend = MemoryBackend()
    px.run(graph, strategy="sequential", state=backend)
    assert runs == ["a", "b"]
    # Second run: both cached, neither re-executed.
    px.run(graph, strategy="sequential", state=backend)
    assert runs == ["a", "b"]  # unchanged
 def test_json_backend_persistence() -> None:
    with tempfile.TemporaryDirectory() as tmp:
        path = os.path.join(tmp, "state.json")
        def fn() -> int:
            return 7
        graph = px.Graph.from_specs([px.TaskSpec("a", fn)])
        px.run(graph, strategy="sequential", state=JSONBackend(path))
        # New backend reads the file; task should be skipped.
        runs: List[str] = []
        def fn2() -> int:
            runs.append("ran")
            return 8
        graph2 = px.Graph.from_specs([px.TaskSpec("a", fn2)])
        report = px.run(graph2, strategy="sequential", state=JSONBackend(path))
        assert runs == []
        assert report["a"] == 7  # cached value, not fn2's 8
 # ---------------------------------------------------------------------- #
 # Events
 # ---------------------------------------------------------------------- #
 def test_on_event_callback() -> None:
    events: List[px.TaskEvent] = []
    def fn() -> int:
        return 1
    graph = px.Graph.from_specs([px.TaskSpec("a", fn)])
    px.run(graph, strategy="sequential", on_event=events.append)
    statuses = [e.status for e in events]
    assert px.TaskStatus.SUCCESS in statuses
    assert all(e.task == "a" for e in events)
 # ---------------------------------------------------------------------- #
 # Invalid strategy
 # ---------------------------------------------------------------------- #
 def test_invalid_strategy() -> None:
    graph = px.Graph.from_specs([px.TaskSpec("a", lambda: None)])  # type: ignore[arg-type]
    with pytest.raises(ValueError):
        px.run(graph, strategy="bogus")  # type: ignore[arg-type]
@@ -0,0 +1,131 @@
 """Tests for Graph construction, validation, layering and subgraphs."""
 from __future__ import annotations
 import pytest
 import pyflowx as px
 from pyflowx.errors import CycleError, DuplicateTaskError, MissingDependencyError
 def _fn() -> None:
    return None
 def test_from_specs_builds_graph() -> None:
    graph = px.Graph.from_specs([
        px.TaskSpec("a", _fn),
        px.TaskSpec("b", _fn, ("a",)),
        px.TaskSpec("c", _fn, ("a", "b")),
    ])
    assert set(graph.names) == {"a", "b", "c"}
    assert graph.dependencies("c") == ("a", "b")
    assert len(graph) == 3
    assert "a" in graph
 def test_from_specs_allows_forward_references() -> None:
    # b depends on a, but a is declared after b — order should not matter.
    graph = px.Graph.from_specs([
        px.TaskSpec("b", _fn, ("a",)),
        px.TaskSpec("a", _fn),
    ])
    assert graph.layers() == [["a"], ["b"]]
 def test_duplicate_task_raises() -> None:
    with pytest.raises(DuplicateTaskError):
        px.Graph.from_specs([
            px.TaskSpec("a", _fn),
            px.TaskSpec("a", _fn),
        ])
 def test_missing_dependency_raises() -> None:
    with pytest.raises(MissingDependencyError) as exc_info:
        px.Graph.from_specs([px.TaskSpec("b", _fn, ("a",))])
    assert exc_info.value.task == "b"
    assert exc_info.value.dependency == "a"
 def test_cycle_detection() -> None:
    with pytest.raises(CycleError):
        px.Graph.from_specs([
            px.TaskSpec("a", _fn, ("c",)),
            px.TaskSpec("b", _fn, ("a",)),
            px.TaskSpec("c", _fn, ("b",)),
        ])
 def test_layers_grouping() -> None:
    graph = px.Graph.from_specs([
        px.TaskSpec("a", _fn),
        px.TaskSpec("b", _fn),
        px.TaskSpec("c", _fn, ("a", "b")),
        px.TaskSpec("d", _fn, ("c",)),
    ])
    layers = graph.layers()
    assert layers == [["a", "b"], ["c"], ["d"]]
 def test_self_dependency_rejected() -> None:
    with pytest.raises(ValueError):
        px.TaskSpec("a", _fn, ("a",))
 def test_to_mermaid() -> None:
    graph = px.Graph.from_specs([
        px.TaskSpec("a", _fn),
        px.TaskSpec("b", _fn, ("a",)),
    ])
    mermaid = graph.to_mermaid()
    assert mermaid.startswith("graph TD")
    assert 'a["a"]' in mermaid
    assert "a --> b" in mermaid
 def test_to_mermaid_invalid_orientation() -> None:
    graph = px.Graph.from_specs([px.TaskSpec("a", _fn)])
    with pytest.raises(ValueError):
        graph.to_mermaid("XX")
 def test_subgraph_by_tags() -> None:
    graph = px.Graph.from_specs([
        px.TaskSpec("a", _fn, tags=("ingest",)),
        px.TaskSpec("b", _fn, ("a",), tags=("ingest",)),
        px.TaskSpec("c", _fn, ("b",), tags=("report",)),
    ])
    sub = graph.subgraph(["ingest"])
    assert set(sub.names) == {"a", "b"}
    # Edge to dropped task c is removed; b no longer waits for anything
    # outside the subgraph (c was never a dep of b anyway).
    assert sub.dependencies("b") == ("a",)
 def test_subgraph_by_names() -> None:
    graph = px.Graph.from_specs([
        px.TaskSpec("a", _fn),
        px.TaskSpec("b", _fn, ("a",)),
        px.TaskSpec("c", _fn, ("b",)),
    ])
    sub = graph.subgraph_by_names(["a", "b"])
    assert set(sub.names) == {"a", "b"}
    # c is dropped, so b's dep on c (none here) — but a->b edge preserved.
    assert sub.dependencies("b") == ("a",)
 def test_subgraph_by_names_unknown() -> None:
    graph = px.Graph.from_specs([px.TaskSpec("a", _fn)])
    with pytest.raises(KeyError):
        graph.subgraph_by_names(["nope"])
 def test_describe() -> None:
    graph = px.Graph.from_specs([
        px.TaskSpec("a", _fn),
        px.TaskSpec("b", _fn, ("a",)),
    ])
    desc = graph.describe()
    assert "Layer 1" in desc
    assert "Layer 2" in desc