feat: 初始化PyFlowX轻量级DAG任务调度库

实现完整的DAG任务调度核心功能,包括:
1.  支持同步/异步/线程三种执行策略
2.  自动上下文注入,无需手动绑定任务依赖
3.  内置状态后端,支持断点续跑
4.  提供完整的测试用例与示例代码
5.  添加CI/CD配置与发布流程
This commit is contained in:
2026-06-20 10:41:33 +08:00
parent 70f3c03986
commit 8b7777d936
21 changed files with 6003 additions and 3 deletions
+81
View File
@@ -0,0 +1,81 @@
"""Example 1: ETL pipeline (sequential strategy).
Demonstrates the core PyFlowX workflow:
* Define tasks as plain functions.
* Declare the DAG with a list of TaskSpec.
* Parameter names == dependency names → automatic context injection,
no wrappers needed (contrast with flowweaver's get_task_result boilerplate).
* dry_run to preview, then execute and read typed results from RunReport.
"""
from __future__ import annotations
from typing import List
import pyflowx as px
# --- task functions: pure, testable, no framework coupling ------------- #
def extract_customers() -> List[dict]:
return [
{"id": "C001", "name": "Alice"},
{"id": "C002", "name": "Bob"},
]
def extract_orders() -> List[dict]:
return [
{"id": "O001", "customer_id": "C001", "amount": 150.0},
{"id": "O002", "customer_id": "C002", "amount": 200.5},
]
# Parameter names match dependency names → automatic injection.
def transform(
extract_customers: List[dict],
extract_orders: List[dict],
) -> List[dict]:
cmap = {c["id"]: c for c in extract_customers}
return [
{**o, "customer_name": cmap[o["customer_id"]]["name"]}
for o in extract_orders
if o["customer_id"] in cmap
]
def load(transform: List[dict]) -> int:
print(f" loaded {len(transform)} records")
return len(transform)
def main() -> None:
graph = px.Graph.from_specs(
[
px.TaskSpec("extract_customers", extract_customers, tags=("extract",)),
px.TaskSpec("extract_orders", extract_orders, tags=("extract",)),
px.TaskSpec(
"transform",
transform,
("extract_customers", "extract_orders"),
tags=("transform",),
),
px.TaskSpec("load", load, ("transform",), retries=1, tags=("load",)),
]
)
print("=== Execution plan ===")
print(graph.describe())
print("\n=== Dry run (no execution) ===")
px.run(graph, strategy="sequential", dry_run=True)
print("\n=== Sequential execution ===")
report = px.run(graph, strategy="sequential")
print(report.describe())
print(f"\nload result = {report['load']}")
print(f"summary = {report.summary()}")
if __name__ == "__main__":
main()