feat(profiling): 添加工作流性能分析模块与测试用例

新增了性能剖面分析能力,支持从运行报告生成任务级、图级性能指标,包括关键路径、并行度分析和瓶颈识别,同时补充了完整的单元测试覆盖。
This commit is contained in:
2026-06-28 19:59:25 +08:00
parent 3f9c52e6f1
commit 3d6d769685
3 changed files with 1021 additions and 0 deletions
+3
View File
@@ -82,6 +82,7 @@ from .errors import (
) )
from .executors import Strategy, run from .executors import Strategy, run
from .graph import Graph, GraphDefaults from .graph import Graph, GraphDefaults
from .profiling import ProfileReport, TaskProfile
from .report import RunReport from .report import RunReport
from .runner import CliExitCode, CliRunner from .runner import CliExitCode, CliRunner
from .storage import JSONBackend, MemoryBackend, StateBackend from .storage import JSONBackend, MemoryBackend, StateBackend
@@ -122,6 +123,7 @@ __all__ = [
"JSONBackend", "JSONBackend",
"MemoryBackend", "MemoryBackend",
"MissingDependencyError", "MissingDependencyError",
"ProfileReport",
"PyFlowXError", "PyFlowXError",
"RetryPolicy", "RetryPolicy",
"RunReport", "RunReport",
@@ -132,6 +134,7 @@ __all__ = [
"TaskEvent", "TaskEvent",
"TaskFailedError", "TaskFailedError",
"TaskHooks", "TaskHooks",
"TaskProfile",
"TaskResult", "TaskResult",
"TaskSpec", "TaskSpec",
"TaskStatus", "TaskStatus",
+451
View File
@@ -0,0 +1,451 @@
"""工作流执行性能评估。
基于 :class:`~pyflowx.report.RunReport` 中已有的 ``started_at`` /
``finished_at`` 时间戳进行离线分析,**零运行时开销**——不修改执行流程,
不注册回调,不引入额外计时器。
核心指标
--------
* **任务级**:每个任务的 wall-clock 耗时、状态、重试次数、等待时间
(从最早依赖完成到本任务开始)。
* **图级**:总耗时(wall-clock)、关键路径耗时(理论最短耗时)、
并行度效率(关键路径耗时 / 总耗时)。
* **关键路径**:从源点到汇点的最长依赖路径,识别真正的串行瓶颈。
* **并行度**:基于时间线重叠计算瞬时并行度,给出平均并行度与峰值并行度。
* **瓶颈识别**:按耗时排序的 Top-N 任务。
设计原则
--------
* 数据来源于 ``RunReport`` + ``Graph``,无副作用。
* 计算复杂度 O(V+E):拓扑排序 + 单次松弛,适合大规模图。
* 所有时间戳用 ``datetime``,与 :class:`TaskResult` 保持一致。
快速上手
--------
import pyflowx as px
report = px.run(graph)
profile = px.ProfileReport.from_report(report, graph)
print(profile.describe())
bottlenecks = profile.top_bottlenecks(3)
"""
from __future__ import annotations
__all__ = [
"ProfileReport",
"TaskProfile",
]
from dataclasses import dataclass
from datetime import datetime
from typing import Any
from .graph import Graph
from .report import RunReport
from .task import TaskResult, TaskStatus
@dataclass(frozen=True)
class TaskProfile:
"""单个任务的性能剖面。
属性
----
name:
任务名。
status:
终态(SUCCESS/FAILED/SKIPPED)。
duration:
wall-clock 执行耗时(秒)。SKIPPED 任务为 0.0。
attempts:
尝试次数(含首次)。
wait_time:
从最早硬依赖完成到本任务开始的等待时间(秒)。
无硬依赖或 SKIPPED 时为 0.0。
is_on_critical_path:
是否位于关键路径上。
deps:
硬依赖任务名列表。
"""
name: str
status: TaskStatus
duration: float
attempts: int
wait_time: float
is_on_critical_path: bool
deps: tuple[str, ...]
def to_dict(self) -> dict[str, Any]:
"""转为 JSON 友好的字典。"""
return {
"name": self.name,
"status": self.status.value,
"duration_seconds": round(self.duration, 6),
"attempts": self.attempts,
"wait_time_seconds": round(self.wait_time, 6),
"is_on_critical_path": self.is_on_critical_path,
"deps": list(self.deps),
}
@dataclass(frozen=True)
class ProfileReport:
"""工作流执行的性能剖面报告。
通过 :meth:`from_report` 从 :class:`RunReport` + :class:`Graph` 构建。
所有字段在构造时一次性计算完毕,后续访问为 O(1)。
"""
tasks: tuple[TaskProfile, ...]
"""所有任务的性能剖面(按拓扑序)。"""
total_duration: float
"""整次运行的 wall-clock 耗时(秒)。"""
critical_path_duration: float
"""关键路径耗时(秒):从最早任务开始到最晚任务结束的最长依赖路径。"""
critical_path: tuple[str, ...]
"""关键路径上的任务名序列(按执行顺序)。"""
avg_parallelism: float
"""平均并行度 = 任务总耗时 / wall-clock 总耗时。"""
peak_parallelism: int
"""峰值并行度:任一时刻同时运行的任务数最大值。"""
parallelism_efficiency: float
"""并行度效率 = 关键路径耗时 / wall-clock 总耗时。``1.0`` 表示完全串行,
越大表示并行化收益越低(瓶颈在关键路径上)。"""
# ------------------------------------------------------------------ #
# 构建
# ------------------------------------------------------------------ #
@classmethod
def from_report(cls, report: RunReport, graph: Graph) -> ProfileReport:
"""从运行报告与图构建性能剖面。
参数
----
report:
已完成的 :class:`RunReport`,需包含 ``started_at``/``finished_at``。
graph:
对应的 :class:`Graph`,用于依赖关系与关键路径分析。
Note
-----
本方法不修改 ``report`` 或 ``graph``,纯函数式计算。
"""
task_profiles = cls._build_task_profiles(report, graph)
total_duration = cls._calc_total_duration(report)
critical_path, critical_duration = cls._calc_critical_path(graph, report)
avg_par, peak_par = cls._calc_parallelism(report)
efficiency = critical_duration / total_duration if total_duration > 0 else 0.0
# 标记关键路径上的任务
critical_set = set(critical_path)
marked = tuple(
TaskProfile(
name=t.name,
status=t.status,
duration=t.duration,
attempts=t.attempts,
wait_time=t.wait_time,
is_on_critical_path=t.name in critical_set,
deps=t.deps,
)
for t in task_profiles
)
return cls(
tasks=marked,
total_duration=total_duration,
critical_path_duration=critical_duration,
critical_path=critical_path,
avg_parallelism=avg_par,
peak_parallelism=peak_par,
parallelism_efficiency=efficiency,
)
@staticmethod
def _build_task_profiles(report: RunReport, graph: Graph) -> tuple[TaskProfile, ...]:
"""构建每个任务的性能剖面。"""
profiles: list[TaskProfile] = []
for name, result in report.results.items():
spec = graph.specs.get(name)
deps = tuple(spec.depends_on) if spec is not None else ()
duration = result.duration or 0.0
wait_time = ProfileReport._calc_wait_time(result, deps, report)
profiles.append(
TaskProfile(
name=name,
status=result.status,
duration=duration,
attempts=result.attempts,
wait_time=wait_time,
is_on_critical_path=False, # 后续标记
deps=deps,
)
)
return tuple(profiles)
@staticmethod
def _calc_wait_time(
result: TaskResult[Any],
deps: tuple[str, ...],
report: RunReport,
) -> float:
"""计算等待时间:从最早依赖完成到本任务开始。
无硬依赖、SKIPPED 任务或时间戳缺失时返回 0.0。
"""
if not deps or result.started_at is None or result.status == TaskStatus.SKIPPED:
return 0.0
# 找出所有已完成依赖的最晚完成时间
dep_end_times: list[datetime] = []
for dep in deps:
dep_result = report.results.get(dep)
if dep_result is not None and dep_result.finished_at is not None:
dep_end_times.append(dep_result.finished_at)
if not dep_end_times:
return 0.0
latest_dep_end = max(dep_end_times)
delta = (result.started_at - latest_dep_end).total_seconds()
return max(0.0, delta)
@staticmethod
def _calc_total_duration(report: RunReport) -> float:
"""计算 wall-clock 总耗时:最早开始到最晚结束。"""
starts: list[datetime] = []
ends: list[datetime] = []
for r in report.results.values():
if r.started_at is not None:
starts.append(r.started_at)
if r.finished_at is not None:
ends.append(r.finished_at)
if not starts or not ends:
return 0.0
return (max(ends) - min(starts)).total_seconds()
@staticmethod
def _calc_critical_path(graph: Graph, report: RunReport) -> tuple[tuple[str, ...], float]:
"""计算关键路径:DAG 最长路径(按实际执行耗时)。
使用拓扑排序 + 动态规划,O(V+E)。SKIPPED 任务耗时按 0 计。
"""
# 构建耗时映射
durations: dict[str, float] = {}
for name, result in report.results.items():
durations[name] = result.duration or 0.0
# 拓扑序(使用 graph.layers 保证与分层一致)
try:
layers = graph.layers()
except Exception:
# 图校验失败时回退为空
return (), 0.0
# earliest_finish[name] = duration[name] + max(earliest_finish[dep] for dep in deps)
earliest_finish: dict[str, float] = {}
predecessor: dict[str, str | None] = {}
for layer in layers:
for name in layer:
spec = graph.specs.get(name)
deps = spec.depends_on if spec is not None else ()
if not deps:
earliest_finish[name] = durations.get(name, 0.0)
predecessor[name] = None
else:
best_dep: str | None = None
best_ef = 0.0
for dep in deps:
ef = earliest_finish.get(dep, 0.0)
if ef >= best_ef:
best_ef = ef
best_dep = dep
earliest_finish[name] = best_ef + durations.get(name, 0.0)
predecessor[name] = best_dep
if not earliest_finish:
return (), 0.0
# 找到 earliest_finish 最大的节点作为终点
end_node = max(earliest_finish, key=lambda n: earliest_finish[n])
total = earliest_finish[end_node]
# 回溯关键路径
path: list[str] = []
node: str | None = end_node
while node is not None:
path.append(node)
node = predecessor.get(node)
path.reverse()
return tuple(path), total
@staticmethod
def _calc_parallelism(report: RunReport) -> tuple[float, int]:
"""计算平均并行度与峰值并行度。
基于时间线扫描:将每个任务的 [started_at, finished_at] 区间
转为事件点(+1/-1),排序后扫描得到瞬时并行度序列。
返回 (avg_parallelism, peak_parallelism)。
无有效时间戳时返回 (0.0, 0)。
"""
events: list[tuple[float, int]] = [] # (timestamp, delta)
for r in report.results.values():
if r.started_at is None or r.finished_at is None:
continue
if r.status == TaskStatus.SKIPPED:
continue
start_ts = r.started_at.timestamp()
end_ts = r.finished_at.timestamp()
if end_ts <= start_ts:
continue
events.append((start_ts, 1))
events.append((end_ts, -1))
if not events:
return 0.0, 0
# 排序:同一时间点先处理结束(-1)再处理开始(+1),避免虚假峰值
events.sort(key=lambda e: (e[0], e[1]))
current = 0
peak = 0
# 加权面积用于计算平均并行度
area = 0.0
prev_ts = events[0][0]
for ts, delta in events:
if ts > prev_ts:
area += current * (ts - prev_ts)
current += delta
peak = max(peak, current)
prev_ts = ts
total_span = events[-1][0] - events[0][0]
avg = area / total_span if total_span > 0 else 0.0
return avg, peak
# ------------------------------------------------------------------ #
# 查询
# ------------------------------------------------------------------ #
def task(self, name: str) -> TaskProfile:
"""返回指定任务的剖面。不存在则 ``KeyError``。"""
for t in self.tasks:
if t.name == name:
return t
raise KeyError(name)
def top_bottlenecks(self, n: int = 5) -> tuple[TaskProfile, ...]:
"""返回耗时最长的 Top-N 任务(按 duration 降序)。
参数
----
n:
返回数量。``n <= 0`` 返回空元组。
"""
if n <= 0:
return ()
return tuple(sorted(self.tasks, key=lambda t: t.duration, reverse=True)[:n])
def critical_tasks(self) -> tuple[TaskProfile, ...]:
"""返回关键路径上的所有任务(按路径顺序)。"""
critical_set = set(self.critical_path)
# 保持关键路径顺序
order = {name: i for i, name in enumerate(self.critical_path)}
return tuple(sorted((t for t in self.tasks if t.name in critical_set), key=lambda t: order[t.name]))
def failed_tasks(self) -> tuple[TaskProfile, ...]:
"""返回 FAILED 状态的任务。"""
return tuple(t for t in self.tasks if t.status == TaskStatus.FAILED)
def skipped_tasks(self) -> tuple[TaskProfile, ...]:
"""返回 SKIPPED 状态的任务。"""
return tuple(t for t in self.tasks if t.status == TaskStatus.SKIPPED)
# ------------------------------------------------------------------ #
# 输出
# ------------------------------------------------------------------ #
def to_dict(self) -> dict[str, Any]:
"""转为 JSON 友好的字典。"""
return {
"tasks": [t.to_dict() for t in self.tasks],
"total_duration_seconds": round(self.total_duration, 6),
"critical_path_duration_seconds": round(self.critical_path_duration, 6),
"critical_path": list(self.critical_path),
"avg_parallelism": round(self.avg_parallelism, 4),
"peak_parallelism": self.peak_parallelism,
"parallelism_efficiency": round(self.parallelism_efficiency, 4),
"bottlenecks": [t.to_dict() for t in self.top_bottlenecks(5)],
}
def describe(self) -> str:
"""人类可读的多行性能报告。"""
lines: list[str] = []
lines.append("=" * 70)
lines.append("PyFlowX 性能剖面报告")
lines.append("=" * 70)
lines.append("")
lines.append("【图级指标】")
lines.append(f" 总耗时 (wall-clock): {self.total_duration:.3f}s")
lines.append(f" 关键路径耗时: {self.critical_path_duration:.3f}s")
lines.append(f" 平均并行度: {self.avg_parallelism:.2f}")
lines.append(f" 峰值并行度: {self.peak_parallelism}")
lines.append(f" 并行度效率: {self.parallelism_efficiency:.2%}")
lines.append(f" 任务总数: {len(self.tasks)}")
lines.append("")
# 关键路径
lines.append("【关键路径】")
if self.critical_path:
lines.append(f" {' -> '.join(self.critical_path)}")
else:
lines.append(" (无)")
lines.append("")
# Top 瓶颈
bottlenecks = self.top_bottlenecks(5)
lines.append(f"【Top {len(bottlenecks)} 瓶颈任务】")
if bottlenecks:
lines.append(f" {'任务':<30} {'耗时':>10} {'等待':>10} {'尝试':>6} {'关键路径':>8} {'状态':>8}")
lines.append(f" {'-' * 30} {'-' * 10} {'-' * 10} {'-' * 6} {'-' * 8} {'-' * 8}")
for t in bottlenecks:
critical_flag = "" if t.is_on_critical_path else ""
lines.append(
f" {t.name:<30} {t.duration:>9.3f}s {t.wait_time:>9.3f}s {t.attempts:>6} "
f"{critical_flag:>8} {t.status.value:>8}",
)
else:
lines.append(" (无)")
lines.append("")
# 全部任务详情
lines.append("【全部任务】")
if self.tasks:
lines.append(f" {'任务':<30} {'耗时':>10} {'等待':>10} {'尝试':>6} {'关键路径':>8} {'状态':>8}")
lines.append(f" {'-' * 30} {'-' * 10} {'-' * 10} {'-' * 6} {'-' * 8} {'-' * 8}")
for t in self.tasks:
critical_flag = "" if t.is_on_critical_path else ""
lines.append(
f" {t.name:<30} {t.duration:>9.3f}s {t.wait_time:>9.3f}s {t.attempts:>6} "
f"{critical_flag:>8} {t.status.value:>8}",
)
else:
lines.append(" (无)")
lines.append("")
lines.append("=" * 70)
return "\n".join(lines)
def __repr__(self) -> str:
return (
f"ProfileReport(tasks={len(self.tasks)}, "
f"total={self.total_duration:.3f}s, "
f"critical={self.critical_path_duration:.3f}s, "
f"avg_par={self.avg_parallelism:.2f}, "
f"peak_par={self.peak_parallelism})"
)
+567
View File
@@ -0,0 +1,567 @@
"""性能剖面(ProfileReport)测试.
覆盖策略:
* 构造带时间戳的 RunReport + Graph,验证关键路径、并行度、瓶颈排序。
* 边界场景:空报告、单任务、无时间戳、SKIPPED 任务、图校验失败。
* 输出格式:to_dict / describe / top_bottlenecks / critical_tasks。
"""
from __future__ import annotations
from datetime import datetime, timedelta
from typing import Any
import pyflowx as px
from pyflowx.profiling import ProfileReport, TaskProfile
from pyflowx.task import TaskResult, TaskSpec, TaskStatus
def _fn() -> int:
return 1
def _spec(name: str, deps: tuple[str, ...] = ()) -> TaskSpec[Any]:
return TaskSpec[Any](name, _fn, depends_on=deps)
def _result(
name: str,
start: datetime,
duration: float,
*,
status: TaskStatus = TaskStatus.SUCCESS,
attempts: int = 1,
) -> TaskResult[Any]:
"""构造带时间戳的 TaskResult."""
end = start + timedelta(seconds=duration) if duration > 0 else start
return TaskResult[Any](
spec=_spec(name),
status=status,
value=None,
attempts=attempts,
started_at=start if duration > 0 or status != TaskStatus.SKIPPED else None,
finished_at=end if duration > 0 or status != TaskStatus.SKIPPED else None,
)
def _skipped_result(name: str, reason: str = "skip") -> TaskResult[Any]:
"""构造 SKIPPED 结果(无时间戳)."""
return TaskResult[Any](
spec=_spec(name),
status=TaskStatus.SKIPPED,
reason=reason,
)
class TestProfileReportConstruction:
"""测试 ProfileReport 构建."""
def test_empty_report(self) -> None:
"""空报告应产生空剖面."""
report = px.RunReport()
graph = px.Graph()
profile = ProfileReport.from_report(report, graph)
assert len(profile.tasks) == 0
assert profile.total_duration == 0.0
assert profile.critical_path == ()
assert profile.critical_path_duration == 0.0
assert profile.avg_parallelism == 0.0
assert profile.peak_parallelism == 0
def test_single_task(self) -> None:
"""单任务:关键路径就是它自己,并行度为 1."""
start = datetime(2024, 1, 1, 0, 0, 0)
report = px.RunReport()
report.results["a"] = _result("a", start, 1.5)
graph = px.Graph.from_specs([_spec("a")])
profile = ProfileReport.from_report(report, graph)
assert len(profile.tasks) == 1
assert profile.tasks[0].name == "a"
assert profile.tasks[0].duration == 1.5
assert profile.tasks[0].is_on_critical_path
assert profile.total_duration == 1.5
assert profile.critical_path == ("a",)
assert profile.critical_path_duration == 1.5
assert profile.avg_parallelism == 1.0
assert profile.peak_parallelism == 1
assert profile.parallelism_efficiency == 1.0
def test_serial_chain(self) -> None:
"""串行链 a -> b -> c:关键路径为全部,效率 100%."""
start = datetime(2024, 1, 1, 0, 0, 0)
report = px.RunReport()
report.results["a"] = _result("a", start, 1.0)
report.results["b"] = _result("b", start + timedelta(seconds=1), 2.0)
report.results["c"] = _result("c", start + timedelta(seconds=3), 1.5)
graph = px.Graph.from_specs([
_spec("a"),
_spec("b", deps=("a",)),
_spec("c", deps=("b",)),
])
profile = ProfileReport.from_report(report, graph)
assert profile.total_duration == 4.5
assert profile.critical_path_duration == 4.5
assert profile.critical_path == ("a", "b", "c")
assert profile.parallelism_efficiency == 1.0
assert profile.peak_parallelism == 1
assert profile.avg_parallelism == 1.0
def test_parallel_tasks(self) -> None:
"""并行任务 a, b 同时执行:关键路径取较长者,效率 < 1."""
start = datetime(2024, 1, 1, 0, 0, 0)
report = px.RunReport()
report.results["a"] = _result("a", start, 1.0)
report.results["b"] = _result("b", start, 2.0)
graph = px.Graph.from_specs([_spec("a"), _spec("b")])
profile = ProfileReport.from_report(report, graph)
# wall-clock = 2.0, 关键路径 = 2.0 (b), 效率 = 1.0
# 因为关键路径定义就是最长路径,与 wall-clock 相同
assert profile.total_duration == 2.0
assert profile.critical_path_duration == 2.0
assert profile.critical_path == ("b",)
assert profile.peak_parallelism == 2
# 平均并行度 = (1.0 + 2.0) / 2.0 = 1.5
assert profile.avg_parallelism == 1.5
def test_parallel_with_join(self) -> None:
"""a, b 并行后 join 到 c:关键路径 a->c 或 b->c."""
start = datetime(2024, 1, 1, 0, 0, 0)
report = px.RunReport()
report.results["a"] = _result("a", start, 1.0)
report.results["b"] = _result("b", start, 3.0)
report.results["c"] = _result("c", start + timedelta(seconds=3), 1.0)
graph = px.Graph.from_specs([
_spec("a"),
_spec("b"),
_spec("c", deps=("a", "b")),
])
profile = ProfileReport.from_report(report, graph)
# 关键路径 = b -> c (3 + 1 = 4)
assert profile.critical_path_duration == 4.0
assert profile.critical_path == ("b", "c")
assert profile.tasks[0].is_on_critical_path is False # a 不在关键路径
# task("b") 在关键路径上
assert profile.task("b").is_on_critical_path
assert profile.task("c").is_on_critical_path
def test_skipped_task_no_timestamp(self) -> None:
"""SKIPPED 任务无时间戳:不影响并行度计算."""
start = datetime(2024, 1, 1, 0, 0, 0)
report = px.RunReport()
report.results["a"] = _result("a", start, 1.0)
report.results["b"] = _skipped_result("b")
graph = px.Graph.from_specs([_spec("a"), _spec("b")])
profile = ProfileReport.from_report(report, graph)
# b 是 SKIPPEDduration=0
assert profile.task("b").status == TaskStatus.SKIPPED
assert profile.task("b").duration == 0.0
assert profile.peak_parallelism == 1 # 只有 a 在跑
class TestWaitTime:
"""测试等待时间计算."""
def test_no_deps_zero_wait(self) -> None:
"""无依赖任务等待时间为 0."""
start = datetime(2024, 1, 1, 0, 0, 0)
report = px.RunReport()
report.results["a"] = _result("a", start, 1.0)
graph = px.Graph.from_specs([_spec("a")])
profile = ProfileReport.from_report(report, graph)
assert profile.task("a").wait_time == 0.0
def test_wait_after_dep_completes(self) -> None:
"""b 在 a 完成后等待 0.5s 才开始."""
start = datetime(2024, 1, 1, 0, 0, 0)
report = px.RunReport()
report.results["a"] = _result("a", start, 1.0)
report.results["b"] = _result("b", start + timedelta(seconds=1.5), 1.0)
graph = px.Graph.from_specs([
_spec("a"),
_spec("b", deps=("a",)),
])
profile = ProfileReport.from_report(report, graph)
assert profile.task("b").wait_time == 0.5
def test_wait_negative_clamped_to_zero(self) -> None:
"""b 在 a 完成前就开始(异常情况)应钳制为 0."""
start = datetime(2024, 1, 1, 0, 0, 0)
report = px.RunReport()
report.results["a"] = _result("a", start, 2.0)
# b 在 a 还没完成时就开始(不应该但可能发生)
report.results["b"] = _result("b", start + timedelta(seconds=1), 1.0)
graph = px.Graph.from_specs([
_spec("a"),
_spec("b", deps=("a",)),
])
profile = ProfileReport.from_report(report, graph)
# a 在 t=2 结束,b 在 t=1 开始,delta = -1,钳制为 0
assert profile.task("b").wait_time == 0.0
def test_skipped_task_zero_wait(self) -> None:
"""SKIPPED 任务等待时间为 0."""
start = datetime(2024, 1, 1, 0, 0, 0)
report = px.RunReport()
report.results["a"] = _result("a", start, 1.0)
report.results["b"] = _skipped_result("b")
graph = px.Graph.from_specs([
_spec("a"),
_spec("b", deps=("a",)),
])
profile = ProfileReport.from_report(report, graph)
assert profile.task("b").wait_time == 0.0
class TestCriticalPath:
"""测试关键路径分析."""
def test_diamond_dependency(self) -> None:
"""菱形依赖:a -> b -> d, a -> c -> d,关键路径取较长分支."""
start = datetime(2024, 1, 1, 0, 0, 0)
report = px.RunReport()
report.results["a"] = _result("a", start, 1.0)
report.results["b"] = _result("b", start + timedelta(seconds=1), 3.0)
report.results["c"] = _result("c", start + timedelta(seconds=1), 1.0)
report.results["d"] = _result("d", start + timedelta(seconds=4), 1.0)
graph = px.Graph.from_specs([
_spec("a"),
_spec("b", deps=("a",)),
_spec("c", deps=("a",)),
_spec("d", deps=("b", "c")),
])
profile = ProfileReport.from_report(report, graph)
# 关键路径:a -> b -> d = 1 + 3 + 1 = 5
assert profile.critical_path_duration == 5.0
assert profile.critical_path == ("a", "b", "d")
def test_graph_validation_failure_returns_empty(self) -> None:
"""图校验失败(有环)应回退为空关键路径."""
start = datetime(2024, 1, 1, 0, 0, 0)
report = px.RunReport()
report.results["a"] = _result("a", start, 1.0)
# 手动构造带环的图(绕过校验)
graph = px.Graph()
graph.specs["a"] = _spec("a", deps=("b",))
graph.specs["b"] = _spec("b", deps=("a",))
graph.deps["a"] = ("b",)
graph.deps["b"] = ("a",)
profile = ProfileReport.from_report(report, graph)
# layers() 抛 CycleError,回退为空
assert profile.critical_path == ()
assert profile.critical_path_duration == 0.0
class TestParallelism:
"""测试并行度计算."""
def test_no_timestamps_zero_parallelism(self) -> None:
"""所有任务无时间戳:并行度为 0."""
report = px.RunReport()
report.results["a"] = TaskResult[Any](spec=_spec("a"), status=TaskStatus.SUCCESS)
graph = px.Graph.from_specs([_spec("a")])
profile = ProfileReport.from_report(report, graph)
assert profile.avg_parallelism == 0.0
assert profile.peak_parallelism == 0
def test_zero_duration_excluded(self) -> None:
"""零耗时任务(end <= start)不参与并行度计算."""
start = datetime(2024, 1, 1, 0, 0, 0)
report = px.RunReport()
report.results["a"] = _result("a", start, 0.0) # 零耗时
report.results["b"] = _result("b", start, 1.0)
graph = px.Graph.from_specs([_spec("a"), _spec("b")])
profile = ProfileReport.from_report(report, graph)
# 只有 b 参与,峰值 = 1
assert profile.peak_parallelism == 1
def test_skipped_with_timestamps_excluded(self) -> None:
"""SKIPPED 任务即使带时间戳也不参与并行度计算."""
start = datetime(2024, 1, 1, 0, 0, 0)
report = px.RunReport()
# SKIPPED 但带时间戳(异常但可能发生)
report.results["a"] = _result("a", start, 1.0, status=TaskStatus.SKIPPED)
report.results["b"] = _result("b", start, 1.0)
graph = px.Graph.from_specs([_spec("a"), _spec("b")])
profile = ProfileReport.from_report(report, graph)
# a 是 SKIPPED,被排除;只有 b 参与
assert profile.peak_parallelism == 1
def test_peak_parallelism_three_tasks(self) -> None:
"""三个任务完全重叠:峰值并行度 = 3."""
start = datetime(2024, 1, 1, 0, 0, 0)
report = px.RunReport()
report.results["a"] = _result("a", start, 3.0)
report.results["b"] = _result("b", start, 3.0)
report.results["c"] = _result("c", start, 3.0)
graph = px.Graph.from_specs([_spec("a"), _spec("b"), _spec("c")])
profile = ProfileReport.from_report(report, graph)
assert profile.peak_parallelism == 3
assert profile.avg_parallelism == 3.0
class TestQueries:
"""测试查询方法."""
def test_task_lookup(self) -> None:
"""task(name) 应返回对应剖面."""
start = datetime(2024, 1, 1, 0, 0, 0)
report = px.RunReport()
report.results["a"] = _result("a", start, 1.0)
report.results["b"] = _result("b", start, 2.0)
graph = px.Graph.from_specs([_spec("a"), _spec("b")])
profile = ProfileReport.from_report(report, graph)
assert profile.task("a").name == "a"
assert profile.task("b").duration == 2.0
def test_task_lookup_not_found(self) -> None:
"""task(name) 不存在应抛 KeyError."""
report = px.RunReport()
graph = px.Graph()
profile = ProfileReport.from_report(report, graph)
try:
profile.task("missing")
except KeyError:
pass
else:
raise AssertionError("应抛出 KeyError")
def test_top_bottlenecks(self) -> None:
"""top_bottlenecks 应按耗时降序返回."""
start = datetime(2024, 1, 1, 0, 0, 0)
report = px.RunReport()
report.results["a"] = _result("a", start, 1.0)
report.results["b"] = _result("b", start, 3.0)
report.results["c"] = _result("c", start, 2.0)
graph = px.Graph.from_specs([_spec("a"), _spec("b"), _spec("c")])
profile = ProfileReport.from_report(report, graph)
top3 = profile.top_bottlenecks(3)
assert len(top3) == 3
assert top3[0].name == "b"
assert top3[1].name == "c"
assert top3[2].name == "a"
def test_top_bottlenecks_zero_or_negative(self) -> None:
"""n <= 0 应返回空元组."""
start = datetime(2024, 1, 1, 0, 0, 0)
report = px.RunReport()
report.results["a"] = _result("a", start, 1.0)
graph = px.Graph.from_specs([_spec("a")])
profile = ProfileReport.from_report(report, graph)
assert profile.top_bottlenecks(0) == ()
assert profile.top_bottlenecks(-1) == ()
def test_critical_tasks(self) -> None:
"""critical_tasks 应返回关键路径上的任务(按路径顺序)."""
start = datetime(2024, 1, 1, 0, 0, 0)
report = px.RunReport()
report.results["a"] = _result("a", start, 1.0)
report.results["b"] = _result("b", start + timedelta(seconds=1), 3.0)
report.results["c"] = _result("c", start + timedelta(seconds=1), 1.0)
report.results["d"] = _result("d", start + timedelta(seconds=4), 1.0)
graph = px.Graph.from_specs([
_spec("a"),
_spec("b", deps=("a",)),
_spec("c", deps=("a",)),
_spec("d", deps=("b", "c")),
])
profile = ProfileReport.from_report(report, graph)
# 关键路径 a -> b -> d
critical = profile.critical_tasks()
assert len(critical) == 3
assert [t.name for t in critical] == ["a", "b", "d"]
def test_failed_tasks(self) -> None:
"""failed_tasks 应返回 FAILED 状态的任务."""
start = datetime(2024, 1, 1, 0, 0, 0)
report = px.RunReport()
report.results["a"] = _result("a", start, 1.0, status=TaskStatus.FAILED)
report.results["b"] = _result("b", start, 1.0)
graph = px.Graph.from_specs([_spec("a"), _spec("b")])
profile = ProfileReport.from_report(report, graph)
failed = profile.failed_tasks()
assert len(failed) == 1
assert failed[0].name == "a"
def test_skipped_tasks(self) -> None:
"""skipped_tasks 应返回 SKIPPED 状态的任务."""
start = datetime(2024, 1, 1, 0, 0, 0)
report = px.RunReport()
report.results["a"] = _result("a", start, 1.0)
report.results["b"] = _skipped_result("b")
graph = px.Graph.from_specs([_spec("a"), _spec("b")])
profile = ProfileReport.from_report(report, graph)
skipped = profile.skipped_tasks()
assert len(skipped) == 1
assert skipped[0].name == "b"
class TestOutputFormats:
"""测试输出格式."""
def test_to_dict_structure(self) -> None:
"""to_dict 应返回包含所有字段的字典."""
start = datetime(2024, 1, 1, 0, 0, 0)
report = px.RunReport()
report.results["a"] = _result("a", start, 1.5)
graph = px.Graph.from_specs([_spec("a")])
profile = ProfileReport.from_report(report, graph)
d = profile.to_dict()
assert "tasks" in d
assert "total_duration_seconds" in d
assert "critical_path_duration_seconds" in d
assert "critical_path" in d
assert "avg_parallelism" in d
assert "peak_parallelism" in d
assert "parallelism_efficiency" in d
assert "bottlenecks" in d
assert len(d["tasks"]) == 1
assert d["tasks"][0]["name"] == "a"
assert d["tasks"][0]["status"] == "success"
assert d["tasks"][0]["duration_seconds"] == 1.5
assert d["tasks"][0]["is_on_critical_path"] is True
def test_describe_contains_key_sections(self) -> None:
"""describe 应包含关键章节标题."""
start = datetime(2024, 1, 1, 0, 0, 0)
report = px.RunReport()
report.results["a"] = _result("a", start, 1.0)
graph = px.Graph.from_specs([_spec("a")])
profile = ProfileReport.from_report(report, graph)
text = profile.describe()
assert "PyFlowX 性能剖面报告" in text
assert "【图级指标】" in text
assert "【关键路径】" in text
assert "【Top" in text
assert "【全部任务】" in text
assert "a" in text
def test_describe_empty_report(self) -> None:
"""空报告的 describe 应不崩溃且包含章节标题."""
report = px.RunReport()
graph = px.Graph()
profile = ProfileReport.from_report(report, graph)
text = profile.describe()
assert "【图级指标】" in text
assert "(无)" in text
def test_repr(self) -> None:
"""__repr__ 应包含关键指标."""
start = datetime(2024, 1, 1, 0, 0, 0)
report = px.RunReport()
report.results["a"] = _result("a", start, 1.0)
graph = px.Graph.from_specs([_spec("a")])
profile = ProfileReport.from_report(report, graph)
r = repr(profile)
assert "ProfileReport" in r
assert "tasks=1" in r
assert "total=1.000s" in r
def test_task_profile_to_dict(self) -> None:
"""TaskProfile.to_dict 应返回正确字段."""
tp = TaskProfile(
name="x",
status=TaskStatus.SUCCESS,
duration=1.5,
attempts=2,
wait_time=0.3,
is_on_critical_path=True,
deps=("a", "b"),
)
d = tp.to_dict()
assert d["name"] == "x"
assert d["status"] == "success"
assert d["duration_seconds"] == 1.5
assert d["attempts"] == 2
assert d["wait_time_seconds"] == 0.3
assert d["is_on_critical_path"] is True
assert d["deps"] == ["a", "b"]
class TestIntegrationWithRun:
"""与真实 run() 集成测试."""
def test_profile_from_real_run(self) -> None:
"""从真实 run() 结果构建剖面."""
graph = px.Graph.from_specs([
px.TaskSpec("a", lambda: 1),
px.TaskSpec("b", lambda: 2, depends_on=("a",)),
px.TaskSpec("c", lambda: 3, depends_on=("a",)),
])
report = px.run(graph, strategy="sequential")
profile = ProfileReport.from_report(report, graph)
assert len(profile.tasks) == 3
assert profile.critical_path_duration > 0
# sequential 策略下并行度应为 1
assert profile.peak_parallelism == 1
def test_profile_from_thread_run(self) -> None:
"""从 thread 策略 run() 结果构建剖面,验证并行度 > 1."""
import time
def slow() -> int:
time.sleep(0.05)
return 1
graph = px.Graph.from_specs([
px.TaskSpec("a", slow),
px.TaskSpec("b", slow),
px.TaskSpec("c", slow),
])
report = px.run(graph, strategy="thread", max_workers=3)
profile = ProfileReport.from_report(report, graph)
# 三个任务并行,峰值应 >= 2(可能因调度时机不到 3)
assert profile.peak_parallelism >= 2
assert profile.critical_path_duration > 0