Compare commits
198 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| c55a37173a | |||
| 960b8672f4 | |||
| 4fd1d70b58 | |||
| 6fb9223066 | |||
| 1f7127357e | |||
| 58ee84ded6 | |||
| 9a96e5d052 | |||
| c9c7529c58 | |||
| c498d9b1c9 | |||
| b36e279f92 | |||
| 58d6f1faad | |||
| d93da0d8b4 | |||
| 701c455c42 | |||
| e174b64495 | |||
| 3afb25bb5e | |||
| fbd17536fd | |||
| 32ca8c1208 | |||
| a7ff68d279 | |||
| de368ea810 | |||
| 6a3e3a57cd | |||
| 7089944306 | |||
| ec5e348694 | |||
| 12d9f2f647 | |||
| 6ffcbecade | |||
| e76d93187b | |||
| 52e20e3f93 | |||
| 3f966a230e | |||
| 5d0b211a44 | |||
| 6931f36fd1 | |||
| db02443463 | |||
| eb8e1402bc | |||
| c93f45dcb8 | |||
| a0b1814024 | |||
| 3a2826d3f9 | |||
| dbd30689ab | |||
| 5eb59b8a66 | |||
| 8e7b866de2 | |||
| 1b4f9bfa6a | |||
| 2d39272330 | |||
| f699bb9167 | |||
| 35f07e96e1 | |||
| 1f274fe828 | |||
| 85793ff9d5 | |||
| 37ac4b8025 | |||
| 0edeadb846 | |||
| f63db6c71a | |||
| 4d397606e6 | |||
| f24388b151 | |||
| 535b7cba31 | |||
| 3f68bed3fd | |||
| 2e2ca812a1 | |||
| 8de565d0cb | |||
| 5480c48e67 | |||
| c6653d5117 | |||
| d194a991a0 | |||
| 4446658170 | |||
| 1d26f9d3e7 | |||
| d9644ca5d1 | |||
| d3c2d53449 | |||
| 9cfcfb38e4 | |||
| 69db241611 | |||
| 66e6295a24 | |||
| aebb4fce68 | |||
| 7784c8ff86 | |||
| 77918a5568 | |||
| 7e4c615dc7 | |||
| ac5082523e | |||
| 0df6f7c8ac | |||
| 4b66176ce6 | |||
| cf6b6fd059 | |||
| 6f93e6eb6d | |||
| 43e1aad1fe | |||
| 467634f8c7 | |||
| ce31f60441 | |||
| 3d6d769685 | |||
| 3f9c52e6f1 | |||
| 8fadf6edd8 | |||
| abc1152538 | |||
| 5e561b4b3a | |||
| 40f641611b | |||
| 232e7293d9 | |||
| a1bae58e56 | |||
| cbc7cc0a75 | |||
| d0ff7d7b4d | |||
| d154f67ce0 | |||
| 9999071119 | |||
| bdd70e9c43 | |||
| c15b38516a | |||
| 7d4e8a40ce | |||
| 1b2d6d6a2c | |||
| df890f0f16 | |||
| b62a544569 | |||
| d58fc5536e | |||
| c3b86b603d | |||
| 327bd6e069 | |||
| 22f8d2110d | |||
| 2a1f2f7175 | |||
| 9d033e1c0b | |||
| 336f7b7292 | |||
| 65dcbcbf62 | |||
| 7fa97a01e3 | |||
| 83da5135d0 | |||
| 7463a60649 | |||
| 87dd010342 | |||
| bdfee7bee4 | |||
| b954fb1622 | |||
| a7b7a82dff | |||
| 40f0478146 | |||
| b808b880f8 | |||
| e073ff41ee | |||
| ea0c51de5e | |||
| 2b3f4b82d3 | |||
| 1e23c48efc | |||
| 5c8ec281ff | |||
| 6f01cde8ac | |||
| bcd189ae60 | |||
| 20c4fb87c5 | |||
| a98eb6e344 | |||
| 752ff618b2 | |||
| f15f235ecf | |||
| 9d79cddbd6 | |||
| af9aab395a | |||
| 6f334fde73 | |||
| 2ccd84ac3b | |||
| ec30af3edb | |||
| 10bbc07118 | |||
| 194cf3c343 | |||
| 1880cd7a34 | |||
| d43c9e4044 | |||
| 22ac9fc4dd | |||
| 7ded8df05e | |||
| fd282db28f | |||
| 6f64d9d6dc | |||
| a2889fbb08 | |||
| 024b597e44 | |||
| 1eb7942aa9 | |||
| 9285ae3782 | |||
| a88797f410 | |||
| b047b05aaf | |||
| 78a274ce5b | |||
| ab8faec863 | |||
| 936a009212 | |||
| f10f8d09a6 | |||
| 0d6a78f320 | |||
| c9a4192c85 | |||
| 0afdb54e5c | |||
| 9e99a1f1ba | |||
| 50575c6e91 | |||
| f8436f6b8c | |||
| 5c0f51e272 | |||
| 4e3622ef02 | |||
| f69ddc5133 | |||
| 477d901281 | |||
| 0df795237d | |||
| 413ab40044 | |||
| d4a1a5c2de | |||
| 843e9369fe | |||
| 48f6d8a7f0 | |||
| 0b97846d77 | |||
| 50e74180a2 | |||
| 71e6ba316a | |||
| 707e2ac07c | |||
| 983d47bd2e | |||
| 9cc91d1153 | |||
| 2f3041c169 | |||
| 6a004a54b9 | |||
| 2d0873af45 | |||
| 4cc21be562 | |||
| 98cf3b54a1 | |||
| af8a074484 | |||
| ff1122cb68 | |||
| cbc02c5aee | |||
| c8e9354e87 | |||
| 1ecff5fdf7 | |||
| c856c9b6a6 | |||
| ea591d1088 | |||
| cae51856d2 | |||
| be03662e4c | |||
| db18ca4978 | |||
| 7de55614a6 | |||
| 939cd724ec | |||
| 5ddfe8510c | |||
| cd38e1246a | |||
| febcd90a31 | |||
| 58bafd48cc | |||
| 179e5b3811 | |||
| 4884fd53e5 | |||
| 60083bcb6e | |||
| 56c018e72e | |||
| 22ae4b0084 | |||
| 08eb743ea9 | |||
| c06d0284c4 | |||
| 6cc693d15f | |||
| 13f6110b18 | |||
| 6d4b5e4a1f | |||
| e00868e3b1 | |||
| 4de55336f1 | |||
| fad964b370 |
@@ -0,0 +1,46 @@
|
||||
# 版本控制
|
||||
.git
|
||||
.gitignore
|
||||
.github
|
||||
|
||||
# Python 缓存与构建产物
|
||||
__pycache__
|
||||
*.pyc
|
||||
*.pyo
|
||||
*.pyd
|
||||
*.egg-info
|
||||
*.egg
|
||||
dist
|
||||
build
|
||||
.eggs
|
||||
|
||||
# 测试与覆盖率
|
||||
.pytest_cache
|
||||
.coverage
|
||||
htmlcov
|
||||
.tox
|
||||
coverage.xml
|
||||
|
||||
# 虚拟环境
|
||||
.venv
|
||||
venv
|
||||
env
|
||||
|
||||
# 工具缓存
|
||||
.uv-cache
|
||||
.ruff_cache
|
||||
.pyrefly_cache
|
||||
.mypy_cache
|
||||
|
||||
# IDE 与编辑器
|
||||
.idea
|
||||
.vscode
|
||||
*.swp
|
||||
*.swo
|
||||
|
||||
# 文档(按需保留)
|
||||
docs
|
||||
|
||||
# 系统文件
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
+19
-123
@@ -2,137 +2,33 @@ name: CI
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main, develop]
|
||||
pull_request:
|
||||
branches: [main, develop]
|
||||
workflow_dispatch:
|
||||
branches: [ main, develop ]
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
# ─────────────────────────────────────────────────────────────
|
||||
# lint:代码风格与格式检查(单平台即可)
|
||||
# ─────────────────────────────────────────────────────────────
|
||||
lint:
|
||||
name: Lint (ruff)
|
||||
ci:
|
||||
name: Lint, Typecheck & Test
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
image: pyflowx-ci:latest
|
||||
env:
|
||||
UV_LINK_MODE: copy
|
||||
# ---- 国内源 ----
|
||||
PIP_INDEX_URL: https://pypi.tuna.tsinghua.edu.cn/simple
|
||||
PIP_TRUSTED_HOST: pypi.tuna.tsinghua.edu.cn
|
||||
UV_INDEX_URL: https://pypi.tuna.tsinghua.edu.cn/simple
|
||||
UV_TRUSTED_HOST: pypi.tuna.tsinghua.edu.cn
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
- uses: http://gitea:3000/zhou/checkout.git@main
|
||||
|
||||
- name: 安装 uv
|
||||
uses: astral-sh/setup-uv@v5
|
||||
with:
|
||||
version: latest
|
||||
enable-cache: true
|
||||
cache-dependency-glob: uv.lock
|
||||
- name: Sync dependencies
|
||||
run: uv sync --frozen
|
||||
|
||||
- name: 设置 Python 3.13
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.13'
|
||||
- name: Ruff check
|
||||
run: ruff check src tests
|
||||
|
||||
- name: 安装依赖
|
||||
run: uv sync --extra dev --frozen
|
||||
|
||||
- name: Ruff 检查
|
||||
run: uv run ruff check src tests examples
|
||||
|
||||
- name: Ruff 格式检查
|
||||
run: uv run ruff format --check src tests examples
|
||||
|
||||
# ─────────────────────────────────────────────────────────────
|
||||
# typecheck:mypy 严格类型检查
|
||||
# ─────────────────────────────────────────────────────────────
|
||||
typecheck:
|
||||
name: Typecheck (mypy)
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: 安装 uv
|
||||
uses: astral-sh/setup-uv@v5
|
||||
with:
|
||||
version: latest
|
||||
enable-cache: true
|
||||
cache-dependency-glob: uv.lock
|
||||
|
||||
- name: 设置 Python 3.13
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.13'
|
||||
|
||||
- name: 安装依赖
|
||||
run: uv sync --extra dev --frozen
|
||||
|
||||
- name: Mypy 严格类型检查
|
||||
run: uv run mypy
|
||||
|
||||
# ─────────────────────────────────────────────────────────────
|
||||
# test:多平台 × 多 Python 版本矩阵测试 + 覆盖率
|
||||
# ─────────────────────────────────────────────────────────────
|
||||
test:
|
||||
name: Test (${{ matrix.os }} / py${{ matrix.python-version }})
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ubuntu-latest, windows-latest, macos-latest]
|
||||
python-version: ['3.8', '3.9', '3.10', '3.11', '3.12', '3.13']
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: 安装 uv
|
||||
uses: astral-sh/setup-uv@v5
|
||||
with:
|
||||
version: latest
|
||||
enable-cache: true
|
||||
cache-dependency-glob: uv.lock
|
||||
|
||||
- name: 设置 Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: 安装依赖
|
||||
run: uv sync --extra dev --frozen
|
||||
|
||||
- name: 运行测试(含覆盖率,强制 100%)
|
||||
run: uv run pytest -v --cov=pyflowx --cov-report=xml --cov-report=term-missing --cov-fail-under=100
|
||||
|
||||
- name: 运行示例冒烟测试
|
||||
run: |
|
||||
uv run python examples/etl_pipeline.py
|
||||
uv run python examples/parallel_run.py
|
||||
uv run python examples/async_aggregation.py
|
||||
|
||||
- name: 上传覆盖率
|
||||
if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.13'
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: coverage-${{ matrix.os }}-py${{ matrix.python-version }}
|
||||
path: coverage.xml
|
||||
retention-days: 7
|
||||
|
||||
# ─────────────────────────────────────────────────────────────
|
||||
# 聚合:所有检查通过后才标记完成
|
||||
# ─────────────────────────────────────────────────────────────
|
||||
ci-pass:
|
||||
name: CI Pass
|
||||
runs-on: ubuntu-latest
|
||||
needs: [lint, typecheck, test]
|
||||
if: always()
|
||||
steps:
|
||||
- name: 检查依赖任务结果
|
||||
if: ${{ needs.lint.result != 'success' || needs.typecheck.result != 'success' || needs.test.result != 'success' }}
|
||||
run: |
|
||||
echo "lint: ${{ needs.lint.result }}"
|
||||
echo "typecheck: ${{ needs.typecheck.result }}"
|
||||
echo "test: ${{ needs.test.result }}"
|
||||
exit 1
|
||||
- name: 全部通过
|
||||
run: echo "✅ 所有 CI 检查通过"
|
||||
- name: Tox test (py38, py313)
|
||||
run: uvx tox run -e py38,py313
|
||||
|
||||
+42
-177
@@ -2,192 +2,57 @@ name: Release
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- 'v*.*.*'
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
tag:
|
||||
description: '发布版本号(如 v0.1.0)'
|
||||
required: true
|
||||
type: string
|
||||
tags: ['v*.*.*']
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
# Trusted Publishing (OIDC) 上传 PyPI 所需
|
||||
id-token: write
|
||||
|
||||
jobs:
|
||||
# ─────────────────────────────────────────────────────────────
|
||||
# 预检:版本号校验 + 与 pyproject.toml 一致性检查
|
||||
# ─────────────────────────────────────────────────────────────
|
||||
pre-check:
|
||||
name: Pre-release Check
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
version: ${{ steps.meta.outputs.version }}
|
||||
tag: ${{ steps.meta.outputs.tag }}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: 解析版本号
|
||||
id: meta
|
||||
run: |
|
||||
if [ -n "${{ inputs.tag }}" ]; then
|
||||
TAG="${{ inputs.tag }}"
|
||||
else
|
||||
TAG="${GITHUB_REF#refs/tags/}"
|
||||
fi
|
||||
# 去除前缀 v
|
||||
VERSION="${TAG#v}"
|
||||
echo "tag=$TAG" >> $GITHUB_OUTPUT
|
||||
echo "version=$VERSION" >> $GITHUB_OUTPUT
|
||||
echo "发布版本: $VERSION (tag: $TAG)"
|
||||
|
||||
- name: 校验版本号格式
|
||||
run: |
|
||||
VERSION="${{ steps.meta.outputs.version }}"
|
||||
if ! echo "$VERSION" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+(-[a-zA-Z0-9.]+)?$'; then
|
||||
echo "❌ 版本号格式错误: $VERSION(应为 x.y.z 或 x.y.z-rc.n)"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: 校验 pyproject.toml 版本一致
|
||||
run: |
|
||||
# 精确提取 [project] 段的 version 字段(避免匹配到依赖的 version)
|
||||
PY_VERSION=$(awk '/^\[project\]/{f=1} f&&/^version[[:space:]]*=/{gsub(/[" ]/,"",$3); print $3; exit}' pyproject.toml)
|
||||
echo "pyproject.toml version: $PY_VERSION"
|
||||
if [ "$PY_VERSION" != "${{ steps.meta.outputs.version }}" ]; then
|
||||
echo "❌ pyproject.toml 版本($PY_VERSION) 与 tag 版本(${{ steps.meta.outputs.version }}) 不一致"
|
||||
echo "请先更新 pyproject.toml 中的 version 字段"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# ─────────────────────────────────────────────────────────────
|
||||
# 构建:wheel + sdist(纯 Python,单平台即可)
|
||||
# ─────────────────────────────────────────────────────────────
|
||||
build:
|
||||
name: Build Artifacts
|
||||
needs: pre-check
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: 安装 uv
|
||||
uses: astral-sh/setup-uv@v5
|
||||
with:
|
||||
version: latest
|
||||
enable-cache: true
|
||||
|
||||
- name: 设置 Python 3.13
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.13'
|
||||
|
||||
- name: 安装依赖
|
||||
run: uv sync --extra dev --frozen
|
||||
|
||||
- name: 构建 wheel + sdist
|
||||
run: uv build
|
||||
|
||||
- name: 校验产物
|
||||
run: |
|
||||
echo "待上传产物:"
|
||||
ls -la dist/
|
||||
if [ -z "$(ls -A dist/*.whl dist/*.tar.gz 2>/dev/null)" ]; then
|
||||
echo "❌ 未找到 wheel 或 sdist 产物"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: 上传构建产物
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: dist
|
||||
path: dist/*
|
||||
retention-days: 30
|
||||
|
||||
# ─────────────────────────────────────────────────────────────
|
||||
# 发布:上传到 PyPI(Trusted Publishing / OIDC)
|
||||
# ─────────────────────────────────────────────────────────────
|
||||
publish-pypi:
|
||||
name: Publish to PyPI
|
||||
needs: [pre-check, build]
|
||||
runs-on: ubuntu-latest
|
||||
environment:
|
||||
name: pypi
|
||||
url: https://pypi.org/project/pyflowx/${{ needs.pre-check.outputs.version }}
|
||||
permissions:
|
||||
id-token: write
|
||||
steps:
|
||||
- name: 下载构建产物
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: dist
|
||||
path: dist
|
||||
|
||||
- name: 上传到 PyPI
|
||||
uses: pypa/gh-action-pypi-publish@release/v1
|
||||
with:
|
||||
attestations: true
|
||||
|
||||
# ─────────────────────────────────────────────────────────────
|
||||
# 发布:创建 GitHub Release
|
||||
# ─────────────────────────────────────────────────────────────
|
||||
release:
|
||||
name: Publish Release
|
||||
needs: [pre-check, build, publish-pypi]
|
||||
name: Build, Publish & Release
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: write
|
||||
container:
|
||||
image: pyflowx-ci:latest
|
||||
env:
|
||||
UV_LINK_MODE: copy
|
||||
# ---- 国内源 ----
|
||||
PIP_INDEX_URL: https://pypi.tuna.tsinghua.edu.cn/simple
|
||||
PIP_TRUSTED_HOST: pypi.tuna.tsinghua.edu.cn
|
||||
UV_INDEX_URL: https://pypi.tuna.tsinghua.edu.cn/simple
|
||||
UV_TRUSTED_HOST: pypi.tuna.tsinghua.edu.cn
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
- uses: http://gitea:3000/zhou/checkout.git@v4
|
||||
|
||||
- name: 下载构建产物
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: dist
|
||||
path: assets
|
||||
|
||||
- name: 整理发布产物
|
||||
- name: Build distributions
|
||||
run: uv build
|
||||
|
||||
- name: Publish to pypi
|
||||
run: uv publish --token '${{ secrets.PYPI_TOKEN }}'
|
||||
|
||||
- name: Create Gitea Release & Upload Assets
|
||||
env:
|
||||
GITEA_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
TAG_NAME: ${{ github.ref_name }}
|
||||
REPO: ${{ github.repository }}
|
||||
GITEA_URL: http://172.17.0.1:3000
|
||||
run: |
|
||||
ls -la assets/
|
||||
set -e
|
||||
# 1. 创建 Release
|
||||
RELEASE_ID=$(curl -sS -X POST "$GITEA_URL/api/v1/repos/$REPO/releases" \
|
||||
-H "Authorization: token $GITEA_TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"tag_name\":\"$TAG_NAME\",\"name\":\"Release $TAG_NAME\",\"body\":\"Automated release from CI\",\"draft\":false,\"prerelease\":false}" \
|
||||
| python3 -c "import sys,json;print(json.load(sys.stdin)['id'])")
|
||||
|
||||
- name: 生成 Release Notes
|
||||
id: notes
|
||||
run: |
|
||||
{
|
||||
echo "## pyflowx ${{ needs.pre-check.outputs.version }}"
|
||||
echo ""
|
||||
echo "### 下载"
|
||||
echo ""
|
||||
echo "- **Wheel**: \`pyflowx-${{ needs.pre-check.outputs.version }}-py3-none-any.whl\`"
|
||||
echo "- **源码包**: \`pyflowx-${{ needs.pre-check.outputs.version }}.tar.gz\`"
|
||||
echo ""
|
||||
echo "### 安装"
|
||||
echo ""
|
||||
echo '```bash'
|
||||
echo "pip install pyflowx==${{ needs.pre-check.outputs.version }}"
|
||||
echo '```'
|
||||
echo ""
|
||||
echo "### 完整变更日志"
|
||||
} > RELEASE_NOTES.md
|
||||
{
|
||||
echo "content<<EOF"
|
||||
cat RELEASE_NOTES.md
|
||||
echo "EOF"
|
||||
} >> $GITHUB_OUTPUT
|
||||
echo "Created release id=$RELEASE_ID"
|
||||
|
||||
- name: 创建 GitHub Release
|
||||
uses: softprops/action-gh-release@v2
|
||||
with:
|
||||
tag_name: ${{ needs.pre-check.outputs.tag }}
|
||||
name: pyflowx ${{ needs.pre-check.outputs.version }}
|
||||
body: ${{ steps.notes.outputs.content }}
|
||||
files: assets/*
|
||||
draft: false
|
||||
prerelease: ${{ contains(needs.pre-check.outputs.version, '-') }}
|
||||
generate_release_notes: true
|
||||
# 2. 上传 dist/ 下所有文件作为附件
|
||||
for f in dist/*; do
|
||||
echo "Uploading $f ..."
|
||||
curl -sS -X POST "$GITEA_URL/api/v1/repos/$REPO/releases/$RELEASE_ID/assets?name=$(basename $f)" \
|
||||
-H "Authorization: token $GITEA_TOKEN" \
|
||||
-H "Content-Type: application/octet-stream" \
|
||||
--data-binary "@$f"
|
||||
done
|
||||
|
||||
|
||||
|
||||
@@ -9,3 +9,9 @@ wheels/
|
||||
# Virtual environments
|
||||
.venv
|
||||
.coverage
|
||||
.idea
|
||||
*_profile.html
|
||||
|
||||
# Sphinx 文档构建输出
|
||||
docs/_build/
|
||||
.trae/refs
|
||||
|
||||
@@ -7,10 +7,7 @@ repos:
|
||||
hooks:
|
||||
# Run the linter
|
||||
- id: ruff
|
||||
args: [ --fix, --exit-non-zero-on-fix ]
|
||||
# Run the formatter
|
||||
- id: ruff-format
|
||||
args: [ --config=pyproject.toml]
|
||||
args: [--fix, --exit-non-zero-on-fix]
|
||||
- repo: https://gitcode.com/gh_mirrors/pr/pre-commit-hooks.git
|
||||
rev: v5.0.0
|
||||
hooks:
|
||||
@@ -18,5 +15,5 @@ repos:
|
||||
- id: debug-statements
|
||||
- id: fix-byte-order-marker
|
||||
- id: trailing-whitespace
|
||||
args: [ --markdown-linebreak-ext=md ]
|
||||
args: [--markdown-linebreak-ext=md]
|
||||
- id: end-of-file-fixer
|
||||
|
||||
+1
-1
@@ -1 +1 @@
|
||||
3.8
|
||||
3.11
|
||||
|
||||
@@ -0,0 +1,23 @@
|
||||
# ReadTheDocs 配置
|
||||
# https://docs.readthedocs.io/en/stable/config-file/v2.html
|
||||
version: 2
|
||||
|
||||
# 构建配置
|
||||
build:
|
||||
os: ubuntu-24.04
|
||||
tools:
|
||||
python: "3.11"
|
||||
|
||||
# Python 依赖与构建命令
|
||||
python:
|
||||
install:
|
||||
- method: pip
|
||||
path: .
|
||||
extra_requirements:
|
||||
- docs
|
||||
|
||||
# Sphinx 构建
|
||||
sphinx:
|
||||
configuration: docs/conf.py
|
||||
builder: html
|
||||
fail_on_warning: false
|
||||
@@ -0,0 +1,15 @@
|
||||
# PYTHON
|
||||
.coverage
|
||||
.pytest_cache/
|
||||
.ruff_cache/
|
||||
.tox/
|
||||
.venv/
|
||||
__pycache__/
|
||||
|
||||
# NODEJS
|
||||
node_modules/
|
||||
|
||||
# IDE
|
||||
.idea
|
||||
.trae
|
||||
.vscode
|
||||
@@ -0,0 +1,108 @@
|
||||
# 文档整理与 Sphinx 文档搭建计划
|
||||
|
||||
## Context
|
||||
|
||||
最近完成 CLI 重构:新增 `pf` 统一入口,13 个工具迁移到 YAML 配置并删除了对应 .py 入口脚本,`run()` 的 verbose 统一应用到 spec。但文档未同步:README 仍引用旧命令(`yamlrun`、`python build.py`),模块结构表缺漏;`runner.py` 的 `_apply_verbose_to_graph` 成为死代码;项目缺少可发布的 Sphinx 文档。本次任务整理这些遗留,并搭建 ReadTheDocs 文档站。
|
||||
|
||||
## 任务范围
|
||||
|
||||
### 1. 清理死代码
|
||||
- 删除 `src/pyflowx/runner.py` 的 `_apply_verbose_to_graph` 函数(line 38-68),功能已移入 `executors.py` 的 `run()`。
|
||||
- 删除 `tests/test_runner.py` 中对应测试(line 610-636,`TestApplyVerboseToGraph` 类)。
|
||||
- 清理 `runner.py` 顶部 `from dataclasses import replace` 若变为未使用。
|
||||
|
||||
### 2. 修复版本不一致
|
||||
- `src/pyflowx/__init__.py:105` 硬编码 `__version__ = "0.4.5"`,`pyproject.toml:25` 为 `0.3.5`。
|
||||
- 统一为 `0.4.5`(`__init__.py` 为准,pyproject.toml 是源但 bumpversion 工具应同时更新两者)。
|
||||
|
||||
### 3. 更新 README.md
|
||||
- L304-308:`python build.py clean/build/test` → `pf pymake clean/build/test`。
|
||||
- L335-351、L435:`yamlrun pipeline.yaml ...` → `pf yamlrun pipeline.yaml ...`(6 处)。
|
||||
- L311:`verbose=True(默认)` 描述保留,但 CLI 示例改为 `pf`。
|
||||
- L558-574 模块结构表:补充 `cli/pf.py`(统一入口)、`cli/configs/`(YAML 工具配置)、`cli/_ops/`(工具函数)、`profiling.py`、`registry.py`。
|
||||
- 顶部增加「文档」徽章链接到 ReadTheDocs。
|
||||
|
||||
### 4. 搭建 Sphinx 文档结构
|
||||
新建 `docs/` 目录:
|
||||
|
||||
```
|
||||
docs/
|
||||
├── conf.py # Sphinx 配置
|
||||
├── index.rst # 首页与目录
|
||||
├── installation.rst # 安装
|
||||
├── quickstart.rst # 快速上手(从 README 提炼)
|
||||
├── guide/
|
||||
│ ├── task.rst # TaskSpec 任务描述
|
||||
│ ├── graph.rst # Graph DAG 构建
|
||||
│ ├── execution.rst # 执行策略与 run()
|
||||
│ ├── yaml.rst # YAML 任务编排
|
||||
│ └── cli.rst # pf 统一入口与工具列表
|
||||
├── api.rst # API 参考(automodule 自动生成)
|
||||
└── changelog.rst # 变更日志摘要
|
||||
```
|
||||
|
||||
**conf.py 要点**:
|
||||
- 扩展:`sphinx.ext.autodoc`、`sphinx.ext.napoleon`(支持 Google/NumPy docstring)、`sphinx.ext.viewcode`、`myst_parser`(支持 Markdown)
|
||||
- 主题:`sphinx_rtd_theme`
|
||||
- 项目版本从 `pyflowx.__version__` 动态读取
|
||||
- `autodoc_default_options`:`members: True, undoc-members: True, show-inheritance: True`
|
||||
|
||||
**api.rst**:用 `automodule:: pyflowx` 抓取 `__all__` 的 56 个公共符号。
|
||||
|
||||
### 5. ReadTheDocs 配置
|
||||
- 新建 `.readthedocs.yaml`:Python 3.11,`pip install -e .[docs]`,`sphinx -b html docs/ docs/_build/`。
|
||||
- `.gitignore` 增加 `docs/_build/`。
|
||||
|
||||
### 6. pyproject.toml 补充 docs 依赖
|
||||
```toml
|
||||
docs = [
|
||||
"sphinx>=7.0",
|
||||
"sphinx-rtd-theme>=2.0",
|
||||
"myst-parser>=3.0",
|
||||
]
|
||||
```
|
||||
并在 `[dependency-groups]` 的 dev 中加入 `pyflowx[docs]`。
|
||||
|
||||
## 关键文件
|
||||
|
||||
| 文件 | 操作 |
|
||||
|------|------|
|
||||
| `src/pyflowx/runner.py` | 删除 `_apply_verbose_to_graph` |
|
||||
| `tests/test_runner.py` | 删除 `TestApplyVerboseToGraph` |
|
||||
| `src/pyflowx/__init__.py` | 版本统一(已 0.4.5,确认) |
|
||||
| `pyproject.toml` | 版本 → 0.4.5;加 docs 依赖 |
|
||||
| `README.md` | 更新 CLI 示例与模块结构表 |
|
||||
| `docs/conf.py` | 新建 |
|
||||
| `docs/*.rst` | 新建 |
|
||||
| `.readthedocs.yaml` | 新建 |
|
||||
| `.gitignore` | 加 docs/_build/ |
|
||||
|
||||
## 验证
|
||||
|
||||
1. **测试与 lint**:
|
||||
```bash
|
||||
uv run pytest tests/ -q
|
||||
uv run ruff check src/ tests/ docs/conf.py
|
||||
uv run pyrefly check src/pyflowx/runner.py
|
||||
```
|
||||
|
||||
2. **Sphinx 构建本地验证**:
|
||||
```bash
|
||||
uv sync --extra docs
|
||||
uv run sphinx-build -b html docs/ docs/_build/
|
||||
```
|
||||
确认无 warning,打开 `docs/_build/index.html` 检查页面。
|
||||
|
||||
3. **pf 功能回归**:
|
||||
```bash
|
||||
pf gitt c
|
||||
pf pymake b --dry-run
|
||||
```
|
||||
|
||||
4. **RTD 配置校验**:`.readthedocs.yaml` 语法正确,`docs/conf.py` 能独立构建。
|
||||
|
||||
## 不在范围
|
||||
|
||||
- 不统一各模块 docstring 风格(napoleon 兼容 Google/NumPy,够用)。
|
||||
- 不重构现有 CLI 工具 YAML。
|
||||
- 不新增中文文档翻译(文档用中文撰写,与项目既有风格一致)。
|
||||
@@ -0,0 +1,11 @@
|
||||
---
|
||||
alwaysApply: true
|
||||
scene: git_message
|
||||
---
|
||||
|
||||
在此处编写规则,自定义 AI 生成提交信息的风格。
|
||||
|
||||
## 提交信息格式
|
||||
- 提交信息必须使用中文。
|
||||
- 提交信息必须包含变更的类型(例如 "fix"、"feat"、"refactor" 等)。
|
||||
- 提交信息必须尽简洁明了,不要超过一段落。
|
||||
@@ -0,0 +1,157 @@
|
||||
# Python 开发规范
|
||||
|
||||
本规范结合 Python 最佳实践,作为编写与审查 Python 代码的统一标准。
|
||||
详细操作指南见 `.agents/skills/` 下相应技能。
|
||||
|
||||
## 工具链(以 pyproject.toml 为准)
|
||||
|
||||
| 工具 | 用途 | 配置要点 |
|
||||
|------|------|---------|
|
||||
| **ruff** | lint + format | `line-length=120`,`target-version="py38"` |
|
||||
| **pyrefly** | 类型检查 | `preset="strict"`,`python-version="3.8"` |
|
||||
| **pytest** | 测试 | `asyncio_default_fixture_loop_scope="function"`,marker `slow` |
|
||||
| **coverage** | 覆盖率 | `branch=true`,`fail_under=95`,`concurrency=["thread"]` |
|
||||
| **pre-commit** | 提交前检查 | ruff `--fix` + trailing-whitespace + end-of-file-fixer |
|
||||
|
||||
验证(每次修改后必做):
|
||||
|
||||
```bash
|
||||
uvx --from pyflowx pymake tc
|
||||
uvx --from pyflowx pymake cov
|
||||
```
|
||||
|
||||
## 兼容性
|
||||
|
||||
- **最低 Python 3.8**:用 `from __future__ import annotations` 延迟注解求值;
|
||||
按版本用 `typing.List`(3.8) → 内置泛型(3.9) → `X | Y`(3.10) → `typing.override`(3.12)。
|
||||
- **版本守卫**:`if sys.version_info >= (3, X):` 引入高版本 API;低版本回退分支加 `# pragma: no cover`。
|
||||
- **零运行时依赖**:仅依赖标准库(3.8 需 `graphlib_backport`、`typing-extensions`)。
|
||||
新增依赖须审慎,优先用标准库。
|
||||
|
||||
## 类型注解
|
||||
|
||||
- **公共 API 必须有完整类型注解**,包括返回类型;私有函数也应有注解。
|
||||
- 泛型用 `TypeVar`;PEP 696 `default=` 仅 3.13+ 标准库支持,3.8–3.12 用 `typing_extensions.TypeVar`。
|
||||
- `Mapping`/`Sequence` 用于只读参数,`dict`/`list` 用于可变返回。
|
||||
- `Any` 仅用于真正动态场景(如 `Context` 跨任务异构映射);任务内部类型必须完全静态。
|
||||
- 禁用裸 `# type: ignore`;确需时加具体规则码(如 `# type: ignore[union-attr]`)。
|
||||
- **`TYPE_CHECKING` 守卫**:仅类型检查需要的导入放 `if TYPE_CHECKING:` 块内,避免循环依赖。
|
||||
- **类型收窄**:用 `assert isinstance(x, Y)` 辅助 pyrefly 推断;`cast()` 仅用于类型系统无法表达的场景。
|
||||
|
||||
## 数据结构
|
||||
|
||||
- **不可变优先**:配置/描述类用 `@dataclass(frozen=True)`;可变类属性标注 `RUF012` 豁免。
|
||||
- **缓存**:实例级用 `functools.cached_property`,按参数键控用 `functools.lru_cache`;
|
||||
不可哈希参数需 try/except 回退。修改被缓存数据源后必须手动清空缓存。
|
||||
- **抽象基类**:接口用 `abc.ABC` + `@abstractmethod`(如 `StateBackend`)。
|
||||
- **枚举**:状态/标志值用 `enum.Enum`(如 `TaskStatus`),禁止裸字符串/魔术数字;枚举值用 `UPPER_SNAKE`。
|
||||
- **`__repr__`**:可变类实现 `__repr__`(含关键字段);`frozen=True` dataclass 自动生成。
|
||||
|
||||
## 模块与导入
|
||||
|
||||
- **单一职责**:每模块只做一件事(`task.py` 数据结构、`executors.py` 执行、`command.py` 命令、`compose.py` 组合)。禁止跨职责边界。
|
||||
- **导入顺序**(ruff isort):`__future__` → 标准库 → 第三方 → 本地,各组间空行。
|
||||
- **惰性导入**:仅为打破循环依赖时使用,函数体内导入并注释说明;顶层导入是默认。
|
||||
- **`__all__`**:定义 `__all__` 显式声明导出符号,位置仅次于 `__future__` 之后。
|
||||
- **禁用 star imports**:`from x import *` 污染命名空间、破坏类型检查(`__init__.py` 聚合经 `__all__` 控制为例外)。
|
||||
- **避免 `utils.py`/`helpers.py`**:按职责归入对应模块。
|
||||
|
||||
## 函数设计
|
||||
|
||||
- **模块级函数优于 Mixin**:共享逻辑用模块级函数,类只持有状态与薄方法。
|
||||
- **静态方法慎用**:纯函数直接放模块级。
|
||||
- **参数 ≤ 5 个**为宜;超出用 dataclass 封装参数对象。
|
||||
- **单一职责**:一个函数做一件事;过长函数考虑拆分。
|
||||
- **异常范围要窄**:只捕获预期异常(如 `(TypeError, ValueError, KeyError, AttributeError)`),
|
||||
**禁止** `except Exception` 掩盖 bug;捕获后至少 `logger.warning` 记录。
|
||||
- **可变默认参数**:`def f(x=[])` 是经典坑;用 `None` 哨兵或 `field(default_factory=list)`。
|
||||
|
||||
## 异常处理
|
||||
|
||||
- **自定义异常家族**:继承公共基类(如 `PyFlowXError`),按错误场景分类。
|
||||
- **异常包装**:`raise NewError(...) from exc` 保留因果链。
|
||||
- **不要吞异常**:捕获后必须处理(记录/包装/重抛),禁止空 `except: pass`。
|
||||
- **钩子/回调异常**:第三方回调异常仅记录,不影响主流程。
|
||||
|
||||
## 并发与线程安全
|
||||
|
||||
- **进程全局状态**(`os.environ`/`os.chdir`)在并发场景下必须用全局锁(`threading.RLock`)序列化。
|
||||
- **条件评估不可有可变状态**:组合条件(NOT/AND/OR)不得修改共享 `_reason`,避免竞态。
|
||||
- **批量 I/O**:循环内多次写盘改为批量一次(`contextmanager` 包裹延迟落盘)。
|
||||
- **信号量限流**:`concurrency_key` + `Semaphore` 按组限流。
|
||||
|
||||
## 测试
|
||||
|
||||
详细操作指南见 `.agents/skills/pyflowx-testing` 技能。硬约束:
|
||||
|
||||
- **覆盖率 ≥ 95%**(branch coverage),不得下降。
|
||||
- **公共 API 优先测试**:用公共接口(`has`/`get`),不访问私有方法;
|
||||
故障注入等场景可临时访问私有属性,docstring 注明原因。
|
||||
- **命名**:`test_<被测对象>_<场景>`。
|
||||
- **断言**:原生 `assert x == 1`,禁用 `self.assertEqual`;`pytest.raises` 必填 `match=`。
|
||||
- **Mock 优先级**:`monkeypatch` > 内联 stub > `unittest.mock` > `pytest-mock`。
|
||||
禁用 `@patch` 装饰器、`mock.patch.object` 上下文、`pytest-mock` 的 `mocker` fixture。
|
||||
- **fixture**:`tmp_path`/`monkeypatch`/`capsys` 优先;autouse 仅全局必需时用。
|
||||
- **slow 标记**:耗时测试加 `@pytest.mark.slow`,CI 可 `-m "not slow"` 跳过。
|
||||
- **测试代码也跑 ruff**:`tests/**` 忽略 `ARG001`/`ARG002`。
|
||||
|
||||
## 代码风格
|
||||
|
||||
- **行宽 120**(ruff formatter 处理)。
|
||||
- **docstring**:公共 API 必须有;中文叙述 + 中文注释是本项目既有风格。
|
||||
- **打印和日志**:使用中文打印和日志,避免使用英文。
|
||||
- **命名**:`snake_case` 函数/变量,`PascalCase` 类,`UPPER_SNAKE` 常量,`_leading_underscore` 私有。
|
||||
- **字符串引号**:ruff 默认双引号。
|
||||
- **末尾单 `\n`**、**无尾随空格**(pre-commit 强制)。
|
||||
- **不用 emoji**:除非用户明确要求。
|
||||
|
||||
## Pythonic 风格
|
||||
|
||||
- **`is` 比较 `None`/`True`/`False`**:单例用 `is`,值用 `==`(PEP 8 E711/E712)。
|
||||
- **EAFP 优于 LBYL**:先尝试再处理异常,而非先检查再执行(避免竞态窗口)。
|
||||
- **truthiness**:`if items:` 优于 `if len(items) > 0:`。
|
||||
- **字符串格式化**:首选 f-string;`%` 仅用于 `logging` 延迟格式化。
|
||||
- **推导式**优于 `map`+`filter`;> 2 层拆为显式循环。
|
||||
- **`enumerate`** 替代 `range(len())`;**`zip`** 并行迭代(3.10+ 用 `strict=True`)。
|
||||
- **解包** `a, b = pair` 优于索引访问;忽略值用 `_`。
|
||||
- **海象运算符 `:=`**(3.8+):赋值+判断合一,但不滥用。
|
||||
|
||||
## 日志
|
||||
|
||||
- **`logging.getLogger(__name__)`**:每模块独立 logger,禁用 `print` 调试残留。
|
||||
- **结构化上下文**:`extra={...}` 传字段;`logger.warning("task %r failed: %s", name, exc)` 优于 f-string(延迟格式化)。
|
||||
- **日志级别**:`DEBUG` 诊断 / `INFO` 关键流程 / `WARNING` 可恢复异常 / `ERROR` 需人工介入。
|
||||
- **禁止日志密码/密钥**:脱敏后再记录。
|
||||
|
||||
## 路径与资源
|
||||
|
||||
- **优先 `pathlib.Path`**:`Path("a") / "b"` 而非 `os.path.join`(ruff `PTH` 强制);
|
||||
禁止字符串拼接路径。类型注解用 `Path`,边界 `str` 立即包装。
|
||||
- **`with` 语句**:文件、锁、连接、临时目录一律用 `with` 或 `contextlib.contextmanager`;
|
||||
多资源用 `contextlib.ExitStack`。
|
||||
- **显式关闭**:长生命周期对象(连接池、线程池)实现 `close()`,但优先 `with`。
|
||||
- **批量操作**:循环内多次 acquire/release 改为批量一次。
|
||||
|
||||
## 安全
|
||||
|
||||
- **禁用 `eval`/`exec`**:处理不可信输入时绝不使用;用 `ast.literal_eval` 或专用解析器。
|
||||
- **`subprocess`**:禁用 `shell=True` 除非命令完全可信;优先 `list[str]` 形式。
|
||||
- **凭证不入仓**:密钥/token/密码放 `.env` 或环境变量,`.gitignore` 必须包含 `.env`。
|
||||
- **日志脱敏**:记录请求/响应时移除 `Authorization`、`password` 等字段。
|
||||
- **依赖审计**:`uv lock` 后审阅新增依赖,避免引入已知 CVE 的包。
|
||||
|
||||
## 性能要点
|
||||
|
||||
- **避免重复计算**:循环内查询应缓存或预构建映射(如 `{name: spec}`)。
|
||||
- **避免双重查找**:`has(k)` + `get(k)` 改为单次 `get(k)` + `KeyError` 回退。
|
||||
- **统一校验**:入口校验一次,下游路径不重复(如 `run()` 统一 `validate()`,`layers()` 不再重复)。
|
||||
- **事件 emit**:任务生命周期必须 emit `RUNNING` → `SUCCESS`/`FAILED`/`SKIPPED`,
|
||||
不要留死分支(`# pragma: no cover` 是清理信号,应激活或删除)。
|
||||
|
||||
## Git 与提交
|
||||
|
||||
- **自动提交**:任务完成后自动 `git add`(按文件名)+ `git commit` + `git push`(仅当分支已跟踪远程时执行 push;新分支跳过 push 并在总结中说明)。
|
||||
- **不修改 git config**。
|
||||
- **不运行破坏性命令**(`push --force`/`reset --hard`/`clean -f`)除非用户明确要求。
|
||||
- **staging**:按文件名添加,不用 `git add -A`/`git add .`,避免误加敏感文件。
|
||||
- **commit message**:简洁,聚焦"为什么"而非"是什么";遵循仓库既有风格。
|
||||
@@ -0,0 +1,134 @@
|
||||
---
|
||||
alwaysApply: true
|
||||
---
|
||||
|
||||
# 自驱动开发规则
|
||||
|
||||
本规则定义一种"目标驱动、闭环执行"的工作模式:仅在任务开始时与用户确认一次目标与边界,后续由 Agent 自主完成"计划 → 编码 → 测试 → 文档 → 验证"的迭代循环,直到用户目标达成。
|
||||
|
||||
## 核心原则
|
||||
|
||||
- **目标导向**:始终以用户最终目标为准绳,所有阶段产出都应服务于该目标。
|
||||
- **闭环执行**:每个子任务必须走完"计划 → 实现 → 测试 → 文档 → 验证"五步;禁止跳步留半成品。
|
||||
- **自主决策**:初始确认之后,实现路径、API 形态、重构范围、文件命名、测试组织、错误修复策略等由 Agent 自行决断,不再逐项请示。**可逆操作(编辑文件、运行测试、修复 lint、调整实现)直接执行,不询问**;只有不可逆/高风险操作才暂停。
|
||||
- **透明沟通**:每个阶段开始前用一句话说明意图;关键节点(完成、阻塞、转向)给简短更新;不复述内部思考,**不在收尾时停下询问"是否继续"或"是否提交"**——直接输出总结并结束。
|
||||
- **安全边界**:仅在高风险、不可逆操作或真正阻塞时才暂停找用户。
|
||||
|
||||
## 初始确认(一次性,仅在最开始)
|
||||
|
||||
任务启动时,用 `AskUserQuestion` 一次性确认以下信息(已由项目规范覆盖的不必重复确认):
|
||||
|
||||
1. **目标与范围**:要解决什么问题?交付物是什么?显式列出不在范围内的内容。
|
||||
2. **验收标准**:怎样算"完成"?可观测的判定条件(功能、性能、覆盖率阈值)。
|
||||
3. **特殊约束**:除 `python-standards.md` 之外的约束(兼容性、依赖限制、API 兼容策略等)。
|
||||
4. **测试要求**:覆盖率门槛(项目默认 ≥95%,branch);是否需要新增 `slow` 标记。
|
||||
|
||||
**git commit/push 不在确认范围内**:任务完成后自动 commit + push(仅当分支已跟踪远程时执行 push;新分支跳过 push 并在总结中说明),遵循 `.trae/rules/git-commit-message.md` 风格。仅 force-push、reset --hard、clean -f、修改 git config 等真正破坏性操作才需暂停确认。
|
||||
|
||||
确认后,将目标与验收标准固化进 `TaskCreate` 任务列表,后续不再就同一信息反复询问。
|
||||
|
||||
## 迭代循环
|
||||
|
||||
下列五个阶段构成一个完整闭环。未达验收标准时,回到「计划」开启下一轮;达标准时,进入「收尾」。
|
||||
|
||||
### 1. 计划(Plan)
|
||||
|
||||
- 用 Explore/Glob/Grep 研究相关代码与既有模式,避免凭空设计。
|
||||
- 用 `TaskCreate` 把目标拆为可独立验证的子任务;每完成一项立即 `TaskUpdate` 为 completed。
|
||||
- 优先复用现有抽象;不为本轮假想需求设计接口。
|
||||
- 不过早抽象:三处相似才考虑提取,否则就地写。
|
||||
|
||||
### 2. 实现(Code)
|
||||
|
||||
- 严格遵守 `.trae/rules/python-standards.md` 与既有代码风格。
|
||||
- 优先 Edit 现有文件;新增文件需有明确职责边界。
|
||||
- 不引入运行时依赖(项目零依赖原则);确需引入须在计划阶段说明。
|
||||
- 公共 API 必须有完整类型注解与中文 docstring。
|
||||
- 不写未被要求的功能、不为未来场景预留扩展点。
|
||||
|
||||
### 3. 测试(Test)
|
||||
|
||||
- 新增/修改的公共 API 必须配套测试;优先通过公共接口测试,故障注入可访问私有属性并在 docstring 注明。
|
||||
- Mock 优先级:`monkeypatch` > 内联 stub > `unittest.mock` > `pytest-mock`;禁用 `@patch` 装饰器。
|
||||
- 必跑校验(每次修改后):
|
||||
|
||||
```bash
|
||||
uvx --from pyflowx pymake tc
|
||||
uvx --from pyflowx pymake cov
|
||||
```
|
||||
|
||||
- 测试失败时定位根因再修复,不通过放宽断言或 `# pragma: no cover` 绕过。
|
||||
- 覆盖率不得低于上一次的值(项目门槛 95%,branch)。
|
||||
|
||||
### 4. 文档(Docs)
|
||||
|
||||
- 同步更新 docstring、README、模块结构说明。
|
||||
- 行为变更须同步更新 `.agents/skills/pyflowx-development/SKILL.md` 中的对应章节。
|
||||
- 跨会话有价值的设计决策、约束、陷阱,追加到 memory(`project_memory.md` 或对应 `topics.md`)。
|
||||
- 不主动新建 `*.md` 文档;除非用户明确要求。
|
||||
|
||||
### 5. 验证(Verify)
|
||||
|
||||
- 逐条对照初始确认的「验收标准」核验;未满足则回到「计划」继续下一轮。
|
||||
- 全套门禁通过:ruff、pyrefly、pytest、coverage。
|
||||
- 给出本轮变更清单(改了哪些文件、为什么)。
|
||||
|
||||
## 暂停条件(仅在以下情况中断自驱动找用户)
|
||||
|
||||
1. **歧义无法自决**:需求存在多种合理解读且无既有约定可循。
|
||||
2. **高风险/不可逆操作**:删除非临时文件、`git push --force`、`reset --hard`、删表、修改 CI 配置、修改 git config、卸载依赖等。**普通 `git commit`/`push` 不属于此类**(任务完成后自动执行)。
|
||||
3. **不可恢复的失败**:根因不在本仓库、需外部环境/权限配合、或经两轮尝试仍无法定位。
|
||||
4. **超出初始确认范围**:用户目标在执行中发现需要显著扩大范围或改变方向。
|
||||
5. **用户主动询问**:用户在对话中提出新问题或要求澄清。
|
||||
|
||||
**注意**:"目标已达成"**不是**暂停条件——验收标准全部满足后直接进入收尾并结束任务,不询问"是否扩展范围"或"是否提交"。
|
||||
|
||||
非以上情况,一律继续自驱动,不要为"求确认"而暂停。
|
||||
|
||||
## 决策判据:该问还是自决
|
||||
|
||||
遇到不确定时,按以下顺序判断:
|
||||
|
||||
1. **是否不可逆/高风险?** 是 → 暂停确认(如删除文件、`push --force`、修改 CI 配置、卸载依赖)。否 → 继续。
|
||||
2. **是否在初始确认范围内?** 是 → 按确认执行,不询问。否 → 视为"超出初始确认范围",暂停。
|
||||
3. **是否有既有约定可循?** 是 → 按约定执行(参考 `python-standards.md`、`project_memory.md`)。否 → 视为"歧义无法自决",暂停。
|
||||
4. **是否可逆?** 是 → 直接执行,即使结果可能不完美(可在后续迭代修正)。否 → 暂停。
|
||||
|
||||
**可直接自决(不询问)的典型情况**:
|
||||
|
||||
- 测试失败、覆盖率不达标、lint/类型检查报错 → 定位根因并修复。
|
||||
- 代码风格选择(命名、模块划分、参数顺序)→ 自决。
|
||||
- 文件编辑、运行测试、运行校验命令 → 直接执行。
|
||||
- 任务完成后输出收尾总结 → 直接输出,不询问下一步。
|
||||
- 显式指定 `name` 参数以保持测试兼容性 → 自决。
|
||||
- 重命名局部变量以避免遮蔽 → 自决。
|
||||
|
||||
**必须暂停询问的典型情况**:
|
||||
|
||||
- 删除非临时文件、重命名公共模块/包。
|
||||
- `git push --force`、`reset --hard`、`clean -f`、修改 git config(普通 commit/push 自动执行,无需询问)。
|
||||
- 引入新的运行时依赖(违反项目零依赖原则)。
|
||||
- 修改 CI 配置、pre-commit 钩子、pyproject.toml 的工具链配置。
|
||||
- 卸载或降级既有依赖。
|
||||
|
||||
## 沟通风格
|
||||
|
||||
- 阶段切换时一句话说明即可;不要把内部推理写给用户看。
|
||||
- 完成子任务后用一两句总结改了什么、下一步做什么。
|
||||
- 遇到阻塞时直接说明:卡在哪、试了什么、需要用户做什么。
|
||||
- **不在收尾时询问"是否需要提交"或"是否扩展范围"**——直接输出总结并结束。用户后续若有新需求,由用户主动提出。
|
||||
- 不使用 emoji,除非用户明确要求。
|
||||
|
||||
## 工具使用
|
||||
|
||||
- 独立操作尽量并行调用(多个 Read/Grep/Glob 一批发出)。
|
||||
- 用 `TaskCreate`/`TaskUpdate` 维护进度,不批量推迟标记。
|
||||
- 长命令用后台运行(`run_in_background`),完成会自动通知。
|
||||
- 文件操作一律用专用工具:Read/Edit/Write/Glob/Grep,不用 `cat`/`sed`/`grep`/`find`。
|
||||
|
||||
## 收尾
|
||||
|
||||
- 验收标准全部满足后,**直接输出最终总结并结束任务**:交付物、关键决策、遗留事项。
|
||||
- **自动提交**:收尾时自动 `git add`(按文件名)+ `git commit`(遵循 `.trae/rules/git-commit-message.md` 风格)+ `git push`(仅当分支已跟踪远程时执行;新分支跳过 push 并在总结中说明);**不询问**"是否需要提交"或"是否扩展范围"。
|
||||
- 若验收标准未全部满足,回到「计划」继续下一轮,不停下询问。
|
||||
- 将本次会话的关键产出与决策更新到 memory,便于后续会话续接。
|
||||
@@ -0,0 +1,135 @@
|
||||
---
|
||||
name: "pyflowx-testing"
|
||||
description: "PyFlowX 项目的测试编写规范与 mock 使用指南。在编写或审查测试、选择 mock 工具、设计 fixture、处理 asyncio 测试时调用。"
|
||||
---
|
||||
|
||||
# PyFlowX 测试规范
|
||||
|
||||
本技能是 `.trae/rules/python-standards.md` 测试章节的详细展开。
|
||||
规则文件仅保留硬约束指针,本文件提供完整操作指南。
|
||||
|
||||
## 总则
|
||||
|
||||
- **覆盖率 ≥ 95%**(branch coverage),不得下降。
|
||||
- **公共 API 优先测试**:测试用公共接口(`has`/`get`),不访问私有方法
|
||||
(如 `_expired`)。兼容旧测试的私有方法应删除并迁移测试。
|
||||
例外:`_store`/`_flush` 等内部状态在无法用公共 API 触发时(如模拟过期、
|
||||
故障注入),可临时访问私有属性,并在 docstring 注明原因。
|
||||
- **命名**:`test_<被测对象>_<场景>`,如 `test_storage_key_cache_key_exception_returns_name`。
|
||||
- **每个测试一个断言重点**;多个断言要语义相关。
|
||||
- **slow 标记**:耗时测试加 `@pytest.mark.slow`,CI 可 `-m "not slow"` 跳过。
|
||||
- **测试代码也跑 ruff**:`tests/**` 忽略 `ARG001`/`ARG002`(未用 fixture 参数)。
|
||||
- **断言风格**:用原生 `assert` + 比较运算符(`assert x == 1`),
|
||||
不用 `self.assertEqual`;pytest 会生成更清晰的 diff。
|
||||
|
||||
## Mock 工具选择(强制)
|
||||
|
||||
**优先级**:`monkeypatch` > 内联 stub > `unittest.mock` > `pytest-mock`。
|
||||
|
||||
| 场景 | 工具 | 示例 |
|
||||
|------|------|------|
|
||||
| 替换模块属性 / 环境变量 / 工作目录 | `monkeypatch` | `monkeypatch.setattr(subprocess, "run", fake_run)` |
|
||||
| `os.environ["KEY"]` 临时设置 | `monkeypatch.setenv` | `monkeypatch.setenv("LOCALAPPDATA", "C:\\...")` |
|
||||
| 切换 cwd | `monkeypatch.chdir` | `monkeypatch.chdir(tmp_path)` |
|
||||
| 一次性 stub 函数 | 内联 lambda / 闭包 | `ran = []; monkeypatch.setattr(subprocess, "run", lambda *c, **__: ran.append(c))` |
|
||||
| 复杂 spy(记录调用次数/参数/返回序列) | `unittest.mock.MagicMock` | 仅当 lambda 不足以表达时 |
|
||||
| `with patch(...)` 上下文 | **禁用**(用 monkeypatch) | monkeypatch 自动 teardown 更安全 |
|
||||
|
||||
**禁止**:
|
||||
- 不用 `pytest-mock` 的 `mocker` fixture(项目虽在 dev 依赖声明,但实际
|
||||
测试代码未使用;为保持风格统一,新代码继续用 `monkeypatch`)。
|
||||
- 不用 `unittest.mock.patch` 装饰器(`@patch("x.y")`),它隐藏依赖且
|
||||
与 pytest fixture 模式不兼容;用 `monkeypatch.setattr` 替代。
|
||||
- 不用 `mock.patch.object` 作为上下文管理器,除非被测代码本身就是
|
||||
contextmanager(此时用 `monkeypatch.setattr` 仍更简单)。
|
||||
|
||||
## monkeypatch 使用规范
|
||||
|
||||
- **类型注解**:fixture 参数标注 `monkeypatch: pytest.MonkeyPatch`。
|
||||
- **作用域**:monkeypatch 自动在测试结束时撤销,**禁止**手动
|
||||
`monkeypatch.setattr(x, "y", original)` 恢复(多余且容易遗漏)。
|
||||
例外:在单个测试内需要中途恢复时,用 `monkeypatch.undo()` 全量撤销。
|
||||
- **替换目标**:替换"被测代码看到的对象",而非全局对象本身。
|
||||
- 错误:`monkeypatch.setattr("os.path.exists", fake)` —— 替换全局,影响其他模块。
|
||||
- 正确:`monkeypatch.setattr(pyflowx.command.shutil, "which", fake)` ——
|
||||
替换被测模块引用的 `shutil.which`。
|
||||
- **属性 vs 字符串路径**:优先属性访问形式 `monkeypatch.setattr(obj, "attr", val)`
|
||||
而非字符串路径 `monkeypatch.setattr("pkg.mod.obj.attr", val)`,
|
||||
前者有 IDE 跳转与重构支持。
|
||||
- **记录调用**:用闭包 `ran: list[tuple] = []` + `lambda *a, **k: ran.append((a, k))`
|
||||
替代 `MagicMock`,可读性更好且无需导入。
|
||||
|
||||
## Stub 与 Spy 模式
|
||||
|
||||
- **轻量 stub**:内联定义 `class MockResult: returncode = 0; stdout = ""`,
|
||||
替代 `MagicMock(return_value=...)`,类型明确且不引入 mock 依赖。
|
||||
- **状态收集**:闭包 + list 比 `mock.call_args_list` 更易断言:
|
||||
```python
|
||||
calls: list[list[str]] = []
|
||||
|
||||
|
||||
def fake_run(cmd: list[str], **_: Any) -> MockResult:
|
||||
calls.append(cmd)
|
||||
return MockResult()
|
||||
|
||||
|
||||
monkeypatch.setattr(subprocess, "run", fake_run)
|
||||
assert calls == [["clear"]]
|
||||
```
|
||||
- **副作用序列**:需要按调用次数返回不同值时,用 `itertools.cycle` 或
|
||||
手动计数器,而非 `side_effect=[...]`(mock 专有 API)。
|
||||
- **异常注入**:`def raise_oserror(*a, **k): raise OSError("...")`,
|
||||
用 `pytest.raises(OSError)` 验证,而非 `side_effect=OSError`。
|
||||
|
||||
## 异常断言
|
||||
|
||||
- **`pytest.raises`**:必填 `match=` 正则(除非异常消息完全不可预测),
|
||||
避免误捕获同类异常:
|
||||
```python
|
||||
with pytest.raises(StorageError, match="cannot write"):
|
||||
b.save("a", 1)
|
||||
```
|
||||
- **异常链**:验证 `__cause__` 时用 `exc_info.value.__cause__`,
|
||||
确认 `raise X from Y` 因果链完整。
|
||||
- **禁止** `try/except + assert False`:用 `pytest.raises` 替代。
|
||||
|
||||
## Fixture 规范
|
||||
|
||||
- **`tmp_path`**:处理临时文件,自动清理,禁止 `tempfile.mkdtemp()` 手动管理。
|
||||
- **`monkeypatch`**:环境变量、cwd、模块属性 mock(见上)。
|
||||
- **`capsys`/`capfd`**:捕获 stdout/stderr,验证日志或命令输出。
|
||||
- **autouse fixture**:仅在全局必需时用(如 `conftest.py` 的
|
||||
`packtool_tmp_workdir` 自动切到 tmp_path);否则显式声明参数。
|
||||
- **fixture 命名**:`snake_case`,描述"提供什么"而非"测试什么"
|
||||
(`sample_graph` 优于 `test_data`)。
|
||||
- **fixture 作用域**:默认 `function`;`module`/`session` 仅当构造昂贵且
|
||||
只读时,并加注释说明无副作用。
|
||||
|
||||
## asyncio 测试
|
||||
|
||||
- **fixture `loop_scope="function"`**(pyproject 已配置默认值)。
|
||||
- **async 测试**:`async def test_x():`,pytest-asyncio 自动驱动。
|
||||
- **await 检查**:测试异步函数必须 `await` 结果,禁止仅验证返回 coroutine 对象。
|
||||
- **异步 mock**:用 `AsyncMock`(3.8+ 在 `unittest.mock`)或
|
||||
`async def fake(): return value`,禁用 `MagicMock(return_value=coro)`。
|
||||
|
||||
## 参数化
|
||||
|
||||
- **`@pytest.mark.parametrize`**:用 `ids` 参数提供可读标识:
|
||||
```python
|
||||
@pytest.mark.parametrize(
|
||||
("strategy", "expected_workers"),
|
||||
[("sequential", 1), ("thread", 8), ("async", 1)],
|
||||
ids=["seq", "thread-8", "async"],
|
||||
)
|
||||
```
|
||||
- **参数命名**:参数元组用有意义名称,而非 `("a", "b")`。
|
||||
- **组合爆炸**:参数组合 > 20 时拆分测试,避免单个测试函数臃肿。
|
||||
|
||||
## 测试组织
|
||||
|
||||
- **文件命名**:`test_<被测模块>.py`(`test_storage.py` 对应 `storage.py`)。
|
||||
- **类分组**:仅在测试逻辑强相关时用 `class TestXxx:` 分组;默认用模块级函数。
|
||||
- **docstring**:每个测试函数一句话说明"测试什么场景",复杂场景补充"为什么"。
|
||||
- **setup/teardown**:优先 fixture;`setup_method`/`teardown_method` 仅在
|
||||
无法用 fixture 表达时(罕见)。
|
||||
Vendored
-1
@@ -18,7 +18,6 @@
|
||||
"evenBetterToml.formatter.arrayAutoCollapse": true,
|
||||
"evenBetterToml.formatter.arrayAutoExpand": true,
|
||||
"evenBetterToml.formatter.arrayTrailingComma": true,
|
||||
"evenBetterToml.formatter.columnWidth": 120,
|
||||
"evenBetterToml.formatter.compactEntries": false,
|
||||
"evenBetterToml.formatter.indentEntries": false,
|
||||
"evenBetterToml.formatter.indentTables": false,
|
||||
|
||||
+63
@@ -0,0 +1,63 @@
|
||||
# 使用国内镜像源拉取基础镜像
|
||||
# 备选镜像源前缀:docker.1ms.run / dockerpull.com / docker.xuanyuan.me
|
||||
FROM docker.m.daocloud.io/python:3.13-slim
|
||||
|
||||
# 国内镜像源(清华)
|
||||
ENV PIP_INDEX_URL=https://pypi.tuna.tsinghua.edu.cn/simple
|
||||
ENV PIP_TRUSTED_HOST=pypi.tuna.tsinghua.edu.cn
|
||||
ENV UV_INDEX_URL=https://pypi.tuna.tsinghua.edu.cn/simple
|
||||
ENV UV_TRUSTED_HOST=pypi.tuna.tsinghua.edu.cn
|
||||
|
||||
# 环境变量:非交互 + 路径配置
|
||||
ENV DEBIAN_FRONTEND=noninteractive \
|
||||
LANG=C.UTF-8 \
|
||||
LC_ALL=C.UTF-8 \
|
||||
UV_LINK_MODE=copy \
|
||||
UV_CACHE_DIR=/uv-cache \
|
||||
UV_PYTHON_INSTALL_DIR=/uv-python \
|
||||
UV_PROJECT_ENVIRONMENT=/opt/venv \
|
||||
PATH="/opt/venv/bin:${PATH}"
|
||||
|
||||
# 配置 apt 国内镜像(阿里云)并安装系统依赖
|
||||
RUN sed -i 's|deb.debian.org|mirrors.aliyun.com|g' /etc/apt/sources.list.d/debian.sources \
|
||||
&& apt-get update \
|
||||
&& apt-get install -y --no-install-recommends \
|
||||
ca-certificates \
|
||||
curl \
|
||||
git \
|
||||
jq \
|
||||
build-essential \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# 配置 pip 国内镜像(阿里云)
|
||||
RUN mkdir -p /etc/pip \
|
||||
&& printf '[global]\nindex-url = https://mirrors.aliyun.com/pypi/simple/\ntrusted-host = mirrors.aliyun.com\n' \
|
||||
> /etc/pip/pip.conf \
|
||||
&& mkdir -p /root/.config/pip \
|
||||
&& ln -sf /etc/pip/pip.conf /root/.config/pip/pip.conf
|
||||
|
||||
# 安装 uv 并预装 Python 3.8 / 3.13
|
||||
RUN pip install --no-cache-dir uv -i https://mirrors.aliyun.com/pypi/simple/ \
|
||||
&& uv python install 3.8 3.13
|
||||
|
||||
# 安装 Node.js 20.x(actions/checkout 需要)
|
||||
RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \
|
||||
apt-get install -y nodejs && \
|
||||
node --version
|
||||
|
||||
# 预装项目 dev 依赖(仅复制依赖描述文件,利用 Docker 层缓存)
|
||||
WORKDIR /workspace
|
||||
COPY pyproject.toml tox.ini README.md ./
|
||||
COPY src/ ./src/
|
||||
|
||||
# 同步依赖到 /opt/venv(CI 时直接复用)
|
||||
RUN uv sync --frozen --no-install-project 2>/dev/null || uv sync --no-install-project
|
||||
|
||||
# 预装 tox 环境(py38 + py313)
|
||||
RUN uvx tox run -e py38,py313 --notest 2>/dev/null || true
|
||||
|
||||
# 持久化 uv 缓存目录(CI 可挂载到宿主机加速)
|
||||
VOLUME ["/uv-cache"]
|
||||
|
||||
# 默认入口
|
||||
CMD ["/bin/bash"]
|
||||
@@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2026 endo Team
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
@@ -2,11 +2,12 @@
|
||||
|
||||
> 轻量、类型安全的 DAG 任务调度器。
|
||||
|
||||
[](https://github.com/pyflowx/pyflowx/actions/workflows/ci.yml)
|
||||
[](https://github.com/gookeryoung/pyflowx/actions/workflows/ci.yml)
|
||||
[](https://pypi.org/project/pyflowx/)
|
||||
[](https://pypi.org/project/pyflowx/)
|
||||
[](https://github.com/pyflowx/pyflowx)
|
||||
[](https://github.com/pyflowx/pyflowx/blob/main/LICENSE)
|
||||
[](https://pyflowx.readthedocs.io/zh/latest/)
|
||||
[](https://github.com/gookeryoung/pyflowx)
|
||||
[](https://github.com/gookeryoung/pyflowx/blob/main/LICENSE)
|
||||
|
||||
PyFlowX 把"任务依赖"这件事做到极致简单:**参数名就是依赖声明**。无需装饰器、
|
||||
无需样板包装器,写一个普通函数,框架按参数名自动注入上游结果。
|
||||
@@ -14,15 +15,26 @@ PyFlowX 把"任务依赖"这件事做到极致简单:**参数名就是依赖
|
||||
## 特性
|
||||
|
||||
- **零样板** —— 参数名即依赖,框架自动注入上游结果
|
||||
- **三种执行策略** —— `sequential`(调试)/ `thread`(I/O 密集同步)/ `async`(I/O 密集异步)
|
||||
- **四种执行策略** —— `sequential`(调试)/ `thread`(I/O 密集同步)/ `async`(I/O 密集异步)/ `dependency`(依赖驱动,最大化并行)
|
||||
- **类型安全** —— `TaskSpec[T]` 把返回类型一路传到 `RunReport`,mypy strict 通过
|
||||
- **DAG 校验** —— 构建时即时校验重名、缺失依赖、环
|
||||
- **自动分层** —— Kahn 算法分组,同层任务可并行
|
||||
- **重试与超时** —— 每个任务独立配置 `retries` 与 `timeout`
|
||||
- **断点续跑** —— `MemoryBackend` / `JSONBackend`,成功结果可缓存复用
|
||||
- **可观测** —— `on_event` 回调、`dry_run` 预览、Mermaid 可视化
|
||||
- **零运行时依赖** —— 仅依赖标准库(3.8 需 `graphlib_backport`)
|
||||
- **100% 测试覆盖** —— 分支覆盖率达 100%
|
||||
- **重试与超时** —— 每个任务独立配置 `RetryPolicy`(max_attempts/delay/backoff/jitter/retry_on)与 `timeout`
|
||||
- **软依赖** —— `soft_depends_on` 仅用于上下文注入,不参与拓扑分层
|
||||
- **并发限制** —— `concurrency_key` + `concurrency_limits` 按组限流
|
||||
- **任务钩子** —— `TaskHooks`(pre_run/post_run/on_failure)生命周期回调
|
||||
- **断点续跑** —— `MemoryBackend` / `JSONBackend`,成功结果可缓存复用;`batch()` 批量落盘
|
||||
- **缓存键** —— `cache_key` 函数基于输入计算稳定键,使不同输入产生独立缓存
|
||||
- **命令任务** —— `cmd` 参数直接执行外部命令,支持列表/shell/可调用对象
|
||||
- **条件执行** —— `conditions` 参数按平台、环境变量、应用安装等条件跳过任务
|
||||
- **图组合** —— `compose` / `GraphComposer` 编程式展开多图字符串引用
|
||||
- **任务模板** —— `task_template` 工厂批量生成相似 TaskSpec
|
||||
- **图级默认值** —— `GraphDefaults` 统一配置 retry/timeout/concurrency 等
|
||||
- **CLI 运行器** —— `CliRunner` 把多个图映射为命令行子命令,替代 Makefile
|
||||
- **可观测** —— `on_event` 回调(RUNNING/SUCCESS/FAILED/SKIPPED)、`dry_run` 预览、`verbose` 生命周期日志、Mermaid 可视化
|
||||
- **YAML 任务编排** —— GitHub Actions 风格的声明式任务图,支持 `jobs`/`needs`/`strategy.matrix`/`if` 等 CI/CD 概念,从 YAML 文件直接加载执行
|
||||
- **最小依赖** —— 仅依赖标准库 + PyYAML(3.8 需 `graphlib_backport`、`typing-extensions`)
|
||||
- **97% 测试覆盖** —— 分支覆盖率 >= 95%
|
||||
|
||||
## 安装
|
||||
|
||||
@@ -41,13 +53,16 @@ uv add pyflowx
|
||||
```python
|
||||
import pyflowx as px
|
||||
|
||||
|
||||
def extract() -> list[int]:
|
||||
return [1, 2, 3]
|
||||
|
||||
|
||||
# 参数名 extract 自动匹配上游任务名 → 自动注入
|
||||
def double(extract: list[int]) -> list[int]:
|
||||
return [x * 2 for x in extract]
|
||||
|
||||
|
||||
graph = px.Graph.from_specs([
|
||||
px.TaskSpec("extract", extract),
|
||||
px.TaskSpec("double", double, ("extract",)),
|
||||
@@ -61,36 +76,92 @@ print(report["double"]) # [2, 4, 6]
|
||||
|
||||
### TaskSpec —— 任务描述
|
||||
|
||||
`TaskSpec` 是不可变的任务描述符,是唯一需要配置的东西:
|
||||
`TaskSpec` 是不可变的任务描述符(`Generic[T]`,返回类型一路传到 `RunReport`),是唯一需要配置的东西:
|
||||
|
||||
```python
|
||||
px.TaskSpec(
|
||||
name="fetch_user", # 唯一标识
|
||||
fn=fetch_user, # 同步或异步函数
|
||||
depends_on=("auth",), # 依赖的任务名
|
||||
args=(uid,), # 静态位置参数(追加在注入参数后)
|
||||
kwargs={"timeout": 30}, # 静态关键字参数
|
||||
retries=3, # 失败重试次数(0 = 仅一次)
|
||||
timeout=30.0, # 超时秒数(None = 不限制)
|
||||
tags=("api", "user"), # 自由标签,用于子图过滤
|
||||
name="fetch_user", # 唯一标识
|
||||
fn=fetch_user, # 同步或异步函数
|
||||
cmd=["curl", "..."], # 或: 执行命令(覆盖 fn)
|
||||
depends_on=("auth",), # 硬依赖(参与拓扑分层)
|
||||
soft_depends_on=("cache",), # 软依赖(仅注入,不参与分层)
|
||||
args=(uid,), # 静态位置参数(追加在注入参数后)
|
||||
kwargs={"timeout": 30}, # 静态关键字参数
|
||||
retry=px.RetryPolicy(max_attempts=3, delay=1.0, backoff=2.0), # 重试策略
|
||||
timeout=30.0, # 超时秒数(None = 不限制)
|
||||
tags=("api", "user"), # 自由标签,用于子图过滤
|
||||
conditions=(is_prod,), # 条件函数列表(全部为 True 才执行)
|
||||
priority=10, # 同层内优先级(高优先执行,默认 0)
|
||||
concurrency_key="db", # 并发分组键(配合 concurrency_limits 限流)
|
||||
cache_key=lambda ctx: str(ctx.get("uid")), # 缓存键函数(不同输入独立缓存)
|
||||
hooks=px.TaskHooks(pre_run=..., post_run=..., on_failure=...), # 生命周期钩子
|
||||
cwd=Path("/tmp"), # 命令工作目录(仅 cmd 模式)
|
||||
env={"DEBUG": "1"}, # 环境变量覆盖(fn 与 cmd 模式均生效)
|
||||
verbose=True, # 打印命令输出(仅 cmd 模式)
|
||||
skip_if_missing=True, # 命令不存在时自动跳过(仅 list[str] cmd)
|
||||
allow_upstream_skip=False, # 上游 SKIPPED/FAILED 时是否仍执行
|
||||
continue_on_error=False, # 本任务失败是否不中断整体
|
||||
)
|
||||
```
|
||||
|
||||
支持两种任务形态:
|
||||
|
||||
- **函数任务**(`fn`):普通 Python 函数,参数名驱动自动注入
|
||||
- **命令任务**(`cmd`):执行外部命令,支持 `list[str]`、`str`(shell)、`Callable` 三种形态
|
||||
|
||||
`skip_if_missing=True` 时,`list[str]` 类型的 `cmd` 会通过 `shutil.which` 检查命令是否存在,不存在则跳过任务(标记为 `SKIPPED`)而非失败。适用于构建工具场景,避免因未安装某些工具而导致整个图执行失败。
|
||||
|
||||
### Graph —— DAG 构建
|
||||
|
||||
```python
|
||||
graph = px.Graph.from_specs([...]) # 整批校验(推荐)
|
||||
# 图级默认值:TaskSpec 字段为 None 时回退
|
||||
defaults = px.GraphDefaults(retry=px.RetryPolicy(max_attempts=2), timeout=60.0)
|
||||
|
||||
graph = px.Graph.from_specs([...], defaults=defaults) # 整批校验(推荐)
|
||||
# 或增量构建
|
||||
graph = px.Graph()
|
||||
graph = px.Graph(defaults=defaults)
|
||||
graph.add(px.TaskSpec("a", fn_a))
|
||||
graph.add(px.TaskSpec("b", fn_b, ("a",)))
|
||||
|
||||
graph.validate() # 显式校验(环检测)
|
||||
graph.layers() # 拓扑分层
|
||||
graph.to_mermaid() # Mermaid 可视化
|
||||
graph.describe() # 人类可读摘要
|
||||
graph.subgraph(("api",)) # 按标签切片
|
||||
graph.validate() # 显式校验(环检测)
|
||||
graph.layers() # 拓扑分层(run() 入口已统一校验,直接调用需自行先 validate)
|
||||
graph.to_mermaid() # Mermaid 可视化
|
||||
graph.describe() # 人类可读摘要
|
||||
graph.subgraph(("api",)) # 按标签切片
|
||||
graph.subgraph_by_names(("a", "b")) # 按名称切片
|
||||
graph.map("fetch", [1, 2, 3], lambda i: TaskSpec(f"fetch_{i}", ...)) # 批量 fan-out
|
||||
```
|
||||
|
||||
### 图组合 —— compose
|
||||
|
||||
`compose` / `GraphComposer` 把带字符串引用的多个图展开为纯 `Graph`:
|
||||
|
||||
```python
|
||||
graphs = {
|
||||
"build": px.Graph.from_specs([px.TaskSpec("b", cmd=["echo", "b"])]),
|
||||
"all": px.Graph.from_specs(["build", px.TaskSpec("t", cmd=["echo", "t"])]),
|
||||
}
|
||||
resolved = px.compose(graphs) # "all" 图中的 "build" 引用被展开
|
||||
```
|
||||
|
||||
引用格式:`"command_name"`(整个图)或 `"command_name.task_name"`(特定任务)。
|
||||
`CliRunner` 内部自动调用 `compose`。
|
||||
|
||||
### 任务模板 —— task_template
|
||||
|
||||
`task_template` 工厂批量生成相似 TaskSpec:
|
||||
|
||||
```python
|
||||
fetch = px.task_template(
|
||||
fn=fetch_url,
|
||||
retry=px.RetryPolicy(max_attempts=5),
|
||||
timeout=30.0,
|
||||
tags=("api",),
|
||||
)
|
||||
graph = px.Graph.from_specs([
|
||||
fetch("users", url="https://api.example.com/users"),
|
||||
fetch("posts", url="https://api.example.com/posts"),
|
||||
])
|
||||
```
|
||||
|
||||
### run —— 执行
|
||||
@@ -98,23 +169,26 @@ graph.subgraph_by_names(("a", "b")) # 按名称切片
|
||||
```python
|
||||
report = px.run(
|
||||
graph,
|
||||
strategy="async", # sequential | thread | async
|
||||
max_workers=8, # thread 策略的线程池大小
|
||||
dry_run=False, # True = 仅打印计划
|
||||
on_event=callback, # 状态转换回调
|
||||
strategy="async", # sequential | thread | async | dependency
|
||||
max_workers=8, # thread 策略的线程池大小
|
||||
concurrency_limits={"db": 2}, # 按 concurrency_key 限流
|
||||
dry_run=False, # True = 仅打印计划
|
||||
verbose=False, # True = 打印任务生命周期日志
|
||||
on_event=callback, # 状态转换回调(RUNNING/SUCCESS/FAILED/SKIPPED)
|
||||
state=px.JSONBackend("state.json"), # 断点续跑后端
|
||||
continue_on_error=False, # True = 单任务失败不中断整体
|
||||
)
|
||||
```
|
||||
|
||||
### RunReport —— 结果
|
||||
|
||||
```python
|
||||
report["task_name"] # 任务返回值
|
||||
report["task_name"] # 任务返回值
|
||||
report.result_of("task_name") # 完整 TaskResult
|
||||
report.success # 整体是否成功
|
||||
report.summary() # 统计字典
|
||||
report.failed_tasks() # 失败任务名列表
|
||||
report.describe() # 人类可读报告
|
||||
report.success # 整体是否成功
|
||||
report.summary() # 统计字典
|
||||
report.failed_tasks() # 失败任务名列表
|
||||
report.describe() # 人类可读报告
|
||||
```
|
||||
|
||||
## 上下文注入规则
|
||||
@@ -122,21 +196,24 @@ report.describe() # 人类可读报告
|
||||
按顺序求值:
|
||||
|
||||
1. **标注为 `Context`** 的参数 → 接收完整上游结果映射
|
||||
2. **名称匹配依赖** 的参数 → 接收该依赖的结果
|
||||
2. **名称匹配依赖** 的参数 → 接收该依赖的结果(含软依赖,缺失时注入默认值)
|
||||
3. **`**kwargs`** 参数 → 接收所有依赖结果(dict)
|
||||
4. **`TaskSpec.args` / `kwargs`** → 为非依赖参数提供静态值
|
||||
|
||||
```python
|
||||
from typing import Any, Dict
|
||||
|
||||
|
||||
def aggregate(ctx: px.Context) -> Dict[str, Any]:
|
||||
"""ctx 包含所有 depends_on 任务的返回值。"""
|
||||
return dict(ctx)
|
||||
|
||||
|
||||
def merge(fetch_a: str, fetch_b: str) -> str:
|
||||
"""fetch_a / fetch_b 自动注入。"""
|
||||
return fetch_a + fetch_b
|
||||
|
||||
|
||||
def fetch_user(uid: int) -> dict: # uid 来自 TaskSpec.args
|
||||
...
|
||||
```
|
||||
@@ -148,37 +225,225 @@ def fetch_user(uid: int) -> dict: # uid 来自 TaskSpec.args
|
||||
| `sequential` | 串行 | 调试、CPU 密集 | 直接调用 | 事件循环 |
|
||||
| `thread` | 线程池 | I/O 密集同步 | 线程池 | 不支持 |
|
||||
| `async` | 事件循环 | I/O 密集异步 | 卸载到线程池 | 事件循环 |
|
||||
| `dependency` | 依赖驱动 | 最大化并行度 | 卸载到线程池 | 事件循环 |
|
||||
|
||||
所有策略都遵循 `retries`、`timeout`、上下文注入、状态后端,并发出 `TaskEvent`。
|
||||
所有策略都遵循 `RetryPolicy`、`timeout`、上下文注入、状态后端、`concurrency_limits`,
|
||||
并发出 `TaskEvent`(RUNNING/SUCCESS/FAILED/SKIPPED)。`dependency` 策略无层屏障:
|
||||
任务在其所有硬依赖完成后立即启动。
|
||||
|
||||
## 示例
|
||||
## 命令任务
|
||||
|
||||
仓库 `examples/` 目录包含完整示例:
|
||||
`TaskSpec` 的 `cmd` 参数支持执行外部命令,无需包装 Python 函数:
|
||||
|
||||
- [`etl_pipeline.py`](examples/etl_pipeline.py) —— ETL 流水线(sequential)
|
||||
- [`parallel_run.py`](examples/parallel_run.py) —— 并行执行对比(thread vs sequential)
|
||||
- [`async_aggregation.py`](examples/async_aggregation.py) —— 异步聚合 + Context 注入
|
||||
```python
|
||||
graph = px.Graph.from_specs([
|
||||
# 命令列表(推荐,参数无需转义)
|
||||
px.TaskSpec("list_files", cmd=["ls", "-la"]),
|
||||
# shell 字符串(支持管道、重定向)
|
||||
px.TaskSpec("check_git", cmd="git status | head"),
|
||||
# 带工作目录与超时
|
||||
px.TaskSpec("build", cmd=["make", "all"], cwd=Path("/project"), timeout=300),
|
||||
# 命令不存在时自动跳过(而非失败)
|
||||
px.TaskSpec("optional_tool", cmd=["maturin", "build"], skip_if_missing=True),
|
||||
])
|
||||
```
|
||||
|
||||
运行:
|
||||
`verbose=True` 时打印执行的命令、工作目录、返回码与输出;`verbose=False` 时静默执行(失败信息仍包含 stderr)。
|
||||
|
||||
`skip_if_missing=True` 时,`list[str]` 类型的 `cmd` 会通过 `shutil.which` 检查命令是否存在,不存在则跳过任务(标记为 `SKIPPED`)而非失败。适用于构建工具场景,避免因未安装某些工具而导致整个图执行失败。对于 `str`(shell)和 `Callable` 类型的 `cmd`,此参数无效。
|
||||
|
||||
## 条件执行
|
||||
|
||||
`conditions` 参数让任务按条件跳过(标记为 `SKIPPED`):
|
||||
|
||||
```python
|
||||
from pyflowx.conditions import IS_WINDOWS, BuiltinConditions
|
||||
|
||||
graph = px.Graph.from_specs([
|
||||
# 仅在 Windows 上运行
|
||||
px.TaskSpec("win_only", cmd=["dir"], conditions=(IS_WINDOWS,)),
|
||||
# 仅在 git 已安装时运行
|
||||
px.TaskSpec(
|
||||
"git_check",
|
||||
cmd=["git", "--version"],
|
||||
conditions=(BuiltinConditions.HAS_INSTALLED("git"),),
|
||||
),
|
||||
# 组合条件
|
||||
px.TaskSpec(
|
||||
"prod_deploy",
|
||||
fn=deploy,
|
||||
conditions=(
|
||||
BuiltinConditions.ENV_VAR_EQUALS("ENV", "prod"),
|
||||
BuiltinConditions.HAS_INSTALLED("docker"),
|
||||
),
|
||||
),
|
||||
])
|
||||
```
|
||||
|
||||
内置条件:`IS_WINDOWS` / `IS_LINUX` / `IS_MACOS` / `IS_POSIX` / `PYTHON_VERSION` / `HAS_INSTALLED` / `ENV_VAR_EXISTS` / `ENV_VAR_EQUALS` / `NOT` / `AND` / `OR`。
|
||||
|
||||
## CLI 运行器
|
||||
|
||||
`CliRunner` 把多个 Graph 映射为命令行子命令,适合构建项目专属构建工具(替代 Makefile):
|
||||
|
||||
```python
|
||||
runner = px.CliRunner(
|
||||
strategy="sequential",
|
||||
description="My Build Tool",
|
||||
graphs={
|
||||
"clean": clean_graph,
|
||||
"build": build_graph,
|
||||
"test": test_graph,
|
||||
},
|
||||
)
|
||||
runner.run_cli() # 解析 sys.argv 并执行
|
||||
```
|
||||
|
||||
命令行用法:
|
||||
|
||||
```bash
|
||||
python examples/etl_pipeline.py
|
||||
python examples/parallel_run.py
|
||||
python examples/async_aggregation.py
|
||||
pf pymake clean # 执行 clean 图
|
||||
pf pymake build --strategy thread # 覆盖执行策略
|
||||
pf pymake test --dry-run # 仅打印执行计划
|
||||
pf pymake --list # 列出所有命令
|
||||
pf pymake --quiet # 静默模式
|
||||
```
|
||||
|
||||
`verbose=True`(默认)时打印任务生命周期(开始/成功/失败/跳过)与命令输出;`--quiet` 关闭。
|
||||
|
||||
## YAML 任务编排
|
||||
|
||||
PyFlowX 支持 GitHub Actions 风格的声明式 YAML 任务编排,从 YAML 文件直接加载任务图。
|
||||
|
||||
### 编程式 API
|
||||
|
||||
```python
|
||||
import pyflowx as px
|
||||
|
||||
# 从 YAML 文件加载任务图
|
||||
graph = px.Graph.from_yaml("pipeline.yaml")
|
||||
report = px.run(graph, strategy="thread")
|
||||
|
||||
# 或用函数式 API
|
||||
graph = px.load_yaml("pipeline.yaml")
|
||||
graph = px.parse_yaml_string("""
|
||||
jobs:
|
||||
hello:
|
||||
cmd: ["echo", "hello"]
|
||||
""")
|
||||
```
|
||||
|
||||
### CLI 入口
|
||||
|
||||
通过 `pf` 统一入口调用(详见 [pf 工具](#cli-工具) 章节):
|
||||
|
||||
```bash
|
||||
# 执行 YAML 任务图
|
||||
pf yamlrun pipeline.yaml
|
||||
|
||||
# 指定执行策略
|
||||
pf yamlrun pipeline.yaml --strategy thread
|
||||
|
||||
# 仅打印任务分层,不执行
|
||||
pf yamlrun pipeline.yaml --dry-run
|
||||
|
||||
# 列出所有任务名
|
||||
pf yamlrun pipeline.yaml --list
|
||||
|
||||
# 静默模式
|
||||
pf yamlrun pipeline.yaml --quiet
|
||||
```
|
||||
|
||||
### YAML Schema(GitHub Actions 风格)
|
||||
|
||||
```yaml
|
||||
strategy: thread # 图级默认策略
|
||||
defaults: # 图级默认值
|
||||
retry: {max_attempts: 3}
|
||||
verbose: true
|
||||
env: {CI: "true"}
|
||||
|
||||
jobs:
|
||||
setup:
|
||||
cmd: ["git", "clone", "..."]
|
||||
runs-on: linux
|
||||
|
||||
build:
|
||||
needs: [setup] # 依赖列表
|
||||
cmd: ["python", "-m", "build"]
|
||||
timeout: 300
|
||||
retry: {max_attempts: 2, delay: 1.0}
|
||||
|
||||
test:
|
||||
needs: [build]
|
||||
cmd: ["python${{ matrix.version }}", "-m", "pytest"] # 矩阵占位符
|
||||
strategy:
|
||||
matrix: # 笛卡尔积展开为 6 个任务
|
||||
version: ["3.8", "3.9", "3.10"]
|
||||
os: ["linux", "macos"]
|
||||
if: "env.CI" # 条件: 环境变量存在
|
||||
|
||||
lint:
|
||||
needs: [build]
|
||||
cmd: ["ruff", "check"]
|
||||
if: "env.CI == 'true'" # 条件: 环境变量等于
|
||||
|
||||
deploy:
|
||||
needs: [test, lint] # 矩阵依赖自动展开
|
||||
cmd: ["twine", "upload"]
|
||||
if: "env.DEPLOY_TOKEN != ''"
|
||||
allow-upstream-skip: true
|
||||
concurrency-key: deploy_lock
|
||||
```
|
||||
|
||||
### 字段映射
|
||||
|
||||
| YAML 字段 | TaskSpec 字段 | 说明 |
|
||||
|-----------|---------------|------|
|
||||
| `jobs.<id>` | `name` | job ID 作为任务名 |
|
||||
| `cmd` / `run` | `cmd` | `cmd` 为列表形式,`run` 为 shell 字符串 |
|
||||
| `needs` | `depends_on` | 依赖列表(矩阵任务自动展开) |
|
||||
| `if` | `conditions` | `success()` / `always()` / `env.VAR` / `env.VAR == 'x'` |
|
||||
| `strategy.matrix` | 矩阵扇出 | 笛卡尔积展开为多个任务 |
|
||||
| `${{ matrix.key }}` | 占位符 | 在 cmd/run/cwd/env 中替换 |
|
||||
| `timeout` | `timeout` | 超时秒数 |
|
||||
| `retry` | `retry` | `{max_attempts, delay, backoff, jitter}` |
|
||||
| `cwd` | `cwd` | 工作目录 |
|
||||
| `env` | `env` | 环境变量 |
|
||||
| `verbose` | `verbose` | 详细输出 |
|
||||
| `continue-on-error` | `continue_on_error` | 失败不中止整图 |
|
||||
| `skip-if-missing` | `skip_if_missing` | 命令不存在时跳过 |
|
||||
| `allow-upstream-skip` | `allow_upstream_skip` | 上游跳过时仍执行 |
|
||||
| `priority` | `priority` | 同层优先级 |
|
||||
| `concurrency-key` | `concurrency_key` | 并发限制键 |
|
||||
| `tags` | `tags` | 自由标签 |
|
||||
| `runs-on` | `tags`(追加) | 运行环境标签 |
|
||||
|
||||
## 断点续跑
|
||||
|
||||
```python
|
||||
from pyflowx import JSONBackend
|
||||
|
||||
# 第一次运行:成功结果写入 state.json
|
||||
backend = JSONBackend("state.json")
|
||||
backend = JSONBackend("state.json", ttl=3600) # ttl 秒数,过期条目自动忽略
|
||||
report = px.run(graph, strategy="sequential", state=backend)
|
||||
|
||||
# 第二次运行:已缓存任务自动跳过
|
||||
# 第二次运行:已缓存任务自动跳过(状态为 SKIPPED)
|
||||
report = px.run(graph, strategy="sequential", state=backend)
|
||||
# report.results 中缓存任务状态为 SKIPPED
|
||||
```
|
||||
|
||||
`run()` 内部以 `backend.batch()` 包裹整个执行:所有 `save` 延迟到运行结束时统一落盘一次
|
||||
(`JSONBackend` 从 O(N²) 降为 O(N) 磁盘写入;`MemoryBackend` 为 no-op)。
|
||||
|
||||
**缓存键**:默认存储键为任务名。配置 `cache_key` 函数后,键为 `"name:cache_key_value"`,
|
||||
使不同输入产生独立缓存条目:
|
||||
|
||||
```python
|
||||
px.TaskSpec(
|
||||
"fetch_user",
|
||||
fn=fetch_user,
|
||||
cache_key=lambda ctx: str(ctx.get("uid")), # 不同 uid 独立缓存
|
||||
)
|
||||
```
|
||||
|
||||
## 错误处理
|
||||
@@ -219,23 +484,93 @@ except px.PyFlowXError:
|
||||
|
||||
PyFlowX 专注于**单机 DAG 调度**的极致简洁,适合 ETL、数据处理、CI 流水线等场景。
|
||||
|
||||
## 高级特性
|
||||
|
||||
### 并发限制
|
||||
|
||||
按 `concurrency_key` 分组限流,避免压垮下游资源:
|
||||
|
||||
```python
|
||||
graph = px.Graph.from_specs([
|
||||
px.TaskSpec("q1", fn=query_db, concurrency_key="db"),
|
||||
px.TaskSpec("q2", fn=query_db, concurrency_key="db"),
|
||||
px.TaskSpec("q3", fn=query_db, concurrency_key="db"),
|
||||
])
|
||||
# 同一时刻最多 2 个 "db" 组任务运行
|
||||
px.run(graph, strategy="async", concurrency_limits={"db": 2})
|
||||
```
|
||||
|
||||
### 任务钩子
|
||||
|
||||
`TaskHooks` 在任务生命周期触发(异常仅记录,不影响任务状态):
|
||||
|
||||
```python
|
||||
hooks = px.TaskHooks(
|
||||
pre_run=lambda spec: print(f"start {spec.name}"),
|
||||
post_run=lambda spec, value: print(f"done {spec.name}"),
|
||||
on_failure=lambda spec, exc: alert(spec.name, exc),
|
||||
)
|
||||
px.TaskSpec("task", fn=work, hooks=hooks)
|
||||
```
|
||||
|
||||
### 优先级
|
||||
|
||||
同层内按 `priority` 降序执行(稳定排序):
|
||||
|
||||
```python
|
||||
px.TaskSpec("low", fn=work, priority=0)
|
||||
px.TaskSpec("high", fn=work, priority=10) # 同层内先执行
|
||||
```
|
||||
|
||||
## 开发
|
||||
|
||||
```bash
|
||||
# 安装开发依赖
|
||||
uv sync --extra dev
|
||||
|
||||
# 运行测试(含覆盖率)
|
||||
uv run pytest --cov=pyflowx --cov-fail-under=100
|
||||
# 运行测试(含覆盖率,阈值 95%)
|
||||
uv run pytest --cov=pyflowx --cov-fail-under=95
|
||||
|
||||
# 类型检查
|
||||
uv run mypy
|
||||
|
||||
# 代码风格
|
||||
uv run ruff check src tests examples
|
||||
uv run ruff format --check src tests examples
|
||||
uv run ruff check src tests
|
||||
uv run ruff format --check src tests
|
||||
```
|
||||
|
||||
## 模块结构
|
||||
|
||||
### 核心
|
||||
|
||||
| 模块 | 职责 |
|
||||
|------|------|
|
||||
| `task.py` | 纯数据结构:`TaskSpec`、`RetryPolicy`、`TaskHooks`、`TaskStatus` |
|
||||
| `graph.py` | DAG 构建、校验、分层、可视化 |
|
||||
| `compose.py` | 多图组合:`GraphComposer` / `compose` |
|
||||
| `context.py` | 上下文注入:参数名→依赖解析 |
|
||||
| `command.py` | 命令执行:`run_command`(list/shell/Callable) |
|
||||
| `conditions.py` | 条件执行:内置条件与组合器 |
|
||||
| `executors.py` | 执行器与 `run` 入口:四种策略共享模块级辅助;verbose 统一应用到 spec |
|
||||
| `storage.py` | 状态后端:`MemoryBackend` / `JSONBackend`(batch flush) |
|
||||
| `runner.py` | CLI 运行器:`CliRunner` |
|
||||
| `report.py` | 运行结果:`RunReport` / `TaskResult` |
|
||||
| `yaml_loader.py` | YAML 任务编排:GitHub Actions 风格 schema 解析(`load_yaml` / `parse_yaml_string` / `run_cli`) |
|
||||
| `registry.py` | 函数注册中心:`register_fn` / `get_fn` / `has_fn`(YAML 的 `fn:` 引用) |
|
||||
| `profiling.py` | 性能分析:`Profiler` 任务耗时统计 |
|
||||
| `errors.py` | 错误家族:`PyFlowXError` 子类 |
|
||||
| `ops/` | 工具函数(dev/files/llm/media/system),被 YAML 的 `fn:` 引用 |
|
||||
|
||||
### CLI 工具
|
||||
|
||||
| 模块 | 职责 |
|
||||
|------|------|
|
||||
| `cli/pf.py` | 统一入口:`pf <tool> [command]`,自动发现 `configs/*.yaml` 并路由 |
|
||||
| `configs/` | YAML 工具配置(clr/taskkill/which/msdownload/sglang/dockercmd/envdev 等) |
|
||||
| `cli/yamlrun.py` | YAML pipeline 执行器,`pf yamlrun pipeline.yaml` 调用 |
|
||||
| `cli/profiler.py` | 性能分析 CLI |
|
||||
| `cli/emlmanager.py` | 邮件管理 CLI |
|
||||
|
||||
## 许可证
|
||||
|
||||
MIT
|
||||
|
||||
+106
@@ -0,0 +1,106 @@
|
||||
API 参考
|
||||
========
|
||||
|
||||
任务描述
|
||||
--------
|
||||
|
||||
.. autoclass:: pyflowx.TaskSpec
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
:exclude-members: args, kwargs
|
||||
|
||||
.. autoclass:: pyflowx.RetryPolicy
|
||||
:members:
|
||||
:undoc-members:
|
||||
|
||||
.. autoclass:: pyflowx.TaskHooks
|
||||
:members:
|
||||
:undoc-members:
|
||||
|
||||
.. autoclass:: pyflowx.TaskStatus
|
||||
:members:
|
||||
:undoc-members:
|
||||
|
||||
图构建
|
||||
------
|
||||
|
||||
.. autoclass:: pyflowx.Graph
|
||||
:members:
|
||||
:undoc-members:
|
||||
:exclude-members: from_specs, from_yaml
|
||||
|
||||
.. autoclass:: pyflowx.GraphDefaults
|
||||
:members:
|
||||
:undoc-members:
|
||||
|
||||
.. autofunction:: pyflowx.compose
|
||||
.. autofunction:: pyflowx.task_template
|
||||
|
||||
执行
|
||||
----
|
||||
|
||||
.. autofunction:: pyflowx.run
|
||||
|
||||
.. autoclass:: pyflowx.RunReport
|
||||
:members:
|
||||
:undoc-members:
|
||||
|
||||
.. autoclass:: pyflowx.TaskResult
|
||||
:members:
|
||||
:undoc-members:
|
||||
|
||||
YAML 编排
|
||||
---------
|
||||
|
||||
.. autofunction:: pyflowx.load_yaml
|
||||
.. autofunction:: pyflowx.parse_yaml_string
|
||||
.. autofunction:: pyflowx.run_yaml
|
||||
.. autofunction:: pyflowx.run_cli
|
||||
.. autofunction:: pyflowx.build_cli_parser
|
||||
|
||||
函数注册
|
||||
--------
|
||||
|
||||
.. autofunction:: pyflowx.register_fn
|
||||
.. autofunction:: pyflowx.get_fn
|
||||
.. autofunction:: pyflowx.has_fn
|
||||
|
||||
命令执行
|
||||
--------
|
||||
|
||||
.. autofunction:: pyflowx.run_command
|
||||
|
||||
CLI 运行器
|
||||
----------
|
||||
|
||||
.. autoclass:: pyflowx.CliRunner
|
||||
:members:
|
||||
:undoc-members:
|
||||
|
||||
状态后端
|
||||
--------
|
||||
|
||||
.. autoclass:: pyflowx.StateBackend
|
||||
:members:
|
||||
:undoc-members:
|
||||
|
||||
.. autoclass:: pyflowx.MemoryBackend
|
||||
:members:
|
||||
:undoc-members:
|
||||
|
||||
.. autoclass:: pyflowx.JSONBackend
|
||||
:members:
|
||||
:undoc-members:
|
||||
|
||||
错误家族
|
||||
--------
|
||||
|
||||
.. autoexception:: pyflowx.PyFlowXError
|
||||
.. autoexception:: pyflowx.DuplicateTaskError
|
||||
.. autoexception:: pyflowx.MissingDependencyError
|
||||
.. autoexception:: pyflowx.CycleError
|
||||
.. autoexception:: pyflowx.TaskFailedError
|
||||
.. autoexception:: pyflowx.TaskTimeoutError
|
||||
.. autoexception:: pyflowx.InjectionError
|
||||
.. autoexception:: pyflowx.StorageError
|
||||
@@ -0,0 +1,45 @@
|
||||
变更日志
|
||||
========
|
||||
|
||||
0.4.5
|
||||
-----
|
||||
|
||||
CLI 重构
|
||||
~~~~~~~~
|
||||
|
||||
- 新增 ``pf`` 统一入口:通过 ``pf <tool> [command] [options]`` 调用所有工具
|
||||
- 13 个工具迁移到 YAML 配置(filedate/filelevel/folderback/folderzip/screenshot/sshcopyid/lscalc/bumpversion/autofmt/piptool/packtool/pdftool/gittool)
|
||||
- YAML 配置支持 ``cli:`` 段声明命令行参数 schema,由 ``build_cli_parser`` 自动生成 argparse
|
||||
- 删除 13 个冗余 ``.py`` 入口脚本,统一通过 ``pf`` 调用
|
||||
- ``run()`` 在 ``verbose=True`` 时自动把 verbose 标记应用到所有 spec
|
||||
- 全局选项 ``--verbose`` 改为 ``--quiet``(默认显示执行过程)
|
||||
- ``cmd`` 任务成功时打印 stdout(此前被静默丢弃)
|
||||
- ``gittool`` 用 ``CLEAN_EXCLUDES`` 数组变量配置 ``git clean -e`` 参数
|
||||
|
||||
YAML 任务编排
|
||||
~~~~~~~~~~~~~
|
||||
|
||||
- 支持 ``variables`` 变量定义,``${VAR}`` 在 cmd/env/cwd 中替换
|
||||
- 列表变量展开为 cmd 数组多个元素
|
||||
- ``cli:`` 段支持 subcommands/positional/options 三级 schema
|
||||
- 支持 ``type: path`` 自动转为 ``pathlib.Path``
|
||||
|
||||
文档
|
||||
~~~~
|
||||
|
||||
- 搭建 Sphinx 文档,发布到 ReadTheDocs
|
||||
- 更新 README:CLI 示例改为 ``pf`` 统一入口,模块结构表补全
|
||||
|
||||
0.3.x
|
||||
-----
|
||||
|
||||
- 新增 YAML 任务编排(GitHub Actions 风格 schema)
|
||||
- 新增 ``fn:`` 函数引用与 ``register_fn`` / ``get_fn`` 注册中心
|
||||
- 新增 ``compose`` / ``GraphComposer`` 多图组合
|
||||
- 新增 ``task_template`` 任务模板工厂
|
||||
- 新增 ``concurrency_key`` + ``concurrency_limits`` 并发限制
|
||||
- 新增 ``JSONBackend`` 断点续跑与 ``batch()`` 批量落盘
|
||||
- 新增 ``cache_key`` 缓存键函数
|
||||
- 新增条件执行(``IS_WINDOWS`` / ``HAS_INSTALLED`` / ``ENV_VAR_EQUALS`` 等)
|
||||
- 四种执行策略:``sequential`` / ``thread`` / ``async`` / ``dependency``
|
||||
- 参数名即依赖的上下文注入机制
|
||||
@@ -0,0 +1,65 @@
|
||||
"""Sphinx 配置.
|
||||
|
||||
ReadTheDocs 构建 PyFlowX 文档站。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# 确保 src/ 在 sys.path 中, autodoc 能导入 pyflowx
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "src"))
|
||||
|
||||
from pyflowx import __version__
|
||||
|
||||
# -- 项目信息 --------------------------------------------------------------
|
||||
project = "PyFlowX"
|
||||
author = "pyflowx"
|
||||
copyright = "2024, pyflowx"
|
||||
release = __version__
|
||||
version = __version__
|
||||
|
||||
# -- Sphinx 配置 -----------------------------------------------------------
|
||||
extensions = [
|
||||
"sphinx.ext.autodoc",
|
||||
"sphinx.ext.napoleon",
|
||||
"sphinx.ext.viewcode",
|
||||
"sphinx.ext.intersphinx",
|
||||
"myst_parser",
|
||||
]
|
||||
|
||||
# -- 主题 ------------------------------------------------------------------
|
||||
html_theme = "sphinx_rtd_theme"
|
||||
html_static_path = ["_static"]
|
||||
|
||||
# -- autodoc 配置 ----------------------------------------------------------
|
||||
autodoc_default_options = {
|
||||
"members": True,
|
||||
"undoc-members": True,
|
||||
"show-inheritance": True,
|
||||
"member-order": "bysource",
|
||||
}
|
||||
autodoc_type_hints = "description"
|
||||
autodoc_typehints_format = "short"
|
||||
|
||||
# -- napoleon 配置 (Google/NumPy docstring 兼容) --------------------------
|
||||
napoleon_google_docstring = True
|
||||
napoleon_numpy_docstring = True
|
||||
napoleon_include_init_with_doc = False
|
||||
napoleon_include_private_with_doc = False
|
||||
napoleon_include_special_with_doc = True
|
||||
|
||||
# -- intersphinx -----------------------------------------------------------
|
||||
intersphinx_mapping = {
|
||||
"python": ("https://docs.python.org/3", None),
|
||||
}
|
||||
|
||||
# -- 全局选项 ---------------------------------------------------------------
|
||||
language = "zh_CN"
|
||||
master_doc = "index"
|
||||
exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
|
||||
source_suffix = {
|
||||
".rst": "restructuredtext",
|
||||
".md": "markdown",
|
||||
}
|
||||
@@ -0,0 +1,158 @@
|
||||
pf 统一 CLI 入口
|
||||
================
|
||||
|
||||
所有工具通过 ``pf <tool> [command] [options]`` 调用。工具定义在 ``cli/configs/`` 目录下的 YAML 文件中。
|
||||
|
||||
基本用法
|
||||
--------
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
pf # 列出所有可用工具
|
||||
pf filedate # 查看 filedate 工具帮助
|
||||
pf filedate add a.txt # 调用 filedate 的 add 子命令
|
||||
pf gitt c # 调用 gittool 的 c 子命令
|
||||
pf pymake b # 调用 pymake 的 b 别名
|
||||
|
||||
全局选项
|
||||
--------
|
||||
|
||||
所有 YAML 工具支持以下全局选项:
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
:widths: 25 75
|
||||
|
||||
* - 选项
|
||||
- 说明
|
||||
* - ``--dry-run``
|
||||
- 仅打印执行计划,不执行
|
||||
* - ``--quiet`` / ``-q``
|
||||
- 减少输出,不显示执行过程
|
||||
* - ``--strategy``
|
||||
- 执行策略(``sequential`` / ``thread`` / ``async`` / ``dependency``)
|
||||
* - ``--list``
|
||||
- 列出所有任务名后退出
|
||||
|
||||
默认 ``verbose`` 开启,显示执行过程(任务开始/命令/返回码/任务成功)。``--quiet`` 关闭。
|
||||
|
||||
YAML 配置工具
|
||||
--------------
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
:widths: 20 15 65
|
||||
|
||||
* - 工具
|
||||
- 别名
|
||||
- 说明
|
||||
* - ``filedate``
|
||||
- ``fd``
|
||||
- 文件日期处理
|
||||
* - ``filelevel``
|
||||
- ``fl``
|
||||
- 文件等级重命名
|
||||
* - ``folderback``
|
||||
- ``fb``
|
||||
- 文件夹备份
|
||||
* - ``folderzip``
|
||||
- ``fz``
|
||||
- 文件夹压缩
|
||||
* - ``gittool``
|
||||
- ``gitt``
|
||||
- Git 执行工具
|
||||
* - ``lscalc``
|
||||
- ``ls``
|
||||
- LS-DYNA 计算工具
|
||||
* - ``packtool``
|
||||
- ``pack``
|
||||
- Python 打包工具
|
||||
* - ``pdftool``
|
||||
- ``pdf``
|
||||
- PDF 文件工具集
|
||||
* - ``piptool``
|
||||
- ``pip``
|
||||
- pip 包管理工具
|
||||
* - ``screenshot``
|
||||
- ``ss``
|
||||
- 截图工具
|
||||
* - ``sshcopyid``
|
||||
- ``ssh``
|
||||
- SSH 密钥部署工具
|
||||
* - ``autofmt``
|
||||
- ``af``
|
||||
- 自动格式化工具
|
||||
* - ``bumpversion``
|
||||
- ``bump``
|
||||
- 版本号自动管理工具
|
||||
|
||||
传统工具
|
||||
--------
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
:widths: 20 80
|
||||
|
||||
* - 工具
|
||||
- 说明
|
||||
* - ``pymake``
|
||||
- 构建工具(替代 Makefile),如 ``pf pymake b`` 构建
|
||||
* - ``yamlrun``
|
||||
- YAML pipeline 执行器,``pf yamlrun pipeline.yaml``
|
||||
* - ``profiler``
|
||||
- 性能分析
|
||||
* - ``emlman``
|
||||
- 邮件管理
|
||||
* - ``reseticon``
|
||||
- 重置图标缓存
|
||||
|
||||
自定义工具
|
||||
----------
|
||||
|
||||
在 ``cli/configs/`` 目录新建 ``<tool>.yaml`` 即可被 ``pf`` 自动发现:
|
||||
|
||||
.. code-block:: yaml
|
||||
|
||||
# cli/configs/mytool.yaml
|
||||
strategy: sequential
|
||||
variables:
|
||||
MSG: "hello"
|
||||
cli:
|
||||
description: "我的工具"
|
||||
usage: "pf mytool [command]"
|
||||
subcommands:
|
||||
greet:
|
||||
help: "打招呼"
|
||||
jobs:
|
||||
greet:
|
||||
cmd: ["echo", "${MSG}"]
|
||||
|
||||
执行::
|
||||
|
||||
pf mytool greet
|
||||
|
||||
CliRunner(编程式)
|
||||
-------------------
|
||||
|
||||
``CliRunner`` 把多个 Graph 映射为命令行子命令,适合构建项目专属构建工具:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
runner = px.CliRunner(
|
||||
strategy="sequential",
|
||||
description="My Build Tool",
|
||||
graphs={
|
||||
"clean": clean_graph,
|
||||
"build": build_graph,
|
||||
"test": test_graph,
|
||||
},
|
||||
)
|
||||
runner.run_cli() # 解析 sys.argv 并执行
|
||||
|
||||
命令行::
|
||||
|
||||
pf pymake clean
|
||||
pf pymake build --strategy thread
|
||||
pf pymake test --dry-run
|
||||
pf pymake --list
|
||||
pf pymake --quiet
|
||||
@@ -0,0 +1,93 @@
|
||||
执行策略与 run()
|
||||
=================
|
||||
|
||||
``run()`` 是执行入口,支持四种策略:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
report = px.run(
|
||||
graph,
|
||||
strategy="async", # sequential | thread | async | dependency
|
||||
max_workers=8, # thread 策略的线程池大小
|
||||
concurrency_limits={"db": 2}, # 按 concurrency_key 限流
|
||||
dry_run=False, # True = 仅打印计划
|
||||
verbose=True, # True = 打印执行过程
|
||||
on_event=callback, # 状态转换回调
|
||||
state=px.JSONBackend("state.json"), # 断点续跑后端
|
||||
continue_on_error=False, # True = 单任务失败不中断整体
|
||||
)
|
||||
|
||||
策略对比
|
||||
--------
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
:widths: 18 18 30 16 18
|
||||
|
||||
* - 策略
|
||||
- 并发模型
|
||||
- 适用场景
|
||||
- 同步任务
|
||||
- 异步任务
|
||||
* - ``sequential``
|
||||
- 串行
|
||||
- 调试、CPU 密集
|
||||
- 直接调用
|
||||
- 事件循环
|
||||
* - ``thread``
|
||||
- 线程池
|
||||
- I/O 密集同步
|
||||
- 线程池
|
||||
- 不支持
|
||||
* - ``async``
|
||||
- 事件循环
|
||||
- I/O 密集异步
|
||||
- 卸载到线程池
|
||||
- 事件循环
|
||||
* - ``dependency``
|
||||
- 依赖驱动
|
||||
- 最大化并行度
|
||||
- 卸载到线程池
|
||||
- 事件循环
|
||||
|
||||
所有策略都遵循 ``RetryPolicy``、``timeout``、上下文注入、状态后端、``concurrency_limits``,
|
||||
并发出 ``TaskEvent``(RUNNING/SUCCESS/FAILED/SKIPPED)。``dependency`` 策略无层屏障:
|
||||
任务在其所有硬依赖完成后立即启动。
|
||||
|
||||
上下文注入规则
|
||||
--------------
|
||||
|
||||
按顺序求值:
|
||||
|
||||
1. **标注为 ``Context``** 的参数 → 接收完整上游结果映射
|
||||
2. **名称匹配依赖** 的参数 → 接收该依赖的结果(含软依赖,缺失时注入默认值)
|
||||
3. **``**kwargs``** 参数 → 接收所有依赖结果(dict)
|
||||
4. **``TaskSpec.args`` / ``kwargs``** → 为非依赖参数提供静态值
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from typing import Any, Dict
|
||||
|
||||
def aggregate(ctx: px.Context) -> Dict[str, Any]:
|
||||
"""ctx 包含所有 depends_on 任务的返回值。"""
|
||||
return dict(ctx)
|
||||
|
||||
def merge(fetch_a: str, fetch_b: str) -> str:
|
||||
"""fetch_a / fetch_b 自动注入。"""
|
||||
return fetch_a + fetch_b
|
||||
|
||||
断点续跑
|
||||
--------
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from pyflowx import JSONBackend
|
||||
|
||||
backend = JSONBackend("state.json", ttl=3600)
|
||||
report = px.run(graph, strategy="sequential", state=backend)
|
||||
|
||||
``run()`` 内部以 ``backend.batch()`` 包裹整个执行:所有 ``save`` 延迟到运行结束时统一落盘一次。
|
||||
|
||||
缓存键:默认存储键为任务名。配置 ``cache_key`` 函数后,键为 ``"name:cache_key_value"``。
|
||||
|
||||
完整 API 说明详见 :doc:`/api`。
|
||||
@@ -0,0 +1,50 @@
|
||||
Graph —— DAG 构建
|
||||
=================
|
||||
|
||||
``Graph`` 管理任务集合,提供建构建、校验、分层、可视化能力。
|
||||
|
||||
构建方式
|
||||
--------
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
# 图级默认值:TaskSpec 字段为 None 时回退
|
||||
defaults = px.GraphDefaults(retry=px.RetryPolicy(max_attempts=2), timeout=60.0)
|
||||
|
||||
graph = px.Graph.from_specs([...], defaults=defaults) # 整批校验(推荐)
|
||||
|
||||
# 或增量构建
|
||||
graph = px.Graph(defaults=defaults)
|
||||
graph.add(px.TaskSpec("a", fn_a))
|
||||
graph.add(px.TaskSpec("b", fn_b, ("a",)))
|
||||
|
||||
常用方法
|
||||
--------
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
graph.validate() # 显式校验(环检测)
|
||||
graph.layers() # 拓扑分层(Kahn 算法)
|
||||
graph.to_mermaid() # Mermaid 可视化
|
||||
graph.describe() # 人类可读摘要
|
||||
graph.subgraph(("api",)) # 按标签切片
|
||||
graph.subgraph_by_names(("a", "b")) # 按名称切片
|
||||
graph.map("fetch", [1, 2, 3], lambda i: TaskSpec(f"fetch_{i}", ...)) # 批量 fan-out
|
||||
|
||||
图组合
|
||||
------
|
||||
|
||||
``compose`` / ``GraphComposer`` 把带字符串引用的多个图展开为纯 ``Graph``:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
graphs = {
|
||||
"build": px.Graph.from_specs([px.TaskSpec("b", cmd=["echo", "b"])]),
|
||||
"all": px.Graph.from_specs(["build", px.TaskSpec("t", cmd=["echo", "t"])]),
|
||||
}
|
||||
resolved = px.compose(graphs) # "all" 图中的 "build" 引用被展开
|
||||
|
||||
引用格式:``"command_name"``(整个图)或 ``"command_name.task_name"``(特定任务)。
|
||||
``CliRunner`` 内部自动调用 ``compose``。
|
||||
|
||||
完整方法说明详见 :doc:`/api`。
|
||||
@@ -0,0 +1,89 @@
|
||||
TaskSpec —— 任务描述
|
||||
=====================
|
||||
|
||||
``TaskSpec`` 是不可变的任务描述符(``Generic[T]``,返回类型一路传到 ``RunReport``),是唯一需要配置的东西。
|
||||
|
||||
主要参数说明:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
px.TaskSpec(
|
||||
name="fetch_user", # 唯一标识
|
||||
fn=fetch_user, # 同步或异步函数
|
||||
cmd=["curl", "..."], # 或: 执行命令(覆盖 fn)
|
||||
depends_on=("auth",), # 硬依赖(参与拓扑分层)
|
||||
soft_depends_on=("cache",), # 软依赖(仅注入,不参与分层)
|
||||
args=(uid,), # 静态位置参数(追加在注入参数后)
|
||||
kwargs={"timeout": 30}, # 静态关键字参数
|
||||
retry=px.RetryPolicy(max_attempts=3, delay=1.0, backoff=2.0),
|
||||
timeout=30.0, # 超时秒数(None = 不限制)
|
||||
tags=("api", "user"), # 自由标签,用于子图过滤
|
||||
conditions=(is_prod,), # 条件函数列表(全部为 True 才执行)
|
||||
priority=10, # 同层内优先级(高优先执行,默认 0)
|
||||
concurrency_key="db", # 并发分组键(配合 concurrency_limits 限流)
|
||||
cache_key=lambda ctx: str(ctx.get("uid")), # 缓存键函数
|
||||
hooks=px.TaskHooks(pre_run=..., post_run=..., on_failure=...),
|
||||
cwd=Path("/tmp"), # 命令工作目录(仅 cmd 模式)
|
||||
env={"DEBUG": "1"}, # 环境变量覆盖
|
||||
verbose=True, # 打印命令输出(仅 cmd 模式)
|
||||
skip_if_missing=True, # 命令不存在时自动跳过(仅 list[str] cmd)
|
||||
allow_upstream_skip=False, # 上游 SKIPPED/FAILED 时是否仍执行
|
||||
continue_on_error=False, # 本任务失败是否不中断整体
|
||||
)
|
||||
|
||||
两种任务形态
|
||||
------------
|
||||
|
||||
- **函数任务**(``fn``):普通 Python 函数,参数名驱动自动注入
|
||||
- **命令任务**(``cmd``):执行外部命令,支持 ``list[str]``、``str``(shell)、``Callable`` 三种形态
|
||||
|
||||
``skip_if_missing=True`` 时,``list[str]`` 类型的 ``cmd`` 会通过 ``shutil.which`` 检查命令是否存在,不存在则跳过任务(标记为 ``SKIPPED``)而非失败。
|
||||
|
||||
重试策略
|
||||
--------
|
||||
|
||||
``RetryPolicy`` 配置重试次数、延迟、退避:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
retry = px.RetryPolicy(
|
||||
max_attempts=3, # 最大尝试次数
|
||||
delay=1.0, # 初始延迟秒数
|
||||
backoff=2.0, # 退避倍数
|
||||
jitter=0.1, # 随机抖动(避免惊群)
|
||||
retry_on=(ConnectionError,), # 仅对这些异常重试
|
||||
)
|
||||
|
||||
任务钩子
|
||||
--------
|
||||
|
||||
``TaskHooks`` 在任务生命周期触发(异常仅记录,不影响任务状态):
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
hooks = px.TaskHooks(
|
||||
pre_run=lambda spec: print(f"start {spec.name}"),
|
||||
post_run=lambda spec, value: print(f"done {spec.name}"),
|
||||
on_failure=lambda spec, exc: alert(spec.name, exc),
|
||||
)
|
||||
px.TaskSpec("task", fn=work, hooks=hooks)
|
||||
|
||||
任务模板
|
||||
--------
|
||||
|
||||
``task_template`` 工厂批量生成相似 TaskSpec:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
fetch = px.task_template(
|
||||
fn=fetch_url,
|
||||
retry=px.RetryPolicy(max_attempts=5),
|
||||
timeout=30.0,
|
||||
tags=("api",),
|
||||
)
|
||||
graph = px.Graph.from_specs([
|
||||
fetch("users", url="https://api.example.com/users"),
|
||||
fetch("posts", url="https://api.example.com/posts"),
|
||||
])
|
||||
|
||||
完整字段说明详见 :doc:`/api`。
|
||||
@@ -0,0 +1,164 @@
|
||||
YAML 任务编排
|
||||
=============
|
||||
|
||||
PyFlowX 支持 GitHub Actions 风格的声明式 YAML 任务编排,从 YAML 文件直接加载任务图。
|
||||
|
||||
编程式 API
|
||||
----------
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import pyflowx as px
|
||||
|
||||
# 从 YAML 文件加载任务图
|
||||
graph = px.Graph.from_yaml("pipeline.yaml")
|
||||
report = px.run(graph, strategy="thread")
|
||||
|
||||
# 或用函数式 API
|
||||
graph = px.load_yaml("pipeline.yaml")
|
||||
|
||||
# 从字符串解析
|
||||
graph = px.parse_yaml_string("""
|
||||
jobs:
|
||||
hello:
|
||||
cmd: ["echo", "hello"]
|
||||
""")
|
||||
|
||||
YAML Schema
|
||||
-----------
|
||||
|
||||
.. code-block:: yaml
|
||||
|
||||
strategy: thread # 图级默认策略
|
||||
defaults: # 图级默认值
|
||||
retry: {max_attempts: 3}
|
||||
verbose: true
|
||||
env: {CI: "true"}
|
||||
|
||||
variables: # 变量定义 (可在 cmd/env 中 ${VAR} 引用)
|
||||
OUTPUT: "dist"
|
||||
|
||||
jobs:
|
||||
setup:
|
||||
cmd: ["git", "clone", "..."]
|
||||
runs-on: linux
|
||||
|
||||
build:
|
||||
needs: [setup] # 依赖列表
|
||||
cmd: ["python", "-m", "build"]
|
||||
timeout: 300
|
||||
retry: {max_attempts: 2, delay: 1.0}
|
||||
|
||||
test:
|
||||
needs: [build]
|
||||
cmd: ["python${{ matrix.version }}", "-m", "pytest"]
|
||||
strategy:
|
||||
matrix: # 笛卡尔积展开为 6 个任务
|
||||
version: ["3.8", "3.9", "3.10"]
|
||||
os: ["linux", "macos"]
|
||||
if: "env.CI" # 条件: 环境变量存在
|
||||
|
||||
lint:
|
||||
needs: [build]
|
||||
cmd: ["ruff", "check"]
|
||||
if: "env.CI == 'true'"
|
||||
|
||||
deploy:
|
||||
needs: [test, lint] # 矩阵依赖自动展开
|
||||
cmd: ["twine", "upload"]
|
||||
if: "env.DEPLOY_TOKEN != ''"
|
||||
allow-upstream-skip: true
|
||||
concurrency-key: deploy_lock
|
||||
|
||||
字段映射
|
||||
--------
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
:widths: 30 30 40
|
||||
|
||||
* - YAML 字段
|
||||
- TaskSpec 字段
|
||||
- 说明
|
||||
* - ``jobs.<id>``
|
||||
- ``name``
|
||||
- job ID 作为任务名
|
||||
* - ``cmd`` / ``run``
|
||||
- ``cmd``
|
||||
- ``cmd`` 为列表形式,``run`` 为 shell 字符串
|
||||
* - ``needs``
|
||||
- ``depends_on``
|
||||
- 依赖列表(矩阵任务自动展开)
|
||||
* - ``if``
|
||||
- ``conditions``
|
||||
- ``success()`` / ``always()`` / ``env.VAR`` / ``env.VAR == 'x'``
|
||||
* - ``strategy.matrix``
|
||||
- 矩阵扇出
|
||||
- 笛卡尔积展开为多个任务
|
||||
* - ``${{ matrix.key }}``
|
||||
- 占位符
|
||||
- 在 cmd/run/cwd/env 中替换
|
||||
* - ``timeout``
|
||||
- ``timeout``
|
||||
- 超时秒数
|
||||
* - ``retry``
|
||||
- ``retry``
|
||||
- ``{max_attempts, delay, backoff, jitter}``
|
||||
* - ``cwd``
|
||||
- ``cwd``
|
||||
- 工作目录
|
||||
* - ``env``
|
||||
- ``env``
|
||||
- 环境变量
|
||||
* - ``verbose``
|
||||
- ``verbose``
|
||||
- 详细输出
|
||||
* - ``continue-on-error``
|
||||
- ``continue_on_error``
|
||||
- 失败不中止整图
|
||||
* - ``skip-if-missing``
|
||||
- ``skip_if_missing``
|
||||
- 命令不存在时跳过
|
||||
* - ``allow-upstream-skip``
|
||||
- ``allow_upstream_skip``
|
||||
- 上游跳过时仍执行
|
||||
* - ``priority``
|
||||
- ``priority``
|
||||
- 同层优先级
|
||||
* - ``concurrency-key``
|
||||
- ``concurrency_key``
|
||||
- 并发限制键
|
||||
* - ``tags``
|
||||
- ``tags``
|
||||
- 自由标签
|
||||
* - ``runs-on``
|
||||
- ``tags``(追加)
|
||||
- 运行环境标签
|
||||
|
||||
CLI 配置段(``cli:``)
|
||||
----------------------
|
||||
|
||||
工具 YAML 还可定义 ``cli:`` 段,声明命令行参数 schema,由 ``pf`` 自动解析:
|
||||
|
||||
.. code-block:: yaml
|
||||
|
||||
cli:
|
||||
description: "FileDate - 文件日期处理工具"
|
||||
usage: "pf filedate <command> [files...]"
|
||||
subcommands:
|
||||
add:
|
||||
help: "添加日期前缀"
|
||||
positional:
|
||||
- name: FILES
|
||||
nargs: "+"
|
||||
type: path
|
||||
help: "文件路径"
|
||||
options:
|
||||
- name: CLEAR
|
||||
flag: "--clear"
|
||||
action: store_true
|
||||
help: "清除已有日期前缀"
|
||||
|
||||
支持的 ``type``:``str`` / ``int`` / ``float`` / ``path``。
|
||||
|
||||
完整 API 说明详见 :doc:`/api`。
|
||||
@@ -0,0 +1,56 @@
|
||||
PyFlowX 文档
|
||||
============
|
||||
|
||||
PyFlowX 是一个轻量、类型安全的 DAG 任务调度器:**参数名就是依赖声明**。
|
||||
无需装饰器、无需样板包装器,写一个普通函数,框架按参数名自动注入上游结果。
|
||||
|
||||
特性
|
||||
----
|
||||
|
||||
- **零样板** —— 参数名即依赖,框架自动注入上游结果
|
||||
- **四种执行策略** —— sequential(串行)、thread(线程池)、async(事件循环)、dependency(依赖驱动,最大化并行)
|
||||
- **类型安全** —— ``TaskSpec[T]`` 把返回类型一路传到 ``RunReport``
|
||||
- **DAG 校验** —— 构建时即时校验重名、缺失依赖、环
|
||||
- **自动分层** —— Kahn 算法分组,同层任务可并行
|
||||
- **重试与超时** —— 每个任务独立配置 ``RetryPolicy`` 与 ``timeout``
|
||||
- **并发限制** —— ``concurrency_key`` + ``concurrency_limits`` 按组限流
|
||||
- **断点续跑** —— ``MemoryBackend`` / ``JSONBackend``,成功结果可缓存复用
|
||||
- **命令任务** —— ``cmd`` 参数直接执行外部命令
|
||||
- **条件执行** —— ``conditions`` 按平台、环境变量等条件跳过任务
|
||||
- **YAML 任务编排** —— GitHub Actions 风格声明式任务图
|
||||
- **pf 统一 CLI** —— ``pf <tool> [command]`` 调用所有工具
|
||||
- **最小依赖** —— 仅依赖标准库 + PyYAML
|
||||
|
||||
文档导航
|
||||
--------
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
:caption: 入门
|
||||
|
||||
installation
|
||||
quickstart
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
:caption: 用户指南
|
||||
|
||||
guide/task
|
||||
guide/graph
|
||||
guide/execution
|
||||
guide/yaml
|
||||
guide/cli
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
:caption: 参考
|
||||
|
||||
api
|
||||
changelog
|
||||
|
||||
索引
|
||||
----
|
||||
|
||||
* :ref:`genindex`
|
||||
* :ref:`modindex`
|
||||
* :ref:`search`
|
||||
@@ -0,0 +1,51 @@
|
||||
安装
|
||||
====
|
||||
|
||||
PyFlowX 支持 Python 3.8+,仅依赖标准库与 PyYAML(3.8 额外需要 ``graphlib_backport`` 和 ``typing-extensions``)。
|
||||
|
||||
pip 安装
|
||||
--------
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
pip install pyflowx
|
||||
|
||||
uv 安装
|
||||
-------
|
||||
|
||||
推荐使用 `uv <https://docs.astral.sh/uv/>`_:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
uv add pyflowx
|
||||
|
||||
可选依赖
|
||||
--------
|
||||
|
||||
``office`` —— PDF/图片处理(pdftool、screenshot 等工具需要):
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
pip install pyflowx[office]
|
||||
|
||||
``dev`` —— 开发工具链(ruff、pyrefly、pytest、tox 等):
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
pip install pyflowx[dev]
|
||||
|
||||
验证安装
|
||||
--------
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
pf --version
|
||||
|
||||
输出示例::
|
||||
|
||||
PyFlowX 0.4.5
|
||||
|
||||
下一步
|
||||
------
|
||||
|
||||
前往 :doc:`quickstart` 开始使用。
|
||||
@@ -0,0 +1,87 @@
|
||||
快速上手
|
||||
========
|
||||
|
||||
核心思想:**参数名即依赖**。写一个普通函数,参数名匹配上游任务名,框架自动注入结果。
|
||||
|
||||
最小示例
|
||||
--------
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import pyflowx as px
|
||||
|
||||
def extract() -> list[int]:
|
||||
return [1, 2, 3]
|
||||
|
||||
# 参数名 extract 自动匹配上游任务名 → 自动注入
|
||||
def double(extract: list[int]) -> list[int]:
|
||||
return [x * 2 for x in extract]
|
||||
|
||||
graph = px.Graph.from_specs([
|
||||
px.TaskSpec("extract", extract),
|
||||
px.TaskSpec("double", double, ("extract",)),
|
||||
])
|
||||
|
||||
report = px.run(graph, strategy="sequential")
|
||||
print(report["double"]) # [2, 4, 6]
|
||||
|
||||
三种任务形态
|
||||
------------
|
||||
|
||||
1. **函数任务**(``fn``):普通 Python 函数,参数名驱动自动注入
|
||||
2. **命令任务**(``cmd``):执行外部命令,支持 ``list[str]`` / ``str``(shell)/ ``Callable``
|
||||
3. **YAML 声明式**:从 YAML 文件加载任务图
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
graph = px.Graph.from_specs([
|
||||
px.TaskSpec("list", cmd=["ls", "-la"]),
|
||||
px.TaskSpec("greet", fn=lambda: "hello"),
|
||||
])
|
||||
|
||||
执行策略
|
||||
--------
|
||||
|
||||
PyFlowX 提供四种执行策略:
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
:widths: 20 20 60
|
||||
|
||||
* - 策略
|
||||
- 并发模型
|
||||
- 适用场景
|
||||
* - ``sequential``
|
||||
- 串行
|
||||
- 调试、CPU 密集
|
||||
* - ``thread``
|
||||
- 线程池
|
||||
- I/O 密集同步
|
||||
* - ``async``
|
||||
- 事件循环
|
||||
- I/O 密集异步
|
||||
* - ``dependency``
|
||||
- 依赖驱动
|
||||
- 最大化并行度(默认推荐)
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
report = px.run(graph, strategy="dependency")
|
||||
|
||||
结果访问
|
||||
--------
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
report["task_name"] # 任务返回值
|
||||
report.result_of("task_name") # 完整 TaskResult
|
||||
report.success # 整体是否成功
|
||||
report.summary() # 统计字典
|
||||
report.failed_tasks() # 失败任务名列表
|
||||
|
||||
下一步
|
||||
------
|
||||
|
||||
- :doc:`guide/task` —— TaskSpec 详细配置
|
||||
- :doc:`guide/yaml` —— YAML 声明式任务编排
|
||||
- :doc:`guide/cli` —— ``pf`` 统一 CLI 入口
|
||||
+81
-28
@@ -6,28 +6,36 @@ classifiers = [
|
||||
"Programming Language :: Python :: 3.11",
|
||||
"Programming Language :: Python :: 3.12",
|
||||
"Programming Language :: Python :: 3.13",
|
||||
"Programming Language :: Python :: 3.14",
|
||||
"Programming Language :: Python :: 3.8",
|
||||
"Programming Language :: Python :: 3.9",
|
||||
"Topic :: Software Development :: Libraries :: Application Frameworks",
|
||||
]
|
||||
dependencies = ["graphlib_backport >= 1.0.0; python_version < '3.9'"]
|
||||
dependencies = [
|
||||
"graphlib_backport >= 1.0.0; python_version < '3.9'",
|
||||
"pyyaml>=6.0.1",
|
||||
"typing-extensions>=4.13.2; python_version < '3.13'",
|
||||
]
|
||||
description = "Lightweight, type-safe DAG task scheduler with multi-strategy execution."
|
||||
keywords = ["async", "dag", "scheduler", "task", "workflow"]
|
||||
license = { text = "MIT" }
|
||||
name = "pyflowx"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.8"
|
||||
version = "0.1.2"
|
||||
version = "0.4.7"
|
||||
|
||||
[project.scripts]
|
||||
pyflowx-demo = "pyflowx.__main__:main"
|
||||
emlman = "pyflowx.cli.emlmanager:main"
|
||||
pf = "pyflowx.cli.pf:main"
|
||||
pxp = "pyflowx.cli.profiler:main"
|
||||
yamlrun = "pyflowx.cli.yamlrun:main"
|
||||
|
||||
[project.optional-dependencies]
|
||||
dev = [
|
||||
"hatch>=1.14.2",
|
||||
"httpx>=0.28.0",
|
||||
"mypy >= 1.0",
|
||||
"prek>=0.4.5",
|
||||
"pyrefly>=1.1.1",
|
||||
"pytest-asyncio>=0.24.0",
|
||||
"pytest-cov>=5.0.0",
|
||||
"pytest-html>=4.1.1",
|
||||
@@ -37,53 +45,98 @@ dev = [
|
||||
"ruff>=0.8.0",
|
||||
"tox-uv>=1.13.1",
|
||||
"tox>=4.25.0",
|
||||
"types-PyYAML>=6.0.12",
|
||||
]
|
||||
docs = ["myst-parser>=3.0", "sphinx-rtd-theme>=2.0", "sphinx>=7.0"]
|
||||
office = [
|
||||
"pillow>=10.4.0",
|
||||
"pymupdf>=1.24.11",
|
||||
"pypdf>=5.9.0",
|
||||
"pytesseract>=0.3.13",
|
||||
]
|
||||
|
||||
[build-system]
|
||||
build-backend = "hatchling.build"
|
||||
requires = ["hatchling"]
|
||||
|
||||
[tool.uv]
|
||||
required-version = ">=0.5.0"
|
||||
|
||||
[[tool.uv.index]]
|
||||
default = true
|
||||
url = "https://mirrors.aliyun.com/pypi/simple/"
|
||||
|
||||
[tool.hatch.build.targets.wheel]
|
||||
packages = ["src/pyflowx"]
|
||||
|
||||
[tool.hatch.build.targets.wheel.force-include]
|
||||
"src/pyflowx/py.typed" = "pyflowx/py.typed"
|
||||
|
||||
[tool.mypy]
|
||||
# mypy 2.x requires a >=3.10 target. We check against 3.10 syntax; the
|
||||
# runtime stays 3.8-compatible via `from __future__ import annotations`
|
||||
# (all annotations are strings at runtime) and the graphlib_backport
|
||||
# conditional dependency for topological sorting.
|
||||
check_untyped_defs = true
|
||||
disallow_incomplete_defs = true
|
||||
disallow_untyped_defs = true
|
||||
files = ["src/pyflowx"]
|
||||
ignore_missing_imports = false
|
||||
python_version = "3.8"
|
||||
strict = true
|
||||
warn_return_any = true
|
||||
warn_unused_configs = true
|
||||
|
||||
[tool.uv.sources]
|
||||
pyflowx = { workspace = true }
|
||||
|
||||
[[tool.uv.index]]
|
||||
default = true
|
||||
url = "https://mirrors.aliyun.com/pypi/simple/"
|
||||
|
||||
[dependency-groups]
|
||||
dev = ["pyflowx[dev]"]
|
||||
dev = ["pyflowx[dev,docs,office]"]
|
||||
|
||||
[tool.coverage.run]
|
||||
branch = true
|
||||
concurrency = ["thread"]
|
||||
omit = ["src/pyflowx/examples/*", "tests/*"]
|
||||
omit = ["src/pyflowx/cli/*", "tests/*"]
|
||||
source = ["pyflowx"]
|
||||
|
||||
[tool.coverage.report]
|
||||
exclude_lines = ["if TYPE_CHECKING:", "if __name__ == .__main__.:", "pragma: no cover", "raise NotImplementedError"]
|
||||
fail_under = 95
|
||||
show_missing = true
|
||||
exclude_lines = [
|
||||
"if TYPE_CHECKING:",
|
||||
"if __name__ == .__main__.:",
|
||||
"pragma: no cover",
|
||||
"raise NotImplementedError",
|
||||
]
|
||||
fail_under = 95
|
||||
show_missing = true
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
asyncio_default_fixture_loop_scope = "function"
|
||||
markers = ["slow: marks tests as slow (deselect with '-m \"not slow\"')"]
|
||||
|
||||
# Ruff 配置 - 与 .pre-commit-config.yaml 保持一致
|
||||
[tool.ruff]
|
||||
line-length = 120
|
||||
target-version = "py38"
|
||||
|
||||
[tool.ruff.lint]
|
||||
ignore = [
|
||||
"E501", # line too long (handled by formatter)
|
||||
"PLC0415", # import should be at top-level (intentional for lazy imports)
|
||||
"PLR0913", # too many arguments
|
||||
"PLR0915", # too many statements (intentional for complex methods)
|
||||
"PLR2004", # magic value comparison
|
||||
"PTH119", # os.path.basename (intentional for sys.argv)
|
||||
"PTH123", # pathlib open() replacement
|
||||
"RUF001", # ambiguous unicode characters in string
|
||||
"RUF002", # ambiguous unicode characters in docstring
|
||||
"RUF003", # ambiguous unicode characters in comment
|
||||
"RUF012", # mutable class attributes (intentional for config)
|
||||
"SIM108", # use ternary operator
|
||||
]
|
||||
select = [
|
||||
"ARG", # flake8-unused-arguments
|
||||
"B", # flake8-bugbear
|
||||
"C4", # flake8-comprehensions
|
||||
"E", # pycodestyle errors
|
||||
"F", # Pyflakes
|
||||
"I", # isort
|
||||
"PL", # Pylint
|
||||
"PTH", # flake8-use-pathlib
|
||||
"RUF", # Ruff-specific rules
|
||||
"SIM", # flake8-simplify
|
||||
"UP", # pyupgrade
|
||||
"W", # pycodestyle warnings
|
||||
]
|
||||
|
||||
[tool.ruff.lint.per-file-ignores]
|
||||
"**/tests/**" = ["ARG001", "ARG002"]
|
||||
|
||||
[tool.pyrefly]
|
||||
preset = "strict"
|
||||
project-includes = ["**/*.ipynb", "**/*.py*"]
|
||||
python-version = "3.8"
|
||||
|
||||
+114
-27
@@ -4,9 +4,15 @@
|
||||
--------
|
||||
* :class:`TaskSpec` —— 不可变任务描述符(唯一需要配置的东西)。
|
||||
* :class:`Graph` —— 由一组 spec 构建的 DAG;负责校验、分层、可视化。
|
||||
* :func:`run` —— 以 ``sequential`` / ``thread`` / ``async`` 策略执行图。
|
||||
* :func:`run` ——以 ``sequential`` / ``thread`` / ``async`` / ``dependency``
|
||||
策略执行图。
|
||||
* :class:`RunReport` —— 类型化、可查询的运行结果。
|
||||
* :class:`Context` —— 整体上下文注入的标注标记。
|
||||
* :class:`RetryPolicy` —— 重试策略(max_attempts/delay/backoff/jitter/retry_on)。
|
||||
* :class:`TaskHooks` —— 任务生命周期钩子(pre_run/post_run/on_failure)。
|
||||
* :class:`GraphDefaults` —— 图级默认值。
|
||||
* :func:`compose` —— 编程式组合多图。
|
||||
* :func:`task_template` —— 批量生成相似 TaskSpec 的工厂。
|
||||
* 状态后端::class:`StateBackend`、:class:`MemoryBackend`、:class:`JSONBackend`。
|
||||
|
||||
快速上手
|
||||
@@ -18,14 +24,51 @@
|
||||
|
||||
graph = px.Graph.from_specs([
|
||||
px.TaskSpec("extract", extract),
|
||||
px.TaskSpec("double", double, ("extract",)),
|
||||
px.TaskSpec("double", double, depends_on=("extract",)),
|
||||
])
|
||||
report = px.run(graph, strategy="sequential")
|
||||
print(report["double"]) # [2, 4, 6]
|
||||
|
||||
命令行任务示例
|
||||
--------------
|
||||
import pyflowx as px
|
||||
from pyflowx.conditions import IS_WINDOWS, BuiltinConditions
|
||||
|
||||
graph = px.Graph.from_specs([
|
||||
px.TaskSpec("list_files", cmd=["ls", "-la"]),
|
||||
px.TaskSpec("check_git", cmd="git status"),
|
||||
px.TaskSpec(
|
||||
"win_only",
|
||||
cmd=["dir"],
|
||||
conditions=(IS_WINDOWS,)
|
||||
),
|
||||
px.TaskSpec(
|
||||
"git_check",
|
||||
cmd=["git", "--version"],
|
||||
conditions=(BuiltinConditions.HAS_INSTALLED("git"),)
|
||||
),
|
||||
px.TaskSpec(
|
||||
"optional_build",
|
||||
cmd=["maturin", "build"],
|
||||
skip_if_missing=True
|
||||
),
|
||||
])
|
||||
report = px.run(graph)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from .command import run_command
|
||||
from .compose import GraphComposer, compose
|
||||
from .conditions import (
|
||||
IS_LINUX,
|
||||
IS_MACOS,
|
||||
IS_POSIX,
|
||||
IS_WINDOWS,
|
||||
BuiltinConditions,
|
||||
Condition,
|
||||
Constants,
|
||||
)
|
||||
from .context import Context, build_call_args, describe_injection
|
||||
from .errors import (
|
||||
CycleError,
|
||||
@@ -37,39 +80,83 @@ from .errors import (
|
||||
TaskFailedError,
|
||||
TaskTimeoutError,
|
||||
)
|
||||
from .executors import run
|
||||
from .graph import Graph
|
||||
from .executors import Strategy, run
|
||||
from .graph import Graph, GraphDefaults
|
||||
from .profiling import ProfileReport, TaskProfile
|
||||
from .registry import FnRegistry, get_fn, has_fn, register_fn
|
||||
from .report import RunReport
|
||||
from .runner import CliExitCode, CliRunner
|
||||
from .storage import JSONBackend, MemoryBackend, StateBackend
|
||||
from .task import TaskEvent, TaskResult, TaskSpec, TaskStatus
|
||||
from .task import (
|
||||
CacheKeyFn,
|
||||
RetryPolicy,
|
||||
TaskCmd,
|
||||
TaskEvent,
|
||||
TaskHooks,
|
||||
TaskResult,
|
||||
TaskSpec,
|
||||
TaskStatus,
|
||||
cmd,
|
||||
task,
|
||||
task_template,
|
||||
)
|
||||
from .yaml_loader import YamlLoadError, build_cli_parser, load_yaml, parse_yaml_string, run_cli, run_yaml
|
||||
|
||||
__version__ = "0.1.2"
|
||||
__version__ = "0.4.7"
|
||||
|
||||
__all__ = [
|
||||
# 核心类型
|
||||
"IS_LINUX",
|
||||
"IS_MACOS",
|
||||
"IS_POSIX",
|
||||
"IS_WINDOWS",
|
||||
"BuiltinConditions",
|
||||
"CacheKeyFn",
|
||||
"CliExitCode",
|
||||
"CliRunner",
|
||||
"Condition",
|
||||
"Constants",
|
||||
"Context",
|
||||
"CycleError",
|
||||
"DuplicateTaskError",
|
||||
"FnRegistry",
|
||||
"Graph",
|
||||
"GraphComposer",
|
||||
"GraphDefaults",
|
||||
"InjectionError",
|
||||
"JSONBackend",
|
||||
"MemoryBackend",
|
||||
"MissingDependencyError",
|
||||
"ProfileReport",
|
||||
"PyFlowXError",
|
||||
"RetryPolicy",
|
||||
"RunReport",
|
||||
"StateBackend",
|
||||
"StorageError",
|
||||
"Strategy",
|
||||
"TaskCmd",
|
||||
"TaskEvent",
|
||||
"TaskFailedError",
|
||||
"TaskHooks",
|
||||
"TaskProfile",
|
||||
"TaskResult",
|
||||
"TaskSpec",
|
||||
"TaskStatus",
|
||||
"TaskResult",
|
||||
"TaskEvent",
|
||||
"Context",
|
||||
"Graph",
|
||||
"RunReport",
|
||||
# 执行
|
||||
"run",
|
||||
# 状态后端
|
||||
"StateBackend",
|
||||
"MemoryBackend",
|
||||
"JSONBackend",
|
||||
# 错误
|
||||
"PyFlowXError",
|
||||
"DuplicateTaskError",
|
||||
"MissingDependencyError",
|
||||
"CycleError",
|
||||
"TaskFailedError",
|
||||
"TaskTimeoutError",
|
||||
"InjectionError",
|
||||
"StorageError",
|
||||
# 辅助(高级)
|
||||
"YamlLoadError",
|
||||
"build_call_args",
|
||||
"build_cli_parser",
|
||||
"cmd",
|
||||
"compose",
|
||||
"describe_injection",
|
||||
"get_fn",
|
||||
"has_fn",
|
||||
"load_yaml",
|
||||
"parse_yaml_string",
|
||||
"register_fn",
|
||||
"run",
|
||||
"run_cli",
|
||||
"run_command",
|
||||
"run_yaml",
|
||||
"task",
|
||||
"task_template",
|
||||
]
|
||||
|
||||
@@ -1,9 +0,0 @@
|
||||
from pyflowx.examples.async_aggregation import main as async_aggregation_main
|
||||
from pyflowx.examples.etl_pipeline import main as etl_pipeline_main
|
||||
from pyflowx.examples.parallel_run import main as parallel_run_main
|
||||
|
||||
|
||||
def main():
|
||||
async_aggregation_main()
|
||||
etl_pipeline_main()
|
||||
parallel_run_main()
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,202 @@
|
||||
"""PyFlowX 统一 CLI 入口.
|
||||
|
||||
通过 ``pf <tool> [command] [options]`` 调用所有工具,
|
||||
工具定义在 ``configs/`` 目录下的 YAML 文件中.
|
||||
|
||||
用法
|
||||
----
|
||||
pf # 列出所有可用工具
|
||||
pf filedate # 查看 filedate 工具帮助
|
||||
pf filedate add a.txt # 调用 filedate 的 add 子命令
|
||||
pf pymake b # 调用 pymake 的 b 别名
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import importlib
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Sequence
|
||||
|
||||
import pyflowx as px
|
||||
|
||||
|
||||
class PfApp:
|
||||
"""pf 统一入口应用.
|
||||
|
||||
路由 ``pf <tool> [command]`` 到 YAML 配置工具或传统 Python 工具.
|
||||
"""
|
||||
|
||||
_CONFIGS_DIR = Path(__file__).parent.parent / "configs"
|
||||
|
||||
# 工具名到 YAML 配置文件的映射 (支持短别名)
|
||||
_TOOL_ALIASES: dict[str, str] = {
|
||||
"autofmt": "autofmt",
|
||||
"af": "autofmt",
|
||||
"bump": "bumpversion",
|
||||
"bumpversion": "bumpversion",
|
||||
"bv": "bumpversion",
|
||||
"clr": "clr",
|
||||
"clearscreen": "clr",
|
||||
"dockercmd": "dockercmd",
|
||||
"docker": "dockercmd",
|
||||
"envdev": "envdev",
|
||||
"env": "envdev",
|
||||
"filedate": "filedate",
|
||||
"fd": "filedate",
|
||||
"filelevel": "filelevel",
|
||||
"fl": "filelevel",
|
||||
"folderback": "folderback",
|
||||
"foldback": "folderback",
|
||||
"fb": "folderback",
|
||||
"folderzip": "folderzip",
|
||||
"foldzip": "folderzip",
|
||||
"fz": "folderzip",
|
||||
"git": "gittool",
|
||||
"gitt": "gittool",
|
||||
"gittool": "gittool",
|
||||
"gt": "gittool",
|
||||
"ls": "lscalc",
|
||||
"lscalc": "lscalc",
|
||||
"msdown": "msdownload",
|
||||
"msdownload": "msdownload",
|
||||
"msd": "msdownload",
|
||||
"pack": "packtool",
|
||||
"packtool": "packtool",
|
||||
"pk": "packtool",
|
||||
"pdf": "pdftool",
|
||||
"pdftool": "pdftool",
|
||||
"pt": "pdftool",
|
||||
"pip": "piptool",
|
||||
"pymake": "pymake",
|
||||
"piptool": "piptool",
|
||||
"pp": "piptool",
|
||||
"reseticon": "reseticoncache",
|
||||
"reseticoncache": "reseticoncache",
|
||||
"ric": "reseticoncache",
|
||||
"screenshot": "screenshot",
|
||||
"scrcap": "screenshot",
|
||||
"ss": "screenshot",
|
||||
"sglang": "sglang",
|
||||
"sg": "sglang",
|
||||
"ssh": "sshcopyid",
|
||||
"sshcopy": "sshcopyid",
|
||||
"sshcopyid": "sshcopyid",
|
||||
"sc": "sshcopyid",
|
||||
"taskk": "taskkill",
|
||||
"taskkill": "taskkill",
|
||||
"tk": "taskkill",
|
||||
"wch": "which",
|
||||
"which": "which",
|
||||
}
|
||||
|
||||
# 传统工具: 有自己的 main() 函数 (无法 YAML 化的复杂逻辑)
|
||||
_LEGACY_TOOLS: dict[str, str] = {
|
||||
"emlman": "pyflowx.cli.emlmanager:main",
|
||||
"profiler": "pyflowx.cli.profiler:main",
|
||||
"pxp": "pyflowx.cli.profiler:main",
|
||||
"yamlrun": "pyflowx.cli.yamlrun:main",
|
||||
}
|
||||
|
||||
def __init__(self, argv: Sequence[str] | None = None) -> None:
|
||||
self._argv = list(argv) if argv is not None else sys.argv[1:]
|
||||
|
||||
def run(self) -> int:
|
||||
"""主入口, 返回退出码."""
|
||||
if not self._argv:
|
||||
self._list_tools()
|
||||
return 0
|
||||
|
||||
tool_name = self._argv[0]
|
||||
rest_argv = self._argv[1:]
|
||||
|
||||
resolved = self._resolve_tool(tool_name)
|
||||
if resolved is None:
|
||||
print(f"错误: 未知工具 '{tool_name}'", file=sys.stderr)
|
||||
print("运行 'pf' 查看可用工具列表", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
tool_type, target = resolved
|
||||
if tool_type == "legacy":
|
||||
return self._run_legacy(target, rest_argv)
|
||||
return self._run_yaml(target, rest_argv)
|
||||
|
||||
def _list_tools(self) -> None:
|
||||
"""列出所有可用工具."""
|
||||
print("PyFlowX 工具列表:")
|
||||
print()
|
||||
print("YAML 配置工具:")
|
||||
yaml_tools = sorted(set(self._TOOL_ALIASES.values()))
|
||||
for tool in yaml_tools:
|
||||
print(f" pf {tool:<15} - {self._tool_description(tool)}")
|
||||
print()
|
||||
print("传统工具:")
|
||||
for tool in sorted(self._LEGACY_TOOLS):
|
||||
print(f" pf {tool:<15}")
|
||||
print()
|
||||
print("示例:")
|
||||
print(" pf filedate add a.txt")
|
||||
print(" pf pymake b")
|
||||
|
||||
def _tool_description(self, tool_name: str) -> str:
|
||||
"""获取工具描述 (从 YAML cli.description)."""
|
||||
config_path = self._CONFIGS_DIR / f"{tool_name}.yaml"
|
||||
if not config_path.exists():
|
||||
return ""
|
||||
try:
|
||||
import yaml
|
||||
|
||||
data = yaml.safe_load(config_path.read_text(encoding="utf-8"))
|
||||
if isinstance(data, dict) and isinstance(data.get("cli"), dict):
|
||||
return str(data["cli"].get("description", ""))
|
||||
except Exception:
|
||||
pass
|
||||
return ""
|
||||
|
||||
def _resolve_tool(self, name: str) -> tuple[str, str] | None:
|
||||
"""解析工具名, 返回 (类型, 目标).
|
||||
|
||||
类型: "yaml" 或 "legacy"
|
||||
目标: YAML 文件名 (不含 .yaml) 或 legacy 模块路径
|
||||
"""
|
||||
if name in self._TOOL_ALIASES:
|
||||
return ("yaml", self._TOOL_ALIASES[name])
|
||||
if name in self._LEGACY_TOOLS:
|
||||
return ("legacy", self._LEGACY_TOOLS[name])
|
||||
return None
|
||||
|
||||
def _run_legacy(self, module_path: str, argv: list[str]) -> int:
|
||||
"""运行传统工具的 main() 函数."""
|
||||
module_name, func_name = module_path.split(":", 1)
|
||||
module = importlib.import_module(module_name)
|
||||
func = getattr(module, func_name)
|
||||
|
||||
original_argv = sys.argv
|
||||
sys.argv = [f"pf {module_name.split('.')[-1]}", *argv]
|
||||
try:
|
||||
func()
|
||||
return 0
|
||||
except SystemExit as e:
|
||||
return int(e.code) if e.code is not None else 0
|
||||
finally:
|
||||
sys.argv = original_argv
|
||||
|
||||
def _run_yaml(self, target: str, argv: list[str]) -> int:
|
||||
"""运行 YAML 配置工具."""
|
||||
config_path = self._CONFIGS_DIR / f"{target}.yaml"
|
||||
if not config_path.exists():
|
||||
print(f"错误: 未找到配置文件 '{config_path}'", file=sys.stderr)
|
||||
print("运行 'pf' 查看可用工具列表", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
print(f"运行配置文件 '{config_path}'")
|
||||
return px.run_cli(config_path, argv)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
"""pf 统一入口主函数."""
|
||||
sys.exit(PfApp().run())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,272 @@
|
||||
"""pxp —— PyFlowX 性能分析器.
|
||||
|
||||
分析包含 ``px`` 调用的 Python 脚本,生成工作流执行性能剖面报告。
|
||||
|
||||
工作原理
|
||||
--------
|
||||
1. 注入 hook:monkey-patch ``pyflowx.run`` / ``pyflowx.executors.run`` /
|
||||
``pyflowx.runner.run``,捕获最后一次执行的 ``Graph`` 与 ``RunReport``。
|
||||
2. 执行目标脚本:用 ``runpy.run_path`` 以 ``__main__`` 身份执行,
|
||||
捕获 ``SystemExit``(脚本可能调 ``sys.exit``)。
|
||||
3. 生成报告:从捕获的 report + graph 构建 :class:`ProfileReport`,
|
||||
默认输出 HTML 并自动打开浏览器。
|
||||
|
||||
使用方式
|
||||
--------
|
||||
# 分析 pymake.py,生成 HTML 报告并打开浏览器
|
||||
pxp pymake.py
|
||||
|
||||
# 传递参数给被分析脚本(用 -- 分隔)
|
||||
pxp pymake.py -- t
|
||||
|
||||
# 指定输出文件
|
||||
pxp pymake.py -o report.html
|
||||
|
||||
# 不打开浏览器
|
||||
pxp pymake.py --no-browser
|
||||
|
||||
# 输出纯文本报告
|
||||
pxp pymake.py -E text
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
__all__ = ["main"]
|
||||
|
||||
import argparse
|
||||
import runpy
|
||||
import sys
|
||||
import webbrowser
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from .. import executors as _executors
|
||||
from .. import runner as _runner
|
||||
from ..profiling import ProfileReport
|
||||
from ..report import RunReport
|
||||
|
||||
|
||||
def _build_parser() -> argparse.ArgumentParser:
|
||||
"""构建参数解析器。"""
|
||||
parser = argparse.ArgumentParser(
|
||||
prog="pxp",
|
||||
description="PyFlowX 性能分析器:分析包含 px 调用的脚本,生成性能剖面报告。",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog=(
|
||||
"示例:\n"
|
||||
" pxp pymake.py # 分析并打开 HTML 报告\n"
|
||||
" pxp pymake.py -- t # 传递参数 t 给脚本\n"
|
||||
" pxp pymake.py -E text # 输出纯文本报告\n"
|
||||
" pxp pymake.py -o out.html # 指定输出文件\n"
|
||||
),
|
||||
)
|
||||
_ = parser.add_argument(
|
||||
"--export",
|
||||
"-E",
|
||||
choices=["html", "text"],
|
||||
default="html",
|
||||
help="导出格式(默认: html)",
|
||||
)
|
||||
_ = parser.add_argument(
|
||||
"--no-browser",
|
||||
action="store_true",
|
||||
help="不自动打开浏览器(仅 HTML 格式有效)",
|
||||
)
|
||||
_ = parser.add_argument(
|
||||
"-o",
|
||||
"--output",
|
||||
help="输出文件路径(默认: <script>_profile.html)",
|
||||
)
|
||||
return parser
|
||||
|
||||
|
||||
def _capture_px_run() -> dict[str, Any]:
|
||||
"""注入 hook 捕获 px.run() 调用。
|
||||
|
||||
返回一个字典,``run()`` 执行后填充 ``graph`` 与 ``report``。
|
||||
同时返回还原函数用于 finally 块。
|
||||
|
||||
Note
|
||||
-----
|
||||
需同时 patch 三处引用:
|
||||
* ``pyflowx.executors.run`` —— 实际实现
|
||||
* ``pyflowx.runner.run`` —— ``CliRunner`` 直接 import 的引用
|
||||
* ``pyflowx.run`` —— 顶层包导出的引用(用户脚本常用 ``px.run()``)
|
||||
|
||||
另外 patch ``RunReport.__init__`` 以捕获 ``run()`` 内部创建的 report 实例。
|
||||
这对于 ``run()`` 抛出 ``TaskFailedError`` 的场景至关重要:此时 ``run()``
|
||||
不会正常返回 report,但 report 对象已在内部创建并填充了已执行任务的结果。
|
||||
通过 ``capture_enabled`` 标志确保只在 ``patched_run`` 调用期间捕获。
|
||||
"""
|
||||
captured: dict[str, Any] = {}
|
||||
original_exec_run = _executors.run
|
||||
original_runner_run = _runner.run
|
||||
# 惰性获取顶层 pyflowx.run 引用(避免循环导入)
|
||||
import pyflowx as px_mod
|
||||
|
||||
original_px_run = px_mod.run
|
||||
original_report_init = RunReport.__init__
|
||||
capture_enabled = [False]
|
||||
|
||||
def patched_report_init(self: RunReport, *args: Any, **kwargs: Any) -> None:
|
||||
original_report_init(self, *args, **kwargs)
|
||||
if capture_enabled[0]:
|
||||
captured["report"] = self
|
||||
|
||||
RunReport.__init__ = patched_report_init # type: ignore[assignment]
|
||||
|
||||
def patched_run(graph: Any, *args: Any, **kwargs: Any) -> RunReport:
|
||||
captured["graph"] = graph
|
||||
capture_enabled[0] = True
|
||||
try:
|
||||
report = original_exec_run(graph, *args, **kwargs)
|
||||
# 正常返回时确保 captured["report"] 是返回的 report
|
||||
captured["report"] = report
|
||||
return report
|
||||
finally:
|
||||
capture_enabled[0] = False
|
||||
|
||||
# patch 所有引用 run 的入口
|
||||
_executors.run = patched_run # type: ignore[assignment]
|
||||
_runner.run = patched_run # type: ignore[assignment]
|
||||
px_mod.run = patched_run # type: ignore[assignment]
|
||||
|
||||
def _restore() -> None:
|
||||
_executors.run = original_exec_run # type: ignore[assignment]
|
||||
_runner.run = original_runner_run # type: ignore[assignment]
|
||||
px_mod.run = original_px_run # type: ignore[assignment]
|
||||
RunReport.__init__ = original_report_init # type: ignore[assignment]
|
||||
|
||||
captured["_restore"] = _restore
|
||||
return captured
|
||||
|
||||
|
||||
def _run_target_script(script: Path, script_args: list[str]) -> dict[str, Any]:
|
||||
"""执行目标脚本。
|
||||
|
||||
将脚本所在目录加入 ``sys.path``,设置 ``sys.argv``,然后用
|
||||
``runpy.run_path`` 以 ``__main__`` 身份执行。捕获 ``SystemExit``。
|
||||
|
||||
Returns
|
||||
-------
|
||||
dict[str, Any]
|
||||
脚本模块的全局变量字典(含 ``main`` 等定义)。
|
||||
"""
|
||||
sys.argv = [str(script), *script_args]
|
||||
script_dir = str(script.parent.resolve())
|
||||
if script_dir not in sys.path:
|
||||
sys.path.insert(0, script_dir)
|
||||
return runpy.run_path(str(script), run_name="__main__")
|
||||
|
||||
|
||||
def _try_call_main(module_globals: dict[str, Any]) -> None:
|
||||
"""若模块定义了 ``main`` 可调用对象,调用它。
|
||||
|
||||
用于脚本无 ``if __name__ == "__main__"`` 块的场景(如通过 entry points
|
||||
注册的 CLI 工具脚本)。``main`` 通常调用 ``CliRunner.run_cli()``,
|
||||
后者读取 ``sys.argv[1:]`` 执行对应命令。
|
||||
"""
|
||||
main_fn = module_globals.get("main")
|
||||
if callable(main_fn):
|
||||
main_fn()
|
||||
|
||||
|
||||
def _output_report(
|
||||
profile: ProfileReport,
|
||||
export: str,
|
||||
output: str | None,
|
||||
script_stem: str,
|
||||
no_browser: bool,
|
||||
) -> None:
|
||||
"""输出性能报告。"""
|
||||
if export == "text":
|
||||
print(profile.describe())
|
||||
return
|
||||
|
||||
# HTML 格式
|
||||
html = profile.to_html()
|
||||
if output:
|
||||
out_path = Path(output)
|
||||
else:
|
||||
out_path = Path.cwd() / f"{script_stem}_profile.html"
|
||||
out_path.write_text(html, encoding="utf-8")
|
||||
print(f"HTML 报告已生成: {out_path}")
|
||||
|
||||
if not no_browser:
|
||||
try:
|
||||
webbrowser.open(f"file://{out_path.resolve()}")
|
||||
except Exception as e:
|
||||
print(f"警告:无法打开浏览器: {e}", file=sys.stderr)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
"""pxp CLI 入口。"""
|
||||
parser = _build_parser()
|
||||
pxp_args, remaining = parser.parse_known_args()
|
||||
|
||||
if not remaining:
|
||||
parser.print_help()
|
||||
sys.exit(2)
|
||||
|
||||
script_str = remaining[0]
|
||||
script_args = remaining[1:]
|
||||
script_path = Path(script_str).resolve()
|
||||
|
||||
if not script_path.is_file():
|
||||
print(f"错误:脚本不存在: {script_path}", file=sys.stderr)
|
||||
sys.exit(2)
|
||||
|
||||
# 注入 hook
|
||||
captured = _capture_px_run()
|
||||
|
||||
# 执行目标脚本
|
||||
print(f"正在分析: {script_path}")
|
||||
if script_args:
|
||||
print(f"脚本参数: {script_args}")
|
||||
print("-" * 60)
|
||||
|
||||
module_globals: dict[str, Any] = {}
|
||||
try:
|
||||
module_globals = _run_target_script(script_path, script_args)
|
||||
except SystemExit:
|
||||
# 脚本调用了 sys.exit,正常情况
|
||||
pass
|
||||
except Exception as e:
|
||||
print(f"警告:脚本执行抛出异常: {e}", file=sys.stderr)
|
||||
|
||||
# 若脚本执行未捕获到 run(),尝试调用模块的 main() 函数
|
||||
# (适用于无 ``if __name__ == "__main__"`` 块的 CLI 脚本)
|
||||
if captured.get("report") is None and module_globals:
|
||||
try:
|
||||
_try_call_main(module_globals)
|
||||
except SystemExit:
|
||||
pass
|
||||
except Exception as e:
|
||||
print(f"警告:调用 main() 抛出异常: {e}", file=sys.stderr)
|
||||
|
||||
# 还原 hook
|
||||
restore = captured.pop("_restore", None)
|
||||
if restore is not None:
|
||||
restore()
|
||||
|
||||
# 检查是否捕获到 run() 调用
|
||||
report = captured.get("report")
|
||||
graph = captured.get("graph")
|
||||
if report is None or graph is None:
|
||||
print("错误:未捕获到 px.run() 调用,无法生成性能报告", file=sys.stderr)
|
||||
print("请确保脚本通过 px.run() 或 CliRunner 执行任务流图。", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
# 生成报告
|
||||
profile = ProfileReport.from_report(report, graph)
|
||||
_output_report(
|
||||
profile,
|
||||
export=pxp_args.export,
|
||||
output=pxp_args.output,
|
||||
script_stem=script_path.stem,
|
||||
no_browser=pxp_args.no_browser,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,109 @@
|
||||
"""YAML 任务编排执行工具.
|
||||
|
||||
从 YAML 文件加载 GitHub Actions 风格的任务图并执行.
|
||||
支持串并行编排、矩阵扇出、条件执行等 CI/CD 核心概念.
|
||||
|
||||
用法
|
||||
----
|
||||
yamlrun pipeline.yaml # 执行 YAML 任务图
|
||||
yamlrun pipeline.yaml --strategy thread # 指定执行策略
|
||||
yamlrun pipeline.yaml --dry-run # 仅打印任务分层, 不执行
|
||||
yamlrun pipeline.yaml --list # 列出所有任务名
|
||||
yamlrun pipeline.yaml --quiet # 静默模式
|
||||
|
||||
示例 YAML
|
||||
----------
|
||||
::
|
||||
|
||||
strategy: thread
|
||||
jobs:
|
||||
setup:
|
||||
cmd: ["git", "clone", "https://github.com/foo/bar"]
|
||||
build:
|
||||
needs: [setup]
|
||||
cmd: ["python", "-m", "build"]
|
||||
test:
|
||||
needs: [build]
|
||||
cmd: ["pytest"]
|
||||
strategy:
|
||||
matrix:
|
||||
python: ["3.8", "3.9"]
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import cast
|
||||
|
||||
import pyflowx as px
|
||||
from pyflowx.executors import Strategy
|
||||
|
||||
|
||||
def main() -> None:
|
||||
"""YAML 任务编排执行工具主函数."""
|
||||
parser = argparse.ArgumentParser(
|
||||
description="YamlRun - 从 YAML 文件加载并执行任务图",
|
||||
usage="yamlrun <file.yaml> [--strategy STRATEGY] [--dry-run] [--list] [--quiet]",
|
||||
)
|
||||
parser.add_argument("file", type=str, help="YAML 任务图文件路径")
|
||||
parser.add_argument(
|
||||
"--strategy",
|
||||
type=str,
|
||||
default=None,
|
||||
help="执行策略: sequential/thread/async/dependency (默认: YAML 中指定的策略或 dependency)",
|
||||
)
|
||||
parser.add_argument("--dry-run", action="store_true", help="仅打印任务分层, 不执行")
|
||||
parser.add_argument("--list", action="store_true", help="列出所有任务名后退出")
|
||||
parser.add_argument("--quiet", action="store_true", help="静默模式, 不打印详细输出")
|
||||
args = parser.parse_args()
|
||||
|
||||
file_path = Path(args.file)
|
||||
if not file_path.exists():
|
||||
print(f"错误: 文件不存在: {file_path}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
graph = px.Graph.from_yaml(file_path)
|
||||
except px.YamlLoadError as e:
|
||||
print(f"错误: YAML 加载失败: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
if args.list:
|
||||
print("任务列表:")
|
||||
for name in graph.names:
|
||||
spec = graph.spec(name)
|
||||
deps = ", ".join(spec.depends_on) if spec.depends_on else "(无依赖)"
|
||||
print(f" - {name} (依赖: {deps})")
|
||||
sys.exit(0)
|
||||
|
||||
layers = graph.layers()
|
||||
print(f"任务分层 ({len(layers)} 层):")
|
||||
for i, layer in enumerate(layers):
|
||||
print(f" 层 {i + 1}: {layer}")
|
||||
|
||||
if args.dry_run:
|
||||
print("\n[dry-run] 跳过执行")
|
||||
sys.exit(0)
|
||||
|
||||
strategy = args.strategy or graph.defaults.strategy or "dependency"
|
||||
print(f"\n执行策略: {strategy}")
|
||||
print(f"任务总数: {len(graph.names)}")
|
||||
print("-" * 40)
|
||||
|
||||
report = px.run(graph, strategy=cast(Strategy, strategy), verbose=not args.quiet)
|
||||
|
||||
print("-" * 40)
|
||||
succeeded = report.succeeded_tasks()
|
||||
failed = report.failed_tasks()
|
||||
skipped = report.skipped_tasks()
|
||||
print(f"完成: {len(succeeded)} 成功 / {len(failed)} 失败 / {len(skipped)} 跳过 (共 {len(graph.names)})")
|
||||
|
||||
if failed:
|
||||
print(f"失败任务: {failed}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,100 @@
|
||||
"""命令执行器:把 :class:`~pyflowx.task.TaskSpec` 的 ``cmd`` 字段(list /
|
||||
shell 字符串 / 可调用对象)转换为统一执行入口。
|
||||
|
||||
历史背景:原 ``task.py`` 的模块文档声明其为"纯数据结构",但 ``_run_command``
|
||||
属于命令执行逻辑,违反单一职责。此处将其抽离,``TaskSpec`` 仅持有配置,
|
||||
执行逻辑集中于本模块,便于独立测试与维护。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
from typing import Any, List, Union, cast
|
||||
|
||||
from .task import TaskSpec
|
||||
|
||||
__all__ = ["run_command"]
|
||||
|
||||
|
||||
def run_command(spec: TaskSpec[Any]) -> Any: # noqa: PLR0912
|
||||
"""执行 ``spec.cmd`` 指定的命令(list / shell 字符串 / 可调用对象)。
|
||||
|
||||
与原 ``TaskSpec._run_command`` 行为一致:
|
||||
|
||||
- 可调用对象:直接调用,异常包装为 :class:`RuntimeError`。
|
||||
- list / str:通过 :func:`subprocess.run` 执行,非零返回码抛
|
||||
:class:`RuntimeError`(``verbose=False`` 时附 stderr)。
|
||||
- ``verbose=True`` 时打印执行信息与返回码到 stdout。
|
||||
- ``cwd`` / ``env`` 通过 subprocess 参数隔离(进程级状态仅在 fn 任务路径
|
||||
使用,cmd 路径不依赖 ``os.chdir`` / ``os.environ``)。
|
||||
"""
|
||||
cmd = spec.cmd
|
||||
verbose = spec.verbose
|
||||
cwd = spec.cwd
|
||||
timeout = spec.timeout
|
||||
env_override = spec.env
|
||||
|
||||
# 可调用对象:直接调用,返回其结果。
|
||||
if callable(cmd) and not isinstance(cmd, (list, str)):
|
||||
name = getattr(cmd, "__name__", "callable")
|
||||
if verbose:
|
||||
print(f"[verbose] 执行可调用命令: {name}", flush=True)
|
||||
if cwd is not None:
|
||||
print(f"[verbose] 工作目录: {cwd}", flush=True)
|
||||
try:
|
||||
return cmd()
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"可调用命令执行异常: {name}: {e}") from e
|
||||
|
||||
is_list = isinstance(cmd, list)
|
||||
if is_list:
|
||||
cmd_str = " ".join(arg for arg in cmd) # type: ignore[union-attr]
|
||||
verb = "执行命令"
|
||||
label = "命令"
|
||||
else:
|
||||
cmd_str = cast(str, cmd)
|
||||
verb = "执行 Shell"
|
||||
label = "Shell 命令"
|
||||
|
||||
if verbose:
|
||||
print(f"[verbose] {verb}: {cmd_str}", flush=True)
|
||||
if cwd is not None:
|
||||
print(f"[verbose] 工作目录: {cwd}", flush=True)
|
||||
|
||||
# 合并环境变量
|
||||
run_env: dict[str, str] | None = None
|
||||
if env_override:
|
||||
run_env = dict(os.environ)
|
||||
run_env.update(env_override)
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
cast(Union[str, List[str]], cmd),
|
||||
shell=not is_list,
|
||||
cwd=cwd,
|
||||
env=run_env,
|
||||
timeout=timeout,
|
||||
capture_output=not verbose,
|
||||
text=True,
|
||||
check=False,
|
||||
)
|
||||
except FileNotFoundError:
|
||||
raise RuntimeError(f"{label}未找到: {cmd_str}") from None
|
||||
except subprocess.TimeoutExpired:
|
||||
raise RuntimeError(f"{label}执行超时: {cmd_str} ({timeout}s)") from None
|
||||
except OSError as e:
|
||||
raise RuntimeError(f"{label}执行异常: {cmd_str}: {e}") from e
|
||||
|
||||
if verbose:
|
||||
print(f"[verbose] 返回码: {result.returncode}", flush=True)
|
||||
|
||||
if result.returncode == 0:
|
||||
if not verbose and result.stdout:
|
||||
print(result.stdout, end="", flush=True)
|
||||
return None
|
||||
|
||||
err_msg = f"{label}执行失败: `{cmd_str}`, 返回码: {result.returncode}"
|
||||
if not verbose and result.stderr.strip():
|
||||
err_msg += f"\n{result.stderr.strip()}"
|
||||
raise RuntimeError(err_msg)
|
||||
@@ -0,0 +1,115 @@
|
||||
"""图组合:将带字符串引用的多个图展开为纯 :class:`~pyflowx.graph.Graph`。
|
||||
|
||||
历史背景:原 ``graph.py`` 同时承载 DAG 构建/校验/分层与多图组合逻辑,
|
||||
职责过载。组合逻辑(:class:`GraphComposer` / :func:`compose`)与单图 DAG
|
||||
模型正交,此处抽离为独立模块,便于按需导入与独立演进。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import replace
|
||||
from typing import Any
|
||||
|
||||
from .graph import Graph
|
||||
from .task import TaskSpec
|
||||
|
||||
__all__ = ["GraphComposer", "compose"]
|
||||
|
||||
|
||||
class GraphComposer:
|
||||
"""将带字符串引用的图展开为纯 :class:`TaskSpec` 图。
|
||||
|
||||
引用格式:
|
||||
* ``"command_name"`` —— 引用整个命令图。
|
||||
* ``"command_name.task_name"`` —— 引用特定任务。
|
||||
|
||||
引用按顺序展开,后续引用的任务依赖前面引用的最后一个任务;
|
||||
原始 ``TaskSpec`` 之间也按出现顺序串行依赖。
|
||||
"""
|
||||
|
||||
def __init__(self, graphs: dict[str, Graph]) -> None:
|
||||
self.graphs = graphs
|
||||
|
||||
def resolve_all(self) -> dict[str, Graph]:
|
||||
"""解析所有图的字符串引用,返回展开后的新图映射。"""
|
||||
resolved: dict[str, Graph] = {}
|
||||
for cmd_name, graph in self.graphs.items():
|
||||
resolved[cmd_name] = self.expand_refs(graph, cmd_name)
|
||||
return resolved
|
||||
|
||||
def expand_refs(self, graph: Graph, current_cmd: str) -> Graph:
|
||||
"""展开图中的字符串引用。若无 ``_pending_refs``,原样返回。"""
|
||||
pending_refs = graph._pending_refs
|
||||
if not pending_refs:
|
||||
return graph
|
||||
|
||||
all_specs: list[TaskSpec[Any]] = []
|
||||
previous_ref_last_task: str | None = None
|
||||
|
||||
for ref in pending_refs:
|
||||
expanded_specs = self.parse_ref(ref, current_cmd)
|
||||
if previous_ref_last_task and expanded_specs:
|
||||
for i, task in enumerate(expanded_specs):
|
||||
if i == 0 or not task.depends_on:
|
||||
expanded_specs[i] = replace(task, depends_on=tuple({*task.depends_on, previous_ref_last_task}))
|
||||
if expanded_specs:
|
||||
previous_ref_last_task = expanded_specs[-1].name
|
||||
all_specs.extend(expanded_specs)
|
||||
|
||||
original_specs = list(graph.all_specs().values())
|
||||
if original_specs:
|
||||
if previous_ref_last_task:
|
||||
first = original_specs[0]
|
||||
all_specs.append(replace(first, depends_on=tuple({*first.depends_on, previous_ref_last_task})))
|
||||
else:
|
||||
all_specs.append(original_specs[0])
|
||||
for i in range(1, len(original_specs)):
|
||||
current_task = original_specs[i]
|
||||
previous_task_name = original_specs[i - 1].name
|
||||
all_specs.append(
|
||||
replace(current_task, depends_on=tuple({*current_task.depends_on, previous_task_name}))
|
||||
)
|
||||
|
||||
return Graph.from_specs(all_specs, defaults=graph.defaults)
|
||||
|
||||
def parse_ref(self, ref: str, current_cmd: str) -> list[TaskSpec[Any]]:
|
||||
"""解析单个字符串引用,返回对应的 TaskSpec 列表。"""
|
||||
if ref == current_cmd:
|
||||
raise ValueError(f"循环引用: 命令 '{current_cmd}' 引用了自己")
|
||||
|
||||
if "." in ref:
|
||||
cmd_name, task_name = ref.split(".", 1)
|
||||
if cmd_name not in self.graphs:
|
||||
raise ValueError(f"引用的命令 '{cmd_name}' 不存在")
|
||||
ref_graph = self.graphs[cmd_name]
|
||||
if task_name not in ref_graph.all_specs():
|
||||
raise ValueError(f"任务 '{task_name}' 不存在于命令 '{cmd_name}' 中")
|
||||
return [ref_graph.all_specs()[task_name]]
|
||||
else:
|
||||
cmd_name = ref
|
||||
if cmd_name not in self.graphs:
|
||||
raise ValueError(f"引用的命令 '{cmd_name}' 不存在")
|
||||
ref_graph = self.graphs[cmd_name]
|
||||
ref_graph = self.expand_refs(ref_graph, cmd_name)
|
||||
return list(ref_graph.all_specs().values())
|
||||
|
||||
|
||||
def compose(
|
||||
graphs: dict[str, Graph],
|
||||
) -> dict[str, Graph]:
|
||||
"""编程式解析多图的字符串引用,返回展开后的新图映射。
|
||||
|
||||
与 :class:`GraphComposer` 等价,但作为独立函数暴露,供不使用
|
||||
:class:`~pyflowx.runner.CliRunner` 的编程式用户调用。
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> graphs = {
|
||||
... "build": px.Graph.from_specs([px.TaskSpec("b", cmd=["echo", "b"])]),
|
||||
... "all": px.Graph.from_specs(["build", px.TaskSpec("t", cmd=["echo", "t"])]),
|
||||
... }
|
||||
>>> resolved = px.compose(graphs)
|
||||
>>> "b" in resolved["all"].all_specs()
|
||||
True
|
||||
"""
|
||||
return GraphComposer(graphs).resolve_all()
|
||||
@@ -0,0 +1,250 @@
|
||||
"""条件判断模块.
|
||||
|
||||
所有条件均为 ``Callable[[Context], bool]``,接收依赖上下文映射(可能为空)。
|
||||
这使得条件可基于上游任务的运行时返回值做决策,实现动态分支。
|
||||
|
||||
内置条件分两类:
|
||||
1. *静态条件* —— 不依赖上下文(平台/环境变量/安装检查),通过 ``_static``
|
||||
包装忽略传入的 context,便于作为模块级常量使用。
|
||||
2. *上下文条件* —— 基于上游结果判断,如 :meth:`BuiltinConditions.DEP_EQUALS`。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable
|
||||
|
||||
from .task import Condition, Context
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
__all__ = ["BuiltinConditions", "Condition", "Constants"]
|
||||
|
||||
|
||||
class Constants:
|
||||
"""常量定义."""
|
||||
|
||||
IS_WINDOWS: bool = sys.platform == "win32"
|
||||
IS_LINUX: bool = sys.platform == "linux"
|
||||
IS_MACOS: bool = sys.platform == "darwin"
|
||||
IS_POSIX: bool = sys.platform != "win32"
|
||||
|
||||
|
||||
def _static(predicate: Callable[[], bool], name: str) -> Condition:
|
||||
"""将无参谓词包装为忽略上下文的 :class:`Condition`。"""
|
||||
|
||||
def _cond(_ctx: Context) -> bool:
|
||||
return predicate()
|
||||
|
||||
_cond.__name__ = name
|
||||
return _cond
|
||||
|
||||
|
||||
def _cond_name(cond: Condition) -> str:
|
||||
"""获取条件的可读名称。"""
|
||||
return getattr(cond, "__name__", repr(cond))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# 模块级静态条件常量
|
||||
# ---------------------------------------------------------------------- #
|
||||
IS_WINDOWS: Condition = _static(lambda: Constants.IS_WINDOWS, "IS_WINDOWS")
|
||||
IS_LINUX: Condition = _static(lambda: Constants.IS_LINUX, "IS_LINUX")
|
||||
IS_MACOS: Condition = _static(lambda: Constants.IS_MACOS, "IS_MACOS")
|
||||
IS_POSIX: Condition = _static(lambda: Constants.IS_POSIX, "IS_POSIX")
|
||||
|
||||
|
||||
class BuiltinConditions:
|
||||
"""内置条件判断函数集合.
|
||||
|
||||
静态条件工厂返回忽略上下文的 :class:`Condition`;上下文条件工厂返回
|
||||
会读取依赖结果的 :class:`Condition`。
|
||||
"""
|
||||
|
||||
# ------------------------------------------------------------------ #
|
||||
# 静态条件
|
||||
# ------------------------------------------------------------------ #
|
||||
@staticmethod
|
||||
def IS_WINDOWS() -> Condition:
|
||||
"""检查是否为 Windows 平台."""
|
||||
return IS_WINDOWS
|
||||
|
||||
@staticmethod
|
||||
def IS_LINUX() -> Condition:
|
||||
"""检查是否为 Linux 平台."""
|
||||
return IS_LINUX
|
||||
|
||||
@staticmethod
|
||||
def IS_MACOS() -> Condition:
|
||||
"""检查是否为 macOS 平台."""
|
||||
return IS_MACOS
|
||||
|
||||
@staticmethod
|
||||
def IS_POSIX() -> Condition:
|
||||
"""检查是否为 POSIX 平台."""
|
||||
return IS_POSIX
|
||||
|
||||
@staticmethod
|
||||
def PYTHON_VERSION(major: int, minor: int | None = None) -> Condition:
|
||||
"""检查 Python 版本是否匹配."""
|
||||
if minor is None:
|
||||
return _static(lambda: sys.version_info.major == major, f"PYTHON_VERSION({major})")
|
||||
return _static(
|
||||
lambda: sys.version_info.major == major and sys.version_info.minor == minor,
|
||||
f"PYTHON_VERSION({major},{minor})",
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def PYTHON_VERSION_AT_LEAST(major: int, minor: int = 0) -> Condition:
|
||||
"""检查 Python 版本是否 >= 指定版本."""
|
||||
return _static(lambda: sys.version_info >= (major, minor), f"PYTHON_VERSION_AT_LEAST({major},{minor})")
|
||||
|
||||
@staticmethod
|
||||
def IS_RUNNING(app_name: str) -> Condition:
|
||||
"""检查指定应用是否正在运行."""
|
||||
|
||||
def _check() -> bool:
|
||||
if Constants.IS_WINDOWS:
|
||||
result = subprocess.run(
|
||||
["tasklist", "/nh", "/fi", f"imagename eq {app_name}"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=False,
|
||||
)
|
||||
return app_name.lower() in result.stdout.lower()
|
||||
else:
|
||||
result = subprocess.run(["pgrep", "-x", app_name], capture_output=True, check=False)
|
||||
return result.returncode == 0
|
||||
|
||||
return _static(_check, f"IS_RUNNING({app_name!r})")
|
||||
|
||||
@staticmethod
|
||||
def HAS_INSTALLED(app_name: str) -> Condition:
|
||||
"""检查指定应用是否已安装."""
|
||||
return _static(lambda: shutil.which(app_name) is not None, f"HAS_INSTALLED({app_name!r})")
|
||||
|
||||
@staticmethod
|
||||
def DIR_EXISTS(path: Path) -> Condition:
|
||||
"""路径是否存在."""
|
||||
return _static(path.exists, f"DIR_EXISTS({path!r})")
|
||||
|
||||
@staticmethod
|
||||
def ENV_VAR_EXISTS(var_name: str) -> Condition:
|
||||
"""检查环境变量是否存在."""
|
||||
return _static(lambda: var_name in os.environ, f"ENV_VAR_EXISTS({var_name!r})")
|
||||
|
||||
@staticmethod
|
||||
def ENV_VAR_EQUALS(var_name: str, value: str) -> Condition:
|
||||
"""检查环境变量是否等于指定值."""
|
||||
return _static(
|
||||
lambda: os.environ.get(var_name) == value,
|
||||
f"ENV_VAR_EQUALS({var_name!r},{value!r})",
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def FILE_CONTENT_EXISTS(path: Path | str, content: str) -> Condition:
|
||||
"""检查文件是否包含指定内容."""
|
||||
|
||||
def _check() -> bool:
|
||||
p = Path(path)
|
||||
if not p.exists():
|
||||
return False
|
||||
try:
|
||||
return content in p.read_text(encoding="utf-8")
|
||||
except (OSError, UnicodeDecodeError):
|
||||
return False
|
||||
|
||||
return _static(_check, f"FILE_CONTENT_EXISTS({path!r},{content!r})")
|
||||
|
||||
# ------------------------------------------------------------------ #
|
||||
# 上下文条件:基于上游依赖结果
|
||||
# ------------------------------------------------------------------ #
|
||||
@staticmethod
|
||||
def DEP_EQUALS(dep_name: str, value: Any) -> Condition:
|
||||
"""上游任务 ``dep_name`` 的返回值等于 ``value`` 时为真。
|
||||
|
||||
若依赖未在上下文中(被跳过或未执行),返回 ``False``。
|
||||
"""
|
||||
|
||||
def _cond(ctx: Context) -> bool:
|
||||
return dep_name in ctx and ctx[dep_name] == value
|
||||
|
||||
_cond.__name__ = f"DEP_EQUALS({dep_name!r},{value!r})"
|
||||
return _cond
|
||||
|
||||
@staticmethod
|
||||
def DEP_MATCHES(dep_name: str, predicate: Callable[[Any], bool]) -> Condition:
|
||||
"""上游任务 ``dep_name`` 的返回值满足 ``predicate`` 时为真。
|
||||
|
||||
依赖不存在时返回 ``False``。
|
||||
"""
|
||||
|
||||
def _cond(ctx: Context) -> bool:
|
||||
if dep_name not in ctx:
|
||||
return False
|
||||
try:
|
||||
return predicate(ctx[dep_name])
|
||||
except Exception as exc:
|
||||
logger.warning("DEP_MATCHES predicate %r raised: %r", dep_name, exc)
|
||||
return False
|
||||
|
||||
_cond.__name__ = f"DEP_MATCHES({dep_name!r},{getattr(predicate, '__name__', 'pred')})"
|
||||
return _cond
|
||||
|
||||
@staticmethod
|
||||
def DEP_PRESENT(dep_name: str) -> Condition:
|
||||
"""上游任务 ``dep_name`` 存在于上下文(即已成功执行)时为真。"""
|
||||
|
||||
def _cond(ctx: Context) -> bool:
|
||||
return dep_name in ctx and ctx[dep_name] is not None
|
||||
|
||||
_cond.__name__ = f"DEP_PRESENT({dep_name!r})"
|
||||
return _cond
|
||||
|
||||
@staticmethod
|
||||
def DEP_TRUTHY(dep_name: str) -> Condition:
|
||||
"""上游任务 ``dep_name`` 的返回值为真值时为真。"""
|
||||
|
||||
def _cond(ctx: Context) -> bool:
|
||||
return bool(ctx.get(dep_name))
|
||||
|
||||
_cond.__name__ = f"DEP_TRUTHY({dep_name!r})"
|
||||
return _cond
|
||||
|
||||
# ------------------------------------------------------------------ #
|
||||
# 逻辑组合
|
||||
# ------------------------------------------------------------------ #
|
||||
@staticmethod
|
||||
def NOT(condition: Condition) -> Condition:
|
||||
"""对条件取反."""
|
||||
|
||||
def _cond(ctx: Context) -> bool:
|
||||
return not condition(ctx)
|
||||
|
||||
_cond.__name__ = f"NOT({_cond_name(condition)})"
|
||||
return _cond
|
||||
|
||||
@staticmethod
|
||||
def AND(*conditions: Condition) -> Condition:
|
||||
"""多个条件的逻辑与."""
|
||||
|
||||
def _cond(ctx: Context) -> bool:
|
||||
return all(c(ctx) for c in conditions)
|
||||
|
||||
_cond.__name__ = f"AND({', '.join(_cond_name(c) for c in conditions)})"
|
||||
return _cond
|
||||
|
||||
@staticmethod
|
||||
def OR(*conditions: Condition) -> Condition:
|
||||
"""多个条件的逻辑或."""
|
||||
|
||||
def _cond(ctx: Context) -> bool:
|
||||
return any(c(ctx) for c in conditions)
|
||||
|
||||
_cond.__name__ = f"OR({', '.join(_cond_name(c) for c in conditions)})"
|
||||
return _cond
|
||||
@@ -0,0 +1,65 @@
|
||||
# autofmt - 自动格式化工具
|
||||
# 用法:
|
||||
# pf autofmt fmt --target .
|
||||
# pf autofmt lint --target .
|
||||
# pf autofmt lint --target . --fix
|
||||
# pf autofmt doc --root-dir .
|
||||
# pf autofmt sync --root-dir .
|
||||
strategy: thread
|
||||
variables:
|
||||
TARGET: "."
|
||||
ROOT_DIR: "."
|
||||
FIX: false
|
||||
cli:
|
||||
description: "AutoFmt - 自动格式化工具"
|
||||
usage: "pf autofmt <command> [options]"
|
||||
subcommands:
|
||||
fmt:
|
||||
help: "格式化代码"
|
||||
options:
|
||||
- name: TARGET
|
||||
flag: "--target"
|
||||
type: str
|
||||
default: "."
|
||||
help: "目标路径 (默认: .)"
|
||||
lint:
|
||||
help: "代码检查"
|
||||
options:
|
||||
- name: TARGET
|
||||
flag: "--target"
|
||||
type: str
|
||||
default: "."
|
||||
help: "目标路径 (默认: .)"
|
||||
- name: FIX
|
||||
flag: "--fix"
|
||||
action: "store_true"
|
||||
help: "自动修复问题"
|
||||
doc:
|
||||
help: "自动添加文档字符串"
|
||||
options:
|
||||
- name: ROOT_DIR
|
||||
flag: "--root-dir"
|
||||
type: str
|
||||
default: "."
|
||||
help: "根目录 (默认: .)"
|
||||
sync:
|
||||
help: "同步 pyproject 配置"
|
||||
options:
|
||||
- name: ROOT_DIR
|
||||
flag: "--root-dir"
|
||||
type: str
|
||||
default: "."
|
||||
help: "根目录 (默认: .)"
|
||||
jobs:
|
||||
fmt:
|
||||
cmd: ["ruff", "format", "${TARGET}"]
|
||||
lint:
|
||||
cmd: ["ruff", "check", "${TARGET}"]
|
||||
lint_fix:
|
||||
cmd: ["ruff", "check", "--fix", "--unsafe-fixes", "${TARGET}"]
|
||||
doc:
|
||||
fn: auto_add_docstrings
|
||||
args: ["${ROOT_DIR}"]
|
||||
sync:
|
||||
fn: sync_pyproject_config
|
||||
args: ["${ROOT_DIR}"]
|
||||
@@ -0,0 +1,27 @@
|
||||
# bumpversion - 版本号自动管理工具
|
||||
# 用法:
|
||||
# pf bumpversion
|
||||
# pf bumpversion minor --no-tag
|
||||
strategy: sequential
|
||||
variables:
|
||||
PART: patch
|
||||
NO_TAG: false
|
||||
cli:
|
||||
description: "BumpVersion - 版本号自动管理工具"
|
||||
usage: "pf bumpversion [part] [options]"
|
||||
positional:
|
||||
- name: PART
|
||||
type: str
|
||||
default: patch
|
||||
help: "版本部分: patch, minor, major"
|
||||
options:
|
||||
- name: NO_TAG
|
||||
flag: "--no-tag"
|
||||
action: "store_true"
|
||||
help: "提交后不创建 git tag"
|
||||
jobs:
|
||||
bump:
|
||||
fn: bump_project_version
|
||||
args: ["${PART}"]
|
||||
kwargs:
|
||||
no_tag: ${NO_TAG}
|
||||
@@ -0,0 +1,10 @@
|
||||
# clr - 清屏工具
|
||||
# 用法:
|
||||
# pf clr
|
||||
strategy: sequential
|
||||
cli:
|
||||
description: "清屏工具 (跨平台)"
|
||||
usage: "pf clr"
|
||||
jobs:
|
||||
clear:
|
||||
fn: clear_screen_run
|
||||
@@ -0,0 +1,24 @@
|
||||
# dockercmd - Docker 镜像登录工具
|
||||
# 用法:
|
||||
# pf dockercmd login
|
||||
# pf dockercmd login --username myuser
|
||||
strategy: sequential
|
||||
variables:
|
||||
USERNAME: ""
|
||||
cli:
|
||||
description: "DockerCmd - Docker 镜像登录工具"
|
||||
usage: "pf dockercmd <command> [options]"
|
||||
subcommands:
|
||||
login:
|
||||
help: "登录腾讯云 Docker 镜像仓库"
|
||||
options:
|
||||
- name: USERNAME
|
||||
flag: "--username"
|
||||
type: str
|
||||
default: ""
|
||||
help: "Docker 用户名 (默认: 当前系统用户)"
|
||||
jobs:
|
||||
login:
|
||||
fn: docker_login_tencent
|
||||
kwargs:
|
||||
username: ${USERNAME}
|
||||
@@ -0,0 +1,78 @@
|
||||
# envdev - 开发环境镜像源配置工具
|
||||
# 用法:
|
||||
# pf envdev
|
||||
# pf envdev --python-mirror aliyun --conda-mirror ustc --rust-mirror ustc --rust-version nightly
|
||||
# 说明
|
||||
# 配置 Python / Conda / Rust 镜像源 (Linux 还会安装 Qt 库、中文字体、Docker).
|
||||
# 所有镜像源参数互不影响, 可单独使用.
|
||||
# Linux 专用操作 (系统镜像/Qt/字体/Docker) 在非 Linux 平台上由函数内部跳过.
|
||||
strategy: thread
|
||||
variables:
|
||||
PYTHON_MIRROR: tsinghua
|
||||
CONDA_MIRROR: tsinghua
|
||||
RUST_MIRROR: tsinghua
|
||||
RUST_VERSION: stable
|
||||
cli:
|
||||
description: "EnvDev - 开发环境镜像源配置工具"
|
||||
usage: "pf envdev [options]"
|
||||
options:
|
||||
- name: PYTHON_MIRROR
|
||||
flag: "--python-mirror"
|
||||
type: str
|
||||
default: tsinghua
|
||||
help: "Python 镜像源: tsinghua/aliyun/huaweicloud/ustc/zju (默认: tsinghua)"
|
||||
- name: CONDA_MIRROR
|
||||
flag: "--conda-mirror"
|
||||
type: str
|
||||
default: tsinghua
|
||||
help: "Conda 镜像源: tsinghua/ustc/bsfu/aliyun (默认: tsinghua)"
|
||||
- name: RUST_MIRROR
|
||||
flag: "--rust-mirror"
|
||||
type: str
|
||||
default: tsinghua
|
||||
help: "Rust 镜像源: tsinghua/ustc/aliyun (默认: tsinghua)"
|
||||
- name: RUST_VERSION
|
||||
flag: "--rust-version"
|
||||
type: str
|
||||
default: stable
|
||||
help: "Rust 版本: stable/nightly/beta (默认: stable)"
|
||||
jobs:
|
||||
# Linux 系统镜像配置 (函数内部判断平台与已配置状态, 非自动跳过)
|
||||
setup_linux_mirror:
|
||||
fn: setup_linux_system_mirror
|
||||
# 安装 Qt 依赖 (仅 Linux, 函数内部判断)
|
||||
install_qt_libs:
|
||||
fn: install_linux_qt_libs
|
||||
needs: [setup_linux_mirror]
|
||||
allow-upstream-skip: true
|
||||
# 安装中文字体 (仅 Linux, 函数内部判断)
|
||||
install_fonts:
|
||||
fn: install_linux_fonts
|
||||
needs: [setup_linux_mirror]
|
||||
allow-upstream-skip: true
|
||||
# 安装 Docker (仅 Linux, 函数内部判断)
|
||||
install_docker:
|
||||
fn: install_linux_docker
|
||||
needs: [setup_linux_mirror]
|
||||
allow-upstream-skip: true
|
||||
# 配置 Python 镜像源 (跨平台)
|
||||
setup_python:
|
||||
fn: setup_python_mirror
|
||||
args: ["${PYTHON_MIRROR}"]
|
||||
# 配置 Conda 镜像源 (跨平台)
|
||||
setup_conda:
|
||||
fn: setup_conda_mirror
|
||||
args: ["${CONDA_MIRROR}"]
|
||||
# 配置 Rust 镜像源 (跨平台)
|
||||
setup_rust:
|
||||
fn: setup_rust_mirror
|
||||
args: ["${RUST_MIRROR}", "${RUST_VERSION}"]
|
||||
# 下载 Rustup 安装脚本 (跨平台, 已安装时由函数内部跳过)
|
||||
download_rustup:
|
||||
fn: download_rustup_script
|
||||
# 安装 Rust 工具链 (rustup 未安装时由函数内部跳过)
|
||||
install_rust:
|
||||
fn: install_rust_toolchain
|
||||
args: ["${RUST_VERSION}"]
|
||||
needs: [setup_rust, download_rustup]
|
||||
allow-upstream-skip: true
|
||||
@@ -0,0 +1,36 @@
|
||||
# filedate - 文件日期处理工具
|
||||
# 用法:
|
||||
# pf filedate add file1.txt file2.txt
|
||||
# pf filedate clear file1.txt file2.txt
|
||||
strategy: thread
|
||||
variables:
|
||||
FILES: []
|
||||
cli:
|
||||
description: "FileDate - 文件日期处理工具"
|
||||
usage: "pf filedate <command> [files...]"
|
||||
subcommands:
|
||||
add:
|
||||
help: "添加日期前缀"
|
||||
positional:
|
||||
- name: FILES
|
||||
nargs: "+"
|
||||
type: path
|
||||
help: "文件路径"
|
||||
clear:
|
||||
help: "清除日期前缀"
|
||||
positional:
|
||||
- name: FILES
|
||||
nargs: "+"
|
||||
type: path
|
||||
help: "文件路径"
|
||||
jobs:
|
||||
add:
|
||||
fn: process_files_date
|
||||
args: ["${FILES}"]
|
||||
kwargs:
|
||||
clear: false
|
||||
clear:
|
||||
fn: process_files_date
|
||||
args: ["${FILES}"]
|
||||
kwargs:
|
||||
clear: true
|
||||
@@ -0,0 +1,28 @@
|
||||
# filelevel - 文件等级重命名工具
|
||||
# 用法:
|
||||
# pf filelevel set file.txt --level 2
|
||||
strategy: thread
|
||||
variables:
|
||||
FILES: []
|
||||
LEVEL: 0
|
||||
cli:
|
||||
description: "FileLevel - 文件等级重命名工具"
|
||||
usage: "pf filelevel <command> [files...] [options]"
|
||||
subcommands:
|
||||
set:
|
||||
help: "设置文件等级"
|
||||
positional:
|
||||
- name: FILES
|
||||
nargs: "+"
|
||||
type: path
|
||||
help: "文件路径"
|
||||
options:
|
||||
- name: LEVEL
|
||||
flag: "--level"
|
||||
type: int
|
||||
required: true
|
||||
help: "文件等级 (0-4)"
|
||||
jobs:
|
||||
set:
|
||||
fn: process_files_level
|
||||
args: ["${FILES}", "${LEVEL}"]
|
||||
@@ -0,0 +1,34 @@
|
||||
# folderback - 文件夹备份工具
|
||||
# 用法:
|
||||
# pf folderback
|
||||
# pf folderback --src ./project --dst ./backup --max-zip 10
|
||||
strategy: thread
|
||||
variables:
|
||||
SRC: "."
|
||||
DST: "./backup"
|
||||
MAX_ZIP: 5
|
||||
cli:
|
||||
description: "FolderBack - 文件夹备份工具"
|
||||
usage: "pf folderback [options]"
|
||||
options:
|
||||
- name: SRC
|
||||
flag: "--src"
|
||||
type: str
|
||||
default: "."
|
||||
help: "源文件夹路径 (默认: 当前目录)"
|
||||
- name: DST
|
||||
flag: "--dst"
|
||||
type: str
|
||||
default: "./backup"
|
||||
help: "目标文件夹路径 (默认: ./backup)"
|
||||
- name: MAX_ZIP
|
||||
flag: "--max-zip"
|
||||
type: int
|
||||
default: 5
|
||||
help: "最大备份数量 (默认: 5)"
|
||||
jobs:
|
||||
backup:
|
||||
fn: backup_folder
|
||||
args: ["${SRC}", "${DST}"]
|
||||
kwargs:
|
||||
max_zip: ${MAX_ZIP}
|
||||
@@ -0,0 +1,21 @@
|
||||
# folderzip - 文件夹压缩工具
|
||||
# 用法:
|
||||
# pf folderzip
|
||||
# pf folderzip --cwd ./project
|
||||
strategy: thread
|
||||
variables:
|
||||
CWD: "."
|
||||
cli:
|
||||
description: "FolderZip - 文件夹压缩工具"
|
||||
usage: "pf folderzip [options]"
|
||||
options:
|
||||
- name: CWD
|
||||
flag: "--cwd"
|
||||
type: str
|
||||
required: false
|
||||
default: "."
|
||||
help: "工作目录 (默认: 当前目录)"
|
||||
jobs:
|
||||
zip:
|
||||
fn: zip_folders
|
||||
args: ["${CWD}"]
|
||||
@@ -0,0 +1,51 @@
|
||||
# gittool - Git 执行工具
|
||||
# 用法:
|
||||
# pf gittool a
|
||||
# pf gittool c
|
||||
# pf gittool i
|
||||
# pf gittool isub
|
||||
# pf gittool p
|
||||
# pf gittool pl
|
||||
strategy: thread
|
||||
variables:
|
||||
# git clean -e 参数列表 (展开为 cmd 数组元素)
|
||||
CLEAN_EXCLUDES: ["-e", ".venv", "-e", ".tox", "-e", ".pytest_cache",
|
||||
"-e", ".ruff_cache", "-e", "node_modules",
|
||||
"-e", ".idea", "-e", ".vscode",
|
||||
"-e", ".trae", "-e", ".qoder",
|
||||
"-e", ".editorconfig", "-e", "idea.config",
|
||||
"-e", "idea_modules.xml", "-e", "vcs.xml"]
|
||||
cli:
|
||||
description: "GitTool - Git 执行工具"
|
||||
usage: "pf gittool <command>"
|
||||
subcommands:
|
||||
a:
|
||||
help: "添加并提交"
|
||||
c:
|
||||
help: "清理并查看状态"
|
||||
i:
|
||||
help: "初始化并提交"
|
||||
isub:
|
||||
help: "初始化子目录"
|
||||
p:
|
||||
help: "推送"
|
||||
pl:
|
||||
help: "拉取"
|
||||
jobs:
|
||||
a:
|
||||
fn: git_add_commit
|
||||
args: ["chore: update"]
|
||||
clean:
|
||||
cmd: ["git", "clean", "-xfd", "${CLEAN_EXCLUDES}"]
|
||||
c:
|
||||
needs: [clean]
|
||||
cmd: ["git", "status", "--porcelain"]
|
||||
i:
|
||||
fn: git_init_add_commit
|
||||
args: ["init commit"]
|
||||
isub:
|
||||
fn: init_sub_dirs
|
||||
p:
|
||||
cmd: ["git", "push"]
|
||||
pl:
|
||||
cmd: ["git", "pull"]
|
||||
@@ -0,0 +1,51 @@
|
||||
# lscalc - LS-DYNA 计算工具
|
||||
# 用法:
|
||||
# pf lscalc run input.k --ncpu 4
|
||||
# pf lscalc status
|
||||
strategy: thread
|
||||
variables:
|
||||
INPUT_FILE: input.k
|
||||
NCPU: 4
|
||||
cli:
|
||||
description: "LSCalc - LS-DYNA 计算工具"
|
||||
usage: "pf lscalc <command> [options]"
|
||||
subcommands:
|
||||
run:
|
||||
help: "运行 LS-DYNA 计算"
|
||||
positional:
|
||||
- name: INPUT_FILE
|
||||
type: str
|
||||
help: "输入文件路径"
|
||||
options:
|
||||
- name: NCPU
|
||||
flag: "--ncpu"
|
||||
type: int
|
||||
default: 4
|
||||
help: "CPU 核心数 (默认: 4)"
|
||||
mpi:
|
||||
help: "运行 LS-DYNA MPI 计算"
|
||||
positional:
|
||||
- name: INPUT_FILE
|
||||
type: str
|
||||
help: "输入文件路径"
|
||||
options:
|
||||
- name: NCPU
|
||||
flag: "--ncpu"
|
||||
type: int
|
||||
default: 4
|
||||
help: "CPU 核心数 (默认: 4)"
|
||||
status:
|
||||
help: "检查 LS-DYNA 进程状态"
|
||||
jobs:
|
||||
run:
|
||||
fn: run_ls_dyna
|
||||
args: ["${INPUT_FILE}"]
|
||||
kwargs:
|
||||
ncpu: ${NCPU}
|
||||
mpi:
|
||||
fn: run_ls_dyna_mpi
|
||||
args: ["${INPUT_FILE}"]
|
||||
kwargs:
|
||||
ncpu: ${NCPU}
|
||||
status:
|
||||
fn: check_ls_dyna_status
|
||||
@@ -0,0 +1,34 @@
|
||||
# msdownload - ModelScope 下载工具
|
||||
# 用法:
|
||||
# pf msdownload Qwen/Qwen2.5-Coder-32B-Instruct
|
||||
# pf msdownload AI-ModelScope/MNIST --type dataset --dir ./data
|
||||
strategy: thread
|
||||
variables:
|
||||
NAME: ""
|
||||
TYPE: model
|
||||
DIR: null
|
||||
cli:
|
||||
description: "MSDownload - ModelScope 模型/数据集下载工具"
|
||||
usage: "pf msdownload <name> [--type TYPE] [--dir DIR]"
|
||||
positional:
|
||||
- name: NAME
|
||||
type: str
|
||||
help: "目标名称 (如: Qwen/Qwen2.5-Coder-32B-Instruct)"
|
||||
options:
|
||||
- name: TYPE
|
||||
flag: "--type"
|
||||
type: str
|
||||
default: model
|
||||
help: "目标类型: model / dataset / space (默认: model)"
|
||||
- name: DIR
|
||||
flag: "--dir"
|
||||
type: str
|
||||
default: null
|
||||
help: "下载目录 (默认: ~/.models/<name>)"
|
||||
jobs:
|
||||
download:
|
||||
fn: msdownload_run
|
||||
args: ["${NAME}"]
|
||||
kwargs:
|
||||
target_type: ${TYPE}
|
||||
download_dir: ${DIR}
|
||||
@@ -0,0 +1,107 @@
|
||||
# packtool - Python 打包工具
|
||||
# 用法:
|
||||
# pf packtool src --project-dir . --output-dir .pypack
|
||||
# pf packtool deps requests numpy --lib-dir libs
|
||||
# pf packtool wheel --project-dir . --output-dir dist
|
||||
# pf packtool embed --version 3.10 --output-dir python
|
||||
# pf packtool zip --source-dir . --output-file package.zip
|
||||
# pf packtool clean
|
||||
strategy: thread
|
||||
variables:
|
||||
PROJECT_DIR: "."
|
||||
OUTPUT_DIR: ".pypack"
|
||||
LIB_DIR: "libs"
|
||||
DEPENDENCIES: []
|
||||
VERSION: "3.10"
|
||||
OUTPUT_FILE: "package.zip"
|
||||
SOURCE_DIR: "."
|
||||
cli:
|
||||
description: "PackTool - Python 打包工具"
|
||||
usage: "pf packtool <command> [options]"
|
||||
subcommands:
|
||||
src:
|
||||
help: "打包源码"
|
||||
options:
|
||||
- name: PROJECT_DIR
|
||||
flag: "--project-dir"
|
||||
type: path
|
||||
default: "."
|
||||
help: "项目目录 (默认: .)"
|
||||
- name: OUTPUT_DIR
|
||||
flag: "--output-dir"
|
||||
type: str
|
||||
default: ".pypack"
|
||||
help: "输出目录 (默认: .pypack)"
|
||||
deps:
|
||||
help: "打包依赖"
|
||||
positional:
|
||||
- name: DEPENDENCIES
|
||||
nargs: "*"
|
||||
type: str
|
||||
help: "依赖包列表"
|
||||
options:
|
||||
- name: LIB_DIR
|
||||
flag: "--lib-dir"
|
||||
type: path
|
||||
default: "libs"
|
||||
help: "依赖库目录 (默认: libs)"
|
||||
wheel:
|
||||
help: "构建 wheel"
|
||||
options:
|
||||
- name: PROJECT_DIR
|
||||
flag: "--project-dir"
|
||||
type: path
|
||||
default: "."
|
||||
help: "项目目录 (默认: .)"
|
||||
- name: OUTPUT_DIR
|
||||
flag: "--output-dir"
|
||||
type: path
|
||||
default: "dist"
|
||||
help: "输出目录 (默认: dist)"
|
||||
embed:
|
||||
help: "安装嵌入式 Python"
|
||||
options:
|
||||
- name: VERSION
|
||||
flag: "--version"
|
||||
type: str
|
||||
default: "3.10"
|
||||
help: "Python 版本 (默认: 3.10)"
|
||||
- name: OUTPUT_DIR
|
||||
flag: "--output-dir"
|
||||
type: path
|
||||
default: "python"
|
||||
help: "输出目录 (默认: python)"
|
||||
zip:
|
||||
help: "创建 zip 包"
|
||||
options:
|
||||
- name: SOURCE_DIR
|
||||
flag: "--source-dir"
|
||||
type: path
|
||||
default: "."
|
||||
help: "源目录 (默认: .)"
|
||||
- name: OUTPUT_FILE
|
||||
flag: "--output-file"
|
||||
type: path
|
||||
default: "package.zip"
|
||||
help: "输出文件 (默认: package.zip)"
|
||||
clean:
|
||||
help: "清理构建目录"
|
||||
jobs:
|
||||
src:
|
||||
fn: pack_source
|
||||
args: ["${PROJECT_DIR}", "${OUTPUT_DIR}"]
|
||||
deps:
|
||||
fn: pack_dependencies
|
||||
args: ["${LIB_DIR}", "${DEPENDENCIES}"]
|
||||
wheel:
|
||||
fn: pack_wheel
|
||||
args: ["${PROJECT_DIR}", "${OUTPUT_DIR}"]
|
||||
embed:
|
||||
fn: install_embed_python
|
||||
args: ["${VERSION}", "${OUTPUT_DIR}"]
|
||||
zip:
|
||||
fn: create_zip_package
|
||||
args: ["${SOURCE_DIR}", "${OUTPUT_FILE}"]
|
||||
clean:
|
||||
fn: clean_build_dir
|
||||
args: ["${OUTPUT_DIR}"]
|
||||
@@ -0,0 +1,303 @@
|
||||
# pdftool - PDF 文件工具集
|
||||
# 用法:
|
||||
# pf pdftool m a.pdf b.pdf --output merged.pdf
|
||||
# pf pdftool s input.pdf --output-dir split
|
||||
# pf pdftool c input.pdf --output compressed.pdf
|
||||
# pf pdftool e input.pdf --output encrypted.pdf --password 123456
|
||||
# pf pdftool d input.pdf --output decrypted.pdf --password 123456
|
||||
# pf pdftool xt input.pdf --output output.txt
|
||||
# pf pdftool xi input.pdf --output-dir images
|
||||
# pf pdftool w input.pdf --output watermarked.pdf --text CONFIDENTIAL
|
||||
# pf pdftool r input.pdf --output rotated.pdf --rotation 90
|
||||
# pf pdftool crop input.pdf --output cropped.pdf --left 10 --top 10 --right 10 --bottom 10
|
||||
# pf pdftool i input.pdf
|
||||
# pf pdftool ocr input.pdf --output ocr.pdf --lang chi_sim+eng
|
||||
# pf pdftool img input.pdf --output-dir images --dpi 300
|
||||
# pf pdftool repair input.pdf --output repaired.pdf
|
||||
strategy: thread
|
||||
variables:
|
||||
INPUT: input.pdf
|
||||
INPUTS: []
|
||||
OUTPUT: output.pdf
|
||||
OUTPUT_DIR: output
|
||||
PASSWORD: ""
|
||||
TEXT: CONFIDENTIAL
|
||||
ROTATION: 90
|
||||
MARGINS: [10, 10, 10, 10]
|
||||
DPI: 300
|
||||
LANG: chi_sim+eng
|
||||
ORDER: []
|
||||
LEFT: 10
|
||||
TOP: 10
|
||||
RIGHT: 10
|
||||
BOTTOM: 10
|
||||
cli:
|
||||
description: "PdfTool - PDF 文件工具集"
|
||||
usage: "pf pdftool <command> [options]"
|
||||
subcommands:
|
||||
m:
|
||||
help: "合并 PDF"
|
||||
positional:
|
||||
- name: INPUTS
|
||||
nargs: "+"
|
||||
type: path
|
||||
help: "输入 PDF 文件列表"
|
||||
options:
|
||||
- name: OUTPUT
|
||||
flag: "--output"
|
||||
type: path
|
||||
default: "merged.pdf"
|
||||
help: "输出文件 (默认: merged.pdf)"
|
||||
s:
|
||||
help: "拆分 PDF"
|
||||
positional:
|
||||
- name: INPUT
|
||||
type: path
|
||||
help: "输入 PDF 文件"
|
||||
options:
|
||||
- name: OUTPUT_DIR
|
||||
flag: "--output-dir"
|
||||
type: path
|
||||
default: "split"
|
||||
help: "输出目录 (默认: split)"
|
||||
c:
|
||||
help: "压缩 PDF"
|
||||
positional:
|
||||
- name: INPUT
|
||||
type: path
|
||||
help: "输入 PDF 文件"
|
||||
options:
|
||||
- name: OUTPUT
|
||||
flag: "--output"
|
||||
type: path
|
||||
default: "compressed.pdf"
|
||||
help: "输出文件 (默认: compressed.pdf)"
|
||||
e:
|
||||
help: "加密 PDF"
|
||||
positional:
|
||||
- name: INPUT
|
||||
type: path
|
||||
help: "输入 PDF 文件"
|
||||
options:
|
||||
- name: OUTPUT
|
||||
flag: "--output"
|
||||
type: path
|
||||
default: "encrypted.pdf"
|
||||
help: "输出文件 (默认: encrypted.pdf)"
|
||||
- name: PASSWORD
|
||||
flag: "--password"
|
||||
type: str
|
||||
required: true
|
||||
help: "密码 (必填)"
|
||||
d:
|
||||
help: "解密 PDF"
|
||||
positional:
|
||||
- name: INPUT
|
||||
type: path
|
||||
help: "输入 PDF 文件"
|
||||
options:
|
||||
- name: OUTPUT
|
||||
flag: "--output"
|
||||
type: path
|
||||
default: "decrypted.pdf"
|
||||
help: "输出文件 (默认: decrypted.pdf)"
|
||||
- name: PASSWORD
|
||||
flag: "--password"
|
||||
type: str
|
||||
required: true
|
||||
help: "密码 (必填)"
|
||||
xt:
|
||||
help: "提取文本"
|
||||
positional:
|
||||
- name: INPUT
|
||||
type: path
|
||||
help: "输入 PDF 文件"
|
||||
options:
|
||||
- name: OUTPUT
|
||||
flag: "--output"
|
||||
type: path
|
||||
default: "output.txt"
|
||||
help: "输出文件 (默认: output.txt)"
|
||||
xi:
|
||||
help: "提取图片"
|
||||
positional:
|
||||
- name: INPUT
|
||||
type: path
|
||||
help: "输入 PDF 文件"
|
||||
options:
|
||||
- name: OUTPUT_DIR
|
||||
flag: "--output-dir"
|
||||
type: path
|
||||
default: "images"
|
||||
help: "输出目录 (默认: images)"
|
||||
w:
|
||||
help: "添加水印"
|
||||
positional:
|
||||
- name: INPUT
|
||||
type: path
|
||||
help: "输入 PDF 文件"
|
||||
options:
|
||||
- name: OUTPUT
|
||||
flag: "--output"
|
||||
type: path
|
||||
default: "watermarked.pdf"
|
||||
help: "输出文件 (默认: watermarked.pdf)"
|
||||
- name: TEXT
|
||||
flag: "--text"
|
||||
type: str
|
||||
default: "CONFIDENTIAL"
|
||||
help: "水印文字 (默认: CONFIDENTIAL)"
|
||||
r:
|
||||
help: "旋转 PDF"
|
||||
positional:
|
||||
- name: INPUT
|
||||
type: path
|
||||
help: "输入 PDF 文件"
|
||||
options:
|
||||
- name: OUTPUT
|
||||
flag: "--output"
|
||||
type: path
|
||||
default: "rotated.pdf"
|
||||
help: "输出文件 (默认: rotated.pdf)"
|
||||
- name: ROTATION
|
||||
flag: "--rotation"
|
||||
type: int
|
||||
default: 90
|
||||
help: "旋转角度 (默认: 90)"
|
||||
crop:
|
||||
help: "裁剪 PDF"
|
||||
positional:
|
||||
- name: INPUT
|
||||
type: path
|
||||
help: "输入 PDF 文件"
|
||||
options:
|
||||
- name: OUTPUT
|
||||
flag: "--output"
|
||||
type: path
|
||||
default: "cropped.pdf"
|
||||
help: "输出文件 (默认: cropped.pdf)"
|
||||
- name: LEFT
|
||||
flag: "--left"
|
||||
type: int
|
||||
default: 10
|
||||
help: "左边距 (默认: 10)"
|
||||
- name: TOP
|
||||
flag: "--top"
|
||||
type: int
|
||||
default: 10
|
||||
help: "上边距 (默认: 10)"
|
||||
- name: RIGHT
|
||||
flag: "--right"
|
||||
type: int
|
||||
default: 10
|
||||
help: "右边距 (默认: 10)"
|
||||
- name: BOTTOM
|
||||
flag: "--bottom"
|
||||
type: int
|
||||
default: 10
|
||||
help: "下边距 (默认: 10)"
|
||||
i:
|
||||
help: "查看 PDF 信息"
|
||||
positional:
|
||||
- name: INPUT
|
||||
type: path
|
||||
help: "输入 PDF 文件"
|
||||
ocr:
|
||||
help: "PDF OCR 识别"
|
||||
positional:
|
||||
- name: INPUT
|
||||
type: path
|
||||
help: "输入 PDF 文件"
|
||||
options:
|
||||
- name: OUTPUT
|
||||
flag: "--output"
|
||||
type: path
|
||||
default: "ocr.pdf"
|
||||
help: "输出文件 (默认: ocr.pdf)"
|
||||
- name: LANG
|
||||
flag: "--lang"
|
||||
type: str
|
||||
default: "chi_sim+eng"
|
||||
help: "识别语言 (默认: chi_sim+eng)"
|
||||
img:
|
||||
help: "PDF 转图片"
|
||||
positional:
|
||||
- name: INPUT
|
||||
type: path
|
||||
help: "输入 PDF 文件"
|
||||
options:
|
||||
- name: OUTPUT_DIR
|
||||
flag: "--output-dir"
|
||||
type: path
|
||||
default: "images"
|
||||
help: "输出目录 (默认: images)"
|
||||
- name: DPI
|
||||
flag: "--dpi"
|
||||
type: int
|
||||
default: 300
|
||||
help: "DPI (默认: 300)"
|
||||
repair:
|
||||
help: "修复 PDF"
|
||||
positional:
|
||||
- name: INPUT
|
||||
type: path
|
||||
help: "输入 PDF 文件"
|
||||
options:
|
||||
- name: OUTPUT
|
||||
flag: "--output"
|
||||
type: path
|
||||
default: "repaired.pdf"
|
||||
help: "输出文件 (默认: repaired.pdf)"
|
||||
jobs:
|
||||
m:
|
||||
fn: pdf_merge
|
||||
args: ["${INPUTS}", "${OUTPUT}"]
|
||||
s:
|
||||
fn: pdf_split
|
||||
args: ["${INPUT}", "${OUTPUT_DIR}"]
|
||||
c:
|
||||
fn: pdf_compress
|
||||
args: ["${INPUT}", "${OUTPUT}"]
|
||||
e:
|
||||
fn: pdf_encrypt
|
||||
args: ["${INPUT}", "${OUTPUT}", "${PASSWORD}"]
|
||||
d:
|
||||
fn: pdf_decrypt
|
||||
args: ["${INPUT}", "${OUTPUT}", "${PASSWORD}"]
|
||||
xt:
|
||||
fn: pdf_extract_text
|
||||
args: ["${INPUT}", "${OUTPUT}"]
|
||||
xi:
|
||||
fn: pdf_extract_images
|
||||
args: ["${INPUT}", "${OUTPUT_DIR}"]
|
||||
w:
|
||||
fn: pdf_add_watermark
|
||||
args: ["${INPUT}", "${OUTPUT}"]
|
||||
kwargs:
|
||||
text: "${TEXT}"
|
||||
r:
|
||||
fn: pdf_rotate
|
||||
args: ["${INPUT}", "${OUTPUT}"]
|
||||
kwargs:
|
||||
rotation: ${ROTATION}
|
||||
crop:
|
||||
fn: pdf_crop
|
||||
args: ["${INPUT}", "${OUTPUT}"]
|
||||
kwargs:
|
||||
margins: "${MARGINS}"
|
||||
i:
|
||||
fn: pdf_info
|
||||
args: ["${INPUT}"]
|
||||
ocr:
|
||||
fn: pdf_ocr
|
||||
args: ["${INPUT}", "${OUTPUT}"]
|
||||
kwargs:
|
||||
lang: "${LANG}"
|
||||
img:
|
||||
fn: pdf_to_images
|
||||
args: ["${INPUT}", "${OUTPUT_DIR}"]
|
||||
kwargs:
|
||||
dpi: ${DPI}
|
||||
repair:
|
||||
fn: pdf_repair
|
||||
args: ["${INPUT}", "${OUTPUT}"]
|
||||
@@ -0,0 +1,78 @@
|
||||
# piptool - pip 包管理工具
|
||||
# 用法:
|
||||
# pf piptool i requests
|
||||
# pf piptool u requests
|
||||
# pf piptool r requests
|
||||
# pf piptool d requests
|
||||
# pf piptool up
|
||||
# pf piptool f
|
||||
strategy: thread
|
||||
variables:
|
||||
PACKAGES: []
|
||||
OFFLINE: false
|
||||
cli:
|
||||
description: "PipTool - pip 包管理工具"
|
||||
usage: "pf piptool <command> [packages...] [options]"
|
||||
subcommands:
|
||||
i:
|
||||
help: "安装包"
|
||||
positional:
|
||||
- name: PACKAGES
|
||||
nargs: "+"
|
||||
type: str
|
||||
help: "包名列表"
|
||||
u:
|
||||
help: "卸载包"
|
||||
positional:
|
||||
- name: PACKAGES
|
||||
nargs: "+"
|
||||
type: str
|
||||
help: "包名列表"
|
||||
r:
|
||||
help: "重装包"
|
||||
positional:
|
||||
- name: PACKAGES
|
||||
nargs: "+"
|
||||
type: str
|
||||
help: "包名列表"
|
||||
options:
|
||||
- name: OFFLINE
|
||||
flag: "--offline"
|
||||
action: "store_true"
|
||||
help: "离线模式"
|
||||
d:
|
||||
help: "下载包"
|
||||
positional:
|
||||
- name: PACKAGES
|
||||
nargs: "+"
|
||||
type: str
|
||||
help: "包名列表"
|
||||
options:
|
||||
- name: OFFLINE
|
||||
flag: "--offline"
|
||||
action: "store_true"
|
||||
help: "离线模式"
|
||||
up:
|
||||
help: "升级 pip"
|
||||
f:
|
||||
help: "导出依赖"
|
||||
jobs:
|
||||
i:
|
||||
cmd: ["pip", "install", "${PACKAGES}"]
|
||||
u:
|
||||
fn: pip_uninstall
|
||||
args: ["${PACKAGES}"]
|
||||
r:
|
||||
fn: pip_reinstall
|
||||
args: ["${PACKAGES}"]
|
||||
kwargs:
|
||||
offline: ${OFFLINE}
|
||||
d:
|
||||
fn: pip_download
|
||||
args: ["${PACKAGES}"]
|
||||
kwargs:
|
||||
offline: ${OFFLINE}
|
||||
up:
|
||||
cmd: ["python", "-m", "pip", "install", "--upgrade", "pip"]
|
||||
f:
|
||||
fn: pip_freeze
|
||||
@@ -0,0 +1,125 @@
|
||||
# pymake - 项目构建工具
|
||||
# 用法
|
||||
# pf pymake <command>
|
||||
# 命令
|
||||
# b: 构建 Python 主包 (uv build)
|
||||
# ba: 构建所有包 (Python + Rust)
|
||||
# bc: 构建 Rust 核心模块 (maturin build)
|
||||
# bump: 升级版本号 (清理 + 检查 + add + bumpversion)
|
||||
# bumpmi: 升级次版本号 (bumpversion minor)
|
||||
# c: 清理构建产物 (调用 gitt c)
|
||||
# cov: 测试并生成覆盖率
|
||||
# doc: 构建 Sphinx 文档
|
||||
# lint: 代码格式化与检查 (ruff)
|
||||
# p: 推送代码 (清理 + push + push tags)
|
||||
# pb: 发布到 PyPI (twine + hatch)
|
||||
# sync: 同步依赖 (uv sync)
|
||||
# t: 运行测试
|
||||
# tc: 类型检查 (pyrefly + ruff)
|
||||
# tf: 快速测试 (无 slow)
|
||||
# tox: 多版本测试 (tox)
|
||||
strategy: thread
|
||||
variables:
|
||||
CWD: "."
|
||||
cli:
|
||||
description: "PyMake - 项目构建工具"
|
||||
usage: "pf pymake <command>"
|
||||
options:
|
||||
- name: CWD
|
||||
flag: "--cwd"
|
||||
type: path
|
||||
required: false
|
||||
default: "."
|
||||
help: "工作目录 (默认: 当前目录)"
|
||||
subcommands:
|
||||
b: {help: "构建 Python 主包 (uv build)"}
|
||||
ba: {help: "构建所有包 (Python + Rust)"}
|
||||
bc: {help: "构建 Rust 核心模块 (maturin build)"}
|
||||
bump: {help: "升级版本号 (清理 + 检查 + add + bumpversion)"}
|
||||
bumpmi: {help: "升级次版本号 (bumpversion minor)"}
|
||||
c: {help: "清理构建产物 (调用 gitt c)"}
|
||||
cov: {help: "测试并生成覆盖率"}
|
||||
doc: {help: "构建 Sphinx 文档"}
|
||||
lint: {help: "代码格式化与检查 (ruff)"}
|
||||
p: {help: "推送代码 (清理 + push + push tags)"}
|
||||
pb: {help: "发布到 PyPI (twine + hatch)"}
|
||||
sync: {help: "同步依赖 (uv sync)"}
|
||||
t: {help: "运行测试"}
|
||||
tc: {help: "类型检查 (pyrefly + ruff)"}
|
||||
tf: {help: "快速测试 (无 slow)"}
|
||||
tox: {help: "多版本测试 (tox)"}
|
||||
jobs:
|
||||
# 单任务别名
|
||||
b:
|
||||
cmd: ["uv", "build"]
|
||||
cwd: ${CWD}
|
||||
bc:
|
||||
cmd: ["maturin", "build", "-r"]
|
||||
cwd: ${CWD}
|
||||
sync:
|
||||
cmd: ["uv", "sync"]
|
||||
cwd: ${CWD}
|
||||
c:
|
||||
cmd: ["pf", "gitt", "c"]
|
||||
cwd: ${CWD}
|
||||
t:
|
||||
cmd: ["pytest", "-m", "not slow", "-n", "8", "--dist", "loadfile", "--color=yes", "--durations=10"]
|
||||
cwd: ${CWD}
|
||||
tf:
|
||||
cmd: ["pytest", "-m", "not slow", "--dist", "loadfile", "--color=yes", "--durations=10"]
|
||||
cwd: ${CWD}
|
||||
bumpversion:
|
||||
cmd: ["pf", "bumpversion", "patch"]
|
||||
needs: [git_add_all]
|
||||
cwd: ${CWD}
|
||||
bumpmi:
|
||||
cmd: ["pf", "bumpversion", "minor"]
|
||||
cwd: ${CWD}
|
||||
doc:
|
||||
cmd: ["sphinx-build", "-b", "html", "docs", "docs/_build"]
|
||||
cwd: ${CWD}
|
||||
lint:
|
||||
cmd: ["ruff", "check", "--fix", "--unsafe-fixes"]
|
||||
cwd: ${CWD}
|
||||
tox:
|
||||
cmd: ["tox", "-p", "auto"]
|
||||
cwd: ${CWD}
|
||||
|
||||
# 内部 job (不暴露为 subcommand)
|
||||
test_coverage:
|
||||
cmd: ["pytest", "--cov", "-n", "8", "--dist", "loadfile", "--tb=short", "-v", "--color=yes", "--durations=10"]
|
||||
needs: [c]
|
||||
cwd: ${CWD}
|
||||
pyrefly_check:
|
||||
cmd: ["pyrefly", "check", "."]
|
||||
cwd: ${CWD}
|
||||
git_add_all:
|
||||
cmd: ["git", "add", "-A"]
|
||||
needs: [tc]
|
||||
cwd: ${CWD}
|
||||
git_push:
|
||||
cmd: ["git", "push"]
|
||||
cwd: ${CWD}
|
||||
git_push_tags:
|
||||
cmd: ["git", "push", "--tags"]
|
||||
cwd: ${CWD}
|
||||
twine_publish:
|
||||
cmd: ["twine", "upload", "--disable-progress-bar"]
|
||||
cwd: ${CWD}
|
||||
publish_python:
|
||||
cmd: ["hatch", "publish"]
|
||||
cwd: ${CWD}
|
||||
|
||||
# 聚合 job (方向 B: 有 needs 无 cmd/fn)
|
||||
ba:
|
||||
needs: [b, bc]
|
||||
bump:
|
||||
needs: [bumpversion]
|
||||
cov:
|
||||
needs: [test_coverage]
|
||||
tc:
|
||||
needs: [c, pyrefly_check, lint]
|
||||
p:
|
||||
needs: [c, git_push, git_push_tags]
|
||||
pb:
|
||||
needs: [twine_publish, publish_python]
|
||||
@@ -0,0 +1,13 @@
|
||||
# reseticoncache - 重置 Windows 图标缓存
|
||||
# 用法
|
||||
# pf reseticon
|
||||
# 说明
|
||||
# 杀掉 explorer → 删除 IconCache.db → 删除 iconcache* → 重启 explorer
|
||||
# 仅在 Windows 上有效, 非 Windows 平台打印提示并跳过
|
||||
strategy: sequential
|
||||
cli:
|
||||
description: "重置 Windows 图标缓存"
|
||||
usage: "pf reseticon"
|
||||
jobs:
|
||||
reset:
|
||||
fn: reset_icon_cache_run
|
||||
@@ -0,0 +1,34 @@
|
||||
# screenshot - 截图工具
|
||||
# 用法:
|
||||
# pf screenshot full
|
||||
# pf screenshot area --filename custom.png
|
||||
strategy: thread
|
||||
variables:
|
||||
FILENAME: null
|
||||
cli:
|
||||
description: "Screenshot - 截图工具"
|
||||
usage: "pf screenshot <command> [options]"
|
||||
subcommands:
|
||||
full:
|
||||
help: "全屏截图"
|
||||
options:
|
||||
- name: FILENAME
|
||||
flag: "--filename"
|
||||
type: str
|
||||
help: "文件名"
|
||||
area:
|
||||
help: "区域截图"
|
||||
options:
|
||||
- name: FILENAME
|
||||
flag: "--filename"
|
||||
type: str
|
||||
help: "文件名"
|
||||
jobs:
|
||||
full:
|
||||
fn: take_screenshot_full
|
||||
kwargs:
|
||||
filename: "${FILENAME}"
|
||||
area:
|
||||
fn: take_screenshot_area
|
||||
kwargs:
|
||||
filename: "${FILENAME}"
|
||||
@@ -0,0 +1,60 @@
|
||||
# sglang - SGLang 本地模型服务
|
||||
# 用法:
|
||||
# pf sglang
|
||||
# pf sglang --model ~/.models/Qwen2.5-Coder-32B-Instruct-AWQ
|
||||
# pf sglang --port 9000 --mem 0.8
|
||||
strategy: sequential
|
||||
variables:
|
||||
MODEL: "~/.models/Qwen2.5-Coder-32B-Instruct-AWQ"
|
||||
PORT: 8000
|
||||
CTX_LEN: 32768
|
||||
MEM: 0.75
|
||||
HOST: "0.0.0.0"
|
||||
LOG_LEVEL: "info"
|
||||
cli:
|
||||
description: "SGLang - 本地模型服务启动工具"
|
||||
usage: "pf sglang [options]"
|
||||
options:
|
||||
- name: MODEL
|
||||
flag: "--model"
|
||||
type: str
|
||||
default: "~/.models/Qwen2.5-Coder-32B-Instruct-AWQ"
|
||||
help: "模型路径"
|
||||
- name: PORT
|
||||
flag: "--port"
|
||||
type: int
|
||||
default: 8000
|
||||
help: "服务端口 (默认: 8000)"
|
||||
- name: CTX_LEN
|
||||
flag: "--ctx-len"
|
||||
type: int
|
||||
default: 32768
|
||||
help: "最大上下文长度 (默认: 32768)"
|
||||
- name: MEM
|
||||
flag: "--mem"
|
||||
type: float
|
||||
default: 0.75
|
||||
help: "显存占比 0-1 (默认: 0.75)"
|
||||
- name: HOST
|
||||
flag: "--host"
|
||||
type: str
|
||||
default: "0.0.0.0"
|
||||
help: "主机地址 (默认: 0.0.0.0)"
|
||||
- name: LOG_LEVEL
|
||||
flag: "--log-level"
|
||||
type: str
|
||||
default: "info"
|
||||
help: "日志级别 (默认: info)"
|
||||
jobs:
|
||||
install:
|
||||
fn: install_sglang
|
||||
run:
|
||||
fn: run_sglang
|
||||
needs: [install]
|
||||
kwargs:
|
||||
model: ${MODEL}
|
||||
port: ${PORT}
|
||||
ctx_len: ${CTX_LEN}
|
||||
mem_fraction: ${MEM}
|
||||
host: ${HOST}
|
||||
log_level: ${LOG_LEVEL}
|
||||
@@ -0,0 +1,49 @@
|
||||
# sshcopyid - SSH 密钥部署工具
|
||||
# 用法:
|
||||
# pf sshcopyid hostname username password
|
||||
# pf sshcopyid server user pass --port 2222
|
||||
strategy: thread
|
||||
variables:
|
||||
HOSTNAME: ""
|
||||
USERNAME: ""
|
||||
PASSWORD: ""
|
||||
PORT: 22
|
||||
KEYPATH: "~/.ssh/id_rsa.pub"
|
||||
TIMEOUT: 30
|
||||
cli:
|
||||
description: "SSHCopyID - SSH 密钥部署工具"
|
||||
usage: "pf sshcopyid <hostname> <username> <password> [options]"
|
||||
positional:
|
||||
- name: HOSTNAME
|
||||
type: str
|
||||
help: "远程服务器主机名或 IP 地址"
|
||||
- name: USERNAME
|
||||
type: str
|
||||
help: "远程服务器用户名"
|
||||
- name: PASSWORD
|
||||
type: str
|
||||
help: "远程服务器密码"
|
||||
options:
|
||||
- name: PORT
|
||||
flag: "--port"
|
||||
type: int
|
||||
default: 22
|
||||
help: "SSH 端口 (默认: 22)"
|
||||
- name: KEYPATH
|
||||
flag: "--keypath"
|
||||
type: str
|
||||
default: "~/.ssh/id_rsa.pub"
|
||||
help: "公钥文件路径"
|
||||
- name: TIMEOUT
|
||||
flag: "--timeout"
|
||||
type: int
|
||||
default: 30
|
||||
help: "SSH 操作超时秒数 (默认: 30)"
|
||||
jobs:
|
||||
deploy:
|
||||
fn: ssh_copy_id
|
||||
args: ["${HOSTNAME}", "${USERNAME}", "${PASSWORD}"]
|
||||
kwargs:
|
||||
port: ${PORT}
|
||||
keypath: "${KEYPATH}"
|
||||
timeout: ${TIMEOUT}
|
||||
@@ -0,0 +1,18 @@
|
||||
# taskkill - 进程终止工具
|
||||
# 用法:
|
||||
# pf taskkill chrome.exe python node
|
||||
strategy: thread
|
||||
variables:
|
||||
PROCESS_NAMES: []
|
||||
cli:
|
||||
description: "TaskKill - 进程终止工具 (跨平台)"
|
||||
usage: "pf taskkill <process_name> [process_name ...]"
|
||||
positional:
|
||||
- name: PROCESS_NAMES
|
||||
nargs: "+"
|
||||
type: str
|
||||
help: "进程名称 (如: chrome.exe python node)"
|
||||
jobs:
|
||||
kill:
|
||||
fn: taskkill_run
|
||||
args: ["${PROCESS_NAMES}"]
|
||||
@@ -0,0 +1,18 @@
|
||||
# which - 命令查找工具
|
||||
# 用法:
|
||||
# pf which python ls ps gcc
|
||||
strategy: thread
|
||||
variables:
|
||||
COMMANDS: []
|
||||
cli:
|
||||
description: "Which - 命令查找工具 (跨平台)"
|
||||
usage: "pf which <command> [command ...]"
|
||||
positional:
|
||||
- name: COMMANDS
|
||||
nargs: "+"
|
||||
type: str
|
||||
help: "要查找的命令名称, 如: python ls ps gcc"
|
||||
jobs:
|
||||
find:
|
||||
fn: which_run
|
||||
args: ["${COMMANDS}"]
|
||||
+49
-74
@@ -1,106 +1,92 @@
|
||||
"""上下文注入:把上游结果转换为函数参数。
|
||||
|
||||
本机制让用户可以编写普通函数,其参数名*就是*依赖声明,从而消除其他
|
||||
DAG 库中泛滥的样板包装器(如 ``def wrapper(): return fn(workflow.get_task_result('x'))``)。
|
||||
DAG 库中泛滥的样板包装器。
|
||||
|
||||
注入规则(按顺序求值)
|
||||
----------------------
|
||||
1. **标注为** :class:`Context` 的参数接收完整结果映射。适用于需要遍历
|
||||
所有输入的任务。
|
||||
2. **名称匹配某个依赖**的参数接收该依赖的结果。
|
||||
1. **标注为** :class:`Context` 的参数接收完整结果映射(含硬依赖与软依赖)。
|
||||
2. **名称匹配某个依赖**(硬或软)的参数接收该依赖的结果。
|
||||
3. ``**kwargs`` 参数以 dict 形式接收*所有*依赖结果。
|
||||
4. ``TaskSpec.args`` / ``TaskSpec.kwargs`` 为*非依赖*参数提供静态值。
|
||||
|
||||
若某参数无法解析且无默认值,则抛出 :class:`~pyflowx.errors.InjectionError`,
|
||||
并附带精确错误信息。
|
||||
若某参数无法解析且无默认值,则抛出 :class:`~pyflowx.errors.InjectionError`。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import inspect
|
||||
from typing import Any, Dict, List, Mapping, Set, Tuple
|
||||
from functools import lru_cache
|
||||
from typing import Any, Mapping
|
||||
|
||||
from .errors import InjectionError
|
||||
from .task import Context, TaskSpec
|
||||
|
||||
__all__ = ["Context", "build_call_args", "describe_injection", "_is_context_annotation"]
|
||||
__all__ = ["Context", "_is_context_annotation", "build_call_args", "describe_injection"]
|
||||
|
||||
|
||||
@lru_cache(maxsize=1024)
|
||||
def _cached_signature(fn: Any) -> inspect.Signature:
|
||||
"""缓存 ``inspect.signature`` 结果(按 fn 对象键控)。
|
||||
|
||||
``fn`` 对象在 :meth:`TaskSpec.effective_fn` 缓存后稳定,签名重复内省
|
||||
属纯开销。对不可哈希的可调用对象,调用方回退到直接内省。
|
||||
"""
|
||||
return inspect.signature(fn)
|
||||
|
||||
|
||||
def _signature(fn: Any) -> inspect.Signature:
|
||||
"""获取签名,优先走缓存;``fn`` 不可哈希时回退到直接内省。"""
|
||||
try:
|
||||
return _cached_signature(fn)
|
||||
except TypeError:
|
||||
return inspect.signature(fn)
|
||||
|
||||
|
||||
def _is_context_annotation(annotation: Any) -> bool:
|
||||
"""判断参数标注是否为(或指向)``Context``。
|
||||
|
||||
处理三种形式:
|
||||
* ``Context`` 别名对象本身;
|
||||
* ``__name__``/``_name`` 为 ``Context`` 或 ``Mapping`` 的 typing 别名;
|
||||
* *字符串*标注(``from __future__ import annotations`` 会在运行时
|
||||
把所有标注变为字符串),如 ``"Context"`` 或 ``"px.Context"``。
|
||||
"""
|
||||
"""判断参数标注是否为(或指向)``Context``。"""
|
||||
if annotation is Context:
|
||||
return True
|
||||
# `from __future__ import annotations` 产生的字符串标注。
|
||||
if isinstance(annotation, str):
|
||||
# 匹配 "Context"、"px.Context"、"pyflowx.Context" 等。
|
||||
return annotation == "Context" or annotation.endswith(".Context")
|
||||
# 按限定名匹配,支持 ``from pyflowx import Context`` 再导出。
|
||||
name = getattr(annotation, "__name__", None) or getattr(annotation, "_name", None)
|
||||
if name in ("Context", "Mapping"):
|
||||
return True
|
||||
return False
|
||||
return name in ("Context", "Mapping")
|
||||
|
||||
|
||||
def build_call_args(
|
||||
spec: TaskSpec[object],
|
||||
spec: TaskSpec[Any],
|
||||
context: Mapping[str, Any],
|
||||
) -> Tuple[Tuple[Any, ...], Dict[str, Any]]:
|
||||
) -> tuple[tuple[Any, ...], dict[str, Any]]:
|
||||
"""解析用于调用 ``spec.fn`` 的 ``(args, kwargs)``。
|
||||
|
||||
参数
|
||||
----
|
||||
spec:
|
||||
任务 spec,提供 ``fn``、``depends_on``、``args``、``kwargs``。
|
||||
context:
|
||||
依赖名 -> 结果值的映射。仅保证本任务自身的 ``depends_on`` 条目
|
||||
存在;其他任务的结果被排除,以保持注入的确定性。
|
||||
|
||||
返回
|
||||
----
|
||||
(args, kwargs)
|
||||
可直接展开为 ``spec.fn(*args, **kwargs)``。
|
||||
|
||||
抛出
|
||||
----
|
||||
InjectionError
|
||||
若必需参数无法满足,或静态 ``kwargs`` 与注入依赖名冲突。
|
||||
``context`` 必须已包含所有硬依赖与软依赖的结果(软依赖被跳过时由
|
||||
执行器填入 :attr:`TaskSpec.defaults` 中的默认值)。
|
||||
"""
|
||||
sig = inspect.signature(spec.fn)
|
||||
fn = spec.effective_fn
|
||||
sig = _signature(fn)
|
||||
params = sig.parameters
|
||||
|
||||
# 检测特殊参数类型。
|
||||
var_keyword = next(
|
||||
(p for p in params.values() if p.kind == inspect.Parameter.VAR_KEYWORD),
|
||||
None,
|
||||
)
|
||||
|
||||
# 与本任务相关的上下文子集。
|
||||
dep_context: Dict[str, Any] = {
|
||||
name: context[name] for name in spec.depends_on if name in context
|
||||
}
|
||||
# 本任务相关的上下文子集:硬依赖 + 软依赖。
|
||||
all_deps = set(spec.depends_on) | set(spec.soft_depends_on)
|
||||
dep_context: dict[str, Any] = {name: context[name] for name in all_deps if name in context}
|
||||
|
||||
# 检测静态 kwargs 与依赖名的冲突。
|
||||
collisions = set(spec.kwargs) & set(dep_context)
|
||||
if collisions:
|
||||
raise InjectionError(
|
||||
spec.name,
|
||||
f"static kwargs {sorted(collisions)} collide with dependency names; "
|
||||
"rename the static kwarg or the dependency.",
|
||||
+ "rename the static kwarg or the dependency.",
|
||||
)
|
||||
|
||||
injected_kwargs: Dict[str, Any] = {}
|
||||
leftover_dep_results: Dict[str, Any] = dict(dep_context)
|
||||
injected_kwargs: dict[str, Any] = {}
|
||||
leftover_dep_results: dict[str, Any] = dict(dep_context)
|
||||
|
||||
# 被 spec.args 消费的位置参数。记录哪些参数名已被位置填充,
|
||||
# 以便在基于名称的注入(依赖 / Context / 静态 kwargs)时跳过。
|
||||
positional_params: List[str] = []
|
||||
positional_params: list[str] = []
|
||||
positional_kinds = (
|
||||
inspect.Parameter.POSITIONAL_ONLY,
|
||||
inspect.Parameter.POSITIONAL_OR_KEYWORD,
|
||||
@@ -108,33 +94,25 @@ def build_call_args(
|
||||
for pname, param in params.items():
|
||||
if param.kind in positional_kinds:
|
||||
positional_params.append(pname)
|
||||
# 前 len(spec.args) 个位置参数由 spec.args 填充。
|
||||
args_filled: Set[str] = set(positional_params[: len(spec.args)])
|
||||
args_filled: set[str] = set(positional_params[: len(spec.args)])
|
||||
|
||||
for pname, param in params.items():
|
||||
# 跳过已被位置 spec.args 填充的参数。
|
||||
if pname in args_filled:
|
||||
continue
|
||||
|
||||
# 规则 1:标注为 Context -> 完整映射。
|
||||
if _is_context_annotation(param.annotation):
|
||||
injected_kwargs[pname] = dep_context
|
||||
continue
|
||||
|
||||
# 规则 2:名称匹配某个依赖。
|
||||
if pname in dep_context:
|
||||
injected_kwargs[pname] = dep_context[pname]
|
||||
leftover_dep_results.pop(pname, None)
|
||||
continue
|
||||
|
||||
# 规则 3:在循环后通过 **kwargs 处理。
|
||||
|
||||
# 规则 4:静态 kwargs 填充其余参数。
|
||||
if pname in spec.kwargs:
|
||||
injected_kwargs[pname] = spec.kwargs[pname]
|
||||
continue
|
||||
|
||||
# 该参数无来源:必须有默认值,否则报错。
|
||||
if param.default is inspect.Parameter.empty and param.kind not in (
|
||||
inspect.Parameter.VAR_POSITIONAL,
|
||||
inspect.Parameter.VAR_KEYWORD,
|
||||
@@ -144,9 +122,7 @@ def build_call_args(
|
||||
f"parameter {pname!r} has no dependency, static value, or default.",
|
||||
)
|
||||
|
||||
# 规则 3:**kwargs 吞掉剩余依赖结果。
|
||||
if var_keyword is not None and leftover_dep_results:
|
||||
# 先合并静态 kwargs,再合并依赖结果(冲突已在上方拒绝)。
|
||||
merged = dict(spec.kwargs)
|
||||
merged.update(injected_kwargs)
|
||||
merged.update(leftover_dep_results)
|
||||
@@ -155,13 +131,10 @@ def build_call_args(
|
||||
return tuple(spec.args), injected_kwargs
|
||||
|
||||
|
||||
def describe_injection(spec: TaskSpec[object]) -> str:
|
||||
"""生成任务参数注入方式的人类可读描述。
|
||||
|
||||
供 ``dry_run`` 使用,在不执行的情况下展示执行计划。
|
||||
"""
|
||||
sig = inspect.signature(spec.fn)
|
||||
# 确定哪些位置参数由 spec.args 填充。
|
||||
def describe_injection(spec: TaskSpec[Any]) -> str:
|
||||
"""生成任务参数注入方式的人类可读描述。供 ``dry_run`` 使用。"""
|
||||
fn = spec.effective_fn
|
||||
sig = _signature(fn)
|
||||
positional_params = [
|
||||
p
|
||||
for p, param in sig.parameters.items()
|
||||
@@ -172,6 +145,7 @@ def describe_injection(spec: TaskSpec[object]) -> str:
|
||||
)
|
||||
]
|
||||
args_filled = set(positional_params[: len(spec.args)])
|
||||
all_deps = set(spec.depends_on) | set(spec.soft_depends_on)
|
||||
parts = []
|
||||
for pname, param in sig.parameters.items():
|
||||
if pname in args_filled:
|
||||
@@ -179,8 +153,9 @@ def describe_injection(spec: TaskSpec[object]) -> str:
|
||||
parts.append(f"{pname}={spec.args[idx]!r}")
|
||||
elif _is_context_annotation(param.annotation):
|
||||
parts.append(f"{pname}=<Context>")
|
||||
elif pname in spec.depends_on:
|
||||
parts.append(f"{pname}=<result:{pname}>")
|
||||
elif pname in all_deps:
|
||||
tag = "soft" if pname in spec.soft_depends_on else "dep"
|
||||
parts.append(f"{pname}=<{tag}:{pname}>")
|
||||
elif pname in spec.kwargs:
|
||||
parts.append(f"{pname}={spec.kwargs[pname]!r}")
|
||||
elif param.default is not inspect.Parameter.empty:
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Iterable, Optional
|
||||
from typing import Any, Iterable
|
||||
|
||||
|
||||
class PyFlowXError(Exception):
|
||||
@@ -27,7 +27,7 @@ class MissingDependencyError(PyFlowXError):
|
||||
def __init__(self, task: str, dependency: str) -> None:
|
||||
super().__init__(
|
||||
f"Task '{task}' depends on unknown task '{dependency}'. "
|
||||
"Add the dependency before (or together with) this task."
|
||||
+ "Add the dependency before (or together with) this task."
|
||||
)
|
||||
self.task = task
|
||||
self.dependency = dependency
|
||||
@@ -55,12 +55,10 @@ class TaskFailedError(PyFlowXError):
|
||||
task: str,
|
||||
cause: BaseException,
|
||||
attempts: int,
|
||||
layer: Optional[int] = None,
|
||||
layer: int | None = None,
|
||||
) -> None:
|
||||
location = f" (layer {layer})" if layer is not None else ""
|
||||
super().__init__(
|
||||
f"Task '{task}' failed after {attempts} attempt(s){location}: {cause}"
|
||||
)
|
||||
super().__init__(f"Task '{task}' failed after {attempts} attempt(s){location}: {cause}")
|
||||
self.task = task
|
||||
self.cause = cause
|
||||
self.attempts = attempts
|
||||
@@ -87,6 +85,6 @@ class InjectionError(PyFlowXError):
|
||||
class StorageError(PyFlowXError):
|
||||
"""状态后端在持久化失败时抛出。"""
|
||||
|
||||
def __init__(self, detail: str, cause: Optional[BaseException] = None) -> None:
|
||||
def __init__(self, detail: str, cause: BaseException | None = None) -> None:
|
||||
super().__init__(f"State storage error: {detail}")
|
||||
self.cause: Any = cause
|
||||
|
||||
@@ -1,58 +0,0 @@
|
||||
"""Example 3: async aggregation with static args and Context injection.
|
||||
|
||||
Shows:
|
||||
* async task functions executed with strategy="async".
|
||||
* static positional args (TaskSpec.args) for parameterised tasks.
|
||||
* Context annotation to receive the full upstream result mapping.
|
||||
* on_event callback for real-time progress.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from typing import Any, Dict, List
|
||||
|
||||
import pyflowx as px
|
||||
|
||||
|
||||
async def fetch_user(uid: int) -> dict:
|
||||
await asyncio.sleep(0.2)
|
||||
return {"id": uid, "name": f"User{uid}"}
|
||||
|
||||
|
||||
async def fetch_posts(uid: int) -> List[int]:
|
||||
await asyncio.sleep(0.2)
|
||||
return [uid, uid + 1]
|
||||
|
||||
|
||||
# Context annotation → receives the full mapping of upstream results.
|
||||
def aggregate(ctx: px.Context) -> Dict[str, Any]:
|
||||
return dict(ctx)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
graph = px.Graph.from_specs(
|
||||
[
|
||||
# Static positional args parameterise the same function twice.
|
||||
px.TaskSpec("fetch_user", fetch_user, args=(1,)),
|
||||
px.TaskSpec("fetch_posts", fetch_posts, args=(1,)),
|
||||
px.TaskSpec("aggregate", aggregate, ("fetch_user", "fetch_posts")),
|
||||
]
|
||||
)
|
||||
|
||||
print("=== Dry run ===")
|
||||
px.run(graph, strategy="async", dry_run=True)
|
||||
|
||||
events: List[px.TaskEvent] = []
|
||||
print("\n=== Async execution ===")
|
||||
report = px.run(graph, strategy="async", on_event=events.append)
|
||||
|
||||
for ev in events:
|
||||
print(f" event: {ev.task} -> {ev.status.value}")
|
||||
|
||||
print(f"\naggregate = {report['aggregate']}")
|
||||
print(report.describe())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,81 +0,0 @@
|
||||
"""Example 1: ETL pipeline (sequential strategy).
|
||||
|
||||
Demonstrates the core PyFlowX workflow:
|
||||
* Define tasks as plain functions.
|
||||
* Declare the DAG with a list of TaskSpec.
|
||||
* Parameter names == dependency names → automatic context injection,
|
||||
no wrappers needed (contrast with flowweaver's get_task_result boilerplate).
|
||||
* dry_run to preview, then execute and read typed results from RunReport.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import List
|
||||
|
||||
import pyflowx as px
|
||||
|
||||
# --- task functions: pure, testable, no framework coupling ------------- #
|
||||
|
||||
|
||||
def extract_customers() -> List[dict]:
|
||||
return [
|
||||
{"id": "C001", "name": "Alice"},
|
||||
{"id": "C002", "name": "Bob"},
|
||||
]
|
||||
|
||||
|
||||
def extract_orders() -> List[dict]:
|
||||
return [
|
||||
{"id": "O001", "customer_id": "C001", "amount": 150.0},
|
||||
{"id": "O002", "customer_id": "C002", "amount": 200.5},
|
||||
]
|
||||
|
||||
|
||||
# Parameter names match dependency names → automatic injection.
|
||||
def transform(
|
||||
extract_customers: List[dict],
|
||||
extract_orders: List[dict],
|
||||
) -> List[dict]:
|
||||
cmap = {c["id"]: c for c in extract_customers}
|
||||
return [
|
||||
{**o, "customer_name": cmap[o["customer_id"]]["name"]}
|
||||
for o in extract_orders
|
||||
if o["customer_id"] in cmap
|
||||
]
|
||||
|
||||
|
||||
def load(transform: List[dict]) -> int:
|
||||
print(f" loaded {len(transform)} records")
|
||||
return len(transform)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
graph = px.Graph.from_specs(
|
||||
[
|
||||
px.TaskSpec("extract_customers", extract_customers, tags=("extract",)),
|
||||
px.TaskSpec("extract_orders", extract_orders, tags=("extract",)),
|
||||
px.TaskSpec(
|
||||
"transform",
|
||||
transform,
|
||||
("extract_customers", "extract_orders"),
|
||||
tags=("transform",),
|
||||
),
|
||||
px.TaskSpec("load", load, ("transform",), retries=1, tags=("load",)),
|
||||
]
|
||||
)
|
||||
|
||||
print("=== Execution plan ===")
|
||||
print(graph.describe())
|
||||
|
||||
print("\n=== Dry run (no execution) ===")
|
||||
px.run(graph, strategy="sequential", dry_run=True)
|
||||
|
||||
print("\n=== Sequential execution ===")
|
||||
report = px.run(graph, strategy="sequential")
|
||||
print(report.describe())
|
||||
print(f"\nload result = {report['load']}")
|
||||
print(f"summary = {report.summary()}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,59 +0,0 @@
|
||||
"""Example 2: parallel execution (thread strategy).
|
||||
|
||||
Same DAG run with sequential vs. thread strategy to show layer-internal
|
||||
parallelism. Tasks within a layer run concurrently; layers are barriers.
|
||||
|
||||
Layer 1: [fetch_a, fetch_b] (parallel)
|
||||
Layer 2: [merge] (waits for both)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import time
|
||||
|
||||
import pyflowx as px
|
||||
|
||||
|
||||
def fetch_a() -> str:
|
||||
time.sleep(0.5)
|
||||
return "a"
|
||||
|
||||
|
||||
def fetch_b() -> str:
|
||||
time.sleep(0.5)
|
||||
return "b"
|
||||
|
||||
|
||||
def merge(fetch_a: str, fetch_b: str) -> str:
|
||||
return fetch_a + fetch_b
|
||||
|
||||
|
||||
def main() -> None:
|
||||
graph = px.Graph.from_specs(
|
||||
[
|
||||
px.TaskSpec("fetch_a", fetch_a),
|
||||
px.TaskSpec("fetch_b", fetch_b),
|
||||
px.TaskSpec("merge", merge, ("fetch_a", "fetch_b")),
|
||||
]
|
||||
)
|
||||
|
||||
print("=== Mermaid diagram ===")
|
||||
print(graph.to_mermaid("LR"))
|
||||
|
||||
print("\n=== Sequential (expect ~1.0s) ===")
|
||||
start = time.time()
|
||||
report_seq = px.run(graph, strategy="sequential")
|
||||
t_seq = time.time() - start
|
||||
print(f" result={report_seq['merge']} time={t_seq:.2f}s")
|
||||
|
||||
print("\n=== Threaded (expect ~0.5s) ===")
|
||||
start = time.time()
|
||||
report_thr = px.run(graph, strategy="thread", max_workers=2)
|
||||
t_thr = time.time() - start
|
||||
print(f" result={report_thr['merge']} time={t_thr:.2f}s")
|
||||
|
||||
print(f"\nspeedup = {t_seq / t_thr:.2f}x")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
+735
-271
File diff suppressed because it is too large
Load Diff
+422
-126
@@ -1,31 +1,141 @@
|
||||
"""DAG 构建、校验、分层与可视化。
|
||||
|
||||
使用标准库的 :mod:`graphlib`(3.9+)或 :mod:`graphlib_backport`(3.8)
|
||||
进行拓扑排序。图以增量方式构建并即时校验,使配置错误在构建时(而非
|
||||
执行时)快速失败。
|
||||
进行拓扑排序。图以增量方式构建并即时校验,使配置错误在构建时(而非执行时)快速失败。
|
||||
|
||||
支持:
|
||||
* 图级默认值 :class:`GraphDefaults`,TaskSpec 字段为 ``None`` 时回退。
|
||||
* :meth:`Graph.map` 工厂批量生成 fan-out 任务。
|
||||
* 字符串引用与 :func:`compose` 编程式组合多个图。
|
||||
* 软依赖:仅用于上下文注入,不参与拓扑分层。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
__all__ = [
|
||||
"Graph",
|
||||
"GraphDefaults",
|
||||
]
|
||||
|
||||
import inspect
|
||||
import sys
|
||||
from typing import Dict, Iterable, List, Mapping, Sequence, Set, Tuple
|
||||
from dataclasses import dataclass, field, replace
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable, Iterable, Mapping, Sequence
|
||||
|
||||
from .errors import CycleError, DuplicateTaskError, MissingDependencyError
|
||||
from .task import TaskSpec
|
||||
from .task import Context, RetryPolicy, TaskSpec
|
||||
|
||||
# graphlib 自 3.9 起进入标准库;3.8 回退到 backport。
|
||||
if sys.version_info >= (3, 9): # pragma: no cover
|
||||
import graphlib
|
||||
import graphlib # pyright: ignore[reportUnreachable]
|
||||
|
||||
_TopologicalSorter = graphlib.TopologicalSorter
|
||||
else: # pragma: no cover
|
||||
import graphlib # type: ignore[import-untyped] # pragma: no cover
|
||||
import graphlib # type: ignore[import-untyped]
|
||||
|
||||
_TopologicalSorter = graphlib.TopologicalSorter # pragma: no cover
|
||||
|
||||
|
||||
@dataclass
|
||||
class GraphDefaults:
|
||||
"""图级默认值。TaskSpec 对应字段为 ``None`` 时回退到此处。
|
||||
|
||||
仅对可空字段生效(retry/timeout/strategy/env/cwd/tags/priority/
|
||||
continue_on_error/concurrency_key)。非空字段(name/fn/cmd)不回退。
|
||||
"""
|
||||
|
||||
retry: RetryPolicy | None = None
|
||||
timeout: float | None = None
|
||||
strategy: str | None = None
|
||||
tags: tuple[str, ...] = ()
|
||||
env: Mapping[str, str] | None = None
|
||||
cwd: Any = None # Path | None
|
||||
priority: int = 0
|
||||
continue_on_error: bool = False
|
||||
concurrency_key: str | None = None
|
||||
verbose: bool = False
|
||||
|
||||
|
||||
def _prune_deps(spec: TaskSpec[Any], keep: Callable[[str], bool]) -> TaskSpec[Any]:
|
||||
"""返回新 spec,其 ``depends_on`` / ``soft_depends_on`` 仅保留 ``keep(dep)`` 为真的依赖。"""
|
||||
return replace(
|
||||
spec,
|
||||
depends_on=tuple(d for d in spec.depends_on if keep(d)),
|
||||
soft_depends_on=tuple(d for d in spec.soft_depends_on if keep(d)),
|
||||
)
|
||||
|
||||
|
||||
def _make_namespaced_fn(orig_fn: Any, ns: str, dep_names: set[str]) -> Any:
|
||||
"""包装 fn,使其能接收带 ``ns:`` 前缀的依赖名,调用时映射回原参数名。
|
||||
|
||||
命名空间合并后,依赖名带前缀(如 ``build:extract``),但 Python 参数名
|
||||
不能含 ``:``。wrapper 用 ``**kwargs`` 接收所有依赖,内部把带前缀的依赖名
|
||||
映射回原参数名后调用原 fn。
|
||||
|
||||
无依赖参数时直接返回原 fn。
|
||||
"""
|
||||
if not dep_names or orig_fn is None:
|
||||
return orig_fn
|
||||
try:
|
||||
orig_sig = inspect.signature(orig_fn)
|
||||
except (TypeError, ValueError):
|
||||
return orig_fn
|
||||
|
||||
# 带前缀依赖名 -> 原参数名
|
||||
name_map: dict[str, str] = {f"{ns}:{orig}": orig for orig in dep_names}
|
||||
prefix = f"{ns}:"
|
||||
|
||||
# 检查原 fn 是否有 Context 标注参数
|
||||
context_param_name: str | None = None
|
||||
for p in orig_sig.parameters.values():
|
||||
ann = p.annotation
|
||||
if ann is not Context and not (isinstance(ann, str) and ann.endswith("Context")):
|
||||
continue
|
||||
context_param_name = p.name
|
||||
break
|
||||
|
||||
if context_param_name is not None:
|
||||
|
||||
def wrapper(ctx: Any = None, **kwargs: Any) -> Any:
|
||||
# ctx 是 dep_context,键为带前缀的依赖名;映射回原始键
|
||||
orig_ctx: dict[str, Any] = {}
|
||||
for k, v in (ctx or {}).items():
|
||||
orig_ctx[name_map.get(k, k)] = v
|
||||
# kwargs 中带前缀的依赖也映射回原参数名
|
||||
for k, v in kwargs.items():
|
||||
if k in name_map:
|
||||
orig_ctx[name_map[k]] = v
|
||||
return orig_fn(**{context_param_name: orig_ctx})
|
||||
|
||||
ctx_param = inspect.Parameter("ctx", inspect.Parameter.POSITIONAL_OR_KEYWORD, annotation=Context)
|
||||
kw_param = inspect.Parameter("kwargs", inspect.Parameter.VAR_KEYWORD)
|
||||
wrapper.__signature__ = inspect.Signature( # type: ignore[attr-defined]
|
||||
parameters=[ctx_param, kw_param],
|
||||
return_annotation=orig_sig.return_annotation,
|
||||
)
|
||||
else:
|
||||
|
||||
def wrapper(**kwargs: Any) -> Any: # type: ignore[no-redef]
|
||||
orig_kwargs: dict[str, Any] = {}
|
||||
for k, v in kwargs.items():
|
||||
if k.startswith(prefix):
|
||||
orig_kwargs[k[len(prefix) :]] = v
|
||||
return orig_fn(**orig_kwargs)
|
||||
|
||||
kw_param = inspect.Parameter("kwargs", inspect.Parameter.VAR_KEYWORD)
|
||||
wrapper.__signature__ = inspect.Signature( # type: ignore[attr-defined]
|
||||
parameters=[kw_param],
|
||||
return_annotation=orig_sig.return_annotation,
|
||||
)
|
||||
|
||||
wrapper.__name__ = f"{ns}_{getattr(orig_fn, '__name__', 'fn')}"
|
||||
wrapper.__doc__ = getattr(orig_fn, "__doc__", None)
|
||||
return wrapper
|
||||
|
||||
|
||||
@dataclass
|
||||
class Graph:
|
||||
"""校验后不可变的有向无环任务图。
|
||||
"""校验后的有向无环任务图。
|
||||
|
||||
通过添加 :class:`~pyflowx.task.TaskSpec` 实例构建。每次 ``add`` 都
|
||||
执行即时校验(重名、缺失依赖),:meth:`validate` / :meth:`layers`
|
||||
@@ -35,69 +145,188 @@ class Graph:
|
||||
这使图可安全重复运行并在线程间共享。
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._specs: Dict[str, TaskSpec[object]] = {}
|
||||
# 任务 -> 其直接依赖(前驱)。
|
||||
self._deps: Dict[str, Tuple[str, ...]] = {}
|
||||
specs: dict[str, TaskSpec[Any]] = field(default_factory=dict)
|
||||
deps: dict[str, tuple[str, ...]] = field(default_factory=dict)
|
||||
defaults: GraphDefaults = field(default_factory=GraphDefaults)
|
||||
namespace: str | None = None
|
||||
|
||||
# 待解析的字符串引用列表(由 GraphComposer 消费);为空表示无引用。
|
||||
_pending_refs: list[str] = field(default_factory=list)
|
||||
|
||||
# resolved_spec 缓存:避免执行期每个任务多次重复 dataclasses.replace 判断。
|
||||
# 在 specs / defaults 变更时失效。
|
||||
_resolved_cache: dict[str, TaskSpec[Any]] = field(default_factory=dict)
|
||||
|
||||
# ------------------------------------------------------------------ #
|
||||
# 构建
|
||||
# ------------------------------------------------------------------ #
|
||||
def add(self, spec: TaskSpec[object]) -> "Graph":
|
||||
"""注册一个任务 spec,并即时校验。
|
||||
|
||||
返回 ``self`` 以支持链式调用,但推荐入口是 :meth:`from_specs`,
|
||||
它会整批校验(允许单次调用中的前向引用)。
|
||||
"""
|
||||
if spec.name in self._specs:
|
||||
raise DuplicateTaskError(spec.name)
|
||||
self._specs[spec.name] = spec
|
||||
self._deps[spec.name] = spec.depends_on
|
||||
# 为增量 API 即时检查重名与缺失依赖。
|
||||
def add(self, spec: TaskSpec[Any]) -> Graph:
|
||||
"""注册一个任务 spec,并即时校验。返回 ``self`` 支持链式调用。"""
|
||||
self._register(spec)
|
||||
self._validate_references()
|
||||
return self
|
||||
|
||||
def chain(self, *specs: TaskSpec[Any]) -> Graph:
|
||||
"""链式注册任务:每个 spec 自动依赖前一个。
|
||||
|
||||
``chain(a, b, c)`` 等价于 ``b`` 依赖 ``a``,``c`` 依赖 ``b``。
|
||||
若 spec 已带 ``depends_on``,则前驱名追加到现有依赖前。
|
||||
返回 ``self`` 支持链式调用。
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> graph = px.Graph().chain(extract, transform, load)
|
||||
"""
|
||||
prev_name: str | None = None
|
||||
for s in specs:
|
||||
current = s
|
||||
if prev_name is not None:
|
||||
# 将前驱追加到 depends_on 最前(保持显式依赖优先)
|
||||
new_deps = (prev_name, *s.depends_on) if prev_name not in s.depends_on else s.depends_on
|
||||
current = replace(s, depends_on=new_deps)
|
||||
self.add(current)
|
||||
prev_name = current.name
|
||||
return self
|
||||
|
||||
def _register(self, spec: TaskSpec[Any]) -> None:
|
||||
if spec.name in self.specs:
|
||||
raise DuplicateTaskError(spec.name)
|
||||
self.specs[spec.name] = spec
|
||||
# 拓扑依赖仅含硬依赖;软依赖仅用于注入,不影响分层。
|
||||
self.deps[spec.name] = spec.depends_on
|
||||
self._resolved_cache.clear()
|
||||
|
||||
@classmethod
|
||||
def from_specs(cls, specs: Iterable[TaskSpec[object]]) -> "Graph":
|
||||
def from_specs(
|
||||
cls,
|
||||
specs: Iterable[TaskSpec[Any] | str],
|
||||
defaults: GraphDefaults | None = None,
|
||||
*,
|
||||
namespace: str | None = None,
|
||||
) -> Graph:
|
||||
"""从可迭代的 task spec 构建图。
|
||||
|
||||
先收集所有 spec,再统一校验。这意味着任务可以引用*后出现*的
|
||||
依赖——顺序无关,就像声明式配置文件的读取方式。
|
||||
先收集所有 spec,再统一校验。允许前向引用。支持字符串引用,
|
||||
由 :func:`compose` 或 :class:`GraphComposer` 解析展开。
|
||||
|
||||
Parameters
|
||||
----------
|
||||
specs:
|
||||
TaskSpec 对象或字符串引用的列表。
|
||||
defaults:
|
||||
图级默认值。``None`` 使用空 :class:`GraphDefaults`。
|
||||
namespace:
|
||||
可选命名空间,用于 :meth:`add_subgraph` 合并时加前缀。
|
||||
"""
|
||||
graph = cls()
|
||||
graph = cls(defaults=defaults or GraphDefaults(), namespace=namespace)
|
||||
pending_refs: list[str] = []
|
||||
for spec in specs:
|
||||
if spec.name in graph._specs:
|
||||
raise DuplicateTaskError(spec.name)
|
||||
graph._specs[spec.name] = spec
|
||||
graph._deps[spec.name] = spec.depends_on
|
||||
if isinstance(spec, str):
|
||||
pending_refs.append(spec)
|
||||
elif isinstance(spec, TaskSpec):
|
||||
graph._register(spec)
|
||||
else:
|
||||
raise TypeError(f"from_specs 只接受 TaskSpec 或 str,收到: {type(spec)}")
|
||||
|
||||
if pending_refs:
|
||||
graph._pending_refs = pending_refs
|
||||
|
||||
graph._validate_references()
|
||||
graph.validate()
|
||||
return graph
|
||||
|
||||
@classmethod
|
||||
def from_yaml(
|
||||
cls,
|
||||
path: str | Path,
|
||||
variables: Mapping[str, Any] | None = None,
|
||||
) -> Graph:
|
||||
"""从 YAML 文件构建任务图。
|
||||
|
||||
参考 GitHub Actions 风格 schema, 支持 jobs/needs/strategy.matrix/if
|
||||
等 CI/CD 概念。详见 :mod:`pyflowx.yaml_loader`。
|
||||
|
||||
Parameters
|
||||
----------
|
||||
path : str | Path
|
||||
YAML 文件路径
|
||||
variables : Mapping[str, Any] | None
|
||||
运行时变量, 用于替换 ``${VAR}`` 占位符
|
||||
|
||||
Returns
|
||||
-------
|
||||
Graph
|
||||
构建好的任务图
|
||||
|
||||
Raises
|
||||
------
|
||||
YamlLoadError
|
||||
文件不存在、YAML 格式错误、schema 校验失败、循环依赖等
|
||||
"""
|
||||
from .yaml_loader import load_yaml
|
||||
|
||||
return load_yaml(path, variables=variables)
|
||||
|
||||
def add_subgraph(self, sub: Graph, *, namespace: str | None = None) -> Graph:
|
||||
"""将子图合并到当前图,任务名加命名空间前缀避免冲突。
|
||||
|
||||
参数
|
||||
----
|
||||
sub:
|
||||
待合并的子图。
|
||||
namespace:
|
||||
命名空间前缀。``None`` 时使用 ``sub.namespace``,若子图也无命名空间
|
||||
则抛出 ``ValueError``。最终任务名为 ``f"{ns}:{original_name}"``。
|
||||
|
||||
合并后,子图内任务的依赖名也会被加前缀;与子图外部任务的依赖保持原样。
|
||||
|
||||
返回 ``self`` 支持链式调用。
|
||||
"""
|
||||
ns = namespace or sub.namespace
|
||||
if not ns:
|
||||
raise ValueError("add_subgraph 需要 namespace 或子图自带 namespace")
|
||||
|
||||
def _rename(name: str) -> str:
|
||||
# 仅对子图内部任务名加前缀;外部依赖保持原样
|
||||
return f"{ns}:{name}" if name in sub.specs else name
|
||||
|
||||
sub_names = set(sub.specs.keys())
|
||||
for spec in sub.specs.values():
|
||||
# 子图内部依赖名需加前缀,对应的 fn 参数也需包装
|
||||
internal_deps = (set(spec.depends_on) | set(spec.soft_depends_on)) & sub_names
|
||||
new_fn = _make_namespaced_fn(spec.fn, ns, internal_deps) if spec.fn else spec.fn
|
||||
new_spec = replace(
|
||||
spec,
|
||||
name=_rename(spec.name),
|
||||
fn=new_fn,
|
||||
depends_on=tuple(_rename(d) for d in spec.depends_on),
|
||||
soft_depends_on=tuple(_rename(d) for d in spec.soft_depends_on),
|
||||
)
|
||||
self._register(new_spec)
|
||||
self._validate_references()
|
||||
self.validate()
|
||||
return self
|
||||
|
||||
# ------------------------------------------------------------------ #
|
||||
# 校验
|
||||
# ------------------------------------------------------------------ #
|
||||
def _validate_references(self) -> None:
|
||||
"""确保每个依赖名都存在于图中。"""
|
||||
for name, deps in self._deps.items():
|
||||
for dep in deps:
|
||||
if dep not in self._specs:
|
||||
"""确保每个依赖名都存在于图中。硬依赖与软依赖都校验。"""
|
||||
for name, spec in self.specs.items():
|
||||
for dep in spec.depends_on:
|
||||
if dep not in self.specs:
|
||||
raise MissingDependencyError(name, dep)
|
||||
for dep in spec.soft_depends_on:
|
||||
if dep not in self.specs:
|
||||
raise MissingDependencyError(name, dep)
|
||||
|
||||
def validate(self) -> None:
|
||||
"""执行完整 DAG 校验。
|
||||
|
||||
存在环时抛出 :class:`~pyflowx.errors.CycleError`。
|
||||
依赖存在性由 :meth:`_validate_references` 检查。
|
||||
"""
|
||||
"""执行完整 DAG 校验。存在环时抛出 :class:`CycleError`。"""
|
||||
self._validate_references()
|
||||
sorter = _TopologicalSorter(self._deps)
|
||||
sorter = _TopologicalSorter(self.deps)
|
||||
try:
|
||||
# prepare() 在有环时抛出 CycleError;此处不需要
|
||||
# static_order() 的结果,仅利用其校验副作用。
|
||||
sorter.prepare()
|
||||
except graphlib.CycleError as exc:
|
||||
# exc.args[1] 是构成环的节点列表。
|
||||
except graphlib.CycleError as exc: # type: ignore[name-defined]
|
||||
cycle: Sequence[str] = exc.args[1] if len(exc.args) > 1 else []
|
||||
raise CycleError(list(cycle)) from exc
|
||||
|
||||
@@ -105,37 +334,81 @@ class Graph:
|
||||
# 内省
|
||||
# ------------------------------------------------------------------ #
|
||||
@property
|
||||
def names(self) -> List[str]:
|
||||
def names(self) -> list[str]:
|
||||
"""所有已注册任务名(按插入顺序)。"""
|
||||
return list(self._specs.keys())
|
||||
return list(self.specs.keys())
|
||||
|
||||
def spec(self, name: str) -> TaskSpec[object]:
|
||||
def spec(self, name: str) -> TaskSpec[Any]:
|
||||
"""返回 ``name`` 的 spec;不存在则 ``KeyError``。"""
|
||||
return self._specs[name]
|
||||
return self.specs[name]
|
||||
|
||||
def dependencies(self, name: str) -> Tuple[str, ...]:
|
||||
"""``name`` 的直接前驱。"""
|
||||
return self._deps[name]
|
||||
def resolved_spec(self, name: str) -> TaskSpec[Any]:
|
||||
"""返回应用图级默认值后的 spec(不修改原图)。
|
||||
|
||||
def all_specs(self) -> Mapping[str, TaskSpec[object]]:
|
||||
对于 ``retry``/``timeout``/``strategy``/``env``/``cwd`` 等可空
|
||||
字段,若 spec 字段为默认空值且图级默认值非空,则用
|
||||
:func:`dataclasses.replace` 生成带默认值的副本。
|
||||
|
||||
结果按 ``name`` 缓存;specs / defaults 变更时缓存失效。
|
||||
"""
|
||||
cached = self._resolved_cache.get(name)
|
||||
if cached is not None:
|
||||
return cached
|
||||
spec = self.specs[name]
|
||||
d = self.defaults
|
||||
overrides: dict[str, Any] = {}
|
||||
if spec.retry == RetryPolicy() and d.retry is not None:
|
||||
overrides["retry"] = d.retry
|
||||
if spec.timeout is None and d.timeout is not None:
|
||||
overrides["timeout"] = d.timeout
|
||||
if spec.strategy is None and d.strategy is not None:
|
||||
overrides["strategy"] = d.strategy
|
||||
if spec.env is None and d.env is not None:
|
||||
overrides["env"] = d.env
|
||||
if spec.cwd is None and d.cwd is not None:
|
||||
overrides["cwd"] = d.cwd
|
||||
if spec.priority == 0 and d.priority != 0:
|
||||
overrides["priority"] = d.priority
|
||||
if not spec.continue_on_error and d.continue_on_error:
|
||||
overrides["continue_on_error"] = True
|
||||
if spec.concurrency_key is None and d.concurrency_key is not None:
|
||||
overrides["concurrency_key"] = d.concurrency_key
|
||||
if not spec.verbose and d.verbose:
|
||||
overrides["verbose"] = True
|
||||
if not spec.tags and d.tags:
|
||||
overrides["tags"] = d.tags
|
||||
resolved = spec if not overrides else replace(spec, **overrides)
|
||||
self._resolved_cache[name] = resolved
|
||||
return resolved
|
||||
|
||||
def dependencies(self, name: str) -> tuple[str, ...]:
|
||||
"""``name`` 的直接硬依赖前驱。"""
|
||||
return self.deps[name]
|
||||
|
||||
def all_deps(self, name: str) -> tuple[str, ...]:
|
||||
"""``name`` 的硬依赖 + 软依赖。"""
|
||||
spec = self.specs[name]
|
||||
return tuple(spec.depends_on) + tuple(spec.soft_depends_on)
|
||||
|
||||
def all_specs(self) -> Mapping[str, TaskSpec[Any]]:
|
||||
"""name -> spec 的只读视图。"""
|
||||
return self._specs
|
||||
return self.specs
|
||||
|
||||
def layers(self) -> List[List[str]]:
|
||||
def layers(self) -> list[list[str]]:
|
||||
"""将任务分组为可并行执行的层(Kahn 算法)。
|
||||
|
||||
同层任务无相互依赖,可并发执行。层按执行顺序返回。
|
||||
同层任务无相互硬依赖,可并发执行。软依赖不参与分层。
|
||||
层按执行顺序返回。图有环时抛出 :class:`CycleError`。
|
||||
|
||||
图有环时抛出 :class:`~pyflowx.errors.CycleError`。
|
||||
.. note::
|
||||
本方法假定图已通过 :meth:`validate` 校验(由 :func:`pyflowx.run`
|
||||
在入口统一执行一次)。若直接调用本方法,需自行先校验。
|
||||
"""
|
||||
self.validate()
|
||||
sorter = _TopologicalSorter(self._deps)
|
||||
result: List[List[str]] = []
|
||||
# ``get_ready`` + ``done`` 每次给出一层,正好是并行执行所需的分组。
|
||||
sorter = _TopologicalSorter(self.deps)
|
||||
result: list[list[str]] = []
|
||||
sorter.prepare()
|
||||
while sorter.is_active():
|
||||
ready = list(sorter.get_ready())
|
||||
# 排序以保证确定性、可复现的执行计划。
|
||||
ready.sort()
|
||||
result.append(ready)
|
||||
for node in ready:
|
||||
@@ -145,81 +418,104 @@ class Graph:
|
||||
# ------------------------------------------------------------------ #
|
||||
# 子图 / 标签过滤
|
||||
# ------------------------------------------------------------------ #
|
||||
def subgraph(self, tags: Iterable[str]) -> "Graph":
|
||||
"""返回仅包含匹配任意标签的任务的新图。
|
||||
def subgraph(self, tags: Iterable[str]) -> Graph:
|
||||
"""返回仅包含匹配任意标签的任务的新图。依赖边被修剪。"""
|
||||
wanted: set[str] = set(tags)
|
||||
|
||||
依赖会被修剪,仅保留被保留任务之间的边;指向被丢弃任务的边
|
||||
会被移除(被保留的任务不再等待它们)。用于调试时运行大型
|
||||
DAG 的切片。
|
||||
"""
|
||||
wanted: Set[str] = set(tags)
|
||||
kept: List[TaskSpec[object]] = []
|
||||
for spec in self._specs.values():
|
||||
if wanted & set(spec.tags):
|
||||
pruned_deps = tuple(
|
||||
d
|
||||
for d in spec.depends_on
|
||||
if d in self._specs and (wanted & set(self._specs[d].tags))
|
||||
)
|
||||
kept.append(
|
||||
TaskSpec(
|
||||
name=spec.name,
|
||||
fn=spec.fn,
|
||||
depends_on=pruned_deps,
|
||||
args=spec.args,
|
||||
kwargs=spec.kwargs,
|
||||
retries=spec.retries,
|
||||
timeout=spec.timeout,
|
||||
tags=spec.tags,
|
||||
)
|
||||
)
|
||||
return Graph.from_specs(kept)
|
||||
def _dep_kept(dep: str) -> bool:
|
||||
return dep in self.specs and bool(wanted & set(self.specs[dep].tags))
|
||||
|
||||
def subgraph_by_names(self, names: Iterable[str]) -> "Graph":
|
||||
kept: list[TaskSpec[Any]] = [
|
||||
_prune_deps(spec, _dep_kept) for spec in self.specs.values() if wanted & set(spec.tags)
|
||||
]
|
||||
return Graph.from_specs(kept, defaults=self.defaults)
|
||||
|
||||
def subgraph_by_names(self, names: Iterable[str]) -> Graph:
|
||||
"""返回限定于 ``names`` 的新图(边已修剪)。"""
|
||||
wanted: Set[str] = set(names)
|
||||
wanted: set[str] = set(names)
|
||||
for n in wanted:
|
||||
if n not in self._specs:
|
||||
if n not in self.specs:
|
||||
raise KeyError(f"Unknown task name: {n!r}")
|
||||
kept: List[TaskSpec[object]] = []
|
||||
for spec in self._specs.values():
|
||||
if spec.name in wanted:
|
||||
pruned_deps = tuple(d for d in spec.depends_on if d in wanted)
|
||||
kept.append(
|
||||
TaskSpec(
|
||||
name=spec.name,
|
||||
fn=spec.fn,
|
||||
depends_on=pruned_deps,
|
||||
args=spec.args,
|
||||
kwargs=spec.kwargs,
|
||||
retries=spec.retries,
|
||||
timeout=spec.timeout,
|
||||
tags=spec.tags,
|
||||
)
|
||||
)
|
||||
return Graph.from_specs(kept)
|
||||
kept: list[TaskSpec[Any]] = [
|
||||
_prune_deps(spec, lambda d: d in wanted) for spec in self.specs.values() if spec.name in wanted
|
||||
]
|
||||
return Graph.from_specs(kept, defaults=self.defaults)
|
||||
|
||||
# ------------------------------------------------------------------ #
|
||||
# Fan-out / map-reduce
|
||||
# ------------------------------------------------------------------ #
|
||||
def map(
|
||||
self,
|
||||
name_fn: Callable[[int], str],
|
||||
spec: TaskSpec[Any],
|
||||
items: Sequence[Any],
|
||||
arg_factory: Callable[[Any], tuple[Any, ...]] | None = None,
|
||||
depends_on_per: Callable[[int], tuple[str, ...]] | None = None,
|
||||
) -> list[TaskSpec[Any]]:
|
||||
"""为 ``items`` 中每个元素生成一个 TaskSpec 并加入图。
|
||||
|
||||
用于 fan-out / map-reduce 模式。返回生成的 spec 列表,便于
|
||||
后续 reduce 任务依赖。
|
||||
|
||||
Parameters
|
||||
----------
|
||||
name_fn:
|
||||
接受索引 ``i``,返回任务名。需保证唯一。
|
||||
spec:
|
||||
模板 spec。其 ``name`` 与 ``args`` 会被覆盖。
|
||||
items:
|
||||
待分发的数据序列。
|
||||
arg_factory:
|
||||
接受一个 item,返回位置参数元组,覆盖 spec.args。
|
||||
``None`` 则将单个 item 作为唯一位置参数。
|
||||
depends_on_per:
|
||||
接受索引 ``i``,返回该任务的额外硬依赖。``None`` 则继承 spec.depends_on。
|
||||
|
||||
Returns
|
||||
-------
|
||||
list[TaskSpec]
|
||||
生成的 spec 列表(已加入图)。
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> fetch_tmpl = px.TaskSpec("", fn=fetch_user)
|
||||
>>> specs = graph.map(lambda i: f"fetch_{i}", fetch_tmpl, [1, 2, 3])
|
||||
>>> reduce_spec = px.TaskSpec("reduce", fn=reduce_fn, depends_on=tuple(s.name for s in specs))
|
||||
"""
|
||||
generated: list[TaskSpec[Any]] = []
|
||||
for i, item in enumerate(items):
|
||||
name = name_fn(i)
|
||||
args = arg_factory(item) if arg_factory is not None else (item,)
|
||||
extra_deps = depends_on_per(i) if depends_on_per is not None else ()
|
||||
new_spec = replace(
|
||||
spec,
|
||||
name=name,
|
||||
args=tuple(args),
|
||||
depends_on=tuple(spec.depends_on) + tuple(extra_deps),
|
||||
)
|
||||
self.add(new_spec)
|
||||
generated.append(new_spec)
|
||||
return generated
|
||||
|
||||
# ------------------------------------------------------------------ #
|
||||
# 可视化
|
||||
# ------------------------------------------------------------------ #
|
||||
def to_mermaid(self, orientation: str = "TD") -> str:
|
||||
"""将 DAG 渲染为 Mermaid ``graph`` 定义字符串。
|
||||
|
||||
无外部依赖;输出可粘贴到 Markdown、由 VS Code 的 Mermaid 预览
|
||||
渲染,或保存为文件。
|
||||
"""
|
||||
"""将 DAG 渲染为 Mermaid ``graph`` 定义字符串。"""
|
||||
valid = {"TD", "TB", "BT", "LR", "RL"}
|
||||
orientation = orientation.upper()
|
||||
if orientation not in valid:
|
||||
raise ValueError(
|
||||
f"Invalid orientation {orientation!r}; expected one of {sorted(valid)}."
|
||||
)
|
||||
lines: List[str] = [f"graph {orientation}"]
|
||||
for name in self._specs:
|
||||
raise ValueError(f"Invalid orientation {orientation!r}; expected one of {sorted(valid)}.")
|
||||
lines: list[str] = [f"graph {orientation}"]
|
||||
for name in self.specs:
|
||||
lines.append(f' {name}["{name}"]')
|
||||
for name, deps in self._deps.items():
|
||||
for name, deps in self.deps.items():
|
||||
for dep in deps:
|
||||
lines.append(f" {dep} --> {name}")
|
||||
# 软依赖用虚线
|
||||
for name, spec in self.specs.items():
|
||||
for dep in spec.soft_depends_on:
|
||||
lines.append(f" {dep} -.-> {name}")
|
||||
return "\n".join(lines) + "\n"
|
||||
|
||||
# ------------------------------------------------------------------ #
|
||||
@@ -227,16 +523,16 @@ class Graph:
|
||||
# ------------------------------------------------------------------ #
|
||||
def describe(self) -> str:
|
||||
"""用于调试的人类可读多行摘要。"""
|
||||
out: List[str] = [f"Graph(tasks={len(self._specs)})"]
|
||||
out: list[str] = [f"Graph(tasks={len(self.specs)})"]
|
||||
for layer_idx, layer in enumerate(self.layers(), 1):
|
||||
out.append(f" Layer {layer_idx}: {layer}")
|
||||
return "\n".join(out)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"Graph(tasks={len(self._specs)})"
|
||||
return f"Graph(tasks={len(self.specs)})"
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self._specs)
|
||||
return len(self.specs)
|
||||
|
||||
def __contains__(self, name: object) -> bool:
|
||||
return name in self._specs
|
||||
def __contains__(self, name: Any) -> bool:
|
||||
return name in self.specs
|
||||
|
||||
@@ -0,0 +1,20 @@
|
||||
"""工具函数模块.
|
||||
|
||||
按类别组织 CLI 工具中可复用的函数, 每个子模块使用 ``@px.register_fn`` 注册函数,
|
||||
供 YAML 任务编排通过 ``fn`` 字段引用.
|
||||
|
||||
子模块
|
||||
------
|
||||
- :mod:`files` —— 文件日期/等级/备份/压缩相关函数
|
||||
- :mod:`dev` —— 开发工具 (ruff/pip/git/envdev/dockercmd) 相关函数
|
||||
- :mod:`bumpversion` —— 版本号管理相关函数
|
||||
- :mod:`media` —— PDF/截图相关函数
|
||||
- :mod:`system` —— LS-DYNA/SSH/打包/清屏/进程终止相关函数
|
||||
- :mod:`llm` —— ModelScope 下载/SGLang 服务相关函数
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from . import bumpversion, dev, files, llm, media, system
|
||||
|
||||
__all__ = ["bumpversion", "dev", "files", "llm", "media", "system"]
|
||||
@@ -0,0 +1,233 @@
|
||||
"""版本号管理模块.
|
||||
|
||||
提供单文件版本号更新 (``bump_file_version``) 与项目级批量版本号同步
|
||||
(``bump_project_version``) 能力. 所有公共函数通过 ``@px.register_fn`` 注册,
|
||||
供 YAML 任务编排引用.
|
||||
|
||||
设计要点
|
||||
--------
|
||||
``bump_project_version`` 采用 "先读取基准、再统一写入" 的两阶段策略:
|
||||
先扫描所有 ``__init__.py`` / ``pyproject.toml`` 文件, 读取各自的版本号,
|
||||
取最大值作为基准版本计算新版本号, 然后把新版本号统一写入所有文件,
|
||||
避免文件间版本号不同步导致的跳号问题.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from typing import Literal
|
||||
|
||||
import pyflowx as px
|
||||
|
||||
__all__ = [
|
||||
"BumpVersionType",
|
||||
"bump_file_version",
|
||||
"bump_project_version",
|
||||
]
|
||||
|
||||
# ============================================================================
|
||||
# 配置
|
||||
# ============================================================================
|
||||
|
||||
BumpVersionType = Literal["patch", "minor", "major"]
|
||||
|
||||
_PYPROJECT_VERSION_PATTERN = re.compile(
|
||||
r'(?:^|\n)\s*version\s*=\s*["\']'
|
||||
r"(?P<major>0|[1-9]\d*)\.(?P<minor>0|[1-9]\d*)\.(?P<patch>0|[1-9]\d*)"
|
||||
r"(?:-(?P<prerelease>(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?"
|
||||
r"(?:\+(?P<buildmetadata>[0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?"
|
||||
r'["\']',
|
||||
re.MULTILINE,
|
||||
)
|
||||
|
||||
_INIT_VERSION_PATTERN = re.compile(
|
||||
r'(?:^|\n)\s*__version__\s*=\s*["\']'
|
||||
r"(?P<major>0|[1-9]\d*)\.(?P<minor>0|[1-9]\d*)\.(?P<patch>0|[1-9]\d*)"
|
||||
r"(?:-(?P<prerelease>(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?"
|
||||
r"(?:\+(?P<buildmetadata>[0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?"
|
||||
r'["\']',
|
||||
re.MULTILINE,
|
||||
)
|
||||
|
||||
_IGNORE_DIRS = frozenset({".venv", "venv", ".git", "__pycache__", ".tox", "node_modules", "build", "dist", ".eggs"})
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# 私有辅助函数
|
||||
# ============================================================================
|
||||
|
||||
|
||||
def _get_pattern_for_file(file_name: str) -> re.Pattern[str] | None:
|
||||
"""根据文件类型获取对应的正则表达式."""
|
||||
if file_name == "pyproject.toml":
|
||||
return _PYPROJECT_VERSION_PATTERN
|
||||
if file_name == "__init__.py":
|
||||
return _INIT_VERSION_PATTERN
|
||||
return None
|
||||
|
||||
|
||||
def _calculate_new_version(major: int, minor: int, patch: int, part: BumpVersionType) -> str:
|
||||
"""计算新版本号."""
|
||||
if part == "major":
|
||||
return f"{major + 1}.0.0"
|
||||
if part == "minor":
|
||||
return f"{major}.{minor + 1}.0"
|
||||
return f"{major}.{minor}.{patch + 1}"
|
||||
|
||||
|
||||
def _build_replacement_string(original_match: str, new_version: str, file_name: str) -> str:
|
||||
"""构建替换字符串, 保留原始格式."""
|
||||
quote_char = '"' if '"' in original_match else "'"
|
||||
key = "__version__" if file_name == "__init__.py" else "version"
|
||||
prefix_match = re.match(rf"(\s*{key}\s*=\s*)[\"']", original_match)
|
||||
prefix = prefix_match.group(1) if prefix_match else f"{key} = "
|
||||
return f"{prefix}{quote_char}{new_version}{quote_char}"
|
||||
|
||||
|
||||
def _read_version_tuple(file_path: Path) -> tuple[int, int, int] | None:
|
||||
"""从文件中读取版本号, 返回 (major, minor, patch) 元组; 未找到返回 None.
|
||||
|
||||
读取失败时抛出 ``OSError`` / ``UnicodeDecodeError`` 由调用方处理.
|
||||
"""
|
||||
pattern = _get_pattern_for_file(file_path.name)
|
||||
if pattern is None:
|
||||
return None
|
||||
|
||||
content = file_path.read_text(encoding="utf-8")
|
||||
match = pattern.search(content)
|
||||
if not match:
|
||||
return None
|
||||
|
||||
return int(match.group("major")), int(match.group("minor")), int(match.group("patch"))
|
||||
|
||||
|
||||
def _write_version_to_file(file_path: Path, new_version: str) -> bool:
|
||||
"""把新版本号写入指定文件; 成功返回 True, 未匹配到版本号返回 False."""
|
||||
pattern = _get_pattern_for_file(file_path.name)
|
||||
if pattern is None: # pragma: no cover - 调用方已保证 pattern 不为 None
|
||||
return False
|
||||
|
||||
content = file_path.read_text(encoding="utf-8")
|
||||
match = pattern.search(content)
|
||||
if not match: # pragma: no cover - 调用方已通过 _read_version_tuple 验证
|
||||
return False
|
||||
|
||||
replacement = _build_replacement_string(match.group(0), new_version, file_path.name)
|
||||
content = content.replace(match.group(0), replacement)
|
||||
|
||||
try:
|
||||
file_path.write_text(content, encoding="utf-8")
|
||||
except OSError as e:
|
||||
print(f"更新文件 {file_path} 版本号时出错: {e}")
|
||||
raise
|
||||
|
||||
return True
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# 公共函数
|
||||
# ============================================================================
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def bump_file_version(file_path: Path, part: BumpVersionType = "patch") -> str | None:
|
||||
"""更新单个文件中的版本号.
|
||||
|
||||
读取文件当前版本号, 按 ``part`` 指定的部分递增, 写回文件.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
file_path : Path
|
||||
要更新的文件路径 (``pyproject.toml`` 或 ``__init__.py``)
|
||||
part : BumpVersionType
|
||||
版本部分: patch, minor, major
|
||||
|
||||
Returns
|
||||
-------
|
||||
str | None
|
||||
更新后的新版本号; 文件中未找到版本号或读取失败时返回 None
|
||||
"""
|
||||
version_tuple = _read_version_tuple(file_path)
|
||||
if version_tuple is None:
|
||||
print(f"文件 {file_path} 中未找到版本号模式")
|
||||
return None
|
||||
|
||||
major, minor, patch = version_tuple
|
||||
new_version = _calculate_new_version(major, minor, patch, part)
|
||||
|
||||
if not _write_version_to_file(file_path, new_version): # pragma: no cover - _read_version_tuple 已验证
|
||||
return None
|
||||
|
||||
return new_version
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def bump_project_version(part: BumpVersionType = "patch", no_tag: bool = False) -> str | None:
|
||||
"""批量同步项目所有版本号文件并提交.
|
||||
|
||||
扫描当前目录下所有 ``__init__.py`` 和 ``pyproject.toml`` 文件
|
||||
(排除虚拟环境和缓存目录), 先读取每个文件的当前版本号取最大值作为基准,
|
||||
计算新版本号后统一写入所有文件, 最后执行 git add (按文件名) + commit + tag.
|
||||
|
||||
采用 "先读取基准、再统一写入" 的两阶段策略, 即使某些文件版本号不同步,
|
||||
也能在一次 bump 后重新对齐, 避免跳号.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
part : BumpVersionType
|
||||
版本部分: patch, minor, major
|
||||
no_tag : bool
|
||||
提交后不创建 git tag
|
||||
|
||||
Returns
|
||||
-------
|
||||
str | None
|
||||
更新后的新版本号; 未找到版本号文件时返回 None
|
||||
"""
|
||||
all_files: set[Path] = set()
|
||||
for pattern in ("__init__.py", "pyproject.toml"):
|
||||
for file in Path.cwd().rglob(pattern):
|
||||
if not any(ignore_dir in file.parts for ignore_dir in _IGNORE_DIRS):
|
||||
all_files.add(file)
|
||||
|
||||
if not all_files:
|
||||
print("未找到包含版本号的文件")
|
||||
return None
|
||||
|
||||
print(f"找到 {len(all_files)} 个文件需要更新版本号")
|
||||
cwd = Path.cwd()
|
||||
for file in sorted(all_files):
|
||||
print(f" - {file.relative_to(cwd)}")
|
||||
|
||||
# 阶段 1: 读取所有文件版本号, 取最大值作为基准
|
||||
versions: list[tuple[int, int, int]] = []
|
||||
for file in sorted(all_files):
|
||||
v = _read_version_tuple(file)
|
||||
if v is not None:
|
||||
versions.append(v)
|
||||
|
||||
if not versions:
|
||||
print("未能从任何文件读取版本号")
|
||||
return None
|
||||
|
||||
major, minor, patch = max(versions)
|
||||
new_version = _calculate_new_version(major, minor, patch, part)
|
||||
print(f"基准版本: {major}.{minor}.{patch} -> 新版本: {new_version}")
|
||||
|
||||
# 阶段 2: 统一写入新版本号到所有文件
|
||||
for file in sorted(all_files):
|
||||
_write_version_to_file(file, new_version)
|
||||
|
||||
# 阶段 3: git add (按文件名) + commit + tag
|
||||
relative_files = [str(file.relative_to(cwd)) for file in sorted(all_files)]
|
||||
subprocess.run(["git", "add", *relative_files], check=True)
|
||||
subprocess.run(["git", "commit", "-m", f"bump version to {new_version}"], check=True)
|
||||
|
||||
if not no_tag:
|
||||
tag_name = f"v{new_version}"
|
||||
subprocess.run(["git", "tag", "-a", tag_name, "-m", f"Release {tag_name}"], check=True)
|
||||
print(f"已创建标签: {tag_name}")
|
||||
|
||||
return new_version
|
||||
@@ -0,0 +1,823 @@
|
||||
"""开发工具类函数模块.
|
||||
|
||||
聚合自动格式化 (autofmt)、pip 包管理 (piptool)、git 工具 (gittool)、
|
||||
开发环境配置 (envdev)、docker 镜像登录 (dockercmd) 的可复用函数.
|
||||
版本号管理已抽离到 :mod:`pyflowx.ops.bumpversion`. 所有公共函数通过
|
||||
``@px.register_fn`` 注册, 供 YAML 任务编排引用.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import ast
|
||||
import fnmatch
|
||||
import getpass
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from typing import Literal
|
||||
|
||||
import pyflowx as px
|
||||
from pyflowx.conditions import Constants
|
||||
|
||||
__all__ = [
|
||||
"IGNORE_PATTERNS",
|
||||
"PACKAGE_DIR",
|
||||
"REQUIREMENTS_FILE",
|
||||
"_PROTECTED_PACKAGES",
|
||||
"add_docstring",
|
||||
"auto_add_docstrings",
|
||||
"docker_login_tencent",
|
||||
"download_rustup_script",
|
||||
"format_all",
|
||||
"format_with_ruff",
|
||||
"generate_module_docstring",
|
||||
"git_add_commit",
|
||||
"git_init_add_commit",
|
||||
"has_files",
|
||||
"init_sub_dirs",
|
||||
"install_linux_docker",
|
||||
"install_linux_fonts",
|
||||
"install_linux_qt_libs",
|
||||
"install_rust_toolchain",
|
||||
"lint_with_ruff",
|
||||
"not_has_git_repo",
|
||||
"pip_download",
|
||||
"pip_freeze",
|
||||
"pip_reinstall",
|
||||
"pip_uninstall",
|
||||
"setup_conda_mirror",
|
||||
"setup_linux_system_mirror",
|
||||
"setup_python_mirror",
|
||||
"setup_rust_mirror",
|
||||
"sync_pyproject_config",
|
||||
]
|
||||
|
||||
# ============================================================================
|
||||
# autofmt 配置
|
||||
# ============================================================================
|
||||
|
||||
IGNORE_PATTERNS = [
|
||||
"__pycache__",
|
||||
"*.pyc",
|
||||
"*.pyo",
|
||||
".git",
|
||||
".venv",
|
||||
".idea",
|
||||
".vscode",
|
||||
"*.egg-info",
|
||||
"dist",
|
||||
"build",
|
||||
".pytest_cache",
|
||||
".tox",
|
||||
".mypy_cache",
|
||||
]
|
||||
|
||||
# ============================================================================
|
||||
# piptool 配置
|
||||
# ============================================================================
|
||||
|
||||
PACKAGE_DIR = "packages"
|
||||
REQUIREMENTS_FILE = "requirements.txt"
|
||||
|
||||
_PROTECTED_PACKAGES: frozenset[str] = frozenset(
|
||||
{
|
||||
"pyflowx",
|
||||
"bitool",
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# autofmt 私有辅助函数
|
||||
# ============================================================================
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# autofmt 函数
|
||||
# ============================================================================
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def format_with_ruff(target: Path, fix: bool = True) -> None:
|
||||
"""使用 ruff 格式化代码.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
target : Path
|
||||
目标路径
|
||||
fix : bool
|
||||
是否自动修复
|
||||
"""
|
||||
cmd = ["ruff", "format", str(target)]
|
||||
if fix:
|
||||
cmd.append("--fix")
|
||||
|
||||
subprocess.run(cmd, check=True)
|
||||
print(f"ruff format 完成: {target}")
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def lint_with_ruff(target: Path, fix: bool = True) -> None:
|
||||
"""使用 ruff 检查代码.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
target : Path
|
||||
目标路径
|
||||
fix : bool
|
||||
是否自动修复
|
||||
"""
|
||||
cmd = ["ruff", "check", str(target)]
|
||||
if fix:
|
||||
cmd.extend(["--fix", "--unsafe-fixes"])
|
||||
|
||||
subprocess.run(cmd, check=True)
|
||||
print(f"ruff check 完成: {target}")
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def add_docstring(file_path: Path, docstring: str) -> bool:
|
||||
"""为文件添加 docstring.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
file_path : Path
|
||||
文件路径
|
||||
docstring : str
|
||||
docstring 内容
|
||||
|
||||
Returns
|
||||
-------
|
||||
bool
|
||||
是否成功添加
|
||||
"""
|
||||
try:
|
||||
content = file_path.read_text(encoding="utf-8")
|
||||
tree = ast.parse(content)
|
||||
|
||||
first_node = tree.body[0] if tree.body else None
|
||||
if first_node and isinstance(first_node, ast.Expr) and isinstance(first_node.value, ast.Constant):
|
||||
return False
|
||||
|
||||
lines = content.splitlines()
|
||||
doc_lines = docstring.splitlines()
|
||||
doc_lines.append("")
|
||||
new_content = "\n".join(doc_lines + lines)
|
||||
|
||||
file_path.write_text(new_content, encoding="utf-8")
|
||||
print(f"添加 docstring: {file_path}")
|
||||
return True
|
||||
|
||||
except (OSError, UnicodeDecodeError, SyntaxError) as e:
|
||||
print(f"处理失败: {file_path} - {e}")
|
||||
return False
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def generate_module_docstring(file_path: Path) -> str:
|
||||
"""生成模块 docstring.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
file_path : Path
|
||||
文件路径
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
生成的 docstring
|
||||
"""
|
||||
stem = file_path.stem
|
||||
parent = file_path.parent.name
|
||||
|
||||
keywords = {
|
||||
"cli": f"Command-line interface for {parent}",
|
||||
"gui": f"Graphical user interface for {parent}",
|
||||
"core": f"Core functionality for {parent}",
|
||||
"util": f"Utility functions for {parent}",
|
||||
"model": f"Data models for {parent}",
|
||||
"test": f"Tests for {parent}",
|
||||
}
|
||||
|
||||
for key, desc in keywords.items():
|
||||
if key in stem.lower():
|
||||
return f'"""{desc}."""'
|
||||
|
||||
return f'"""{stem.replace("_", " ").title()} module."""'
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def auto_add_docstrings(root_dir: Path) -> int:
|
||||
"""自动为所有 Python 文件添加 docstring.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
root_dir : Path
|
||||
根目录
|
||||
|
||||
Returns
|
||||
-------
|
||||
int
|
||||
添加的 docstring 数量
|
||||
"""
|
||||
count = 0
|
||||
for py_file in root_dir.rglob("*.py"):
|
||||
if any(pattern in str(py_file) for pattern in IGNORE_PATTERNS):
|
||||
continue
|
||||
|
||||
docstring = generate_module_docstring(py_file)
|
||||
if add_docstring(py_file, docstring):
|
||||
count += 1
|
||||
|
||||
print(f"共添加 {count} 个 docstring")
|
||||
return count
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def sync_pyproject_config(root_dir: Path) -> None:
|
||||
"""同步 pyproject.toml 配置到子项目.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
root_dir : Path
|
||||
根目录
|
||||
"""
|
||||
main_toml = root_dir / "pyproject.toml"
|
||||
if not main_toml.exists():
|
||||
print(f"主项目配置文件不存在: {main_toml}")
|
||||
return
|
||||
|
||||
sub_tomls = [p for p in root_dir.rglob("pyproject.toml") if p != main_toml and ".venv" not in str(p)]
|
||||
|
||||
if not sub_tomls:
|
||||
print("没有找到子项目的 pyproject.toml")
|
||||
return
|
||||
|
||||
print(f"找到 {len(sub_tomls)} 个子项目配置文件")
|
||||
|
||||
for sub_toml in sub_tomls:
|
||||
subprocess.run(["ruff", "format", str(sub_toml)], check=False)
|
||||
|
||||
print("配置同步完成")
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def format_all(root_dir: Path) -> None:
|
||||
"""格式化所有 Python 文件.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
root_dir : Path
|
||||
根目录
|
||||
"""
|
||||
subprocess.run(["ruff", "format", str(root_dir)], check=True)
|
||||
subprocess.run(["ruff", "check", "--fix", "--unsafe-fixes", str(root_dir)], check=True)
|
||||
print(f"格式化完成: {root_dir}")
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# piptool 私有辅助函数
|
||||
# ============================================================================
|
||||
|
||||
|
||||
def _get_installed_packages() -> list[str]:
|
||||
"""获取当前环境中所有已安装的包名."""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["pip", "list", "--format=freeze"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=True,
|
||||
)
|
||||
packages: list[str] = []
|
||||
for line in result.stdout.strip().split("\n"):
|
||||
if line and "==" in line:
|
||||
pkg_name = line.split("==")[0].strip()
|
||||
packages.append(pkg_name)
|
||||
except (subprocess.SubprocessError, OSError):
|
||||
return []
|
||||
return packages
|
||||
|
||||
|
||||
def _expand_wildcard_packages(pattern: str) -> list[str]:
|
||||
"""展开通配符模式为实际的包名列表."""
|
||||
if not any(char in pattern for char in ["*", "?", "[", "]"]):
|
||||
return [pattern]
|
||||
|
||||
installed_packages = _get_installed_packages()
|
||||
matched = [pkg for pkg in installed_packages if fnmatch.fnmatchcase(pkg.lower(), pattern.lower())]
|
||||
return matched
|
||||
|
||||
|
||||
def _filter_protected_packages(packages: list[str]) -> list[str]:
|
||||
"""过滤掉受保护的包名."""
|
||||
safe = [p for p in packages if p.lower() not in {p.lower() for p in _PROTECTED_PACKAGES}]
|
||||
filtered = [p for p in packages if p.lower() in {p.lower() for p in _PROTECTED_PACKAGES}]
|
||||
if filtered:
|
||||
print(f"跳过受保护的包: {', '.join(filtered)}")
|
||||
return safe
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# piptool 函数
|
||||
# ============================================================================
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def pip_uninstall(pkg_names: list[str]) -> None:
|
||||
"""卸载包."""
|
||||
packages_to_uninstall: list[str] = []
|
||||
for pattern in pkg_names:
|
||||
packages_to_uninstall.extend(_expand_wildcard_packages(pattern))
|
||||
|
||||
packages_to_uninstall = _filter_protected_packages(packages_to_uninstall)
|
||||
|
||||
if not packages_to_uninstall:
|
||||
return
|
||||
|
||||
subprocess.run(["pip", "uninstall", "-y", *packages_to_uninstall], check=True)
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def pip_reinstall(pkg_names: list[str], offline: bool = False) -> None:
|
||||
"""重新安装包."""
|
||||
safe_ps = _filter_protected_packages(pkg_names)
|
||||
if not safe_ps:
|
||||
print("所有指定的包均为受保护包, 跳过重装")
|
||||
return
|
||||
|
||||
subprocess.run(["pip", "uninstall", "-y", *safe_ps], check=True)
|
||||
|
||||
options = ["--no-index", "--find-links", "."] if offline else []
|
||||
subprocess.run(["pip", "install", *options, *safe_ps], check=True)
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def pip_download(pkg_names: list[str], offline: bool = False) -> None:
|
||||
"""下载包."""
|
||||
options = ["--no-index", "--find-links", "."] if offline else []
|
||||
subprocess.run(
|
||||
["pip", "download", *pkg_names, *options, "-d", PACKAGE_DIR],
|
||||
check=True,
|
||||
)
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def pip_freeze() -> None:
|
||||
"""冻结依赖."""
|
||||
result = subprocess.run(
|
||||
["pip", "freeze", "--exclude-editable"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=True,
|
||||
)
|
||||
Path(REQUIREMENTS_FILE).write_text(result.stdout)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# gittool 函数
|
||||
# ============================================================================
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def init_sub_dirs() -> None:
|
||||
"""初始化子目录的 Git 仓库."""
|
||||
sub_dirs = [subdir for subdir in Path.cwd().iterdir() if subdir.is_dir()]
|
||||
for subdir in sub_dirs:
|
||||
px.run(
|
||||
px.Graph().chain(
|
||||
px.cmd(["git", "init"], conditions=(lambda _: not_has_git_repo(),), cwd=subdir),
|
||||
px.cmd(["git", "add", "."]),
|
||||
px.cmd(["git", "commit", "-m", "init commit"]),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def not_has_git_repo() -> bool:
|
||||
"""检查当前目录没有 Git 仓库."""
|
||||
return not Path.cwd().exists() or not (Path.cwd() / ".git").is_dir()
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def has_files() -> bool:
|
||||
"""检查当前 Git 仓库是否有未提交的更改."""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["git", "status", "--porcelain"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=False,
|
||||
)
|
||||
return bool(result.stdout.strip())
|
||||
except (subprocess.SubprocessError, OSError):
|
||||
return False
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def git_add_commit(message: str = "chore: update") -> None:
|
||||
"""执行 git add + git commit (仅当有未提交更改时).
|
||||
|
||||
Parameters
|
||||
----------
|
||||
message : str
|
||||
提交信息
|
||||
"""
|
||||
if not has_files():
|
||||
print("没有文件需要提交")
|
||||
return
|
||||
subprocess.run(["git", "add", "."], check=True)
|
||||
subprocess.run(["git", "commit", "-m", message], check=True)
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def git_init_add_commit(message: str = "init commit") -> None:
|
||||
"""执行 git init (若需) + git add + git commit (若有更改).
|
||||
|
||||
Parameters
|
||||
----------
|
||||
message : str
|
||||
提交信息
|
||||
"""
|
||||
if not_has_git_repo():
|
||||
subprocess.run(["git", "init"], check=True)
|
||||
if has_files():
|
||||
subprocess.run(["git", "add", "."], check=True)
|
||||
subprocess.run(["git", "commit", "-m", message], check=True)
|
||||
else:
|
||||
print("没有文件需要提交")
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# envdev 配置 (Python / Conda / Rust 镜像源)
|
||||
# ============================================================================
|
||||
|
||||
PyMirrorType = Literal["tsinghua", "aliyun", "huaweicloud", "ustc", "zju"]
|
||||
CondaMirrorType = Literal["tsinghua", "ustc", "bsfu", "aliyun"]
|
||||
RustMirrorType = Literal["tsinghua", "ustc", "aliyun"]
|
||||
|
||||
_PIP_INDEX_URLS: dict[str, str] = {
|
||||
"tsinghua": "https://pypi.tuna.tsinghua.edu.cn/simple",
|
||||
"aliyun": "https://mirrors.aliyun.com/pypi/simple/",
|
||||
"huaweicloud": "https://mirrors.huaweicloud.com/repository/pypi/simple/",
|
||||
"ustc": "https://pypi.mirrors.ustc.edu.cn/simple/",
|
||||
"zju": "https://mirrors.zju.edu.cn/pypi/simple/",
|
||||
}
|
||||
|
||||
_PIP_TRUSTED_HOSTS: dict[str, str] = {
|
||||
"tsinghua": "pypi.tuna.tsinghua.edu.cn",
|
||||
"aliyun": "mirrors.aliyun.com",
|
||||
"huaweicloud": "mirrors.huaweicloud.com",
|
||||
"ustc": "pypi.mirrors.ustc.edu.cn",
|
||||
"zju": "mirrors.zju.edu.cn",
|
||||
}
|
||||
|
||||
_UV_PYTHON_INSTALL_MIRROR: str = "https://registry.npmmirror.com/-/binary/python-build-standalone"
|
||||
|
||||
_CONDA_MIRROR_URLS: dict[str, list[str]] = {
|
||||
"tsinghua": [
|
||||
"https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main/",
|
||||
"https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free/",
|
||||
"https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/r/",
|
||||
"https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/msys2/",
|
||||
"https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/pro/",
|
||||
"https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge/",
|
||||
"https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/bioconda/",
|
||||
"https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/menpo/",
|
||||
"https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/pytorch/",
|
||||
],
|
||||
"ustc": [
|
||||
"https://mirrors.ustc.edu.cn/anaconda/pkgs/main/",
|
||||
"https://mirrors.ustc.edu.cn/anaconda/pkgs/free/",
|
||||
"https://mirrors.ustc.edu.cn/anaconda/pkgs/r/",
|
||||
"https://mirrors.ustc.edu.cn/anaconda/pkgs/msys2/",
|
||||
"https://mirrors.ustc.edu.cn/anaconda/pkgs/pro/",
|
||||
"https://mirrors.ustc.edu.cn/anaconda/pkgs/dev/",
|
||||
"https://mirrors.ustc.edu.cn/anaconda/cloud/conda-forge/",
|
||||
"https://mirrors.ustc.edu.cn/anaconda/cloud/bioconda/",
|
||||
"https://mirrors.ustc.edu.cn/anaconda/cloud/menpo/",
|
||||
"https://mirrors.ustc.edu.cn/anaconda/cloud/pytorch/",
|
||||
],
|
||||
"bsfu": [
|
||||
"https://mirrors.bsfu.edu.cn/anaconda/pkgs/main/",
|
||||
"https://mirrors.bsfu.edu.cn/anaconda/pkgs/free/",
|
||||
"https://mirrors.bsfu.edu.cn/anaconda/pkgs/r/",
|
||||
"https://mirrors.bsfu.edu.cn/anaconda/pkgs/msys2/",
|
||||
"https://mirrors.bsfu.edu.cn/anaconda/pkgs/pro/",
|
||||
"https://mirrors.bsfu.edu.cn/anaconda/pkgs/dev/",
|
||||
"https://mirrors.bsfu.edu.cn/anaconda/cloud/conda-forge/",
|
||||
"https://mirrors.bsfu.edu.cn/anaconda/cloud/bioconda/",
|
||||
"https://mirrors.bsfu.edu.cn/anaconda/cloud/menpo/",
|
||||
"https://mirrors.bsfu.edu.cn/anaconda/cloud/pytorch/",
|
||||
],
|
||||
"aliyun": [
|
||||
"https://mirrors.aliyun.com/anaconda/pkgs/main/",
|
||||
"https://mirrors.aliyun.com/anaconda/pkgs/free/",
|
||||
"https://mirrors.aliyun.com/anaconda/pkgs/r/",
|
||||
"https://mirrors.aliyun.com/anaconda/pkgs/msys2/",
|
||||
"https://mirrors.aliyun.com/anaconda/pkgs/pro/",
|
||||
"https://mirrors.aliyun.com/anaconda/pkgs/dev/",
|
||||
"https://mirrors.aliyun.com/anaconda/cloud/conda-forge/",
|
||||
"https://mirrors.aliyun.com/anaconda/cloud/bioconda/",
|
||||
"https://mirrors.aliyun.com/anaconda/cloud/menpo/",
|
||||
"https://mirrors.aliyun.com/anaconda/cloud/pytorch/",
|
||||
],
|
||||
}
|
||||
|
||||
_RUSTUP_MIRRORS: dict[str, dict[str, str]] = {
|
||||
"tsinghua": {
|
||||
"RUSTUP_DIST_SERVER": "https://mirrors.tuna.tsinghua.edu.cn/rustup",
|
||||
"RUSTUP_UPDATE_ROOT": "https://mirrors.tuna.tsinghua.edu.cn/rustup/rustup",
|
||||
"TOML_REGISTRY": "https://mirrors.tuna.tsinghua.edu.cn/crates.io-index/",
|
||||
},
|
||||
"aliyun": {
|
||||
"RUSTUP_DIST_SERVER": "https://mirrors.aliyun.com/rustup",
|
||||
"RUSTUP_UPDATE_ROOT": "https://mirrors.aliyun.com/rustup/rustup",
|
||||
"TOML_REGISTRY": "https://mirrors.aliyun.com/crates.io-index/",
|
||||
},
|
||||
"ustc": {
|
||||
"RUSTUP_DIST_SERVER": "https://mirrors.ustc.edu.cn/rust-static",
|
||||
"RUSTUP_UPDATE_ROOT": "https://mirrors.ustc.edu.cn/rust-static/rustup",
|
||||
"TOML_REGISTRY": "https://mirrors.ustc.edu.cn/crates.io-index/",
|
||||
},
|
||||
}
|
||||
|
||||
_RUST_SCCACHE_DIR: Path = Path.home() / ".cargo" / "sccache"
|
||||
_RUST_SCCACHE_CACHE_SIZE: str = "20G"
|
||||
|
||||
|
||||
def _pip_config_path() -> Path:
|
||||
"""返回当前平台的 pip 配置文件路径."""
|
||||
if Constants.IS_LINUX:
|
||||
return Path.home() / ".pip" / "pip.conf"
|
||||
return Path.home() / "pip" / "pip.ini"
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def setup_python_mirror(mirror: str) -> None:
|
||||
"""配置 Python 镜像源 (设置环境变量 + 写入 pip 配置文件).
|
||||
|
||||
设置 ``PIP_INDEX_URL`` / ``PIP_TRUSTED_HOSTS`` / ``UV_INDEX_URL`` /
|
||||
``UV_PYTHON_INSTALL_MIRROR`` 等环境变量, 并写入 pip 配置文件.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
mirror : str
|
||||
镜像源名称, 见 :data:`_PIP_INDEX_URLS`
|
||||
"""
|
||||
if mirror not in _PIP_INDEX_URLS:
|
||||
print(f"未知 Python 镜像源: {mirror}")
|
||||
return
|
||||
|
||||
index_url = _PIP_INDEX_URLS[mirror]
|
||||
trusted_host = _PIP_TRUSTED_HOSTS[mirror]
|
||||
|
||||
os.environ["PIP_INDEX_URL"] = index_url
|
||||
os.environ["PIP_TRUSTED_HOSTS"] = trusted_host
|
||||
os.environ["UV_INDEX_URL"] = index_url
|
||||
os.environ["UV_PYTHON_INSTALL_MIRROR"] = _UV_PYTHON_INSTALL_MIRROR
|
||||
os.environ["UV_HTTP_TIMEOUT"] = "600"
|
||||
os.environ["UV_LINK_MODE"] = "copy"
|
||||
|
||||
config_path = _pip_config_path()
|
||||
config_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
content = f"[global]\nindex-url = {index_url}\ntrusted-host = {trusted_host}\n"
|
||||
config_path.write_text(content, encoding="utf-8")
|
||||
print(f"Python 镜像源已配置: {mirror} -> {config_path}")
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def setup_conda_mirror(mirror: str) -> None:
|
||||
"""配置 Conda 镜像源 (写入 ~/.condarc).
|
||||
|
||||
Parameters
|
||||
----------
|
||||
mirror : str
|
||||
镜像源名称, 见 :data:`_CONDA_MIRROR_URLS`
|
||||
"""
|
||||
if mirror not in _CONDA_MIRROR_URLS:
|
||||
print(f"未知 Conda 镜像源: {mirror}")
|
||||
return
|
||||
|
||||
urls = _CONDA_MIRROR_URLS[mirror]
|
||||
config_path = Path.home() / ".condarc"
|
||||
config_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
content = "show_channel_urls: true\nchannels:\n - " + "\n - ".join(urls) + "\n - defaults\n"
|
||||
config_path.write_text(content, encoding="utf-8")
|
||||
print(f"Conda 镜像源已配置: {mirror} -> {config_path}")
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def setup_rust_mirror(mirror: str, version: str = "stable") -> None:
|
||||
"""配置 Rust 镜像源 (设置环境变量 + 写入 cargo config + 创建 sccache 目录).
|
||||
|
||||
设置 ``RUSTUP_DIST_SERVER`` / ``RUSTUP_UPDATE_ROOT`` / ``RUST_SCCACHE_DIR``
|
||||
等环境变量, 写入 ``~/.cargo/config.toml``, 并创建 sccache 缓存目录.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
mirror : str
|
||||
镜像源名称, 见 :data:`_RUSTUP_MIRRORS`
|
||||
version : str
|
||||
Rust 版本 (未使用, 保留以与原 envdev 参数对齐)
|
||||
"""
|
||||
del version # 兼容旧参数, 实际安装由独立 job 处理
|
||||
|
||||
if mirror not in _RUSTUP_MIRRORS:
|
||||
print(f"未知 Rust 镜像源: {mirror}")
|
||||
return
|
||||
|
||||
mirrors = _RUSTUP_MIRRORS[mirror]
|
||||
os.environ["RUSTUP_DIST_SERVER"] = mirrors["RUSTUP_DIST_SERVER"]
|
||||
os.environ["RUSTUP_UPDATE_ROOT"] = mirrors["RUSTUP_UPDATE_ROOT"]
|
||||
os.environ["RUST_SCCACHE_DIR"] = str(_RUST_SCCACHE_DIR)
|
||||
os.environ["RUST_SCCACHE_CACHE_SIZE"] = _RUST_SCCACHE_CACHE_SIZE
|
||||
|
||||
_RUST_SCCACHE_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
config_path = Path.home() / ".cargo" / "config.toml"
|
||||
config_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
registry = mirrors["TOML_REGISTRY"]
|
||||
content = (
|
||||
f"\n[source.crates-io]\nreplace-with = '{mirror}'\n\n"
|
||||
f'[source.{mirror}]\nregistry = "sparse+{registry}"\n\n'
|
||||
f'[registries.{mirror}]\nindex = "sparse+{registry}"\n'
|
||||
)
|
||||
config_path.write_text(content, encoding="utf-8")
|
||||
print(f"Rust 镜像源已配置: {mirror} -> {config_path}")
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# dockercmd 函数
|
||||
# ============================================================================
|
||||
|
||||
_DOCKER_MIRROR_TENCENT: str = "ccr.ccs.tencentyun.com"
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def docker_login_tencent(username: str = "") -> None:
|
||||
"""登录腾讯云 Docker 镜像仓库.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
username : str
|
||||
Docker 用户名 (为空时由 docker 交互式提示输入)
|
||||
"""
|
||||
user = username or getpass.getuser()
|
||||
subprocess.run(["docker", "login", "--username", user, _DOCKER_MIRROR_TENCENT], check=False)
|
||||
print(f"已尝试登录腾讯云镜像仓库 (用户: {user})")
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# envdev Linux 专用函数
|
||||
# ============================================================================
|
||||
|
||||
_QT_LIBS: list[str] = [
|
||||
"build-essential",
|
||||
"libgl1",
|
||||
"libegl1",
|
||||
"libglib2.0-0",
|
||||
"libfontconfig1",
|
||||
"libfreetype6",
|
||||
"libxkbcommon0",
|
||||
"libdbus-1-3",
|
||||
"libxcb-xinerama0",
|
||||
"libxcb-icccm4",
|
||||
"libxcb-image0",
|
||||
"libxcb-keysyms1",
|
||||
"libxcb-randr0",
|
||||
"libxcb-render-util0",
|
||||
"libxcb-shape0",
|
||||
"libxcb-xfixes0",
|
||||
"libxcb-cursor0",
|
||||
]
|
||||
|
||||
_CHINESE_FONTS: list[str] = [
|
||||
"fonts-noto-cjk",
|
||||
"fonts-wqy-microhei",
|
||||
"fonts-wqy-zenhei",
|
||||
"fonts-noto-color-emoji",
|
||||
]
|
||||
|
||||
_DOWNLOAD_MIRROR_SCRIPT: str = "curl -sSL https://linuxmirrors.cn/main.sh -o /tmp/linuxmirrors.sh"
|
||||
_INSTALL_MIRROR_SCRIPT: str = "sudo bash /tmp/linuxmirrors.sh"
|
||||
|
||||
_RUSTUP_DOWNLOAD_URL_LINUX: str = "https://mirrors.aliyun.com/repo/rust/rustup-init.sh"
|
||||
_RUSTUP_DOWNLOAD_URL_WINDOWS: str = "https://static.rust-lang.org/rustup/dist/x86_64-pc-windows-msvc/rustup-init.exe"
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def setup_linux_system_mirror() -> None:
|
||||
"""下载并安装 Linux 系统镜像源 (仅 Linux, 已配置国内镜像时跳过).
|
||||
|
||||
检查 ``/etc/apt/sources.list`` 与 ``/etc/apt/sources.list.d/ubuntu.sources``
|
||||
是否已配置国内镜像, 已配置则跳过; 未配置则下载并执行 linuxmirrors 脚本.
|
||||
"""
|
||||
if not Constants.IS_LINUX:
|
||||
print("setup_linux_system_mirror: 仅在 Linux 上执行")
|
||||
return
|
||||
|
||||
apt_files = ["/etc/apt/sources.list", "/etc/apt/sources.list.d/ubuntu.sources"]
|
||||
mirror_keys = list(_PIP_INDEX_URLS.keys())
|
||||
already_configured = False
|
||||
for apt_file in apt_files:
|
||||
try:
|
||||
content = Path(apt_file).read_text(encoding="utf-8")
|
||||
except (OSError, UnicodeDecodeError):
|
||||
continue
|
||||
if any(mirror in content for mirror in mirror_keys):
|
||||
already_configured = True
|
||||
break
|
||||
|
||||
if already_configured:
|
||||
print("已配置国内镜像源, 跳过系统镜像配置")
|
||||
return
|
||||
|
||||
print("下载 linuxmirrors 脚本...")
|
||||
subprocess.run(_DOWNLOAD_MIRROR_SCRIPT, shell=True, check=False)
|
||||
print("安装 linuxmirrors...")
|
||||
subprocess.run(_INSTALL_MIRROR_SCRIPT, shell=True, check=False)
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def install_linux_qt_libs() -> None:
|
||||
"""安装 Qt 依赖库 (仅 Linux)."""
|
||||
if not Constants.IS_LINUX:
|
||||
print("install_linux_qt_libs: 仅在 Linux 上执行")
|
||||
return
|
||||
|
||||
subprocess.run(["sudo", "apt", "install", "-y", *_QT_LIBS], check=False)
|
||||
print("Qt 依赖库安装完成")
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def install_linux_fonts() -> None:
|
||||
"""安装中文字体 (仅 Linux)."""
|
||||
if not Constants.IS_LINUX:
|
||||
print("install_linux_fonts: 仅在 Linux 上执行")
|
||||
return
|
||||
|
||||
subprocess.run(["sudo", "apt", "install", "-y", *_CHINESE_FONTS], check=False)
|
||||
print("中文字体安装完成")
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def install_linux_docker() -> None:
|
||||
"""安装 Docker (仅 Linux)."""
|
||||
if not Constants.IS_LINUX:
|
||||
print("install_linux_docker: 仅在 Linux 上执行")
|
||||
return
|
||||
|
||||
subprocess.run(["sudo", "apt", "install", "-y", "docker-compose-v2"], check=False)
|
||||
subprocess.run(["sudo", "usermod", "-aG", "docker", getpass.getuser()], check=False)
|
||||
print("Docker 安装完成 (需重新登录以生效 docker 用户组)")
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def download_rustup_script() -> None:
|
||||
"""下载 Rustup 安装脚本 (跨平台, 已安装 rustup 时跳过).
|
||||
|
||||
Linux 下载 ``rustup-init.sh``, Windows 下载 ``rustup-init.exe``.
|
||||
"""
|
||||
if shutil.which("rustup") is not None:
|
||||
print("rustup 已安装, 跳过下载")
|
||||
return
|
||||
|
||||
if Constants.IS_WINDOWS:
|
||||
print("下载 rustup-init.exe...")
|
||||
subprocess.run(
|
||||
[
|
||||
"powershell",
|
||||
"-Command",
|
||||
"Invoke-WebRequest",
|
||||
"-Uri",
|
||||
_RUSTUP_DOWNLOAD_URL_WINDOWS,
|
||||
"-OutFile",
|
||||
"rustup-init.exe",
|
||||
],
|
||||
check=False,
|
||||
)
|
||||
else:
|
||||
print("下载 rustup-init.sh...")
|
||||
subprocess.run(
|
||||
["curl", "-fsSL", _RUSTUP_DOWNLOAD_URL_LINUX, "-o", "rustup-init.sh"],
|
||||
check=False,
|
||||
)
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def install_rust_toolchain(version: str = "stable") -> None:
|
||||
"""安装 Rust 工具链 (rustup 未安装时跳过).
|
||||
|
||||
Parameters
|
||||
----------
|
||||
version : str
|
||||
Rust 版本: ``stable`` / ``nightly`` / ``beta`` (默认: ``stable``)
|
||||
"""
|
||||
if shutil.which("rustup") is None:
|
||||
print("rustup 未安装, 跳过工具链安装")
|
||||
return
|
||||
|
||||
subprocess.run(["rustup", "toolchain", "install", version], check=False)
|
||||
print(f"Rust 工具链 {version} 安装完成")
|
||||
@@ -0,0 +1,327 @@
|
||||
"""文件类函数模块.
|
||||
|
||||
聚合文件日期处理、文件等级重命名、文件夹备份、文件夹压缩工具的可复用函数.
|
||||
所有公共函数通过 ``@px.register_fn`` 注册, 供 YAML 任务编排引用.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import shutil
|
||||
import time
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
|
||||
import pyflowx as px
|
||||
|
||||
__all__ = [
|
||||
"BRACKETS",
|
||||
"DATE_PATTERN",
|
||||
"IGNORE_DIRS",
|
||||
"IGNORE_EXT",
|
||||
"IGNORE_FILES",
|
||||
"LEVELS",
|
||||
"SEP",
|
||||
"add_date_prefix",
|
||||
"archive_folder",
|
||||
"backup_folder",
|
||||
"folderback_default",
|
||||
"folderzip_default",
|
||||
"get_file_timestamp",
|
||||
"process_file_date",
|
||||
"process_file_level",
|
||||
"process_files_date",
|
||||
"process_files_level",
|
||||
"remove_date_prefix",
|
||||
"remove_dump",
|
||||
"remove_marks",
|
||||
"zip_folders",
|
||||
"zip_target",
|
||||
]
|
||||
|
||||
# ============================================================================
|
||||
# filedate 配置
|
||||
# ============================================================================
|
||||
|
||||
DATE_PATTERN = re.compile(r"(20|19)\d{2}[-_#.~]?((0[1-9])|(1[012]))[-_#.~]?((0[1-9])|([12]\d)|(3[01]))[-_#.~]?")
|
||||
SEP = "_"
|
||||
|
||||
# ============================================================================
|
||||
# filelevel 配置
|
||||
# ============================================================================
|
||||
|
||||
LEVELS: dict[str, str] = {
|
||||
"0": "",
|
||||
"1": "PUB,NOR",
|
||||
"2": "INT",
|
||||
"3": "CON",
|
||||
"4": "CLA",
|
||||
}
|
||||
|
||||
BRACKETS: tuple[str, str] = (" ([_(【-", " )]_)】")
|
||||
|
||||
# ============================================================================
|
||||
# folderzip 配置
|
||||
# ============================================================================
|
||||
|
||||
IGNORE_DIRS: list[str] = [".git", ".idea", ".vscode", "__pycache__"]
|
||||
IGNORE_FILES: list[str] = [".gitignore"]
|
||||
IGNORE: list[str] = [*IGNORE_DIRS, *IGNORE_FILES]
|
||||
IGNORE_EXT: list[str] = [".zip", ".rar", ".7z", ".tar", ".gz"]
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# filedate 函数
|
||||
# ============================================================================
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def get_file_timestamp(filepath: Path) -> str:
|
||||
"""获取文件时间戳."""
|
||||
modified_time = filepath.stat().st_mtime
|
||||
created_time = filepath.stat().st_ctime
|
||||
return time.strftime("%Y%m%d", time.localtime(max((modified_time, created_time))))
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def remove_date_prefix(filepath: Path) -> Path:
|
||||
"""移除文件日期前缀."""
|
||||
stem = filepath.stem
|
||||
new_stem = DATE_PATTERN.sub("", stem)
|
||||
if new_stem != stem:
|
||||
new_path = filepath.with_name(new_stem + filepath.suffix)
|
||||
filepath.rename(new_path)
|
||||
return new_path
|
||||
return filepath
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def add_date_prefix(filepath: Path) -> Path:
|
||||
"""添加文件日期前缀."""
|
||||
timestamp = get_file_timestamp(filepath)
|
||||
stem = filepath.stem
|
||||
new_stem = f"{timestamp}{SEP}{stem}"
|
||||
new_path = filepath.with_name(new_stem + filepath.suffix)
|
||||
if new_path != filepath:
|
||||
filepath.rename(new_path)
|
||||
return new_path
|
||||
return filepath
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def process_file_date(filepath: Path, clear: bool = False) -> None:
|
||||
"""处理单个文件的日期前缀.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
filepath : Path
|
||||
文件路径
|
||||
clear : bool
|
||||
是否清除日期前缀
|
||||
"""
|
||||
if clear:
|
||||
remove_date_prefix(filepath)
|
||||
else:
|
||||
new_path = remove_date_prefix(filepath)
|
||||
add_date_prefix(new_path)
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def process_files_date(targets: list[Path], clear: bool = False) -> None:
|
||||
"""批量处理文件日期前缀.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
targets : list[Path]
|
||||
文件路径列表
|
||||
clear : bool
|
||||
是否清除日期前缀
|
||||
"""
|
||||
for target in targets:
|
||||
if target.exists() and not target.name.startswith("."):
|
||||
process_file_date(target, clear)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# filelevel 函数
|
||||
# ============================================================================
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def remove_marks(stem: str, marks: list[str]) -> str:
|
||||
"""从文件名主干中移除所有标记."""
|
||||
left_brackets, right_brackets = BRACKETS
|
||||
for mark in marks:
|
||||
pos = 0
|
||||
while True:
|
||||
pos = stem.find(mark, pos)
|
||||
if pos == -1:
|
||||
break
|
||||
b, e = pos - 1, pos + len(mark)
|
||||
if b >= 0 and e < len(stem) and stem[b] in left_brackets and stem[e] in right_brackets:
|
||||
stem = stem[:b] + stem[e + 1 :]
|
||||
else:
|
||||
pos = e
|
||||
return stem
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def process_file_level(filepath: Path, level: int = 0) -> None:
|
||||
"""处理单个文件的等级标记.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
filepath : Path
|
||||
文件路径
|
||||
level : int
|
||||
文件等级 (0-4), 0 用于清除等级
|
||||
"""
|
||||
if not (0 <= level < len(LEVELS)):
|
||||
print(f"无效的等级 {level}, 必须在 0 和 {len(LEVELS) - 1} 之间")
|
||||
return
|
||||
|
||||
if not filepath.exists():
|
||||
print(f"文件不存在: {filepath}")
|
||||
return
|
||||
|
||||
filestem = filepath.stem
|
||||
original_stem = filestem
|
||||
|
||||
for level_names in LEVELS.values():
|
||||
if level_names:
|
||||
filestem = remove_marks(filestem, level_names.split(","))
|
||||
|
||||
for digit in map(str, range(1, 10)):
|
||||
filestem = remove_marks(filestem, [digit])
|
||||
|
||||
if level > 0:
|
||||
levelstr = LEVELS.get(str(level), "").split(",")[0]
|
||||
if levelstr:
|
||||
filestem = f"{filestem}({levelstr})"
|
||||
|
||||
if filestem != original_stem:
|
||||
new_path = filepath.with_name(filestem + filepath.suffix)
|
||||
filepath.rename(new_path)
|
||||
print(f"重命名: {filepath} -> {new_path}")
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def process_files_level(targets: list[Path], level: int = 0) -> None:
|
||||
"""批量处理文件等级标记.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
targets : list[Path]
|
||||
文件路径列表
|
||||
level : int
|
||||
文件等级 (0-4)
|
||||
"""
|
||||
for target in targets:
|
||||
process_file_level(target, level)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# folderback 函数
|
||||
# ============================================================================
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def remove_dump(src: Path, dst: Path, max_zip: int) -> None:
|
||||
"""递归删除旧的备份 zip 文件."""
|
||||
zip_paths = [filepath for filepath in dst.rglob("*.zip") if src.stem in str(filepath)]
|
||||
zip_files = sorted(zip_paths, key=lambda fn: str(fn)[-19:-4])
|
||||
if len(zip_files) > max_zip:
|
||||
zip_files[0].unlink()
|
||||
remove_dump(src, dst, max_zip)
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def zip_target(src: Path, dst: Path, max_zip: int) -> None:
|
||||
"""将单个文件或文件夹压缩为 zip 文件."""
|
||||
files = [str(_) for _ in src.rglob("*")]
|
||||
timestamp = time.strftime("_%Y%m%d_%H%M%S")
|
||||
target_path = dst / (src.stem + timestamp + ".zip")
|
||||
|
||||
with zipfile.ZipFile(target_path, "w") as zip_file:
|
||||
for file in files:
|
||||
zip_file.write(file, arcname=file.replace(str(src.parent), ""))
|
||||
|
||||
remove_dump(src, dst, max_zip)
|
||||
print(f"备份完成: {target_path}")
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def backup_folder(src: str, dst: str, max_zip: int = 5) -> None:
|
||||
"""备份文件夹.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
src : str
|
||||
源文件夹路径
|
||||
dst : str
|
||||
目标文件夹路径
|
||||
max_zip : int
|
||||
最大备份数量
|
||||
"""
|
||||
src_path = Path(src)
|
||||
dst_path = Path(dst)
|
||||
|
||||
if not src_path.exists():
|
||||
print(f"源文件夹不存在: {src_path}")
|
||||
return
|
||||
|
||||
if not dst_path.exists():
|
||||
dst_path.mkdir(parents=True, exist_ok=True)
|
||||
print(f"创建目标文件夹: {dst_path}")
|
||||
|
||||
zip_target(src_path, dst_path, max_zip)
|
||||
|
||||
|
||||
@px.register_fn("folderback_default")
|
||||
def folderback_default() -> None:
|
||||
"""备份当前目录到 ./backup."""
|
||||
backup_folder(".", "./backup", 5)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# folderzip 函数
|
||||
# ============================================================================
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def archive_folder(folder: Path) -> None:
|
||||
"""压缩单个文件夹."""
|
||||
shutil.make_archive(
|
||||
str(folder.with_name(folder.name)),
|
||||
format="zip",
|
||||
base_dir=folder,
|
||||
)
|
||||
print(f"压缩完成: {folder.name}.zip")
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def zip_folders(cwd: str = ".") -> None:
|
||||
"""压缩目录下的所有文件夹.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
cwd : str
|
||||
工作目录
|
||||
"""
|
||||
cwd_path = Path(cwd)
|
||||
if not cwd_path.exists():
|
||||
print(f"目录不存在: {cwd_path}")
|
||||
return
|
||||
|
||||
dirs: list[Path] = [
|
||||
e for e in cwd_path.iterdir() if e.is_dir() and e.name not in IGNORE_DIRS and e.suffix not in IGNORE_EXT
|
||||
]
|
||||
|
||||
for dir_path in dirs:
|
||||
archive_folder(dir_path)
|
||||
|
||||
|
||||
@px.register_fn("folderzip_default")
|
||||
def folderzip_default() -> None:
|
||||
"""压缩当前目录下的所有文件夹."""
|
||||
zip_folders(".")
|
||||
@@ -0,0 +1,117 @@
|
||||
"""LLM 工具类函数模块.
|
||||
|
||||
聚合 ModelScope 下载 (msdownload) 与 SGLang 本地模型服务 (sglang) 的可复用函数.
|
||||
所有公共函数通过 ``@px.register_fn`` 注册, 供 YAML 任务编排引用.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import shutil
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
import pyflowx as px
|
||||
from pyflowx.conditions import Constants
|
||||
|
||||
__all__ = [
|
||||
"install_sglang",
|
||||
"msdownload_run",
|
||||
"run_sglang",
|
||||
]
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def msdownload_run(name: str, target_type: str = "model", download_dir: str | None = None) -> None:
|
||||
"""从 ModelScope 下载模型/数据集/空间.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
name : str
|
||||
目标名称 (如: ``Qwen/Qwen2.5-Coder-32B-Instruct``)
|
||||
target_type : str
|
||||
目标类型: ``model`` / ``dataset`` / ``space`` (默认: ``model``)
|
||||
download_dir : str | None
|
||||
下载目录; 为 None 时默认 ``~/.models/<name 最后一段>``
|
||||
"""
|
||||
if not name:
|
||||
print("msdownload: name 不能为空")
|
||||
return
|
||||
|
||||
if download_dir:
|
||||
out_dir = Path(download_dir)
|
||||
else:
|
||||
out_dir = Path.home() / ".models" / name.rsplit("/", 1)[-1]
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
cmd = ["uvx", "modelscope", "download", f"--{target_type}", name, "--local_dir", str(out_dir)]
|
||||
print(f"下载 {target_type}: {name} -> {out_dir}")
|
||||
subprocess.run(cmd, check=False)
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def install_sglang() -> None:
|
||||
"""安装 sglang (若未安装).
|
||||
|
||||
通过 ``shutil.which`` 检测 sglang 是否已安装, 未安装时执行 ``uv install sglang[all]``.
|
||||
"""
|
||||
if shutil.which("sglang") is not None:
|
||||
print("sglang 已安装, 跳过安装步骤")
|
||||
return
|
||||
|
||||
print("正在安装 sglang[all]...")
|
||||
subprocess.run(["uv", "install", "sglang[all]"], check=False)
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def run_sglang(
|
||||
model: str = "~/.models/Qwen2.5-Coder-32B-Instruct-AWQ",
|
||||
port: int = 8000,
|
||||
ctx_len: int = 32768,
|
||||
mem_fraction: float = 0.75,
|
||||
host: str = "0.0.0.0",
|
||||
log_level: str = "info",
|
||||
) -> None:
|
||||
"""启动 SGLang 本地模型服务.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
model : str
|
||||
模型路径 (默认: ``~/.models/Qwen2.5-Coder-32B-Instruct-AWQ``)
|
||||
port : int
|
||||
服务端口 (默认: 8000)
|
||||
ctx_len : int
|
||||
最大上下文长度 (默认: 32768)
|
||||
mem_fraction : float
|
||||
显存占比 0-1 (默认: 0.75)
|
||||
host : str
|
||||
主机地址 (默认: 0.0.0.0)
|
||||
log_level : str
|
||||
日志级别 (默认: info)
|
||||
"""
|
||||
model_dir = Path(model).expanduser()
|
||||
if not model_dir.exists():
|
||||
print(f"模型目录不存在: {model_dir}")
|
||||
return
|
||||
|
||||
python_bin = "python" if Constants.IS_WINDOWS else "python3"
|
||||
cmd = [
|
||||
python_bin,
|
||||
"-m",
|
||||
"sglang.launch_server",
|
||||
"--model-path",
|
||||
str(model_dir),
|
||||
"--host",
|
||||
host,
|
||||
"--port",
|
||||
str(port),
|
||||
"--mem-fraction-static",
|
||||
str(mem_fraction),
|
||||
"--context-length",
|
||||
str(ctx_len),
|
||||
"--tool-call-parser",
|
||||
"qwen",
|
||||
"--log-level",
|
||||
log_level,
|
||||
]
|
||||
print(f"启动 SGLang: {model_dir} (port={port}, ctx={ctx_len}, mem={mem_fraction})")
|
||||
subprocess.run(cmd, check=False)
|
||||
@@ -0,0 +1,498 @@
|
||||
"""媒体类函数模块.
|
||||
|
||||
聚合 PDF 工具 (pdftool) 和截图工具 (screenshot) 的可复用函数.
|
||||
所有公共函数通过 ``@px.register_fn`` 注册, 供 YAML 任务编排引用.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import subprocess
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
import pyflowx as px
|
||||
from pyflowx.conditions import Constants
|
||||
|
||||
__all__ = [
|
||||
"DEFAULT_PASSWORD",
|
||||
"DEFAULT_QUALITY",
|
||||
"PDF_SUFFIX",
|
||||
"get_screenshot_path",
|
||||
"pdf_add_watermark",
|
||||
"pdf_compress",
|
||||
"pdf_crop",
|
||||
"pdf_decrypt",
|
||||
"pdf_encrypt",
|
||||
"pdf_extract_images",
|
||||
"pdf_extract_text",
|
||||
"pdf_info",
|
||||
"pdf_merge",
|
||||
"pdf_ocr",
|
||||
"pdf_reorder",
|
||||
"pdf_repair",
|
||||
"pdf_rotate",
|
||||
"pdf_split",
|
||||
"pdf_to_images",
|
||||
"take_screenshot_area",
|
||||
"take_screenshot_full",
|
||||
]
|
||||
|
||||
try:
|
||||
import fitz # PyMuPDF
|
||||
|
||||
HAS_PYMUPDF = True
|
||||
except ImportError:
|
||||
HAS_PYMUPDF = False
|
||||
|
||||
try:
|
||||
import pypdf
|
||||
|
||||
HAS_PYPDF = True
|
||||
except ImportError:
|
||||
HAS_PYPDF = False
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# 配置
|
||||
# ============================================================================
|
||||
|
||||
PDF_SUFFIX = ".pdf"
|
||||
DEFAULT_QUALITY = 75
|
||||
DEFAULT_PASSWORD = ""
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# PDF 函数
|
||||
# ============================================================================
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def pdf_merge(input_paths: list[Path], output_path: Path) -> None:
|
||||
"""合并多个 PDF 文件."""
|
||||
if not HAS_PYPDF:
|
||||
print("未安装 pypdf 库, 请安装: pip install pypdf")
|
||||
return
|
||||
|
||||
writer = pypdf.PdfWriter()
|
||||
for input_path in input_paths:
|
||||
if input_path.exists():
|
||||
reader = pypdf.PdfReader(str(input_path))
|
||||
for page in reader.pages:
|
||||
writer.add_page(page)
|
||||
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(output_path, "wb") as f:
|
||||
writer.write(f)
|
||||
|
||||
print(f"合并完成: {output_path}")
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def pdf_split(input_path: Path, output_dir: Path) -> None:
|
||||
"""拆分 PDF 文件为单页."""
|
||||
if not HAS_PYPDF:
|
||||
print("未安装 pypdf 库, 请安装: pip install pypdf")
|
||||
return
|
||||
|
||||
reader = pypdf.PdfReader(str(input_path))
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
for i, page in enumerate(reader.pages):
|
||||
writer = pypdf.PdfWriter()
|
||||
writer.add_page(page)
|
||||
output_file = output_dir / f"{input_path.stem}_page_{i + 1}.pdf"
|
||||
with open(output_file, "wb") as f:
|
||||
writer.write(f)
|
||||
|
||||
print(f"拆分完成: {output_dir}")
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def pdf_compress(input_path: Path, output_path: Path) -> None:
|
||||
"""压缩 PDF 文件."""
|
||||
if not HAS_PYMUPDF:
|
||||
print("未安装 PyMuPDF 库, 请安装: pip install PyMuPDF")
|
||||
return
|
||||
|
||||
doc = fitz.open(str(input_path))
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
doc.save(str(output_path), garbage=4, deflate=True, clean=True)
|
||||
doc.close()
|
||||
|
||||
original_size = input_path.stat().st_size
|
||||
new_size = output_path.stat().st_size
|
||||
ratio = (1 - new_size / original_size) * 100
|
||||
print(f"压缩完成: {output_path} (缩小 {ratio:.1f}%)")
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def pdf_encrypt(input_path: Path, output_path: Path, password: str) -> None:
|
||||
"""加密 PDF 文件."""
|
||||
if not HAS_PYPDF:
|
||||
print("未安装 pypdf 库, 请安装: pip install pypdf")
|
||||
return
|
||||
|
||||
reader = pypdf.PdfReader(str(input_path))
|
||||
writer = pypdf.PdfWriter()
|
||||
|
||||
for page in reader.pages:
|
||||
writer.add_page(page)
|
||||
|
||||
writer.encrypt(user_password=password, owner_password=password, use_128bit=True)
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(output_path, "wb") as f:
|
||||
writer.write(f)
|
||||
|
||||
print(f"加密完成: {output_path}")
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def pdf_decrypt(input_path: Path, output_path: Path, password: str) -> None:
|
||||
"""解密 PDF 文件."""
|
||||
if not HAS_PYPDF:
|
||||
print("未安装 pypdf 库, 请安装: pip install pypdf")
|
||||
return
|
||||
|
||||
reader = pypdf.PdfReader(str(input_path))
|
||||
if reader.is_encrypted:
|
||||
reader.decrypt(password)
|
||||
|
||||
writer = pypdf.PdfWriter()
|
||||
for page in reader.pages:
|
||||
writer.add_page(page)
|
||||
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(output_path, "wb") as f:
|
||||
writer.write(f)
|
||||
|
||||
print(f"解密完成: {output_path}")
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def pdf_extract_text(input_path: Path, output_path: Path) -> None:
|
||||
"""提取 PDF 文本."""
|
||||
if not HAS_PYMUPDF:
|
||||
print("未安装 PyMuPDF 库, 请安装: pip install PyMuPDF")
|
||||
return
|
||||
|
||||
doc = fitz.open(str(input_path))
|
||||
text = ""
|
||||
for page in doc:
|
||||
text += str(page.get_text()) + "\n\n"
|
||||
doc.close()
|
||||
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
output_path.write_text(text, encoding="utf-8")
|
||||
print(f"文本提取完成: {output_path}")
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def pdf_extract_images(input_path: Path, output_dir: Path) -> None:
|
||||
"""提取 PDF 图片."""
|
||||
if not HAS_PYMUPDF:
|
||||
print("未安装 PyMuPDF 库, 请安装: pip install PyMuPDF")
|
||||
return
|
||||
|
||||
doc = fitz.open(str(input_path))
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
image_count = 0
|
||||
# pyrefly: ignore [bad-argument-type]
|
||||
for page_num, page in enumerate(doc):
|
||||
images = page.get_images(full=True)
|
||||
for img_idx, img in enumerate(images):
|
||||
xref = img[0]
|
||||
base_image = doc.extract_image(xref)
|
||||
image_data = base_image["image"]
|
||||
image_ext = base_image["ext"]
|
||||
image_path = output_dir / f"page_{page_num + 1}_img_{img_idx + 1}.{image_ext}"
|
||||
image_path.write_bytes(image_data)
|
||||
image_count += 1
|
||||
|
||||
doc.close()
|
||||
print(f"图片提取完成: {output_dir} (共 {image_count} 张)")
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def pdf_add_watermark(input_path: Path, output_path: Path, text: str = "CONFIDENTIAL") -> None:
|
||||
"""添加 PDF 水印."""
|
||||
if not HAS_PYMUPDF:
|
||||
print("未安装 PyMuPDF 库, 请安装: pip install PyMuPDF")
|
||||
return
|
||||
|
||||
doc = fitz.open(str(input_path))
|
||||
for page in doc:
|
||||
rect = page.rect
|
||||
text_width = fitz.get_text_length(text, fontsize=48)
|
||||
x = (rect.width - text_width) / 2
|
||||
y = rect.height / 2
|
||||
page.insert_text((x, y), text, fontsize=48, rotate=45, color=(0, 0, 0))
|
||||
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
doc.save(str(output_path))
|
||||
doc.close()
|
||||
print(f"水印添加完成: {output_path}")
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def pdf_rotate(input_path: Path, output_path: Path, rotation: int = 90) -> None:
|
||||
"""旋转 PDF 页面."""
|
||||
if not HAS_PYMUPDF:
|
||||
print("未安装 PyMuPDF 库, 请安装: pip install PyMuPDF")
|
||||
return
|
||||
|
||||
doc = fitz.open(str(input_path))
|
||||
for page in doc:
|
||||
page.set_rotation(rotation)
|
||||
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
doc.save(str(output_path))
|
||||
doc.close()
|
||||
print(f"旋转完成: {output_path}")
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def pdf_crop(input_path: Path, output_path: Path, margins: tuple[int, int, int, int]) -> None:
|
||||
"""裁剪 PDF 页面."""
|
||||
if not HAS_PYMUPDF:
|
||||
print("未安装 PyMuPDF 库, 请安装: pip install PyMuPDF")
|
||||
return
|
||||
|
||||
doc = fitz.open(str(input_path))
|
||||
left, top, right, bottom = margins
|
||||
|
||||
for page in doc:
|
||||
rect = page.rect
|
||||
new_rect = fitz.Rect(
|
||||
rect.x0 + left,
|
||||
rect.y0 + top,
|
||||
rect.x1 - right,
|
||||
rect.y1 - bottom,
|
||||
)
|
||||
page.set_cropbox(new_rect)
|
||||
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
doc.save(str(output_path))
|
||||
doc.close()
|
||||
print(f"裁剪完成: {output_path}")
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def pdf_info(input_path: Path) -> None:
|
||||
"""显示 PDF 信息."""
|
||||
if not HAS_PYMUPDF:
|
||||
print("未安装 PyMuPDF 库, 请安装: pip install PyMuPDF")
|
||||
return
|
||||
|
||||
doc = fitz.open(str(input_path))
|
||||
print(f"文件: {input_path}")
|
||||
print(f"页数: {doc.page_count}")
|
||||
# pyrefly: ignore [missing-attribute]
|
||||
print(f"标题: {doc.metadata.get('title', 'N/A')}")
|
||||
# pyrefly: ignore [missing-attribute]
|
||||
print(f"作者: {doc.metadata.get('author', 'N/A')}")
|
||||
# pyrefly: ignore [missing-attribute]
|
||||
print(f"创建日期: {doc.metadata.get('creationDate', 'N/A')}")
|
||||
# pyrefly: ignore [missing-attribute]
|
||||
print(f"修改日期: {doc.metadata.get('modDate', 'N/A')}")
|
||||
print(f"文件大小: {input_path.stat().st_size / 1024:.1f} KB")
|
||||
doc.close()
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def pdf_ocr(input_path: Path, output_path: Path, lang: str = "chi_sim+eng") -> None:
|
||||
"""PDF OCR 识别."""
|
||||
try:
|
||||
import pytesseract
|
||||
from PIL import Image
|
||||
except ImportError:
|
||||
print("未安装 OCR 相关库, 请安装: pip install pytesseract pillow")
|
||||
return
|
||||
|
||||
if not HAS_PYMUPDF:
|
||||
print("未安装 PyMuPDF 库, 请安装: pip install PyMuPDF")
|
||||
return
|
||||
|
||||
doc = fitz.open(str(input_path))
|
||||
new_doc = fitz.open()
|
||||
|
||||
for page in doc:
|
||||
pix = page.get_pixmap()
|
||||
img = Image.frombytes("RGB", (pix.width, pix.height), pix.samples)
|
||||
ocr_text = pytesseract.image_to_string(img, lang=lang)
|
||||
|
||||
new_page = new_doc.new_page(width=page.rect.width, height=page.rect.height)
|
||||
new_page.insert_image(new_page.rect, pixmap=pix)
|
||||
text_rect = fitz.Rect(0, 0, page.rect.width, page.rect.height)
|
||||
# pyrefly: ignore [bad-argument-type]
|
||||
new_page.insert_textbox(text_rect, ocr_text, fontname="china-ss", fontsize=11)
|
||||
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
new_doc.save(str(output_path))
|
||||
new_doc.close()
|
||||
doc.close()
|
||||
print(f"OCR 识别完成: {output_path}")
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def pdf_reorder(input_path: Path, output_path: Path, order: list[int]) -> None:
|
||||
"""重排 PDF 页面顺序."""
|
||||
if not HAS_PYPDF:
|
||||
print("未安装 pypdf 库, 请安装: pip install pypdf")
|
||||
return
|
||||
|
||||
reader = pypdf.PdfReader(str(input_path))
|
||||
writer = pypdf.PdfWriter()
|
||||
|
||||
for page_num in order:
|
||||
if 0 <= page_num < len(reader.pages):
|
||||
writer.add_page(reader.pages[page_num])
|
||||
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(output_path, "wb") as f:
|
||||
writer.write(f)
|
||||
|
||||
print(f"重排完成: {output_path}")
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def pdf_to_images(input_path: Path, output_dir: Path, dpi: int = 300) -> None:
|
||||
"""PDF 转图片."""
|
||||
if not HAS_PYMUPDF:
|
||||
print("未安装 PyMuPDF 库, 请安装: pip install PyMuPDF")
|
||||
return
|
||||
|
||||
doc = fitz.open(str(input_path))
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# pyrefly: ignore [bad-argument-type]
|
||||
for page_num, page in enumerate(doc):
|
||||
pix = page.get_pixmap(dpi=dpi)
|
||||
image_path = output_dir / f"{input_path.stem}_page_{page_num + 1}.png"
|
||||
pix.save(str(image_path))
|
||||
|
||||
doc.close()
|
||||
print(f"转换完成: {output_dir}")
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def pdf_repair(input_path: Path, output_path: Path) -> None:
|
||||
"""修复 PDF 文件."""
|
||||
if not HAS_PYMUPDF:
|
||||
print("未安装 PyMuPDF 库, 请安装: pip install PyMuPDF")
|
||||
return
|
||||
|
||||
doc = fitz.open(str(input_path))
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
doc.save(str(output_path), garbage=4, deflate=True, clean=True)
|
||||
doc.close()
|
||||
print(f"修复完成: {output_path}")
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# screenshot 函数
|
||||
# ============================================================================
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def get_screenshot_path(filename: str | None = None) -> Path:
|
||||
"""获取截图保存路径.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
filename : str | None
|
||||
文件名, 如果为 None 则自动生成
|
||||
|
||||
Returns
|
||||
-------
|
||||
Path
|
||||
截图保存路径
|
||||
"""
|
||||
if filename is None:
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
filename = f"screenshot_{timestamp}.png"
|
||||
|
||||
screenshots_dir = Path.home() / "Pictures" / "screenshots"
|
||||
screenshots_dir.mkdir(parents=True, exist_ok=True)
|
||||
return screenshots_dir / filename
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def take_screenshot_full(filename: str | None = None) -> None:
|
||||
"""全屏截图.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
filename : str | None
|
||||
文件名
|
||||
"""
|
||||
output_path = get_screenshot_path(filename)
|
||||
|
||||
if Constants.IS_WINDOWS:
|
||||
ps_script = f"""
|
||||
Add-Type -AssemblyName System.Windows.Forms
|
||||
Add-Type -AssemblyName System.Drawing
|
||||
$screen = [System.Windows.Forms.Screen]::PrimaryScreen
|
||||
$bounds = $screen.Bounds
|
||||
$bitmap = New-Object System.Drawing.Bitmap $bounds.Width, $bounds.Height
|
||||
$graphics = [System.Drawing.Graphics]::FromImage($bitmap)
|
||||
$graphics.CopyFromScreen($bounds.Location, [System.Drawing.Point]::Empty, $bounds.Size)
|
||||
$bitmap.Save('{output_path.as_posix()}')
|
||||
$graphics.Dispose()
|
||||
$bitmap.Dispose()
|
||||
"""
|
||||
subprocess.run(["powershell", "-Command", ps_script], check=True)
|
||||
elif Constants.IS_MACOS:
|
||||
subprocess.run(["screencapture", "-x", str(output_path)], check=True)
|
||||
else:
|
||||
try:
|
||||
subprocess.run(["gnome-screenshot", "-f", str(output_path)], check=True)
|
||||
except FileNotFoundError:
|
||||
subprocess.run(["scrot", str(output_path)], check=True)
|
||||
|
||||
print(f"截图已保存: {output_path}")
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def take_screenshot_area(filename: str | None = None) -> None:
|
||||
"""区域截图.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
filename : str | None
|
||||
文件名
|
||||
"""
|
||||
output_path = get_screenshot_path(filename)
|
||||
|
||||
if Constants.IS_WINDOWS:
|
||||
ps_script = f"""
|
||||
Add-Type -AssemblyName System.Windows.Forms
|
||||
Add-Type -AssemblyName System.Drawing
|
||||
$form = New-Object System.Windows.Forms.Form
|
||||
$form.WindowState = 'Maximized'
|
||||
$form.FormBorderStyle = 'None'
|
||||
$form.BackColor = [System.Drawing.Color]::FromArgb(1, 0, 0)
|
||||
$form.Opacity = 0.5
|
||||
$form.TopMost = $true
|
||||
$form.Show()
|
||||
Start-Sleep -Milliseconds 100
|
||||
$screen = [System.Windows.Forms.Screen]::PrimaryScreen
|
||||
$bounds = $screen.Bounds
|
||||
$bitmap = New-Object System.Drawing.Bitmap $bounds.Width, $bounds.Height
|
||||
$graphics = [System.Drawing.Graphics]::FromImage($bitmap)
|
||||
$graphics.CopyFromScreen($bounds.Location, [System.Drawing.Point]::Empty, $bounds.Size)
|
||||
$form.Close()
|
||||
$bitmap.Save('{output_path.as_posix()}')
|
||||
$graphics.Dispose()
|
||||
$bitmap.Dispose()
|
||||
"""
|
||||
subprocess.run(["powershell", "-Command", ps_script], check=True)
|
||||
elif Constants.IS_MACOS:
|
||||
subprocess.run(["screencapture", "-i", str(output_path)], check=True)
|
||||
else:
|
||||
try:
|
||||
subprocess.run(["gnome-screenshot", "-a", "-f", str(output_path)], check=True)
|
||||
except FileNotFoundError:
|
||||
subprocess.run(["scrot", "-s", str(output_path)], check=True)
|
||||
|
||||
print(f"截图已保存: {output_path}")
|
||||
@@ -0,0 +1,568 @@
|
||||
"""系统类函数模块.
|
||||
|
||||
聚合 LS-DYNA 计算 (lscalc)、SSH 密钥部署 (sshcopyid)、Python 打包 (packtool)、
|
||||
重置图标缓存 (reset_icon_cache) 的可复用函数. 所有公共函数通过 ``@px.register_fn``
|
||||
注册, 供 YAML 任务编排引用.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import platform
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import urllib.request
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
|
||||
import pyflowx as px
|
||||
from pyflowx.conditions import Constants
|
||||
|
||||
__all__ = [
|
||||
"DEFAULT_BUILD_DIR",
|
||||
"DEFAULT_CACHE_DIR",
|
||||
"DEFAULT_DIST_DIR",
|
||||
"DEFAULT_INPUT_FILE",
|
||||
"DEFAULT_LIB_DIR",
|
||||
"DEFAULT_NCPU",
|
||||
"IGNORE_PATTERNS",
|
||||
"LS_DYNA_COMMANDS",
|
||||
"check_ls_dyna_status",
|
||||
"clean_build_dir",
|
||||
"clear_screen_run",
|
||||
"create_zip_package",
|
||||
"get_ls_dyna_command",
|
||||
"install_embed_python",
|
||||
"pack_dependencies",
|
||||
"pack_source",
|
||||
"pack_wheel",
|
||||
"reset_icon_cache_run",
|
||||
"run_ls_dyna",
|
||||
"run_ls_dyna_mpi",
|
||||
"ssh_copy_id",
|
||||
"taskkill_run",
|
||||
"which_run",
|
||||
]
|
||||
|
||||
# ============================================================================
|
||||
# lscalc 配置
|
||||
# ============================================================================
|
||||
|
||||
LS_DYNA_COMMANDS: dict[str, list[str]] = {
|
||||
"windows": ["ls-dyna_mpp", "i=input.k", "ncpu=4"],
|
||||
"linux": ["ls-dyna_mpp", "i=input.k", "ncpu=8"],
|
||||
"macos": ["ls-dyna_mpp", "i=input.k", "ncpu=4"],
|
||||
}
|
||||
|
||||
DEFAULT_INPUT_FILE: str = "input.k"
|
||||
DEFAULT_NCPU: int = 4
|
||||
|
||||
# ============================================================================
|
||||
# packtool 配置
|
||||
# ============================================================================
|
||||
|
||||
DEFAULT_BUILD_DIR = ".pypack"
|
||||
DEFAULT_DIST_DIR = "dist"
|
||||
DEFAULT_LIB_DIR = "libs"
|
||||
DEFAULT_CACHE_DIR = ".cache/pypack"
|
||||
|
||||
IGNORE_PATTERNS = [
|
||||
"__pycache__",
|
||||
"*.pyc",
|
||||
"*.pyo",
|
||||
".git",
|
||||
".venv",
|
||||
".idea",
|
||||
".vscode",
|
||||
"*.egg-info",
|
||||
"dist",
|
||||
"build",
|
||||
".pytest_cache",
|
||||
".tox",
|
||||
".mypy_cache",
|
||||
]
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# lscalc 函数
|
||||
# ============================================================================
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def get_ls_dyna_command(input_file: str, ncpu: int) -> list[str]:
|
||||
"""获取 LS-DYNA 命令.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
input_file : str
|
||||
输入文件路径
|
||||
ncpu : int
|
||||
CPU 核心数
|
||||
|
||||
Returns
|
||||
-------
|
||||
list[str]
|
||||
LS-DYNA 命令列表
|
||||
"""
|
||||
if Constants.IS_WINDOWS or Constants.IS_MACOS:
|
||||
return ["ls-dyna_mpp", f"i={input_file}", f"ncpu={ncpu}"]
|
||||
else:
|
||||
return ["ls-dyna_mpp", f"i={input_file}", f"ncpu={ncpu}"]
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def run_ls_dyna(input_file: str, ncpu: int = DEFAULT_NCPU) -> None:
|
||||
"""运行 LS-DYNA 计算.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
input_file : str
|
||||
输入文件路径
|
||||
ncpu : int
|
||||
CPU 核心数
|
||||
"""
|
||||
input_path = Path(input_file)
|
||||
if not input_path.exists():
|
||||
print(f"输入文件不存在: {input_path}")
|
||||
return
|
||||
|
||||
cmd = get_ls_dyna_command(input_file, ncpu)
|
||||
try:
|
||||
subprocess.run(cmd, check=True)
|
||||
print(f"LS-DYNA 计算完成: {input_file}")
|
||||
except FileNotFoundError:
|
||||
print("未找到 ls-dyna_mpp 命令")
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"LS-DYNA 计算失败: {e}")
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def run_ls_dyna_mpi(input_file: str, ncpu: int = DEFAULT_NCPU) -> None:
|
||||
"""运行 LS-DYNA MPI 计算.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
input_file : str
|
||||
输入文件路径
|
||||
ncpu : int
|
||||
CPU 核心数
|
||||
"""
|
||||
input_path = Path(input_file)
|
||||
if not input_path.exists():
|
||||
print(f"输入文件不存在: {input_path}")
|
||||
return
|
||||
|
||||
cmd = ["mpirun", "-np", str(ncpu), "ls-dyna_mpp", f"i={input_file}"]
|
||||
try:
|
||||
subprocess.run(cmd, check=True)
|
||||
print(f"LS-DYNA MPI 计算完成: {input_file}")
|
||||
except FileNotFoundError:
|
||||
print("未找到 mpirun 或 ls-dyna_mpp 命令")
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"LS-DYNA MPI 计算失败: {e}")
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def check_ls_dyna_status() -> None:
|
||||
"""检查 LS-DYNA 进程状态."""
|
||||
try:
|
||||
if Constants.IS_WINDOWS:
|
||||
result = subprocess.run(
|
||||
["tasklist", "/fi", "imagename eq ls-dyna_mpp.exe"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=True,
|
||||
)
|
||||
print(result.stdout)
|
||||
else:
|
||||
result = subprocess.run(
|
||||
["pgrep", "-f", "ls-dyna"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=False,
|
||||
)
|
||||
if result.stdout.strip():
|
||||
print(f"运行中的 LS-DYNA 进程 PID: {result.stdout.strip()}")
|
||||
else:
|
||||
print("没有运行中的 LS-DYNA 进程")
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"检查进程状态失败: {e}")
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# sshcopyid 函数
|
||||
# ============================================================================
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def ssh_copy_id(
|
||||
hostname: str,
|
||||
username: str,
|
||||
password: str,
|
||||
port: int = 22,
|
||||
keypath: str = "~/.ssh/id_rsa.pub",
|
||||
timeout: int = 30,
|
||||
) -> None:
|
||||
"""将 SSH 公钥部署到远程服务器.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
hostname : str
|
||||
远程服务器主机名或 IP 地址
|
||||
username : str
|
||||
远程服务器用户名
|
||||
password : str
|
||||
远程服务器密码
|
||||
port : int
|
||||
SSH 端口, 默认 22
|
||||
keypath : str
|
||||
公钥文件路径, 默认 ~/.ssh/id_rsa.pub
|
||||
timeout : int
|
||||
SSH 操作超时秒数, 默认 30
|
||||
"""
|
||||
pub_key_path = Path(keypath).expanduser()
|
||||
if not pub_key_path.exists():
|
||||
print(f"公钥文件不存在: {pub_key_path}")
|
||||
sys.exit(1)
|
||||
|
||||
pub_key = pub_key_path.read_text().strip()
|
||||
|
||||
script = f"""mkdir -p ~/.ssh && chmod 700 ~/.ssh
|
||||
cd ~/.ssh && touch authorized_keys && chmod 600 authorized_keys
|
||||
grep -qF '{pub_key.split()[1]}' authorized_keys 2>/dev/null || echo '{pub_key}' >> authorized_keys"""
|
||||
|
||||
try:
|
||||
subprocess.run(
|
||||
[
|
||||
"sshpass",
|
||||
"-p",
|
||||
password,
|
||||
"ssh",
|
||||
"-p",
|
||||
str(port),
|
||||
"-o",
|
||||
"StrictHostKeyChecking=no",
|
||||
"-o",
|
||||
"UserKnownHostsFile=/dev/null",
|
||||
"-o",
|
||||
f"ConnectTimeout={timeout}",
|
||||
f"{username}@{hostname}",
|
||||
script,
|
||||
],
|
||||
check=True,
|
||||
timeout=timeout,
|
||||
)
|
||||
print(f"SSH 密钥已部署到 {username}@{hostname}:{port}")
|
||||
except FileNotFoundError:
|
||||
print(f"未找到 sshpass 工具, 请手动执行: ssh-copy-id -p {port} {username}@{hostname}")
|
||||
sys.exit(1)
|
||||
except subprocess.TimeoutExpired:
|
||||
print("SSH 连接超时")
|
||||
sys.exit(1)
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"SSH 执行失败: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# packtool 函数
|
||||
# ============================================================================
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def pack_source(project_dir: Path, output_dir: Path) -> None:
|
||||
"""打包项目源码.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
project_dir : Path
|
||||
项目目录
|
||||
output_dir : Path
|
||||
输出目录
|
||||
"""
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
pyproject_file = project_dir / "pyproject.toml"
|
||||
project_name = project_dir.name
|
||||
|
||||
if pyproject_file.exists():
|
||||
try:
|
||||
import tomllib
|
||||
|
||||
content = pyproject_file.read_text(encoding="utf-8")
|
||||
data = tomllib.loads(content)
|
||||
project_name = data.get("project", {}).get("name", project_name)
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
source_dir = output_dir / "src" / project_name
|
||||
source_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
src_subdir = project_dir / "src"
|
||||
if src_subdir.exists():
|
||||
shutil.copytree(
|
||||
src_subdir,
|
||||
source_dir / "src",
|
||||
ignore=shutil.ignore_patterns(*IGNORE_PATTERNS),
|
||||
dirs_exist_ok=True,
|
||||
)
|
||||
else:
|
||||
for item in project_dir.iterdir():
|
||||
if item.name in IGNORE_PATTERNS or item.name.startswith("."):
|
||||
continue
|
||||
dst_item = source_dir / item.name
|
||||
if item.is_dir():
|
||||
shutil.copytree(
|
||||
item,
|
||||
dst_item,
|
||||
ignore=shutil.ignore_patterns(*IGNORE_PATTERNS),
|
||||
dirs_exist_ok=True,
|
||||
)
|
||||
else:
|
||||
shutil.copy2(item, dst_item)
|
||||
|
||||
print(f"源码打包完成: {source_dir}")
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def pack_dependencies(lib_dir: Path, dependencies: list[str]) -> None:
|
||||
"""打包项目依赖.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
lib_dir : Path
|
||||
依赖库目录
|
||||
dependencies : list[str]
|
||||
依赖列表
|
||||
"""
|
||||
lib_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if not dependencies:
|
||||
print("没有依赖需要打包")
|
||||
return
|
||||
|
||||
cmd = [
|
||||
"pip",
|
||||
"install",
|
||||
"--target",
|
||||
str(lib_dir),
|
||||
"--no-compile",
|
||||
"--no-warn-script-location",
|
||||
]
|
||||
cmd.extend(dependencies)
|
||||
|
||||
subprocess.run(cmd, check=True)
|
||||
print(f"依赖打包完成: {lib_dir}")
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def pack_wheel(project_dir: Path, output_dir: Path) -> None:
|
||||
"""打包项目为 wheel 文件.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
project_dir : Path
|
||||
项目目录
|
||||
output_dir : Path
|
||||
输出目录
|
||||
"""
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
cmd = [
|
||||
"pip",
|
||||
"wheel",
|
||||
"--no-deps",
|
||||
"--wheel-dir",
|
||||
str(output_dir),
|
||||
str(project_dir),
|
||||
]
|
||||
|
||||
subprocess.run(cmd, check=True)
|
||||
print(f"Wheel 打包完成: {output_dir}")
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def install_embed_python(version: str, output_dir: Path) -> None:
|
||||
"""安装嵌入式 Python.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
version : str
|
||||
Python 版本 (如: 3.10, 3.11)
|
||||
output_dir : Path
|
||||
输出目录
|
||||
"""
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
arch = platform.machine().lower()
|
||||
if arch in ["x86_64", "amd64"]:
|
||||
arch = "amd64"
|
||||
elif arch in ["arm64", "aarch64"]:
|
||||
arch = "arm64"
|
||||
|
||||
version_map = {
|
||||
"3.8": "3.8.10",
|
||||
"3.9": "3.9.13",
|
||||
"3.10": "3.10.11",
|
||||
"3.11": "3.11.9",
|
||||
"3.12": "3.12.4",
|
||||
}
|
||||
full_version = version_map.get(version, f"{version}.0")
|
||||
|
||||
url = f"https://www.python.org/ftp/python/{full_version}/python-{full_version}-embed-{arch}.zip"
|
||||
|
||||
cache_file = Path(DEFAULT_CACHE_DIR) / f"python-{full_version}-embed-{arch}.zip"
|
||||
cache_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if not cache_file.exists():
|
||||
print(f"正在下载嵌入式 Python {full_version}...")
|
||||
urllib.request.urlretrieve(url, cache_file)
|
||||
print(f"下载完成: {cache_file}")
|
||||
|
||||
with zipfile.ZipFile(cache_file, "r") as zf:
|
||||
zf.extractall(output_dir)
|
||||
|
||||
print(f"嵌入式 Python 安装完成: {output_dir}")
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def create_zip_package(source_dir: Path, output_file: Path) -> None:
|
||||
"""创建 ZIP 打包文件.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
source_dir : Path
|
||||
源目录
|
||||
output_file : Path
|
||||
输出文件
|
||||
"""
|
||||
output_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
with zipfile.ZipFile(output_file, "w", zipfile.ZIP_DEFLATED) as zf:
|
||||
for file in source_dir.rglob("*"):
|
||||
if file.is_file():
|
||||
arcname = file.relative_to(source_dir)
|
||||
zf.write(file, arcname)
|
||||
|
||||
print(f"ZIP 打包完成: {output_file}")
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def clean_build_dir(build_dir: Path) -> None:
|
||||
"""清理构建目录.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
build_dir : Path
|
||||
构建目录
|
||||
"""
|
||||
if build_dir.exists():
|
||||
shutil.rmtree(build_dir)
|
||||
print(f"清理完成: {build_dir}")
|
||||
else:
|
||||
print(f"目录不存在: {build_dir}")
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# reseticoncache 函数
|
||||
# ============================================================================
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def reset_icon_cache_run() -> None:
|
||||
"""重置 Windows 图标缓存.
|
||||
|
||||
执行流程: 杀掉 explorer → 删除 IconCache.db → 删除 iconcache* → 重启 explorer.
|
||||
仅在 Windows 上执行, 非 Windows 平台打印提示并跳过.
|
||||
"""
|
||||
if not Constants.IS_WINDOWS:
|
||||
print("reset_icon_cache: 仅在 Windows 上支持")
|
||||
return
|
||||
|
||||
local_app_data = os.environ.get("LOCALAPPDATA", "")
|
||||
if not local_app_data:
|
||||
print("reset_icon_cache: LOCALAPPDATA 环境变量未设置")
|
||||
return
|
||||
|
||||
icon_cache_db = Path(local_app_data) / "IconCache.db"
|
||||
explorer_cache_dir = Path(local_app_data) / "Microsoft" / "Windows" / "Explorer"
|
||||
|
||||
print("正在终止 explorer 进程...")
|
||||
subprocess.run(["taskkill", "/f", "/im", "explorer.exe"], check=False)
|
||||
|
||||
if icon_cache_db.exists():
|
||||
print(f"删除图标缓存: {icon_cache_db}")
|
||||
subprocess.run(["cmd", "/c", "del", "/a", "/q", str(icon_cache_db)], check=False)
|
||||
|
||||
if explorer_cache_dir.exists():
|
||||
print(f"清理 Explorer 缓存: {explorer_cache_dir}")
|
||||
subprocess.run(
|
||||
["cmd", "/c", "del", "/a", "/q", str(explorer_cache_dir / "iconcache*")],
|
||||
check=False,
|
||||
)
|
||||
|
||||
print("重启 explorer...")
|
||||
subprocess.run(["cmd", "/c", "start", "explorer.exe"], check=False)
|
||||
print("图标缓存已重置")
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# clearscreen / taskkill / which 函数
|
||||
# ============================================================================
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def clear_screen_run() -> None:
|
||||
"""清屏 (跨平台).
|
||||
|
||||
Windows 调用 ``cls``, Linux/macOS 调用 ``clear``.
|
||||
"""
|
||||
cmd = ["cls"] if Constants.IS_WINDOWS else ["clear"]
|
||||
subprocess.run(cmd, check=False)
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def taskkill_run(process_names: list[str]) -> None:
|
||||
"""按名称终止进程 (跨平台).
|
||||
|
||||
Windows 使用 ``taskkill /f /im <name>*``,
|
||||
Linux/macOS 使用 ``pkill -f <name>*``.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
process_names : list[str]
|
||||
进程名称列表 (如: ``["chrome.exe", "python"]``)
|
||||
"""
|
||||
if Constants.IS_WINDOWS:
|
||||
cmd_prefix: list[str] = ["taskkill", "/f", "/im"]
|
||||
else:
|
||||
cmd_prefix = ["pkill", "-f"]
|
||||
|
||||
for name in process_names:
|
||||
print(f"终止进程: {name}")
|
||||
subprocess.run([*cmd_prefix, f"{name}*"], check=False)
|
||||
|
||||
|
||||
@px.register_fn
|
||||
def which_run(commands: list[str]) -> None:
|
||||
"""查找可执行命令路径 (跨平台).
|
||||
|
||||
Windows 使用 ``where``, Linux/macOS 使用 ``which``.
|
||||
对每个命令打印 ``<cmd> -> <path>`` 或 ``<cmd> -> 未找到``.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
commands : list[str]
|
||||
要查找的命令名称列表
|
||||
"""
|
||||
which_cmd = "where" if Constants.IS_WINDOWS else "which"
|
||||
|
||||
for cmd in commands:
|
||||
result = subprocess.run([which_cmd, cmd], capture_output=True, text=True, check=False)
|
||||
if result.returncode == 0:
|
||||
# Windows 的 where 可能返回多行, 取第一个
|
||||
path = result.stdout.strip().split("\n")[0].strip()
|
||||
print(f"{cmd} -> {path}")
|
||||
else:
|
||||
print(f"{cmd} -> 未找到")
|
||||
@@ -0,0 +1,705 @@
|
||||
"""工作流执行性能评估。
|
||||
|
||||
基于 :class:`~pyflowx.report.RunReport` 中已有的 ``started_at`` /
|
||||
``finished_at`` 时间戳进行离线分析,**零运行时开销**——不修改执行流程,
|
||||
不注册回调,不引入额外计时器。
|
||||
|
||||
核心指标
|
||||
--------
|
||||
* **任务级**:每个任务的 wall-clock 耗时、状态、重试次数、等待时间
|
||||
(从最早依赖完成到本任务开始)。
|
||||
* **图级**:总耗时(wall-clock)、关键路径耗时(理论最短耗时)、
|
||||
并行度效率(关键路径耗时 / 总耗时)。
|
||||
* **关键路径**:从源点到汇点的最长依赖路径,识别真正的串行瓶颈。
|
||||
* **并行度**:基于时间线重叠计算瞬时并行度,给出平均并行度与峰值并行度。
|
||||
* **瓶颈识别**:按耗时排序的 Top-N 任务。
|
||||
|
||||
设计原则
|
||||
--------
|
||||
* 数据来源于 ``RunReport`` + ``Graph``,无副作用。
|
||||
* 计算复杂度 O(V+E):拓扑排序 + 单次松弛,适合大规模图。
|
||||
* 所有时间戳用 ``datetime``,与 :class:`TaskResult` 保持一致。
|
||||
|
||||
快速上手
|
||||
--------
|
||||
import pyflowx as px
|
||||
|
||||
report = px.run(graph)
|
||||
profile = px.ProfileReport.from_report(report, graph)
|
||||
print(profile.describe())
|
||||
bottlenecks = profile.top_bottlenecks(3)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
__all__ = [
|
||||
"ProfileReport",
|
||||
"TaskProfile",
|
||||
]
|
||||
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from .graph import Graph
|
||||
from .report import RunReport
|
||||
from .task import TaskResult, TaskStatus
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class TaskProfile:
|
||||
"""单个任务的性能剖面。
|
||||
|
||||
属性
|
||||
----
|
||||
name:
|
||||
任务名。
|
||||
status:
|
||||
终态(SUCCESS/FAILED/SKIPPED)。
|
||||
duration:
|
||||
wall-clock 执行耗时(秒)。SKIPPED 任务为 0.0。
|
||||
attempts:
|
||||
尝试次数(含首次)。
|
||||
wait_time:
|
||||
从最早硬依赖完成到本任务开始的等待时间(秒)。
|
||||
无硬依赖或 SKIPPED 时为 0.0。
|
||||
is_on_critical_path:
|
||||
是否位于关键路径上。
|
||||
deps:
|
||||
硬依赖任务名列表。
|
||||
"""
|
||||
|
||||
name: str
|
||||
status: TaskStatus
|
||||
duration: float
|
||||
attempts: int
|
||||
wait_time: float
|
||||
is_on_critical_path: bool
|
||||
deps: tuple[str, ...]
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
"""转为 JSON 友好的字典。"""
|
||||
return {
|
||||
"name": self.name,
|
||||
"status": self.status.value,
|
||||
"duration_seconds": round(self.duration, 6),
|
||||
"attempts": self.attempts,
|
||||
"wait_time_seconds": round(self.wait_time, 6),
|
||||
"is_on_critical_path": self.is_on_critical_path,
|
||||
"deps": list(self.deps),
|
||||
}
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ProfileReport:
|
||||
"""工作流执行的性能剖面报告。
|
||||
|
||||
通过 :meth:`from_report` 从 :class:`RunReport` + :class:`Graph` 构建。
|
||||
所有字段在构造时一次性计算完毕,后续访问为 O(1)。
|
||||
"""
|
||||
|
||||
tasks: tuple[TaskProfile, ...]
|
||||
"""所有任务的性能剖面(按拓扑序)。"""
|
||||
|
||||
total_duration: float
|
||||
"""整次运行的 wall-clock 耗时(秒)。"""
|
||||
|
||||
critical_path_duration: float
|
||||
"""关键路径耗时(秒):从最早任务开始到最晚任务结束的最长依赖路径。"""
|
||||
|
||||
critical_path: tuple[str, ...]
|
||||
"""关键路径上的任务名序列(按执行顺序)。"""
|
||||
|
||||
avg_parallelism: float
|
||||
"""平均并行度 = 任务总耗时 / wall-clock 总耗时。"""
|
||||
|
||||
peak_parallelism: int
|
||||
"""峰值并行度:任一时刻同时运行的任务数最大值。"""
|
||||
|
||||
parallelism_efficiency: float
|
||||
"""并行度效率 = 关键路径耗时 / wall-clock 总耗时。``1.0`` 表示完全串行,
|
||||
越大表示并行化收益越低(瓶颈在关键路径上)。"""
|
||||
|
||||
# ------------------------------------------------------------------ #
|
||||
# 构建
|
||||
# ------------------------------------------------------------------ #
|
||||
@classmethod
|
||||
def from_report(cls, report: RunReport, graph: Graph) -> ProfileReport:
|
||||
"""从运行报告与图构建性能剖面。
|
||||
|
||||
参数
|
||||
----
|
||||
report:
|
||||
已完成的 :class:`RunReport`,需包含 ``started_at``/``finished_at``。
|
||||
graph:
|
||||
对应的 :class:`Graph`,用于依赖关系与关键路径分析。
|
||||
|
||||
Note
|
||||
-----
|
||||
本方法不修改 ``report`` 或 ``graph``,纯函数式计算。
|
||||
"""
|
||||
task_profiles = cls._build_task_profiles(report, graph)
|
||||
total_duration = cls._calc_total_duration(report)
|
||||
critical_path, critical_duration = cls._calc_critical_path(graph, report)
|
||||
avg_par, peak_par = cls._calc_parallelism(report)
|
||||
efficiency = critical_duration / total_duration if total_duration > 0 else 0.0
|
||||
|
||||
# 标记关键路径上的任务
|
||||
critical_set = set(critical_path)
|
||||
marked = tuple(
|
||||
TaskProfile(
|
||||
name=t.name,
|
||||
status=t.status,
|
||||
duration=t.duration,
|
||||
attempts=t.attempts,
|
||||
wait_time=t.wait_time,
|
||||
is_on_critical_path=t.name in critical_set,
|
||||
deps=t.deps,
|
||||
)
|
||||
for t in task_profiles
|
||||
)
|
||||
|
||||
return cls(
|
||||
tasks=marked,
|
||||
total_duration=total_duration,
|
||||
critical_path_duration=critical_duration,
|
||||
critical_path=critical_path,
|
||||
avg_parallelism=avg_par,
|
||||
peak_parallelism=peak_par,
|
||||
parallelism_efficiency=efficiency,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _build_task_profiles(report: RunReport, graph: Graph) -> tuple[TaskProfile, ...]:
|
||||
"""构建每个任务的性能剖面。"""
|
||||
profiles: list[TaskProfile] = []
|
||||
for name, result in report.results.items():
|
||||
spec = graph.specs.get(name)
|
||||
deps = tuple(spec.depends_on) if spec is not None else ()
|
||||
duration = result.duration or 0.0
|
||||
wait_time = ProfileReport._calc_wait_time(result, deps, report)
|
||||
profiles.append(
|
||||
TaskProfile(
|
||||
name=name,
|
||||
status=result.status,
|
||||
duration=duration,
|
||||
attempts=result.attempts,
|
||||
wait_time=wait_time,
|
||||
is_on_critical_path=False, # 后续标记
|
||||
deps=deps,
|
||||
)
|
||||
)
|
||||
return tuple(profiles)
|
||||
|
||||
@staticmethod
|
||||
def _calc_wait_time(
|
||||
result: TaskResult[Any],
|
||||
deps: tuple[str, ...],
|
||||
report: RunReport,
|
||||
) -> float:
|
||||
"""计算等待时间:从最早依赖完成到本任务开始。
|
||||
|
||||
无硬依赖、SKIPPED 任务或时间戳缺失时返回 0.0。
|
||||
"""
|
||||
if not deps or result.started_at is None or result.status == TaskStatus.SKIPPED:
|
||||
return 0.0
|
||||
# 找出所有已完成依赖的最晚完成时间
|
||||
dep_end_times: list[datetime] = []
|
||||
for dep in deps:
|
||||
dep_result = report.results.get(dep)
|
||||
if dep_result is not None and dep_result.finished_at is not None:
|
||||
dep_end_times.append(dep_result.finished_at)
|
||||
if not dep_end_times:
|
||||
return 0.0
|
||||
latest_dep_end = max(dep_end_times)
|
||||
delta = (result.started_at - latest_dep_end).total_seconds()
|
||||
return max(0.0, delta)
|
||||
|
||||
@staticmethod
|
||||
def _calc_total_duration(report: RunReport) -> float:
|
||||
"""计算 wall-clock 总耗时:最早开始到最晚结束。"""
|
||||
starts: list[datetime] = []
|
||||
ends: list[datetime] = []
|
||||
for r in report.results.values():
|
||||
if r.started_at is not None:
|
||||
starts.append(r.started_at)
|
||||
if r.finished_at is not None:
|
||||
ends.append(r.finished_at)
|
||||
if not starts or not ends:
|
||||
return 0.0
|
||||
return (max(ends) - min(starts)).total_seconds()
|
||||
|
||||
@staticmethod
|
||||
def _calc_critical_path(graph: Graph, report: RunReport) -> tuple[tuple[str, ...], float]:
|
||||
"""计算关键路径:DAG 最长路径(按实际执行耗时)。
|
||||
|
||||
使用拓扑排序 + 动态规划,O(V+E)。SKIPPED 任务耗时按 0 计。
|
||||
"""
|
||||
# 构建耗时映射
|
||||
durations: dict[str, float] = {}
|
||||
for name, result in report.results.items():
|
||||
durations[name] = result.duration or 0.0
|
||||
|
||||
# 拓扑序(使用 graph.layers 保证与分层一致)
|
||||
try:
|
||||
layers = graph.layers()
|
||||
except Exception:
|
||||
# 图校验失败时回退为空
|
||||
return (), 0.0
|
||||
|
||||
# earliest_finish[name] = duration[name] + max(earliest_finish[dep] for dep in deps)
|
||||
earliest_finish: dict[str, float] = {}
|
||||
predecessor: dict[str, str | None] = {}
|
||||
|
||||
for layer in layers:
|
||||
for name in layer:
|
||||
spec = graph.specs.get(name)
|
||||
deps = spec.depends_on if spec is not None else ()
|
||||
if not deps:
|
||||
earliest_finish[name] = durations.get(name, 0.0)
|
||||
predecessor[name] = None
|
||||
else:
|
||||
best_dep: str | None = None
|
||||
best_ef = 0.0
|
||||
for dep in deps:
|
||||
ef = earliest_finish.get(dep, 0.0)
|
||||
if ef >= best_ef:
|
||||
best_ef = ef
|
||||
best_dep = dep
|
||||
earliest_finish[name] = best_ef + durations.get(name, 0.0)
|
||||
predecessor[name] = best_dep
|
||||
|
||||
if not earliest_finish:
|
||||
return (), 0.0
|
||||
|
||||
# 找到 earliest_finish 最大的节点作为终点
|
||||
end_node = max(earliest_finish, key=lambda n: earliest_finish[n])
|
||||
total = earliest_finish[end_node]
|
||||
|
||||
# 回溯关键路径
|
||||
path: list[str] = []
|
||||
node: str | None = end_node
|
||||
while node is not None:
|
||||
path.append(node)
|
||||
node = predecessor.get(node)
|
||||
path.reverse()
|
||||
|
||||
return tuple(path), total
|
||||
|
||||
@staticmethod
|
||||
def _calc_parallelism(report: RunReport) -> tuple[float, int]:
|
||||
"""计算平均并行度与峰值并行度。
|
||||
|
||||
基于时间线扫描:将每个任务的 [started_at, finished_at] 区间
|
||||
转为事件点(+1/-1),排序后扫描得到瞬时并行度序列。
|
||||
|
||||
返回 (avg_parallelism, peak_parallelism)。
|
||||
无有效时间戳时返回 (0.0, 0)。
|
||||
"""
|
||||
events: list[tuple[float, int]] = [] # (timestamp, delta)
|
||||
for r in report.results.values():
|
||||
if r.started_at is None or r.finished_at is None:
|
||||
continue
|
||||
if r.status == TaskStatus.SKIPPED:
|
||||
continue
|
||||
start_ts = r.started_at.timestamp()
|
||||
end_ts = r.finished_at.timestamp()
|
||||
if end_ts <= start_ts:
|
||||
continue
|
||||
events.append((start_ts, 1))
|
||||
events.append((end_ts, -1))
|
||||
|
||||
if not events:
|
||||
return 0.0, 0
|
||||
|
||||
# 排序:同一时间点先处理结束(-1)再处理开始(+1),避免虚假峰值
|
||||
events.sort(key=lambda e: (e[0], e[1]))
|
||||
|
||||
current = 0
|
||||
peak = 0
|
||||
# 加权面积用于计算平均并行度
|
||||
area = 0.0
|
||||
prev_ts = events[0][0]
|
||||
for ts, delta in events:
|
||||
if ts > prev_ts:
|
||||
area += current * (ts - prev_ts)
|
||||
current += delta
|
||||
peak = max(peak, current)
|
||||
prev_ts = ts
|
||||
|
||||
total_span = events[-1][0] - events[0][0]
|
||||
avg = area / total_span if total_span > 0 else 0.0
|
||||
return avg, peak
|
||||
|
||||
# ------------------------------------------------------------------ #
|
||||
# 查询
|
||||
# ------------------------------------------------------------------ #
|
||||
def task(self, name: str) -> TaskProfile:
|
||||
"""返回指定任务的剖面。不存在则 ``KeyError``。"""
|
||||
for t in self.tasks:
|
||||
if t.name == name:
|
||||
return t
|
||||
raise KeyError(name)
|
||||
|
||||
def top_bottlenecks(self, n: int = 5) -> tuple[TaskProfile, ...]:
|
||||
"""返回耗时最长的 Top-N 任务(按 duration 降序)。
|
||||
|
||||
参数
|
||||
----
|
||||
n:
|
||||
返回数量。``n <= 0`` 返回空元组。
|
||||
"""
|
||||
if n <= 0:
|
||||
return ()
|
||||
return tuple(sorted(self.tasks, key=lambda t: t.duration, reverse=True)[:n])
|
||||
|
||||
def critical_tasks(self) -> tuple[TaskProfile, ...]:
|
||||
"""返回关键路径上的所有任务(按路径顺序)。"""
|
||||
critical_set = set(self.critical_path)
|
||||
# 保持关键路径顺序
|
||||
order = {name: i for i, name in enumerate(self.critical_path)}
|
||||
return tuple(sorted((t for t in self.tasks if t.name in critical_set), key=lambda t: order[t.name]))
|
||||
|
||||
def failed_tasks(self) -> tuple[TaskProfile, ...]:
|
||||
"""返回 FAILED 状态的任务。"""
|
||||
return tuple(t for t in self.tasks if t.status == TaskStatus.FAILED)
|
||||
|
||||
def skipped_tasks(self) -> tuple[TaskProfile, ...]:
|
||||
"""返回 SKIPPED 状态的任务。"""
|
||||
return tuple(t for t in self.tasks if t.status == TaskStatus.SKIPPED)
|
||||
|
||||
# ------------------------------------------------------------------ #
|
||||
# 输出
|
||||
# ------------------------------------------------------------------ #
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
"""转为 JSON 友好的字典。"""
|
||||
return {
|
||||
"tasks": [t.to_dict() for t in self.tasks],
|
||||
"total_duration_seconds": round(self.total_duration, 6),
|
||||
"critical_path_duration_seconds": round(self.critical_path_duration, 6),
|
||||
"critical_path": list(self.critical_path),
|
||||
"avg_parallelism": round(self.avg_parallelism, 4),
|
||||
"peak_parallelism": self.peak_parallelism,
|
||||
"parallelism_efficiency": round(self.parallelism_efficiency, 4),
|
||||
"bottlenecks": [t.to_dict() for t in self.top_bottlenecks(5)],
|
||||
}
|
||||
|
||||
def to_html(self) -> str:
|
||||
"""生成自包含的 HTML 报告(含 CSS,无外部依赖)。
|
||||
|
||||
报告含:图级指标卡片、关键路径、时间线甘特图、Top 瓶颈表格、
|
||||
全部任务表格。适合直接用浏览器打开查看。
|
||||
"""
|
||||
return _render_html(self)
|
||||
|
||||
def describe(self) -> str:
|
||||
lines: list[str] = []
|
||||
lines.append("=" * 70)
|
||||
lines.append("PyFlowX 性能剖面报告")
|
||||
lines.append("=" * 70)
|
||||
lines.append("")
|
||||
lines.append("【图级指标】")
|
||||
lines.append(f" 总耗时 (wall-clock): {self.total_duration:.3f}s")
|
||||
lines.append(f" 关键路径耗时: {self.critical_path_duration:.3f}s")
|
||||
lines.append(f" 平均并行度: {self.avg_parallelism:.2f}")
|
||||
lines.append(f" 峰值并行度: {self.peak_parallelism}")
|
||||
lines.append(f" 并行度效率: {self.parallelism_efficiency:.2%}")
|
||||
lines.append(f" 任务总数: {len(self.tasks)}")
|
||||
lines.append("")
|
||||
|
||||
# 关键路径
|
||||
lines.append("【关键路径】")
|
||||
if self.critical_path:
|
||||
lines.append(f" {' -> '.join(self.critical_path)}")
|
||||
else:
|
||||
lines.append(" (无)")
|
||||
lines.append("")
|
||||
|
||||
# Top 瓶颈
|
||||
bottlenecks = self.top_bottlenecks(5)
|
||||
lines.append(f"【Top {len(bottlenecks)} 瓶颈任务】")
|
||||
if bottlenecks:
|
||||
lines.append(f" {'任务':<30} {'耗时':>10} {'等待':>10} {'尝试':>6} {'关键路径':>8} {'状态':>8}")
|
||||
lines.append(f" {'-' * 30} {'-' * 10} {'-' * 10} {'-' * 6} {'-' * 8} {'-' * 8}")
|
||||
for t in bottlenecks:
|
||||
critical_flag = "✓" if t.is_on_critical_path else ""
|
||||
lines.append(
|
||||
f" {t.name:<30} {t.duration:>9.3f}s {t.wait_time:>9.3f}s {t.attempts:>6} "
|
||||
f"{critical_flag:>8} {t.status.value:>8}",
|
||||
)
|
||||
else:
|
||||
lines.append(" (无)")
|
||||
lines.append("")
|
||||
|
||||
# 全部任务详情
|
||||
lines.append("【全部任务】")
|
||||
if self.tasks:
|
||||
lines.append(f" {'任务':<30} {'耗时':>10} {'等待':>10} {'尝试':>6} {'关键路径':>8} {'状态':>8}")
|
||||
lines.append(f" {'-' * 30} {'-' * 10} {'-' * 10} {'-' * 6} {'-' * 8} {'-' * 8}")
|
||||
for t in self.tasks:
|
||||
critical_flag = "✓" if t.is_on_critical_path else ""
|
||||
lines.append(
|
||||
f" {t.name:<30} {t.duration:>9.3f}s {t.wait_time:>9.3f}s {t.attempts:>6} "
|
||||
f"{critical_flag:>8} {t.status.value:>8}",
|
||||
)
|
||||
else:
|
||||
lines.append(" (无)")
|
||||
lines.append("")
|
||||
lines.append("=" * 70)
|
||||
return "\n".join(lines)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return (
|
||||
f"ProfileReport(tasks={len(self.tasks)}, "
|
||||
f"total={self.total_duration:.3f}s, "
|
||||
f"critical={self.critical_path_duration:.3f}s, "
|
||||
f"avg_par={self.avg_parallelism:.2f}, "
|
||||
f"peak_par={self.peak_parallelism})"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# HTML 渲染(私有,零依赖)
|
||||
# ---------------------------------------------------------------------- #
|
||||
_HTML_TEMPLATE = """<!DOCTYPE html>
|
||||
<html lang="zh-CN">
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<title>PyFlowX 性能剖面报告</title>
|
||||
<style>
|
||||
:root {{
|
||||
--bg: #f5f5f7;
|
||||
--card: #ffffff;
|
||||
--border: #d2d2d7;
|
||||
--text: #1d1d1f;
|
||||
--muted: #6e6e73;
|
||||
--accent: #0071e3;
|
||||
--success: #34c759;
|
||||
--warning: #ff9f0a;
|
||||
--danger: #ff3b30;
|
||||
--critical: #af52de;
|
||||
}}
|
||||
* {{ box-sizing: border-box; }}
|
||||
body {{
|
||||
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
|
||||
margin: 0;
|
||||
padding: 24px;
|
||||
background: var(--bg);
|
||||
color: var(--text);
|
||||
line-height: 1.5;
|
||||
}}
|
||||
h1 {{ margin: 0 0 8px; font-size: 28px; }}
|
||||
h2 {{ margin: 32px 0 12px; font-size: 20px; border-bottom: 1px solid var(--border); padding-bottom: 6px; }}
|
||||
.subtitle {{ color: var(--muted); margin: 0 0 24px; font-size: 14px; }}
|
||||
.cards {{ display: grid; grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); gap: 12px; margin-bottom: 8px; }}
|
||||
.card {{
|
||||
background: var(--card);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: 10px;
|
||||
padding: 16px;
|
||||
}}
|
||||
.card .label {{ font-size: 12px; color: var(--muted); margin-bottom: 4px; text-transform: uppercase; letter-spacing: 0.5px; }}
|
||||
.card .value {{ font-size: 22px; font-weight: 600; }}
|
||||
.card .unit {{ font-size: 13px; color: var(--muted); margin-left: 2px; }}
|
||||
.critical-path {{
|
||||
background: var(--card);
|
||||
border: 1px solid var(--border);
|
||||
border-left: 4px solid var(--critical);
|
||||
border-radius: 10px;
|
||||
padding: 16px;
|
||||
margin-bottom: 8px;
|
||||
}}
|
||||
.critical-path .label {{ font-size: 12px; color: var(--muted); margin-bottom: 8px; text-transform: uppercase; letter-spacing: 0.5px; }}
|
||||
.critical-path .chain {{ font-family: ui-monospace, "SF Mono", Menlo, monospace; font-size: 13px; word-break: break-all; }}
|
||||
.critical-path .arrow {{ color: var(--critical); margin: 0 6px; font-weight: 600; }}
|
||||
/* 甘特图 */
|
||||
.gantt {{
|
||||
background: var(--card);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: 10px;
|
||||
padding: 16px;
|
||||
overflow-x: auto;
|
||||
}}
|
||||
.gantt-row {{ display: flex; align-items: center; margin-bottom: 6px; min-width: 600px; }}
|
||||
.gantt-label {{ width: 200px; flex-shrink: 0; font-size: 13px; font-family: ui-monospace, monospace; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; }}
|
||||
.gantt-track {{ flex: 1; height: 22px; background: #f0f0f3; border-radius: 4px; position: relative; }}
|
||||
.gantt-bar {{ position: absolute; height: 100%; border-radius: 4px; min-width: 2px; }}
|
||||
.gantt-bar.success {{ background: var(--success); }}
|
||||
.gantt-bar.failed {{ background: var(--danger); }}
|
||||
.gantt-bar.skipped {{ background: var(--muted); }}
|
||||
.gantt-bar.critical {{ box-shadow: 0 0 0 2px var(--critical) inset; }}
|
||||
.gantt-bar:hover {{ opacity: 0.85; }}
|
||||
.gantt-tooltip {{ position: absolute; bottom: 100%; left: 50%; transform: translateX(-50%); background: #1d1d1f; color: #fff; padding: 4px 8px; border-radius: 4px; font-size: 11px; white-space: nowrap; opacity: 0; pointer-events: none; transition: opacity 0.15s; }}
|
||||
.gantt-bar:hover .gantt-tooltip {{ opacity: 1; }}
|
||||
/* 表格 */
|
||||
table {{ width: 100%; border-collapse: collapse; background: var(--card); border-radius: 10px; overflow: hidden; border: 1px solid var(--border); }}
|
||||
th, td {{ padding: 10px 12px; text-align: left; font-size: 13px; }}
|
||||
th {{ background: #fafafa; font-weight: 600; color: var(--muted); text-transform: uppercase; font-size: 11px; letter-spacing: 0.5px; }}
|
||||
tbody tr {{ border-top: 1px solid var(--border); }}
|
||||
tbody tr:hover {{ background: #fafafa; }}
|
||||
td.num {{ font-family: ui-monospace, monospace; text-align: right; }}
|
||||
.badge {{ display: inline-block; padding: 2px 8px; border-radius: 10px; font-size: 11px; font-weight: 500; }}
|
||||
.badge.success {{ background: rgba(52,199,89,0.15); color: var(--success); }}
|
||||
.badge.failed {{ background: rgba(255,59,48,0.15); color: var(--danger); }}
|
||||
.badge.skipped {{ background: rgba(110,110,115,0.15); color: var(--muted); }}
|
||||
.star {{ color: var(--critical); font-weight: 700; }}
|
||||
.footer {{ margin-top: 32px; color: var(--muted); font-size: 12px; text-align: center; }}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<h1>PyFlowX 性能剖面报告</h1>
|
||||
<p class="subtitle">由 <code>pxp</code> 生成 · {generated_at}</p>
|
||||
|
||||
<h2>图级指标</h2>
|
||||
<div class="cards">
|
||||
<div class="card"><div class="label">总耗时</div><div class="value">{total_duration:.3f}<span class="unit">s</span></div></div>
|
||||
<div class="card"><div class="label">关键路径耗时</div><div class="value">{critical_duration:.3f}<span class="unit">s</span></div></div>
|
||||
<div class="card"><div class="label">平均并行度</div><div class="value">{avg_par:.2f}</div></div>
|
||||
<div class="card"><div class="label">峰值并行度</div><div class="value">{peak_par}</div></div>
|
||||
<div class="card"><div class="label">并行度效率</div><div class="value">{efficiency:.1f}<span class="unit">%</span></div></div>
|
||||
<div class="card"><div class="label">任务总数</div><div class="value">{task_count}</div></div>
|
||||
</div>
|
||||
|
||||
<h2>关键路径</h2>
|
||||
<div class="critical-path">
|
||||
<div class="label">最长依赖路径(串行瓶颈)</div>
|
||||
<div class="chain">{critical_chain}</div>
|
||||
</div>
|
||||
|
||||
<h2>任务时间线</h2>
|
||||
<div class="gantt">
|
||||
{gantt_rows}
|
||||
</div>
|
||||
|
||||
<h2>Top 瓶颈任务</h2>
|
||||
<table>
|
||||
<thead><tr><th>任务</th><th class="num">耗时</th><th class="num">等待</th><th class="num">尝试</th><th>关键路径</th><th>状态</th></tr></thead>
|
||||
<tbody>
|
||||
{bottleneck_rows}
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
<h2>全部任务</h2>
|
||||
<table>
|
||||
<thead><tr><th>任务</th><th class="num">耗时</th><th class="num">等待</th><th class="num">尝试</th><th>关键路径</th><th>状态</th><th>依赖</th></tr></thead>
|
||||
<tbody>
|
||||
{all_task_rows}
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
<div class="footer">由 PyFlowX · pxp 生成</div>
|
||||
</body>
|
||||
</html>"""
|
||||
|
||||
|
||||
def _status_badge(status: TaskStatus) -> str:
|
||||
"""生成状态徽章 HTML。"""
|
||||
cls = status.value
|
||||
return f'<span class="badge {cls}">{cls}</span>'
|
||||
|
||||
|
||||
def _format_critical_chain(path: tuple[str, ...]) -> str:
|
||||
"""格式化关键路径为 HTML 链。"""
|
||||
if not path:
|
||||
return '<em style="color:var(--muted)">(无)</em>'
|
||||
arrow = '<span class="arrow">→</span>'
|
||||
return arrow.join(f"<strong>{name}</strong>" for name in path)
|
||||
|
||||
|
||||
def _render_gantt(profile: ProfileReport) -> str:
|
||||
"""渲染甘特图行 HTML。
|
||||
|
||||
每个任务一行:标签 + 时间条。时间条位置基于 wait_time + 依赖关系
|
||||
重建相对开始时间(相对最早任务起点),归一化到 0-100% 宽度。
|
||||
SKIPPED 任务不显示(无时间戳)。
|
||||
"""
|
||||
visible = [t for t in profile.tasks if t.status != TaskStatus.SKIPPED and t.duration > 0]
|
||||
if not visible:
|
||||
return '<div style="color:var(--muted);padding:12px;">(无时间线数据)</div>'
|
||||
|
||||
# 重建相对开始时间:start[name] = max(end[dep]) + wait_time
|
||||
# profile.tasks 已是拓扑序,可直接按序计算
|
||||
start: dict[str, float] = {}
|
||||
end: dict[str, float] = {}
|
||||
for t in profile.tasks:
|
||||
if t.status == TaskStatus.SKIPPED:
|
||||
continue
|
||||
dep_end = 0.0
|
||||
for dep in t.deps:
|
||||
dep_end = max(dep_end, end.get(dep, 0.0))
|
||||
s = dep_end + t.wait_time
|
||||
start[t.name] = s
|
||||
end[t.name] = s + t.duration
|
||||
|
||||
# 归一化:以最早开始时间为 0,最晚结束为 100%
|
||||
min_start = min(start.get(t.name, 0.0) for t in visible)
|
||||
max_end = max(end.get(t.name, 0.0) for t in visible)
|
||||
span = max_end - min_start
|
||||
if span <= 0:
|
||||
span = 1.0
|
||||
|
||||
rows: list[str] = []
|
||||
for t in visible:
|
||||
s = start.get(t.name, 0.0) - min_start
|
||||
left_pct = (s / span) * 100
|
||||
width_pct = (t.duration / span) * 100
|
||||
cls = t.status.value
|
||||
critical_cls = " critical" if t.is_on_critical_path else ""
|
||||
tooltip = f"{t.name}: {t.duration:.3f}s @ +{s:.3f}s ({t.status.value})"
|
||||
rows.append(
|
||||
f' <div class="gantt-row">'
|
||||
f'<div class="gantt-label" title="{t.name}">{t.name}</div>'
|
||||
f'<div class="gantt-track">'
|
||||
f'<div class="gantt-bar {cls}{critical_cls}" style="left:{left_pct:.2f}%;width:{width_pct:.2f}%">'
|
||||
f'<span class="gantt-tooltip">{tooltip}</span>'
|
||||
f"</div></div></div>"
|
||||
)
|
||||
return "\n".join(rows)
|
||||
|
||||
|
||||
def _render_task_row(t: TaskProfile, show_deps: bool = False) -> str:
|
||||
"""渲染任务表格行 HTML。"""
|
||||
star = '<span class="star">★</span>' if t.is_on_critical_path else ""
|
||||
deps = ", ".join(t.deps) if show_deps and t.deps else ""
|
||||
deps_cell = f"<td>{deps}</td>" if show_deps else ""
|
||||
return (
|
||||
f" <tr>"
|
||||
f"<td><code>{t.name}</code></td>"
|
||||
f'<td class="num">{t.duration:.3f}s</td>'
|
||||
f'<td class="num">{t.wait_time:.3f}s</td>'
|
||||
f'<td class="num">{t.attempts}</td>'
|
||||
f"<td>{star}</td>"
|
||||
f"<td>{_status_badge(t.status)}</td>"
|
||||
f"{deps_cell}"
|
||||
f"</tr>"
|
||||
)
|
||||
|
||||
|
||||
def _render_html(profile: ProfileReport) -> str:
|
||||
"""渲染完整 HTML 报告。"""
|
||||
from datetime import datetime as _dt
|
||||
|
||||
bottlenecks = profile.top_bottlenecks(5)
|
||||
bottleneck_rows = (
|
||||
"\n".join(_render_task_row(t) for t in bottlenecks)
|
||||
or ' <tr><td colspan="6" style="color:var(--muted);">(无)</td></tr>'
|
||||
)
|
||||
all_task_rows = (
|
||||
"\n".join(_render_task_row(t, show_deps=True) for t in profile.tasks)
|
||||
or ' <tr><td colspan="7" style="color:var(--muted);">(无)</td></tr>'
|
||||
)
|
||||
|
||||
return _HTML_TEMPLATE.format(
|
||||
generated_at=_dt.now().strftime("%Y-%m-%d %H:%M:%S"),
|
||||
total_duration=profile.total_duration,
|
||||
critical_duration=profile.critical_path_duration,
|
||||
avg_par=profile.avg_parallelism,
|
||||
peak_par=profile.peak_parallelism,
|
||||
efficiency=profile.parallelism_efficiency * 100,
|
||||
task_count=len(profile.tasks),
|
||||
critical_chain=_format_critical_chain(profile.critical_path),
|
||||
gantt_rows=_render_gantt(profile),
|
||||
bottleneck_rows=bottleneck_rows,
|
||||
all_task_rows=all_task_rows,
|
||||
)
|
||||
@@ -0,0 +1,159 @@
|
||||
"""函数注册表.
|
||||
|
||||
提供全局函数注册机制, 供 YAML 任务编排通过 ``fn`` 字段引用 Python 函数.
|
||||
|
||||
使用方式
|
||||
--------
|
||||
import pyflowx as px
|
||||
|
||||
@px.register_fn("pack_source")
|
||||
def pack_source(project_dir, output_dir):
|
||||
...
|
||||
|
||||
# YAML 中引用:
|
||||
# jobs:
|
||||
# pack:
|
||||
# fn: pack_source
|
||||
# args: ["./project", "./dist"]
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from typing import Any, Callable, TypeVar, overload
|
||||
|
||||
if sys.version_info >= (3, 10):
|
||||
from typing import ParamSpec
|
||||
else:
|
||||
from typing_extensions import ParamSpec # pragma: no cover
|
||||
|
||||
__all__ = ["FnRegistry", "get_fn", "has_fn", "register_fn"]
|
||||
|
||||
P = ParamSpec("P")
|
||||
T = TypeVar("T")
|
||||
|
||||
_REGISTRY: dict[str, Callable[..., Any]] = {}
|
||||
|
||||
|
||||
@overload
|
||||
def register_fn(name: Callable[P, T]) -> Callable[P, T]: ...
|
||||
|
||||
|
||||
@overload
|
||||
def register_fn(name: str | None = None) -> Callable[[Callable[P, T]], Callable[P, T]]: ...
|
||||
|
||||
|
||||
def register_fn(name: str | Callable[..., Any] | None = None) -> Callable[..., Any]:
|
||||
"""装饰器:将函数注册到全局 registry.
|
||||
|
||||
支持两种用法::
|
||||
|
||||
@register_fn # 使用函数 __name__ 作为注册名
|
||||
def my_func(): ...
|
||||
|
||||
@register_fn("custom") # 显式指定注册名
|
||||
def my_func(): ...
|
||||
|
||||
Parameters
|
||||
----------
|
||||
name : str | Callable | None
|
||||
注册名或被装饰函数; 为 None 时使用函数 ``__name__``
|
||||
|
||||
Returns
|
||||
-------
|
||||
Callable
|
||||
装饰器函数或被装饰函数
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
名称已注册或无法推断函数名
|
||||
"""
|
||||
if callable(name):
|
||||
fn = name
|
||||
key = getattr(fn, "__name__", None)
|
||||
if key is None:
|
||||
raise ValueError("无法推断函数名, 请显式提供 name 参数")
|
||||
if key in _REGISTRY:
|
||||
raise ValueError(f"函数 {key!r} 已注册")
|
||||
_REGISTRY[key] = fn
|
||||
return fn
|
||||
|
||||
def decorator(fn: Callable[P, T]) -> Callable[P, T]:
|
||||
key = name if name is not None else getattr(fn, "__name__", None)
|
||||
if key is None:
|
||||
raise ValueError("无法推断函数名, 请显式提供 name 参数")
|
||||
if key in _REGISTRY:
|
||||
raise ValueError(f"函数 {key!r} 已注册")
|
||||
_REGISTRY[key] = fn
|
||||
return fn
|
||||
|
||||
return decorator
|
||||
|
||||
|
||||
def get_fn(name: str) -> Callable[..., Any]:
|
||||
"""按名称获取已注册的函数.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
name : str
|
||||
函数名
|
||||
|
||||
Returns
|
||||
-------
|
||||
Callable
|
||||
已注册的函数
|
||||
|
||||
Raises
|
||||
------
|
||||
KeyError
|
||||
函数未注册
|
||||
"""
|
||||
if name not in _REGISTRY:
|
||||
raise KeyError(f"函数 {name!r} 未注册")
|
||||
return _REGISTRY[name]
|
||||
|
||||
|
||||
def has_fn(name: str) -> bool:
|
||||
"""检查函数是否已注册.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
name : str
|
||||
函数名
|
||||
|
||||
Returns
|
||||
-------
|
||||
bool
|
||||
是否已注册
|
||||
"""
|
||||
return name in _REGISTRY
|
||||
|
||||
|
||||
class FnRegistry:
|
||||
"""函数注册表的面向对象访问接口."""
|
||||
|
||||
@staticmethod
|
||||
def register(name: str | None = None) -> Callable[[Callable[..., T]], Callable[..., T]]:
|
||||
"""注册装饰器, 等价于 :func:`register_fn`."""
|
||||
return register_fn(name)
|
||||
|
||||
@staticmethod
|
||||
def get(name: str) -> Callable[..., Any]:
|
||||
"""获取已注册函数, 等价于 :func:`get_fn`."""
|
||||
return get_fn(name)
|
||||
|
||||
@staticmethod
|
||||
def has(name: str) -> bool:
|
||||
"""检查是否已注册, 等价于 :func:`has_fn`."""
|
||||
return has_fn(name)
|
||||
|
||||
@staticmethod
|
||||
def clear() -> None:
|
||||
"""清空注册表."""
|
||||
_REGISTRY.clear()
|
||||
|
||||
@staticmethod
|
||||
def names() -> list[str]:
|
||||
"""返回所有已注册函数名."""
|
||||
return list(_REGISTRY.keys())
|
||||
+26
-14
@@ -7,7 +7,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, Iterator, List
|
||||
from typing import Any, Iterator
|
||||
|
||||
from .task import TaskResult, TaskStatus
|
||||
|
||||
@@ -24,7 +24,7 @@ class RunReport:
|
||||
当且仅当所有非跳过任务都以 ``SUCCESS`` 结束时为 ``True``。
|
||||
"""
|
||||
|
||||
results: Dict[str, TaskResult[object]] = field(default_factory=dict)
|
||||
results: dict[str, TaskResult[Any]] = field(default_factory=dict)
|
||||
success: bool = True
|
||||
|
||||
# ---- 类型化访问 --------------------------------------------------- #
|
||||
@@ -36,11 +36,11 @@ class RunReport:
|
||||
"""
|
||||
return self.results[name].value
|
||||
|
||||
def result_of(self, name: str) -> TaskResult[object]:
|
||||
def result_of(self, name: str) -> TaskResult[Any]:
|
||||
"""返回 ``name`` 的完整 :class:`TaskResult`。"""
|
||||
return self.results[name]
|
||||
|
||||
def __contains__(self, name: object) -> bool:
|
||||
def __contains__(self, name: Any) -> bool:
|
||||
return name in self.results
|
||||
|
||||
def __iter__(self) -> Iterator[str]:
|
||||
@@ -50,9 +50,9 @@ class RunReport:
|
||||
return len(self.results)
|
||||
|
||||
# ---- 汇总 --------------------------------------------------------- #
|
||||
def summary(self) -> Dict[str, Any]:
|
||||
def summary(self) -> dict[str, Any]:
|
||||
"""用于日志/仪表盘的紧凑统计字典。"""
|
||||
counts: Dict[str, int] = {}
|
||||
counts: dict[str, int] = {}
|
||||
total_duration = 0.0
|
||||
for r in self.results.values():
|
||||
counts[r.status.value] = counts.get(r.status.value, 0) + 1
|
||||
@@ -65,19 +65,31 @@ class RunReport:
|
||||
"total_duration_seconds": round(total_duration, 6),
|
||||
}
|
||||
|
||||
def failed_tasks(self) -> List[str]:
|
||||
def failed_tasks(self) -> list[str]:
|
||||
"""以 FAILED 状态结束的任务名列表。"""
|
||||
return [
|
||||
name for name, r in self.results.items() if r.status == TaskStatus.FAILED
|
||||
]
|
||||
return [name for name, r in self.results.items() if r.status == TaskStatus.FAILED]
|
||||
|
||||
def succeeded_tasks(self) -> list[str]:
|
||||
"""以 SUCCESS 状态结束的任务名列表。"""
|
||||
return [name for name, r in self.results.items() if r.status == TaskStatus.SUCCESS]
|
||||
|
||||
def skipped_tasks(self) -> list[str]:
|
||||
"""以 SKIPPED 状态结束的任务名列表。"""
|
||||
return [name for name, r in self.results.items() if r.status == TaskStatus.SKIPPED]
|
||||
|
||||
def tasks_by_status(self, status: TaskStatus) -> list[str]:
|
||||
"""返回指定状态的任务名列表。"""
|
||||
return [name for name, r in self.results.items() if r.status == status]
|
||||
|
||||
def durations(self) -> dict[str, float]:
|
||||
"""任务名 -> 执行时长(秒)。无时长记录的为 0.0。"""
|
||||
return {name: (r.duration or 0.0) for name, r in self.results.items()}
|
||||
|
||||
def describe(self) -> str:
|
||||
"""用于调试的人类可读多行报告。"""
|
||||
lines: List[str] = [f"RunReport(success={self.success})"]
|
||||
lines: list[str] = [f"RunReport(success={self.success})"]
|
||||
for name, r in self.results.items():
|
||||
dur = f"{r.duration:.3f}s" if r.duration is not None else "-"
|
||||
err = f" error={r.error!r}" if r.error else ""
|
||||
lines.append(
|
||||
f" {name}: {r.status.value} ({dur} attempts={r.attempts}){err}"
|
||||
)
|
||||
lines.append(f" {name}: {r.status.value} ({dur} attempts={r.attempts}){err}")
|
||||
return "\n".join(lines)
|
||||
|
||||
@@ -0,0 +1,293 @@
|
||||
"""命令行运行器:根据用户输入执行对应的任务流图.
|
||||
|
||||
verbose 模式
|
||||
------------
|
||||
``CliRunner`` 默认 ``verbose=True``, 会:
|
||||
1. 打印任务生命周期 (开始/成功/失败/跳过) 到 stdout
|
||||
2. 对 ``cmd`` 类任务, 显示执行的命令及其标准输出/标准错误
|
||||
|
||||
可通过构造参数 ``verbose=False`` 或命令行 ``--quiet`` 关闭.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import enum
|
||||
import sys
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any, Sequence, get_args
|
||||
|
||||
from .compose import GraphComposer
|
||||
from .errors import PyFlowXError
|
||||
from .executors import Strategy, run
|
||||
from .graph import Graph
|
||||
from .task import TaskSpec
|
||||
|
||||
__all__ = ["CliExitCode", "CliRunner"]
|
||||
|
||||
|
||||
class CliExitCode(enum.IntEnum):
|
||||
"""CliRunner 退出码."""
|
||||
|
||||
SUCCESS = 0
|
||||
FAILURE = 1
|
||||
INTERRUPTED = 130 # 与 POSIX 信号中断一致
|
||||
|
||||
|
||||
@dataclass
|
||||
class CliRunner:
|
||||
"""命令行运行器: 根据用户输入执行对应的任务流图.
|
||||
|
||||
将命令别名映射到 Graph 实例. 通过 ``sys.argv`` 解析用户输入的命令,
|
||||
执行对应的图.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
aliases : dict[str, str | list[str] | Graph]
|
||||
命令别名到任务引用的映射. 每个值可以是:
|
||||
* ``str`` —— 单个任务名 (引用 ``tasks`` 中注册的任务),
|
||||
生成单任务图.
|
||||
* ``list[str]`` —— 任务名列表, 自动 :meth:`Graph.chain` 建立链式依赖,
|
||||
即后一个任务依赖前一个.
|
||||
* :class:`~pyflowx.graph.Graph` —— 直接使用该图 (用于复杂场景, 如
|
||||
自定义 ``conditions``、并行分支等).
|
||||
tasks : list[TaskSpec]
|
||||
扁平注册的任务列表. ``aliases`` 中的字符串引用这些任务名.
|
||||
未被任何 alias 引用的任务不会被执行.
|
||||
strategy : str | Strategy
|
||||
默认执行策略. 可被命令行 ``--strategy`` 覆盖.
|
||||
description : str
|
||||
CLI 帮助文本.
|
||||
verbose : bool
|
||||
是否显示详细执行过程. 默认 ``True``, 可被命令行 ``--quiet`` 关闭.
|
||||
|
||||
Examples
|
||||
--------
|
||||
简单场景 (tasks + aliases)::
|
||||
|
||||
runner = px.CliRunner(
|
||||
tasks=[
|
||||
px.cmd(["uv", "build"]), # name="uv_build"
|
||||
px.cmd(["maturin", "build"], name="maturin_build"),
|
||||
px.cmd(["ruff", "check", "--fix"], name="lint"),
|
||||
],
|
||||
aliases={
|
||||
"b": "uv_build",
|
||||
"ba": ["uv_build", "maturin_build"], # chain: maturin 依赖 uv
|
||||
"lint": "lint",
|
||||
},
|
||||
)
|
||||
runner.run()
|
||||
|
||||
复杂场景 (直接用 Graph)::
|
||||
|
||||
runner = px.CliRunner(
|
||||
aliases={
|
||||
"a": px.Graph.from_specs([
|
||||
px.TaskSpec("add", cmd=["git", "add", "."], conditions=(...)),
|
||||
px.TaskSpec("commit", cmd=["git", "commit"], depends_on=("add",)),
|
||||
]),
|
||||
},
|
||||
)
|
||||
"""
|
||||
|
||||
aliases: dict[str, str | list[str | TaskSpec[Any]] | TaskSpec[Any] | Graph] = field(default_factory=dict)
|
||||
tasks: list[TaskSpec[Any]] = field(default_factory=list)
|
||||
strategy: Strategy = field(default="dependency")
|
||||
description: str = field(default_factory=str)
|
||||
verbose: bool = field(default_factory=lambda: True)
|
||||
# 解析后的命令→图映射,__post_init__ 填充
|
||||
graphs: dict[str, Graph] = field(default_factory=dict, init=False)
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
if not self.aliases:
|
||||
raise ValueError("CliRunner 至少需要一个别名 (通过 aliases= 提供)")
|
||||
|
||||
# 1. 把 tasks 注册为虚拟命令图(每个 task 一个图),加入 raw_graphs
|
||||
# 使 GraphComposer 能解析对它们的字符串引用
|
||||
raw_graphs: dict[str, Graph] = {}
|
||||
for spec in self.tasks:
|
||||
if spec.name in raw_graphs:
|
||||
raise ValueError(f"任务名重复: {spec.name!r}")
|
||||
raw_graphs[spec.name] = Graph.from_specs([spec])
|
||||
|
||||
# 2. 把每个 alias 转为 Graph(alias 名可与 task 名相同,覆盖 task 注册)
|
||||
for alias, value in self.aliases.items():
|
||||
raw_graphs[alias] = self._alias_to_graph(alias, value)
|
||||
|
||||
# 3. 解析图间字符串引用(str / list[str] 引用其他 alias 或任务)
|
||||
self.graphs = GraphComposer(raw_graphs).resolve_all()
|
||||
|
||||
@staticmethod
|
||||
def _alias_to_graph(
|
||||
alias: str,
|
||||
value: str | list[str | TaskSpec[Any]] | TaskSpec[Any] | Graph,
|
||||
) -> Graph:
|
||||
"""把 alias 的值转换为 Graph.
|
||||
|
||||
* ``str`` —— 对其他 alias 或已注册任务名的引用, 由 GraphComposer 展开.
|
||||
* ``TaskSpec`` —— 单个内联任务, 生成单任务图.
|
||||
* ``list[str | TaskSpec]`` —— 引用/任务混合列表, GraphComposer 展开时
|
||||
自动让后续引用依赖前面 (chain 语义). 元素为 alias 名、任务名或
|
||||
:class:`TaskSpec` 对象 (内联任务).
|
||||
* ``Graph`` —— 原样返回 (用于复杂场景: conditions、并行分支等).
|
||||
"""
|
||||
if isinstance(value, Graph):
|
||||
return value
|
||||
if isinstance(value, TaskSpec):
|
||||
return Graph.from_specs([value])
|
||||
if isinstance(value, str):
|
||||
# 字符串引用,用 _pending_refs 占位,GraphComposer 后续展开
|
||||
return Graph.from_specs([value]) # type: ignore[arg-type]
|
||||
if isinstance(value, list):
|
||||
if not value:
|
||||
raise ValueError(f"别名 {alias!r} 的任务列表为空")
|
||||
for item in value:
|
||||
if not isinstance(item, (str, TaskSpec)):
|
||||
raise TypeError(f"别名 {alias!r} 的列表元素类型无效: {type(item).__name__}, 预期 str 或 TaskSpec")
|
||||
# str/TaskSpec 混合列表,由 GraphComposer 展开(自动建立 chain 依赖)
|
||||
return Graph.from_specs(value)
|
||||
raise TypeError(
|
||||
f"别名 {alias!r} 的值类型无效: {type(value).__name__}, 预期 str/TaskSpec/list[str|TaskSpec]/Graph"
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------ #
|
||||
# 内省
|
||||
# ------------------------------------------------------------------ #
|
||||
@property
|
||||
def commands(self) -> list[str]:
|
||||
"""可用的命令列表 (按 aliases 定义顺序, 不含 tasks 中未引用的任务)."""
|
||||
return list(self.aliases.keys())
|
||||
|
||||
# ------------------------------------------------------------------ #
|
||||
# 参数解析
|
||||
# ------------------------------------------------------------------ #
|
||||
def _prog_name(self) -> str:
|
||||
"""从 sys.argv[0] 推导程序名."""
|
||||
return Path(sys.argv[0]).name if sys.argv else "pyflowx"
|
||||
|
||||
def create_parser(self) -> argparse.ArgumentParser:
|
||||
"""创建参数解析器.
|
||||
|
||||
子类可覆盖此方法以添加自定义参数. 覆盖时应保留 ``command``
|
||||
位置参数与 ``--strategy`` / ``--dry-run`` / ``--list`` / ``--quiet``
|
||||
选项, 否则 :meth:`run` 的默认逻辑可能失效.
|
||||
|
||||
Returns
|
||||
-------
|
||||
argparse.ArgumentParser
|
||||
新创建的参数解析器实例.
|
||||
"""
|
||||
parser = argparse.ArgumentParser(
|
||||
prog=self._prog_name(),
|
||||
description=self.description or "PyFlowX CLI Runner",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog=self._format_commands_help(),
|
||||
)
|
||||
_ = parser.add_argument(
|
||||
"command",
|
||||
nargs="?",
|
||||
help="要执行的命令",
|
||||
)
|
||||
_ = parser.add_argument(
|
||||
"--strategy",
|
||||
choices=list(get_args(Strategy)),
|
||||
default=self.strategy,
|
||||
help="执行策略 (默认: %(default)s)",
|
||||
)
|
||||
_ = parser.add_argument(
|
||||
"--dry-run",
|
||||
action="store_true",
|
||||
help="只打印执行计划, 不实际运行",
|
||||
)
|
||||
_ = parser.add_argument(
|
||||
"--list",
|
||||
action="store_true",
|
||||
help="列出所有可用命令",
|
||||
)
|
||||
_ = parser.add_argument(
|
||||
"--quiet",
|
||||
action="store_true",
|
||||
help="静默模式, 不显示执行过程 (覆盖默认 verbose)",
|
||||
)
|
||||
return parser
|
||||
|
||||
def _format_commands_help(self) -> str:
|
||||
"""格式化命令帮助文本."""
|
||||
return "可用命令:\n" + " | ".join(self.graphs.keys())
|
||||
|
||||
# ------------------------------------------------------------------ #
|
||||
# 执行
|
||||
# ------------------------------------------------------------------ #
|
||||
def run(self, args: Sequence[str] | None = None) -> int:
|
||||
"""解析参数并执行对应的图.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
args : Sequence[str] | None
|
||||
参数列表, 默认使用 ``sys.argv[1:]``.
|
||||
|
||||
Returns
|
||||
-------
|
||||
int
|
||||
退出码 (0 成功, 1 失败, 130 中断).
|
||||
|
||||
Raises
|
||||
------
|
||||
SystemExit
|
||||
当 argparse 无法解析参数时 (与标准 argparse 行为一致).
|
||||
"""
|
||||
parser = self.create_parser()
|
||||
parsed = parser.parse_args(args)
|
||||
|
||||
# --list: 列出命令
|
||||
if parsed.list:
|
||||
print(self._format_commands_help())
|
||||
return CliExitCode.SUCCESS.value
|
||||
|
||||
# 无命令: 显示帮助
|
||||
if not parsed.command:
|
||||
parser.print_help()
|
||||
return CliExitCode.FAILURE.value
|
||||
|
||||
# 验证命令(必须是已注册的 alias,不接受裸任务名)
|
||||
if parsed.command not in self.aliases:
|
||||
available = ", ".join(self.commands)
|
||||
print(
|
||||
f"错误: 未知命令 {parsed.command!r} (可用命令: {available})",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return CliExitCode.FAILURE.value
|
||||
|
||||
# 确定是否 verbose: --quiet 覆盖默认值
|
||||
verbose = self.verbose and not parsed.quiet
|
||||
|
||||
# 执行对应的图 (verbose 标记由 run() 统一应用到各 spec)
|
||||
graph = self.graphs[parsed.command]
|
||||
try:
|
||||
report = run(
|
||||
graph,
|
||||
strategy=parsed.strategy,
|
||||
dry_run=parsed.dry_run,
|
||||
verbose=verbose,
|
||||
)
|
||||
return CliExitCode.SUCCESS.value if report.success else CliExitCode.FAILURE.value
|
||||
except KeyboardInterrupt:
|
||||
print("\n操作已取消", file=sys.stderr)
|
||||
return CliExitCode.INTERRUPTED.value
|
||||
except PyFlowXError as e:
|
||||
print(f"错误: {e}", file=sys.stderr)
|
||||
return CliExitCode.FAILURE.value
|
||||
|
||||
def run_cli(self, args: Sequence[str] | None = None) -> None:
|
||||
"""运行并以退出码退出进程.
|
||||
|
||||
作为 CLI 工具运行时的入口点, 等价于 ``sys.exit(self.run(args))``.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
args : Sequence[str] | None
|
||||
参数列表, 默认使用 ``sys.argv[1:]``.
|
||||
"""
|
||||
sys.exit(self.run(args))
|
||||
+206
-57
@@ -4,93 +4,213 @@
|
||||
执行器向后端查询某任务是否已有存储结果;若有则跳过该任务,并将其
|
||||
存储值注入下游任务。
|
||||
|
||||
本模块刻意保持最小化:仅持久化*成功*结果(失败任务会重跑),存储
|
||||
形态为扁平的 ``{task_name: result}`` 映射。内置两个后端:
|
||||
存储键由 :meth:`TaskSpec.storage_key` 计算,默认为任务名;若任务配置
|
||||
了 ``cache_key``,则键为 ``"name:cache_key_value"``,使不同输入产生
|
||||
独立缓存条目。
|
||||
|
||||
* :class:`MemoryBackend` —— 快速、进程内、无 I/O。默认。
|
||||
* :class:`JSONBackend` —— 持久化到 JSON 文件,支持跨进程续跑。
|
||||
|
||||
两者均零依赖(``json`` 为标准库)。用户可子类化
|
||||
:class:`StateBackend` 接入 SQLite、Redis 等。
|
||||
支持 TTL:``has`` 在条目过期时返回 ``False``。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any, Dict, Mapping, Optional
|
||||
from collections.abc import Iterator
|
||||
from contextlib import contextmanager, nullcontext
|
||||
from pathlib import Path
|
||||
from typing import Any, ContextManager, Mapping
|
||||
|
||||
if sys.version_info >= (3, 12):
|
||||
from typing import override
|
||||
else:
|
||||
from typing_extensions import override # pragma: no cover
|
||||
|
||||
from .errors import StorageError
|
||||
|
||||
|
||||
class StateBackend(ABC):
|
||||
"""可续跑状态存储的抽象基类。"""
|
||||
"""可续跑状态存储的抽象基类。
|
||||
|
||||
所有方法以 ``key`` 为参数(通常为任务名或 ``name:cache_key``)。
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def load(self) -> Mapping[str, Any]:
|
||||
"""返回完整的存储映射(可能为空)。"""
|
||||
|
||||
@abstractmethod
|
||||
def save(self, name: str, value: Any) -> None:
|
||||
def save(self, key: str, value: Any) -> None:
|
||||
"""持久化单个任务的成功结果。"""
|
||||
|
||||
@abstractmethod
|
||||
def has(self, name: str) -> bool:
|
||||
"""``name`` 是否已有存储结果。"""
|
||||
def has(self, key: str) -> bool:
|
||||
"""``key`` 是否已有未过期的存储结果。"""
|
||||
|
||||
@abstractmethod
|
||||
def get(self, name: str) -> Any:
|
||||
"""返回 ``name`` 的存储结果(不存在则抛 ``KeyError``)。"""
|
||||
def get(self, key: str) -> Any:
|
||||
"""返回 ``key`` 的存储结果(不存在则抛 ``KeyError``)。"""
|
||||
|
||||
@abstractmethod
|
||||
def clear(self) -> None:
|
||||
"""清除所有存储状态。"""
|
||||
|
||||
def flush(self) -> None: # noqa: B027
|
||||
"""将内存中暂存的状态持久化到外部介质。
|
||||
|
||||
class MemoryBackend(StateBackend):
|
||||
"""进程内 dict 后端。进程退出即丢失。"""
|
||||
默认无操作(如 :class:`MemoryBackend` 无需落盘)。
|
||||
:class:`JSONBackend` 在 :meth:`batch` 期间会延迟落盘,需在退出时调用。
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._store: Dict[str, Any] = {}
|
||||
def batch(self) -> ContextManager[None]:
|
||||
"""返回一个上下文管理器,期间 :meth:`save` 可延迟 :meth:`flush`。
|
||||
|
||||
默认实现为 no-op(如 :class:`MemoryBackend`)。:class:`JSONBackend`
|
||||
覆盖为:进入时标记延迟,退出时统一 flush 一次,将每任务一次落盘
|
||||
(N 次写入)降为整次运行一次(O(N) 而非 O(N²))。
|
||||
"""
|
||||
return nullcontext()
|
||||
|
||||
|
||||
class _TTLStateBackendMixin(StateBackend):
|
||||
"""TTL 状态后端共享逻辑。
|
||||
|
||||
将 ``has`` / ``get`` / ``load`` / ``save`` / ``clear`` 的统一实现
|
||||
委托给四个原始存取原语::meth:`_get_raw`、:meth:`_put_raw`、
|
||||
:meth:`_iter_raw`、:meth:`_clear_raw`,并基于 :meth:`_now` 与
|
||||
``self._ttl`` 提供统一的过期判断 :meth:`_is_expired`。
|
||||
|
||||
子类需设置 ``self._ttl`` 并实现上述四个原语;如需自定义时间源
|
||||
(如 ``time.monotonic``)可覆盖 :meth:`_now`。
|
||||
"""
|
||||
|
||||
_ttl: float | None
|
||||
|
||||
# ---- 原语:由子类实现 ---- #
|
||||
@abstractmethod
|
||||
def _get_raw(self, key: str) -> tuple[Any, float] | None:
|
||||
"""返回 ``(value, ts)``;键不存在时返回 ``None``。"""
|
||||
|
||||
@abstractmethod
|
||||
def _put_raw(self, key: str, value: Any, ts: float) -> None:
|
||||
"""写入一条记录。"""
|
||||
|
||||
@abstractmethod
|
||||
def _iter_raw(self) -> Iterator[tuple[str, Any, float]]:
|
||||
"""迭代所有记录(不做过期过滤),yield ``(key, value, ts)``。"""
|
||||
|
||||
@abstractmethod
|
||||
def _clear_raw(self) -> None:
|
||||
"""清空所有记录。"""
|
||||
|
||||
# ---- 共享实现 ---- #
|
||||
def _now(self) -> float:
|
||||
"""当前时间戳,默认为 wall-clock 秒。"""
|
||||
return time.time()
|
||||
|
||||
def _is_expired(self, ts: float) -> bool:
|
||||
"""时间戳 ``ts`` 是否已过期。"""
|
||||
if self._ttl is None:
|
||||
return False
|
||||
return (self._now() - ts) > self._ttl
|
||||
|
||||
@override
|
||||
def load(self) -> Mapping[str, Any]:
|
||||
return dict(self._store)
|
||||
return {k: v for k, v, ts in self._iter_raw() if not self._is_expired(ts)}
|
||||
|
||||
def save(self, name: str, value: Any) -> None:
|
||||
self._store[name] = value
|
||||
@override
|
||||
def save(self, key: str, value: Any) -> None:
|
||||
self._put_raw(key, value, self._now())
|
||||
|
||||
def has(self, name: str) -> bool:
|
||||
return name in self._store
|
||||
@override
|
||||
def has(self, key: str) -> bool:
|
||||
entry = self._get_raw(key)
|
||||
return entry is not None and not self._is_expired(entry[1])
|
||||
|
||||
def get(self, name: str) -> Any:
|
||||
return self._store[name]
|
||||
@override
|
||||
def get(self, key: str) -> Any:
|
||||
entry = self._get_raw(key)
|
||||
if entry is None or self._is_expired(entry[1]):
|
||||
raise KeyError(key)
|
||||
return entry[0]
|
||||
|
||||
@override
|
||||
def clear(self) -> None:
|
||||
self._clear_raw()
|
||||
|
||||
|
||||
class MemoryBackend(_TTLStateBackendMixin):
|
||||
"""进程内 dict 后端。进程退出即丢失。
|
||||
|
||||
Parameters
|
||||
----------
|
||||
ttl:
|
||||
条目存活秒数。``None`` 表示永不过期。``has`` 在条目超过 ttl 后
|
||||
返回 ``False``(但不主动删除,下次 ``save`` 覆盖)。
|
||||
"""
|
||||
|
||||
def __init__(self, ttl: float | None = None) -> None:
|
||||
self._store: dict[str, tuple[Any, float]] = {}
|
||||
self._ttl = ttl
|
||||
|
||||
@override
|
||||
def _now(self) -> float:
|
||||
return time.monotonic()
|
||||
|
||||
@override
|
||||
def _get_raw(self, key: str) -> tuple[Any, float] | None:
|
||||
return self._store.get(key)
|
||||
|
||||
@override
|
||||
def _put_raw(self, key: str, value: Any, ts: float) -> None:
|
||||
self._store[key] = (value, ts)
|
||||
|
||||
@override
|
||||
def _iter_raw(self) -> Iterator[tuple[str, Any, float]]:
|
||||
for k, (v, ts) in self._store.items():
|
||||
yield k, v, ts
|
||||
|
||||
@override
|
||||
def _clear_raw(self) -> None:
|
||||
self._store.clear()
|
||||
|
||||
|
||||
class JSONBackend(StateBackend):
|
||||
class JSONBackend(_TTLStateBackendMixin):
|
||||
"""基于文件的 JSON 存储,用于跨进程续跑。
|
||||
|
||||
结果必须可 JSON 序列化。不可序列化的值会抛出
|
||||
:class:`~pyflowx.errors.StorageError`(运行本身不会中止;仅该条
|
||||
结果的持久化失败)。
|
||||
存储格式:``{key: {"value": v, "ts": epoch_seconds}}``。
|
||||
``ts`` 用于 TTL 判断。结果必须可 JSON 序列化。
|
||||
|
||||
Parameters
|
||||
----------
|
||||
path:
|
||||
JSON 文件路径。
|
||||
ttl:
|
||||
条目存活秒数。``None`` 表示永不过期。
|
||||
"""
|
||||
|
||||
def __init__(self, path: str) -> None:
|
||||
self._path = path
|
||||
self._store: Dict[str, Any] = {}
|
||||
def __init__(self, path: str, ttl: float | None = None) -> None:
|
||||
self._path: str = path
|
||||
self._ttl = ttl
|
||||
self._store: dict[str, dict[str, Any]] = {}
|
||||
self._defer_flush: bool = False
|
||||
self._load()
|
||||
|
||||
def _load(self) -> None:
|
||||
if not os.path.exists(self._path):
|
||||
if not Path(self._path).exists():
|
||||
return
|
||||
try:
|
||||
with open(self._path, "r", encoding="utf-8") as fh:
|
||||
data = json.load(fh)
|
||||
with open(self._path, encoding="utf-8") as fh:
|
||||
data: Any = json.load(fh)
|
||||
if isinstance(data, dict):
|
||||
self._store = data
|
||||
# 兼容纯值格式与带元数据格式
|
||||
self._store = {}
|
||||
for k, v in data.items():
|
||||
if isinstance(v, dict) and "value" in v and "ts" in v:
|
||||
self._store[k] = v
|
||||
else:
|
||||
self._store[k] = {"value": v, "ts": time.time()}
|
||||
except (OSError, json.JSONDecodeError) as exc:
|
||||
raise StorageError(f"cannot read state file {self._path!r}", exc) from exc
|
||||
|
||||
@@ -99,35 +219,64 @@ class JSONBackend(StateBackend):
|
||||
try:
|
||||
with open(tmp, "w", encoding="utf-8") as fh:
|
||||
json.dump(self._store, fh, ensure_ascii=False, indent=2)
|
||||
os.replace(tmp, self._path)
|
||||
_ = Path(tmp).replace(Path(self._path))
|
||||
except (OSError, TypeError) as exc:
|
||||
raise StorageError(f"cannot write state file {self._path!r}", exc) from exc
|
||||
|
||||
def load(self) -> Mapping[str, Any]:
|
||||
return dict(self._store)
|
||||
@override
|
||||
def _get_raw(self, key: str) -> tuple[Any, float] | None:
|
||||
entry = self._store.get(key)
|
||||
if entry is None:
|
||||
return None
|
||||
return entry["value"], float(entry.get("ts", 0))
|
||||
|
||||
def save(self, name: str, value: Any) -> None:
|
||||
# 在修改内存状态前先校验可序列化性。
|
||||
try:
|
||||
json.dumps(value)
|
||||
except (TypeError, ValueError) as exc:
|
||||
raise StorageError(
|
||||
f"result of task {name!r} is not JSON-serialisable", exc
|
||||
) from exc
|
||||
self._store[name] = value
|
||||
self._flush()
|
||||
@override
|
||||
def _put_raw(self, key: str, value: Any, ts: float) -> None:
|
||||
self._store[key] = {"value": value, "ts": ts}
|
||||
|
||||
def has(self, name: str) -> bool:
|
||||
return name in self._store
|
||||
@override
|
||||
def _iter_raw(self) -> Iterator[tuple[str, Any, float]]:
|
||||
for k, entry in self._store.items():
|
||||
yield k, entry["value"], float(entry.get("ts", 0))
|
||||
|
||||
def get(self, name: str) -> Any:
|
||||
return self._store[name]
|
||||
|
||||
def clear(self) -> None:
|
||||
@override
|
||||
def _clear_raw(self) -> None:
|
||||
self._store.clear()
|
||||
|
||||
@override
|
||||
def clear(self) -> None:
|
||||
super().clear()
|
||||
self._flush()
|
||||
|
||||
@override
|
||||
def save(self, key: str, value: Any) -> None:
|
||||
try:
|
||||
_ = json.dumps(value)
|
||||
except (TypeError, ValueError) as exc:
|
||||
raise StorageError(f"result of key {key!r} is not JSON-serialisable", exc) from exc
|
||||
super().save(key, value)
|
||||
if not self._defer_flush:
|
||||
self._flush()
|
||||
|
||||
def resolve_backend(backend: Optional[StateBackend]) -> StateBackend:
|
||||
@override
|
||||
def flush(self) -> None:
|
||||
self._flush()
|
||||
|
||||
@override
|
||||
@contextmanager
|
||||
def batch(self) -> Iterator[None]:
|
||||
"""进入批量模式:``save`` 暂不落盘,退出时统一 flush 一次。
|
||||
|
||||
将整次运行 N 个任务的 N 次全量落盘降为 1 次。
|
||||
"""
|
||||
self._defer_flush = True
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
self._defer_flush = False
|
||||
self._flush()
|
||||
|
||||
|
||||
def resolve_backend(backend: StateBackend | None) -> StateBackend:
|
||||
"""返回 ``backend``;为 ``None`` 时返回新的 :class:`MemoryBackend`。"""
|
||||
return backend if backend is not None else MemoryBackend()
|
||||
|
||||
+525
-39
@@ -17,22 +17,36 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
import sys
|
||||
import threading
|
||||
from contextlib import contextmanager
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from functools import cached_property
|
||||
from pathlib import Path
|
||||
from typing import (
|
||||
Any,
|
||||
Callable,
|
||||
ContextManager,
|
||||
Coroutine,
|
||||
Generator,
|
||||
Generic,
|
||||
List,
|
||||
Mapping,
|
||||
Optional,
|
||||
Tuple,
|
||||
TypeVar,
|
||||
Union,
|
||||
cast,
|
||||
)
|
||||
|
||||
T = TypeVar("T")
|
||||
if sys.version_info >= (3, 13):
|
||||
from typing import TypeVar
|
||||
else:
|
||||
from typing_extensions import TypeVar # pragma: no cover
|
||||
|
||||
T = TypeVar("T", default=Any)
|
||||
|
||||
# 任务可调用对象可以是同步或异步的。显式保留联合类型,让 mypy 理解两种形态。
|
||||
TaskFn = Union[
|
||||
@@ -44,6 +58,112 @@ TaskFn = Union[
|
||||
# 单任务类型由函数签名本身保留。
|
||||
Context = Mapping[str, Any]
|
||||
|
||||
# 命令类型支持
|
||||
TaskCmd = Union[
|
||||
List[str], # 命令列表, 如 ["ls", "-la"]
|
||||
str, # shell 命令字符串
|
||||
Callable[..., Any], # Python 函数
|
||||
]
|
||||
|
||||
# 执行策略:sequential/thread/async 为层屏障模型,dependency 为依赖驱动模型。
|
||||
Strategy = Union[str, "StrategyKind"]
|
||||
StrategyKind = Any # 占位,避免循环;executors 模块用 Literal 约束
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# 条件判断函数类型:接收依赖上下文(可能为空映射),返回是否应执行。
|
||||
Condition = Callable[[Context], bool]
|
||||
|
||||
# 缓存键计算函数:基于依赖上下文计算稳定字符串键。
|
||||
CacheKeyFn = Callable[[Context], str]
|
||||
|
||||
|
||||
def _format_skip_reason(failed_conditions: list[str]) -> str:
|
||||
"""格式化跳过原因:≤2 个全展示,>2 个仅展示前 2 个并附总数。"""
|
||||
if len(failed_conditions) <= 2:
|
||||
return f"条件不满足: {', '.join(failed_conditions)}"
|
||||
return f"条件不满足: {', '.join(failed_conditions[:2])} 等{len(failed_conditions)}个条件"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# 重试策略
|
||||
# ---------------------------------------------------------------------- #
|
||||
@dataclass(frozen=True)
|
||||
class RetryPolicy:
|
||||
"""任务失败重试策略。
|
||||
|
||||
参数
|
||||
----
|
||||
max_attempts:
|
||||
最大尝试次数(含首次)。``1`` 表示仅尝试一次,不重试。
|
||||
delay:
|
||||
两次尝试之间的初始等待秒数。
|
||||
backoff:
|
||||
退避倍率。第 n 次重试等待 ``delay * backoff ** (n-1)``。
|
||||
jitter:
|
||||
抖动上限秒数。每次等待加上 ``[0, jitter)`` 的随机量,避免惊群。
|
||||
retry_on:
|
||||
仅对这些异常类型重试。默认 ``(Exception,)`` 重试所有异常。
|
||||
传入空元组等价于不重试。
|
||||
|
||||
Note
|
||||
-----
|
||||
替代旧版 ``retries: int``。``retries=2`` 等价于
|
||||
``RetryPolicy(max_attempts=3)``。
|
||||
"""
|
||||
|
||||
max_attempts: int = 1
|
||||
delay: float = 0.0
|
||||
backoff: float = 1.0
|
||||
jitter: float = 0.0
|
||||
retry_on: tuple[type[BaseException], ...] = (Exception,)
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
if self.max_attempts < 1:
|
||||
raise ValueError(f"RetryPolicy.max_attempts must be >= 1, got {self.max_attempts}.")
|
||||
if self.delay < 0:
|
||||
raise ValueError(f"RetryPolicy.delay must be >= 0, got {self.delay}.")
|
||||
if self.backoff < 0:
|
||||
raise ValueError(f"RetryPolicy.backoff must be >= 0, got {self.backoff}.")
|
||||
if self.jitter < 0:
|
||||
raise ValueError(f"RetryPolicy.jitter must be >= 0, got {self.jitter}.")
|
||||
|
||||
@property
|
||||
def retries(self) -> int:
|
||||
"""重试次数(不含首次),等价于 ``max_attempts - 1``。"""
|
||||
return self.max_attempts - 1
|
||||
|
||||
def should_retry(self, exc: BaseException) -> bool:
|
||||
"""异常是否属于可重试类型。"""
|
||||
return isinstance(exc, self.retry_on)
|
||||
|
||||
def wait_seconds(self, attempt: int) -> float:
|
||||
"""第 ``attempt`` 次失败后应等待的秒数(attempt 从 1 开始)。"""
|
||||
if attempt < 1:
|
||||
return 0.0
|
||||
import random
|
||||
|
||||
base = self.delay * (self.backoff ** max(0, attempt - 1))
|
||||
jitter = random.uniform(0, self.jitter) if self.jitter > 0 else 0.0
|
||||
return base + jitter
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# 任务钩子
|
||||
# ---------------------------------------------------------------------- #
|
||||
@dataclass(frozen=True)
|
||||
class TaskHooks:
|
||||
"""任务生命周期钩子。
|
||||
|
||||
所有钩子均为可选。``pre_run`` 在任务实际执行前调用;``post_run``
|
||||
在成功后调用并接收返回值;``on_failure`` 在最终失败后调用并接收异常。
|
||||
钩子异常不会影响任务状态,仅记录日志。
|
||||
"""
|
||||
|
||||
pre_run: Callable[[TaskSpec[Any]], None] | None = None
|
||||
post_run: Callable[[TaskSpec[Any], Any], None] | None = None
|
||||
on_failure: Callable[[TaskSpec[Any], BaseException], None] | None = None
|
||||
|
||||
|
||||
class TaskStatus(Enum):
|
||||
"""任务在单次运行内的生命周期状态。"""
|
||||
@@ -66,63 +186,432 @@ class TaskSpec(Generic[T]):
|
||||
fn:
|
||||
待执行的可调用对象,可为同步或异步。其参数名驱动自动上下文
|
||||
注入(见 :mod:`pyflowx.context`)。
|
||||
若提供 ``cmd`` 参数,则此参数会被忽略。
|
||||
cmd:
|
||||
命令列表或 shell 字符串,支持三种形态:
|
||||
- ``list[str]``: 命令及参数列表,如 ``["ls", "-la"]``
|
||||
- ``str``: shell 命令字符串,如 ``"pip freeze > requirements.txt"``
|
||||
- ``Callable``: Python 函数,与 ``fn`` 参数等效
|
||||
depends_on:
|
||||
必须先完成才能运行本任务的任务名列表。顺序无关;框架会做
|
||||
拓扑排序。
|
||||
硬依赖任务名。必须全部成功完成才会运行本任务。
|
||||
上游被 SKIPPED 时,本任务也会被 SKIPPED(除非
|
||||
``allow_upstream_skip=True``)。
|
||||
soft_depends_on:
|
||||
软依赖任务名。会等待其完成,但其结果不影响本任务是否执行:
|
||||
- 上游成功:注入其返回值
|
||||
- 上游 SKIPPED 或失败:注入 :attr:`defaults` 中提供的默认值
|
||||
适用于"可选输入"场景。
|
||||
defaults:
|
||||
软依赖的默认值映射 ``{dep_name: default_value}``。
|
||||
软依赖未提供结果时使用。未在 defaults 中出现的软依赖默认为 ``None``。
|
||||
args:
|
||||
静态位置参数,追加在注入参数*之后*。适用于参数化任务
|
||||
(如 ``fetch_user(uid)``)。
|
||||
静态位置参数,追加在注入参数*之后*。
|
||||
kwargs:
|
||||
静态关键字参数。若与注入名冲突则抛出
|
||||
:class:`~pyflowx.errors.InjectionError`。
|
||||
retries:
|
||||
失败后的重试次数。``0`` 表示仅尝试一次。
|
||||
retry:
|
||||
:class:`RetryPolicy` 重试策略。默认仅尝试一次。
|
||||
timeout:
|
||||
最大执行时长(秒)。``None`` 表示不限制。异步任务使用
|
||||
:func:`asyncio.wait_for`;线程/异步执行器中的同步任务会
|
||||
取消 worker future。
|
||||
:func:`asyncio.wait_for`;同步任务通过线程 future 取消。
|
||||
tags:
|
||||
自由标签,供 :meth:`Graph.subgraph` 做选择性执行与调试。
|
||||
自由标签,供 :meth:`Graph.subgraph` 做选择性执行与调试,
|
||||
也可用于并发限制分组。
|
||||
conditions:
|
||||
条件判断函数列表,接收依赖上下文,全部返回 ``True`` 时才执行任务。
|
||||
任一返回 ``False`` 则任务被标记为 SKIPPED。
|
||||
cwd:
|
||||
工作目录。对 ``cmd`` 任务作为子进程工作目录;对 ``fn`` 任务
|
||||
通过临时切换当前目录生效。
|
||||
env:
|
||||
环境变量覆盖映射。对 ``cmd`` 任务合并到子进程环境;对 ``fn``
|
||||
任务在执行期间临时设置。
|
||||
verbose:
|
||||
是否打印详细输出。``True`` 时打印执行的命令、返回码与输出
|
||||
(仅 ``cmd``),以及任务生命周期。
|
||||
skip_if_missing:
|
||||
仅对 ``cmd`` 为 ``list[str]`` 有效。``True`` 时通过
|
||||
:func:`shutil.which` 检查命令是否存在,不存在则跳过。
|
||||
allow_upstream_skip:
|
||||
若为 ``True``,硬依赖被 SKIPPED 时本任务仍执行(软依赖不影响)。
|
||||
适用于清理类任务。
|
||||
strategy:
|
||||
单任务执行策略覆盖。``None`` 表示继承图级策略。
|
||||
``"sequential"`` 同步直接调用;``"thread"``/``"async"`` 将同步
|
||||
任务卸载到线程池,异步任务跑在事件循环上。
|
||||
priority:
|
||||
同层任务调度优先级。数值越大越先启动。仅影响同层内启动顺序,
|
||||
不打破层屏障。默认 ``0``。
|
||||
concurrency_key:
|
||||
并发限制分组键。具有相同键的任务共享一个信号量,限制同时
|
||||
运行的实例数。具体限额由 :func:`run` 的 ``concurrency_limits``
|
||||
参数提供 ``{key: limit}`` 映射。``None`` 表示不限制。
|
||||
continue_on_error:
|
||||
若为 ``True``,任务最终失败时不中止整图,仅标记本任务 FAILED,
|
||||
其硬依赖下游被 SKIPPED,其余任务继续。默认 ``False``。
|
||||
cache_key:
|
||||
缓存键计算函数。若提供,则用其基于依赖上下文计算的字符串键
|
||||
存取状态后端,使不同输入产生独立缓存条目。``None`` 表示用任务名。
|
||||
hooks:
|
||||
:class:`TaskHooks` 生命周期钩子。
|
||||
executor:
|
||||
同步任务的执行器:``"thread"``(默认,线程池)/ ``"process"``
|
||||
(进程池,绕过 GIL,适合 CPU 密集型;``fn`` 须可 pickle)/
|
||||
``"inline"``(直接在事件循环线程调用,最快但会阻塞循环)。
|
||||
"""
|
||||
|
||||
name: str
|
||||
fn: TaskFn[T]
|
||||
depends_on: Tuple[str, ...] = ()
|
||||
args: Tuple[Any, ...] = ()
|
||||
fn: TaskFn[T] | None = None
|
||||
cmd: TaskCmd | None = None
|
||||
depends_on: tuple[str, ...] = ()
|
||||
soft_depends_on: tuple[str, ...] = ()
|
||||
defaults: Mapping[str, Any] = field(default_factory=dict)
|
||||
args: tuple[Any, ...] = ()
|
||||
kwargs: Mapping[str, Any] = field(default_factory=dict)
|
||||
retries: int = 0
|
||||
timeout: Optional[float] = None
|
||||
tags: Tuple[str, ...] = ()
|
||||
retry: RetryPolicy = field(default_factory=RetryPolicy)
|
||||
timeout: float | None = None
|
||||
tags: tuple[str, ...] = ()
|
||||
conditions: tuple[Condition, ...] = ()
|
||||
cwd: Path | None = None
|
||||
env: Mapping[str, str] | None = None
|
||||
verbose: bool = False
|
||||
skip_if_missing: bool = False
|
||||
allow_upstream_skip: bool = False
|
||||
strategy: str | None = None
|
||||
priority: int = 0
|
||||
concurrency_key: str | None = None
|
||||
continue_on_error: bool = False
|
||||
cache_key: CacheKeyFn | None = None
|
||||
hooks: TaskHooks = field(default_factory=TaskHooks)
|
||||
executor: str = "thread" # "thread" | "process" | "inline"
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
if not self.name:
|
||||
raise ValueError("TaskSpec.name must be a non-empty string.")
|
||||
if self.retries < 0:
|
||||
raise ValueError(f"TaskSpec '{self.name}': retries must be >= 0.")
|
||||
if self.retry.max_attempts < 1:
|
||||
raise ValueError(f"TaskSpec '{self.name}': retry.max_attempts must be >= 1.")
|
||||
if self.timeout is not None and self.timeout <= 0:
|
||||
raise ValueError(f"TaskSpec '{self.name}': timeout must be > 0.")
|
||||
if self.name in self.depends_on:
|
||||
if self.name in self.depends_on or self.name in self.soft_depends_on:
|
||||
raise ValueError(f"TaskSpec '{self.name}' cannot depend on itself.")
|
||||
overlap = set(self.depends_on) & set(self.soft_depends_on)
|
||||
if overlap:
|
||||
raise ValueError(f"TaskSpec '{self.name}': depends_on 与 soft_depends_on 不能重叠: {sorted(overlap)}")
|
||||
if self.fn is None and self.cmd is None:
|
||||
raise ValueError(f"TaskSpec '{self.name}': 必须提供 fn 或 cmd 参数。")
|
||||
|
||||
@cached_property
|
||||
def effective_fn(self) -> TaskFn[T]:
|
||||
"""获取有效的执行函数。
|
||||
|
||||
若提供 ``cmd``,返回包装后的命令执行函数;否则返回 ``fn``。
|
||||
包装函数在每次调用时从 ``self`` 读取 ``verbose``/``cwd``/``env``/
|
||||
``timeout``,避免闭包捕获运行期参数,使翻转字段无需重建 spec。
|
||||
|
||||
结果按实例缓存(:func:`functools.cached_property`):frozen dataclass
|
||||
字段不可变,``_wrap_cmd`` 生成的闭包稳定,无需每次访问重建。
|
||||
"""
|
||||
if self.cmd is not None:
|
||||
return self._wrap_cmd()
|
||||
if self.fn is not None:
|
||||
return self.fn
|
||||
raise ValueError(f"TaskSpec '{self.name}': 没有可执行的函数或命令。") # pragma: no cover
|
||||
|
||||
def _wrap_cmd(self) -> TaskFn[Any]:
|
||||
"""将 cmd 包装为可执行函数。
|
||||
|
||||
实际执行逻辑位于 :mod:`pyflowx.command`,避免 :class:`TaskSpec`
|
||||
作为纯数据结构混入命令执行逻辑。
|
||||
"""
|
||||
from .command import run_command
|
||||
|
||||
spec = self
|
||||
|
||||
def _run() -> T:
|
||||
return cast(T, run_command(spec))
|
||||
|
||||
_run.__name__ = spec.name
|
||||
return _run # type: ignore[return-value]
|
||||
|
||||
def should_execute(self, context: Context) -> tuple[bool, str | None]:
|
||||
"""检查任务是否应执行。
|
||||
|
||||
Returns
|
||||
-------
|
||||
(should_run, skip_reason)
|
||||
``should_run`` 为 False 时 ``skip_reason`` 描述跳过原因。
|
||||
失败条件超过 2 个时仅展示前 2 个并附总数。
|
||||
"""
|
||||
# 逐个求值条件,记录失败项。
|
||||
failed_conditions: list[str] = []
|
||||
for condition in self.conditions:
|
||||
try:
|
||||
ok = condition(context)
|
||||
except Exception:
|
||||
ok = False
|
||||
failed_conditions.append("匿名条件(执行错误)")
|
||||
continue
|
||||
if not ok:
|
||||
reason = getattr(condition, "_reason", None)
|
||||
if reason is not None:
|
||||
failed_conditions.append(
|
||||
", ".join(str(r) for r in reason) if isinstance(reason, list) else str(reason),
|
||||
)
|
||||
else:
|
||||
failed_conditions.append(getattr(condition, "__name__", None) or "匿名条件")
|
||||
|
||||
if failed_conditions:
|
||||
return False, _format_skip_reason(failed_conditions)
|
||||
|
||||
if self.skip_if_missing and not self._is_cmd_available():
|
||||
cmd_name = self.cmd[0] if isinstance(self.cmd, list) and self.cmd else "unknown"
|
||||
return False, f"命令不存在: {cmd_name}"
|
||||
|
||||
return True, None
|
||||
|
||||
def _is_cmd_available(self) -> bool:
|
||||
"""检查 ``cmd`` 是否可用(仅 list[str])。"""
|
||||
cmd = self.cmd
|
||||
if isinstance(cmd, list) and cmd:
|
||||
return shutil.which(cmd[0]) is not None
|
||||
return True
|
||||
|
||||
def env_context(self) -> ContextManager[None]:
|
||||
"""返回临时应用 ``env`` 与 ``cwd`` 的上下文管理器。
|
||||
|
||||
对 ``fn`` 任务生效。``cmd`` 任务在 :func:`_run_command` 中直接
|
||||
传给子进程。
|
||||
"""
|
||||
return _env_and_cwd(self.env, self.cwd)
|
||||
|
||||
def storage_key(self, context: Context) -> str:
|
||||
"""计算状态后端存储键。"""
|
||||
if self.cache_key is None:
|
||||
return self.name
|
||||
try:
|
||||
return f"{self.name}:{self.cache_key(context)}"
|
||||
except (TypeError, ValueError, KeyError, AttributeError) as exc:
|
||||
# cache_key 抛出预期内的数据/类型异常时回退到 name,但仍记录警告
|
||||
# 以便用户发现 cache_key 实现中的 bug。
|
||||
logger.warning(
|
||||
"task %r: cache_key 回退到 name(%s: %s)",
|
||||
self.name,
|
||||
type(exc).__name__,
|
||||
exc,
|
||||
)
|
||||
return self.name
|
||||
|
||||
|
||||
# 全局锁:序列化对进程级状态(os.environ / os.chdir)的临时修改。
|
||||
# ``fn`` 任务在 thread/async 策略下并发执行时,若各自配置了不同的
|
||||
# ``cwd``/``env``,会相互覆盖(os.chdir 与 os.environ 均为进程全局)。
|
||||
# 该锁仅包裹"切换→执行→恢复"区间,保证正确性;不使用 cwd/env 的任务不受影响。
|
||||
_env_cwd_lock = threading.RLock()
|
||||
|
||||
|
||||
@contextmanager
|
||||
def _env_and_cwd(
|
||||
env: Mapping[str, str] | None,
|
||||
cwd: Path | None,
|
||||
) -> Generator[None, None, None]:
|
||||
"""临时设置环境变量与工作目录。
|
||||
|
||||
``os.environ`` 与 ``os.chdir`` 是进程级全局状态,在 thread/async 策略下
|
||||
并发执行多个带 ``env``/``cwd`` 的 ``fn`` 任务时会相互覆盖。本函数通过
|
||||
模块级 :data:`_env_cwd_lock` 串行化"切换→执行→恢复"区间,确保正确性。
|
||||
无 ``env`` 且无 ``cwd`` 时直接 yield,不获取锁。
|
||||
"""
|
||||
if not env and cwd is None:
|
||||
yield
|
||||
return
|
||||
with _env_cwd_lock:
|
||||
saved_env: dict[str, str] = {}
|
||||
saved_cwd: str | None = None
|
||||
if env:
|
||||
for k, v in env.items():
|
||||
if k in os.environ:
|
||||
saved_env[k] = os.environ[k]
|
||||
os.environ[k] = v
|
||||
if cwd is not None:
|
||||
saved_cwd = str(Path.cwd())
|
||||
os.chdir(cwd)
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
if saved_cwd is not None:
|
||||
os.chdir(saved_cwd)
|
||||
# 恢复环境变量
|
||||
if env:
|
||||
for k in env:
|
||||
if k in saved_env:
|
||||
os.environ[k] = saved_env[k]
|
||||
else:
|
||||
os.environ.pop(k, None)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# 任务模板:批量生成相似 TaskSpec 的工厂
|
||||
# ---------------------------------------------------------------------- #
|
||||
def _task_noop() -> None:
|
||||
"""task(cmd=...) 形式下的占位 fn(cmd 任务执行期不调用 fn)。"""
|
||||
return None
|
||||
|
||||
|
||||
def task(
|
||||
fn: TaskFn[Any] | None = None,
|
||||
*,
|
||||
cmd: TaskCmd | None = None,
|
||||
depends_on: tuple[str, ...] = (),
|
||||
soft_depends_on: tuple[str, ...] = (),
|
||||
defaults: Mapping[str, Any] | None = None,
|
||||
args: tuple[Any, ...] = (),
|
||||
kwargs: Mapping[str, Any] | None = None,
|
||||
retry: RetryPolicy | None = None,
|
||||
timeout: float | None = None,
|
||||
tags: tuple[str, ...] = (),
|
||||
conditions: tuple[Condition, ...] = (),
|
||||
cwd: str | Path | None = None,
|
||||
env: Mapping[str, str] | None = None,
|
||||
verbose: bool = False,
|
||||
skip_if_missing: bool = False,
|
||||
allow_upstream_skip: bool = False,
|
||||
strategy: str | None = None,
|
||||
priority: int = 0,
|
||||
concurrency_key: str | None = None,
|
||||
continue_on_error: bool = False,
|
||||
cache_key: CacheKeyFn | None = None,
|
||||
hooks: TaskHooks | None = None,
|
||||
name: str | None = None,
|
||||
) -> Any:
|
||||
"""装饰器:将函数转为 :class:`TaskSpec`。
|
||||
|
||||
``name`` 默认取 ``fn.__name__``。可直接装饰函数,或带参数使用。
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> @px.task
|
||||
... def extract(): return [1, 2, 3]
|
||||
>>> @px.task(depends_on=("extract",))
|
||||
... def double(extract): return [x * 2 for x in extract]
|
||||
>>> graph = px.Graph.from_specs([extract, double])
|
||||
"""
|
||||
|
||||
def _decorate(func: TaskFn[Any]) -> TaskSpec[Any]:
|
||||
spec_name = name or func.__name__
|
||||
return TaskSpec(
|
||||
name=spec_name,
|
||||
fn=func,
|
||||
cmd=cmd,
|
||||
depends_on=depends_on,
|
||||
soft_depends_on=soft_depends_on,
|
||||
defaults=dict(defaults) if defaults else {},
|
||||
args=args,
|
||||
kwargs=dict(kwargs) if kwargs else {},
|
||||
retry=retry if retry is not None else RetryPolicy(),
|
||||
timeout=timeout,
|
||||
tags=tags,
|
||||
conditions=conditions,
|
||||
cwd=Path(cwd) if isinstance(cwd, str) else cwd,
|
||||
env=dict(env) if env else None,
|
||||
verbose=verbose,
|
||||
skip_if_missing=skip_if_missing,
|
||||
allow_upstream_skip=allow_upstream_skip,
|
||||
strategy=strategy,
|
||||
priority=priority,
|
||||
concurrency_key=concurrency_key,
|
||||
continue_on_error=continue_on_error,
|
||||
cache_key=cache_key,
|
||||
hooks=hooks if hooks is not None else TaskHooks(),
|
||||
)
|
||||
|
||||
if fn is None and cmd is None:
|
||||
# 带参数调用:@task(depends_on=...),等待被装饰函数
|
||||
return _decorate
|
||||
if fn is None:
|
||||
# task(cmd=..., name=...) 直接构造,无被装饰函数
|
||||
if name is None:
|
||||
raise ValueError("task(cmd=...) 需要显式提供 name")
|
||||
return _decorate(_task_noop)
|
||||
return _decorate(fn)
|
||||
|
||||
|
||||
def cmd(
|
||||
command: list[str],
|
||||
*,
|
||||
name: str | None = None,
|
||||
depends_on: tuple[str, ...] = (),
|
||||
**kwargs: Any,
|
||||
) -> TaskSpec[Any]:
|
||||
"""从命令列表快速创建 :class:`TaskSpec`。
|
||||
|
||||
``name`` 默认为 ``"_".join(command[:2])``(如 ``["uv", "build"]`` → ``"uv_build"``)。
|
||||
若命令不足两个元素则用 ``"_".join(command)``。
|
||||
|
||||
其余关键字参数透传给 :class:`TaskSpec`(如 ``depends_on``、``tags`` 等)。
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> uv_build = px.cmd(["uv", "build"])
|
||||
>>> uv_build.name
|
||||
'uv_build'
|
||||
>>> lint = px.cmd(["ruff", "check", "--fix"], name="lint")
|
||||
>>> lint.name
|
||||
'lint'
|
||||
"""
|
||||
spec_name = name or "_".join(command[:2]) if len(command) >= 2 else "_".join(command)
|
||||
return TaskSpec(
|
||||
name=spec_name,
|
||||
cmd=command,
|
||||
depends_on=depends_on,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
|
||||
def task_template(
|
||||
fn: TaskFn[Any] | None = None,
|
||||
cmd: TaskCmd | None = None,
|
||||
**defaults: Any,
|
||||
) -> Callable[..., TaskSpec[Any]]:
|
||||
"""创建任务模板工厂。
|
||||
|
||||
返回的工厂接受 ``name`` 与任意覆盖字段,生成 :class:`TaskSpec`。
|
||||
适用于批量创建相似任务(如 fan-out)。
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> Fetch = px.task_template(fn=fetch_user, retry=px.RetryPolicy(max_attempts=3))
|
||||
>>> specs = [Fetch(f"fetch_{uid}", args=(uid,)) for uid in range(5)]
|
||||
"""
|
||||
base = dict(defaults)
|
||||
if fn is not None:
|
||||
base["fn"] = fn
|
||||
if cmd is not None:
|
||||
base["cmd"] = cmd
|
||||
|
||||
def _factory(name: str, **overrides: Any) -> TaskSpec[Any]:
|
||||
merged = dict(base)
|
||||
merged.update(overrides)
|
||||
return TaskSpec(name, **merged)
|
||||
|
||||
_factory.__name__ = "task_template_factory"
|
||||
return _factory
|
||||
|
||||
|
||||
@dataclass
|
||||
class TaskResult(Generic[T]):
|
||||
"""运行期间产生的可变单任务记录。
|
||||
|
||||
每次运行都会创建全新的 :class:`TaskResult`;spec 本身保持不可变。
|
||||
这让同一个图可以安全地重复运行。
|
||||
"""
|
||||
"""运行期间产生的可变单任务记录。"""
|
||||
|
||||
spec: TaskSpec[T]
|
||||
status: TaskStatus = TaskStatus.PENDING
|
||||
value: Optional[T] = None
|
||||
error: Optional[BaseException] = None
|
||||
value: T | None = None
|
||||
error: BaseException | None = None
|
||||
attempts: int = 0
|
||||
started_at: Optional[datetime] = None
|
||||
finished_at: Optional[datetime] = None
|
||||
started_at: datetime | None = None
|
||||
finished_at: datetime | None = None
|
||||
reason: str | None = None # 跳过原因
|
||||
|
||||
@property
|
||||
def duration(self) -> Optional[float]:
|
||||
def duration(self) -> float | None:
|
||||
"""从开始到结束的耗时(秒),未开始/未结束则为 ``None``。"""
|
||||
if self.started_at is None or self.finished_at is None:
|
||||
return None
|
||||
@@ -131,14 +620,11 @@ class TaskResult(Generic[T]):
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class TaskEvent:
|
||||
"""执行期间向观察者发出的不可变事件。
|
||||
|
||||
传递给 :func:`pyflowx.run` 的 ``on_event`` 回调,让调用者无需耦合
|
||||
执行器内部即可构建进度条、指标或结构化日志。
|
||||
"""
|
||||
"""执行期间向观察者发出的不可变事件。"""
|
||||
|
||||
task: str
|
||||
status: TaskStatus
|
||||
attempts: int = 0
|
||||
error: Optional[str] = None
|
||||
duration: Optional[float] = None
|
||||
error: str | None = None
|
||||
duration: float | None = None
|
||||
reason: str | None = None
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
|
||||
@@ -0,0 +1,119 @@
|
||||
"""系统操作任务模块.
|
||||
|
||||
提供常用的系统操作任务封装, 包括清屏、环境变量设置、命令查找等.
|
||||
遵循实用主义原则, 仅提供核心功能, 无过度设计.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
__all__ = [
|
||||
"clr",
|
||||
"reset_icon_cache",
|
||||
"setenv",
|
||||
"setenv_group",
|
||||
"which",
|
||||
"write_file",
|
||||
]
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
import pyflowx as px
|
||||
from pyflowx import BuiltinConditions
|
||||
from pyflowx.conditions import Constants
|
||||
|
||||
|
||||
def clr():
|
||||
"""清屏任务."""
|
||||
cmd = ["cls"] if Constants.IS_WINDOWS else ["clear"]
|
||||
return px.TaskSpec("clear_screen", fn=lambda: subprocess.run(cmd, check=False))
|
||||
|
||||
|
||||
def reset_icon_cache() -> list[px.TaskSpec]:
|
||||
"""重置图标缓存任务."""
|
||||
if not Constants.IS_WINDOWS:
|
||||
print("reset_icon_cache: 仅在 Windows 上支持")
|
||||
return []
|
||||
|
||||
local_app_data = os.environ.get("LOCALAPPDATA", "")
|
||||
icon_cache_db = Path(local_app_data) / "IconCache.db"
|
||||
explorer_cache_dir = Path(local_app_data) / "Microsoft" / "Windows" / "Explorer"
|
||||
|
||||
return [
|
||||
px.TaskSpec(
|
||||
"kill_explorer",
|
||||
cmd=["taskkill", "/f", "/im", "explorer.exe"],
|
||||
conditions=(BuiltinConditions.IS_RUNNING("explorer.exe"),),
|
||||
verbose=True,
|
||||
),
|
||||
px.TaskSpec(
|
||||
"delete_icon_cache",
|
||||
cmd=["cmd", "/c", "del", "/a", "/q", str(icon_cache_db)],
|
||||
conditions=(BuiltinConditions.DIR_EXISTS(icon_cache_db),),
|
||||
depends_on=("kill_explorer",),
|
||||
verbose=True,
|
||||
),
|
||||
px.TaskSpec(
|
||||
"delete_icon_cache_all",
|
||||
cmd=["cmd", "/c", "del", "/a", "/q", str(explorer_cache_dir / "iconcache*")],
|
||||
conditions=(BuiltinConditions.DIR_EXISTS(explorer_cache_dir),),
|
||||
depends_on=("kill_explorer",),
|
||||
verbose=True,
|
||||
),
|
||||
px.TaskSpec(
|
||||
"restart_explorer",
|
||||
cmd=["cmd", "/c", "start", "explorer.exe"],
|
||||
conditions=(
|
||||
BuiltinConditions.HAS_INSTALLED("explorer.exe"),
|
||||
BuiltinConditions.NOT(BuiltinConditions.IS_RUNNING("explorer.exe")),
|
||||
),
|
||||
depends_on=("delete_icon_cache", "delete_icon_cache_all"),
|
||||
allow_upstream_skip=True,
|
||||
verbose=True,
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
def setenv(name: str, value: str, default: bool = False) -> px.TaskSpec:
|
||||
"""设置环境变量任务."""
|
||||
|
||||
def set_env():
|
||||
if default:
|
||||
os.environ.setdefault(name, value)
|
||||
else:
|
||||
os.environ[name] = value
|
||||
|
||||
return px.TaskSpec(f"setenv_{name.lower()}", fn=set_env, verbose=True)
|
||||
|
||||
|
||||
def setenv_group(envs: dict[str, str], default: bool = False) -> list[px.TaskSpec]:
|
||||
"""设置环境变量组任务."""
|
||||
return [setenv(name, value, default) for name, value in envs.items()]
|
||||
|
||||
|
||||
def which(cmd: str) -> px.TaskSpec:
|
||||
"""查找命令路径任务."""
|
||||
which_cmd = "where" if Constants.IS_WINDOWS else "which"
|
||||
|
||||
def find_command():
|
||||
result = subprocess.run([which_cmd, cmd], capture_output=True, text=True, check=False)
|
||||
|
||||
if result.returncode == 0:
|
||||
# Windows 的 where 可能返回多行, 取第一个
|
||||
path = result.stdout.strip().split("\n")[0].strip()
|
||||
print(f"{cmd} -> {path}")
|
||||
else:
|
||||
print(f"{cmd} -> 未找到")
|
||||
|
||||
return px.TaskSpec(f"which_{cmd}", fn=find_command)
|
||||
|
||||
|
||||
def write_file(path: str, content: str, encoding: str = "utf-8") -> px.TaskSpec:
|
||||
"""写入文件任务."""
|
||||
|
||||
def write():
|
||||
p = Path(path)
|
||||
p.write_text(content, encoding=encoding)
|
||||
|
||||
return px.TaskSpec(f"write_file_{path}", fn=write, verbose=True)
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,26 @@
|
||||
"""进程池测试辅助:模块级函数(须可 pickle)。"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import time
|
||||
|
||||
|
||||
def cpu_heavy(n: int) -> int:
|
||||
"""CPU 密集型计算(求平方和)。"""
|
||||
return sum(i * i for i in range(n))
|
||||
|
||||
|
||||
def add(a: int, b: int) -> int:
|
||||
"""简单加法。"""
|
||||
return a + b
|
||||
|
||||
|
||||
def sub(a: int, b: int) -> int:
|
||||
"""简单减法。"""
|
||||
return a - b
|
||||
|
||||
|
||||
def slow_sleep(seconds: float) -> int:
|
||||
"""睡眠指定秒数,用于测试超时。"""
|
||||
time.sleep(seconds)
|
||||
return int(seconds)
|
||||
@@ -0,0 +1,218 @@
|
||||
"""Tests for cli.autofmt module."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from pyflowx.ops import dev
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# format_with_ruff
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestFormatWithRuff:
|
||||
"""Test format_with_ruff function."""
|
||||
|
||||
def test_format_with_ruff(self, tmp_path: Path) -> None:
|
||||
"""Should format with ruff."""
|
||||
with patch("subprocess.run") as mock_run:
|
||||
mock_run.return_value = MagicMock(returncode=0)
|
||||
dev.format_with_ruff(tmp_path, fix=True)
|
||||
assert mock_run.called
|
||||
|
||||
def test_format_with_ruff_no_fix(self, tmp_path: Path) -> None:
|
||||
"""Should format with ruff without fix."""
|
||||
with patch("subprocess.run") as mock_run:
|
||||
mock_run.return_value = MagicMock(returncode=0)
|
||||
dev.format_with_ruff(tmp_path, fix=False)
|
||||
# Should not include --fix flag
|
||||
call_args = mock_run.call_args[0][0]
|
||||
assert "--fix" not in call_args
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# lint_with_ruff
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestLintWithRuff:
|
||||
"""Test lint_with_ruff function."""
|
||||
|
||||
def test_lint_with_ruff(self, tmp_path: Path) -> None:
|
||||
"""Should lint with ruff."""
|
||||
with patch("subprocess.run") as mock_run:
|
||||
mock_run.return_value = MagicMock(returncode=0)
|
||||
dev.lint_with_ruff(tmp_path, fix=True)
|
||||
assert mock_run.called
|
||||
|
||||
def test_lint_with_ruff_no_fix(self, tmp_path: Path) -> None:
|
||||
"""Should lint with ruff without fix."""
|
||||
with patch("subprocess.run") as mock_run:
|
||||
mock_run.return_value = MagicMock(returncode=0)
|
||||
dev.lint_with_ruff(tmp_path, fix=False)
|
||||
# Should not include --fix flag
|
||||
call_args = mock_run.call_args[0][0]
|
||||
assert "--fix" not in call_args
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# add_docstring
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestAddDocstring:
|
||||
"""Test add_docstring function."""
|
||||
|
||||
def test_add_docstring_to_file(self, tmp_path: Path) -> None:
|
||||
"""Should add docstring to file."""
|
||||
py_file = tmp_path / "test.py"
|
||||
py_file.write_text("def test():\n pass\n")
|
||||
|
||||
result = dev.add_docstring(py_file, '"""Test module."""')
|
||||
assert result is True
|
||||
|
||||
def test_add_docstring_skips_files_with_docstring(self, tmp_path: Path) -> None:
|
||||
"""Should skip files that already have docstring."""
|
||||
py_file = tmp_path / "test.py"
|
||||
py_file.write_text('"""Existing docstring."""\ndef test():\n pass\n')
|
||||
|
||||
result = dev.add_docstring(py_file, '"""New docstring."""')
|
||||
assert result is False
|
||||
|
||||
def test_add_docstring_empty_file(self, tmp_path: Path) -> None:
|
||||
"""Should handle empty file."""
|
||||
py_file = tmp_path / "test.py"
|
||||
py_file.write_text("")
|
||||
|
||||
result = dev.add_docstring(py_file, '"""Test module."""')
|
||||
# Should handle empty file
|
||||
assert result is True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# generate_module_docstring
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestGenerateModuleDocstring:
|
||||
"""Test generate_module_docstring function."""
|
||||
|
||||
def test_generate_module_docstring_basic(self, tmp_path: Path) -> None:
|
||||
"""Should generate basic docstring."""
|
||||
py_file = tmp_path / "test.py"
|
||||
py_file.write_text("def test():\n pass\n")
|
||||
|
||||
result = dev.generate_module_docstring(py_file)
|
||||
# Should contain "Tests for" since stem contains "test"
|
||||
assert "Tests for" in result
|
||||
|
||||
def test_generate_module_docstring_with_package(self, tmp_path: Path) -> None:
|
||||
"""Should generate docstring for package."""
|
||||
py_file = tmp_path / "mypackage" / "test.py"
|
||||
py_file.parent.mkdir(parents=True)
|
||||
py_file.write_text("def test():\n pass\n")
|
||||
|
||||
result = dev.generate_module_docstring(py_file)
|
||||
assert "mypackage" in result
|
||||
|
||||
def test_generate_module_docstring_cli(self, tmp_path: Path) -> None:
|
||||
"""Should generate docstring for CLI module."""
|
||||
py_file = tmp_path / "cli.py"
|
||||
py_file.write_text("def test():\n pass\n")
|
||||
|
||||
result = dev.generate_module_docstring(py_file)
|
||||
assert "Command-line interface" in result
|
||||
|
||||
def test_generate_module_docstring_util(self, tmp_path: Path) -> None:
|
||||
"""Should generate docstring for utility module."""
|
||||
py_file = tmp_path / "utils.py"
|
||||
py_file.write_text("def test():\n pass\n")
|
||||
|
||||
result = dev.generate_module_docstring(py_file)
|
||||
assert "Utility functions" in result
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# auto_add_docstrings
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestAutoAddDocstrings:
|
||||
"""Test auto_add_docstrings function."""
|
||||
|
||||
def test_auto_add_docstrings(self, tmp_path: Path) -> None:
|
||||
"""Should auto add docstrings."""
|
||||
py_file = tmp_path / "test.py"
|
||||
py_file.write_text("def test():\n pass\n")
|
||||
|
||||
with patch.object(dev, "add_docstring", return_value=True):
|
||||
count = dev.auto_add_docstrings(tmp_path)
|
||||
assert count >= 0
|
||||
|
||||
def test_auto_add_docstrings_skips_ignored(self, tmp_path: Path) -> None:
|
||||
"""Should skip ignored directories."""
|
||||
py_file = tmp_path / "__pycache__" / "test.py"
|
||||
py_file.parent.mkdir()
|
||||
py_file.write_text("def test():\n pass\n")
|
||||
|
||||
count = dev.auto_add_docstrings(tmp_path)
|
||||
# Should skip __pycache__
|
||||
assert count == 0
|
||||
|
||||
def test_auto_add_docstrings_no_files(self, tmp_path: Path) -> None:
|
||||
"""Should handle no Python files."""
|
||||
txt_file = tmp_path / "test.txt"
|
||||
txt_file.write_text("test content")
|
||||
|
||||
count = dev.auto_add_docstrings(tmp_path)
|
||||
assert count == 0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# sync_pyproject_config
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestSyncPyprojectConfig:
|
||||
"""Test sync_pyproject_config function."""
|
||||
|
||||
def test_sync_pyproject_config_creates_file(self, tmp_path: Path) -> None:
|
||||
"""Should sync pyproject.toml config."""
|
||||
main_toml = tmp_path / "pyproject.toml"
|
||||
main_toml.write_text("[tool.ruff]\n")
|
||||
sub_dir = tmp_path / "subproject"
|
||||
sub_dir.mkdir()
|
||||
sub_toml = sub_dir / "pyproject.toml"
|
||||
sub_toml.write_text("[tool.ruff]\n")
|
||||
|
||||
with patch("subprocess.run") as mock_run:
|
||||
mock_run.return_value = MagicMock(returncode=0)
|
||||
dev.sync_pyproject_config(tmp_path)
|
||||
assert mock_run.called
|
||||
|
||||
def test_sync_pyproject_config_updates_file(self, tmp_path: Path) -> None:
|
||||
"""Should update existing pyproject.toml."""
|
||||
main_toml = tmp_path / "pyproject.toml"
|
||||
main_toml.write_text("[tool.ruff]\n")
|
||||
sub_dir = tmp_path / "subproject"
|
||||
sub_dir.mkdir()
|
||||
sub_toml = sub_dir / "pyproject.toml"
|
||||
sub_toml.write_text("[tool.ruff]\n")
|
||||
|
||||
with patch("subprocess.run") as mock_run:
|
||||
mock_run.return_value = MagicMock(returncode=0)
|
||||
dev.sync_pyproject_config(tmp_path)
|
||||
assert mock_run.called
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# format_all
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestFormatAll:
|
||||
"""Test format_all function."""
|
||||
|
||||
def test_format_all_runs_ruff_format(self, tmp_path: Path) -> None:
|
||||
"""Should run ruff format."""
|
||||
with patch("subprocess.run") as mock_run:
|
||||
mock_run.return_value = MagicMock(returncode=0)
|
||||
dev.format_all(tmp_path)
|
||||
assert mock_run.called
|
||||
|
||||
def test_format_all_runs_ruff_check(self, tmp_path: Path) -> None:
|
||||
"""Should run ruff check."""
|
||||
with patch("subprocess.run") as mock_run:
|
||||
mock_run.return_value = MagicMock(returncode=0)
|
||||
dev.format_all(tmp_path)
|
||||
# Should call ruff format and ruff check
|
||||
assert mock_run.call_count == 2
|
||||
@@ -0,0 +1,192 @@
|
||||
"""Tests for ops.bumpversion module."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from pyflowx.ops import bumpversion
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def auto_use_tmp_path(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
"""自动使用临时路径."""
|
||||
monkeypatch.chdir(tmp_path)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# bump_file_version
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestBumpFileVersion:
|
||||
"""Test bump_file_version function."""
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("part", "expected"),
|
||||
[("patch", "1.2.4"), ("minor", "1.3.0"), ("major", "2.0.0")],
|
||||
)
|
||||
def test_bump_pyproject(self, tmp_path: Path, part: str, expected: str) -> None:
|
||||
"""pyproject.toml 三种 part 递增."""
|
||||
f = tmp_path / "pyproject.toml"
|
||||
f.write_text('version = "1.2.3"', encoding="utf-8")
|
||||
assert bumpversion.bump_file_version(f, part) == expected # type: ignore[arg-type]
|
||||
assert f'version = "{expected}"' in f.read_text(encoding="utf-8")
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("part", "expected"),
|
||||
[("patch", "1.2.4"), ("minor", "1.3.0"), ("major", "2.0.0")],
|
||||
)
|
||||
def test_bump_init_py(self, tmp_path: Path, part: str, expected: str) -> None:
|
||||
"""__init__.py 三种 part 递增."""
|
||||
f = tmp_path / "__init__.py"
|
||||
f.write_text('__version__ = "1.2.3"', encoding="utf-8")
|
||||
assert bumpversion.bump_file_version(f, part) == expected # type: ignore[arg-type]
|
||||
assert f'__version__ = "{expected}"' in f.read_text(encoding="utf-8")
|
||||
|
||||
def test_prerelease_and_build_metadata_stripped(self, tmp_path: Path) -> None:
|
||||
"""prerelease 和 build metadata 应被清除."""
|
||||
f = tmp_path / "pyproject.toml"
|
||||
f.write_text('version = "1.2.3-alpha.1+build.123"', encoding="utf-8")
|
||||
assert bumpversion.bump_file_version(f, "patch") == "1.2.4"
|
||||
content = f.read_text(encoding="utf-8")
|
||||
assert "alpha" not in content
|
||||
assert "build" not in content
|
||||
|
||||
def test_dependencies_not_modified(self, tmp_path: Path) -> None:
|
||||
"""只更新 project version, 不动 dependencies 中的版本号."""
|
||||
f = tmp_path / "pyproject.toml"
|
||||
f.write_text(
|
||||
'[project]\nversion = "1.0.0"\ndependencies = ["lib >= 2.0.0", "other >= 3.0.0"]\n',
|
||||
encoding="utf-8",
|
||||
)
|
||||
assert bumpversion.bump_file_version(f, "patch") == "1.0.1"
|
||||
content = f.read_text(encoding="utf-8")
|
||||
assert 'version = "1.0.1"' in content
|
||||
assert "lib >= 2.0.0" in content
|
||||
assert "other >= 3.0.0" in content
|
||||
|
||||
def test_no_version_pattern_returns_none(self, tmp_path: Path, capsys: pytest.CaptureFixture[str]) -> None:
|
||||
"""未匹配到版本号模式返回 None (支持类型但无版本 / 不支持的文件类型)."""
|
||||
f1 = tmp_path / "__init__.py"
|
||||
f1.write_text("# no version here", encoding="utf-8")
|
||||
assert bumpversion.bump_file_version(f1, "patch") is None
|
||||
assert "未找到版本号模式" in capsys.readouterr().out
|
||||
|
||||
f2 = tmp_path / "test.txt"
|
||||
f2.write_text("no version here", encoding="utf-8")
|
||||
assert bumpversion.bump_file_version(f2, "patch") is None
|
||||
|
||||
def test_read_directory_raises(self, tmp_path: Path) -> None:
|
||||
"""读取目录 (名为 __init__.py) 应抛异常."""
|
||||
f = tmp_path / "__init__.py"
|
||||
f.mkdir()
|
||||
with pytest.raises(Exception): # noqa: B017
|
||||
bumpversion.bump_file_version(f, "patch")
|
||||
|
||||
def test_write_failure_raises(self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
"""写入失败应抛 OSError."""
|
||||
f = tmp_path / "__init__.py"
|
||||
f.write_text('__version__ = "1.0.0"', encoding="utf-8")
|
||||
|
||||
def raise_oserror(*_args: object, **_kwargs: object) -> None:
|
||||
raise OSError("write failed")
|
||||
|
||||
monkeypatch.setattr(Path, "write_text", raise_oserror)
|
||||
with pytest.raises(OSError, match="write failed"):
|
||||
bumpversion.bump_file_version(f, "patch")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# bump_project_version (核心 bug 修复: 不同步文件统一同步)
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestBumpProjectVersion:
|
||||
"""Test bump_project_version function."""
|
||||
|
||||
@staticmethod
|
||||
def _mock_subprocess(monkeypatch: pytest.MonkeyPatch) -> list[list[str]]:
|
||||
"""Mock subprocess.run, 返回调用记录列表."""
|
||||
calls: list[list[str]] = []
|
||||
|
||||
def fake_run(cmd: list[str], **_kwargs: object) -> subprocess.CompletedProcess[bytes]:
|
||||
calls.append(cmd)
|
||||
return subprocess.CompletedProcess(cmd, 0, b"", b"")
|
||||
|
||||
monkeypatch.setattr(subprocess, "run", fake_run)
|
||||
return calls
|
||||
|
||||
def test_unsynced_files_synchronized(
|
||||
self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]
|
||||
) -> None:
|
||||
"""核心 bug 修复: 不同步的文件应统一同步到同一新版本号.
|
||||
|
||||
场景: __init__.py = 0.4.5, pyproject.toml = 0.3.5 (历史不同步)
|
||||
期望: bump patch 后两者都变为 0.4.6 (取最大值 0.4.5 作为基准 +1)
|
||||
"""
|
||||
init_file = tmp_path / "src" / "pkg" / "__init__.py"
|
||||
init_file.parent.mkdir(parents=True)
|
||||
init_file.write_text('__version__ = "0.4.5"', encoding="utf-8")
|
||||
pyproj = tmp_path / "pyproject.toml"
|
||||
pyproj.write_text('version = "0.3.5"', encoding="utf-8")
|
||||
|
||||
calls = self._mock_subprocess(monkeypatch)
|
||||
|
||||
result = bumpversion.bump_project_version("patch")
|
||||
|
||||
assert result == "0.4.6"
|
||||
assert '__version__ = "0.4.6"' in init_file.read_text(encoding="utf-8")
|
||||
assert 'version = "0.4.6"' in pyproj.read_text(encoding="utf-8")
|
||||
out = capsys.readouterr().out
|
||||
assert "基准版本: 0.4.5" in out
|
||||
assert "新版本: 0.4.6" in out
|
||||
|
||||
add_calls = [c for c in calls if c[:2] == ["git", "add"]]
|
||||
assert len(add_calls) == 1
|
||||
# 跨平台: Windows 上 Path 转换为反斜杠, 统一用正斜杠比较
|
||||
init_path = str(init_file.relative_to(tmp_path)).replace("\\", "/")
|
||||
assert init_path in [arg.replace("\\", "/") for arg in add_calls[0]]
|
||||
assert "pyproject.toml" in add_calls[0]
|
||||
assert "." not in add_calls[0][2:]
|
||||
|
||||
tag_calls = [c for c in calls if c[:2] == ["git", "tag"]]
|
||||
assert len(tag_calls) == 1
|
||||
assert "v0.4.6" in tag_calls[0]
|
||||
|
||||
def test_no_files_returns_none(self, capsys: pytest.CaptureFixture[str]) -> None:
|
||||
"""无版本号文件返回 None."""
|
||||
assert bumpversion.bump_project_version("patch") is None
|
||||
assert "未找到包含版本号的文件" in capsys.readouterr().out
|
||||
|
||||
def test_files_without_version_returns_none(
|
||||
self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]
|
||||
) -> None:
|
||||
"""有文件但所有文件都无版本号返回 None."""
|
||||
f = tmp_path / "__init__.py"
|
||||
f.write_text("# no version here", encoding="utf-8")
|
||||
self._mock_subprocess(monkeypatch)
|
||||
|
||||
assert bumpversion.bump_project_version("patch") is None
|
||||
assert "未能从任何文件读取版本号" in capsys.readouterr().out
|
||||
|
||||
def test_no_tag_skips_tag_creation(self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
"""no_tag=True 跳过 tag 创建."""
|
||||
pyproj = tmp_path / "pyproject.toml"
|
||||
pyproj.write_text('version = "1.0.0"', encoding="utf-8")
|
||||
|
||||
calls = self._mock_subprocess(monkeypatch)
|
||||
|
||||
assert bumpversion.bump_project_version("patch", no_tag=True) == "1.0.1"
|
||||
assert not any(c[:2] == ["git", "tag"] for c in calls)
|
||||
|
||||
def test_ignored_dirs_excluded(self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
""".venv 等忽略目录中的版本号文件不被处理."""
|
||||
venv_init = tmp_path / ".venv" / "lib" / "pkg" / "__init__.py"
|
||||
venv_init.parent.mkdir(parents=True)
|
||||
venv_init.write_text('__version__ = "0.1.0"', encoding="utf-8")
|
||||
pyproj = tmp_path / "pyproject.toml"
|
||||
pyproj.write_text('version = "1.0.0"', encoding="utf-8")
|
||||
|
||||
self._mock_subprocess(monkeypatch)
|
||||
|
||||
assert bumpversion.bump_project_version("patch") == "1.0.1"
|
||||
assert venv_init.read_text(encoding="utf-8") == '__version__ = "0.1.0"'
|
||||
@@ -0,0 +1,955 @@
|
||||
"""Tests for cli.emlmanager module."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import email
|
||||
from io import BytesIO
|
||||
from pathlib import Path
|
||||
from unittest.mock import Mock, patch
|
||||
|
||||
from pyflowx.cli import emlmanager
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# EmailDatabase Tests
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestEmailDatabase:
|
||||
"""Test EmailDatabase class."""
|
||||
|
||||
def test_init_database(self, tmp_path: Path) -> None:
|
||||
"""Should initialize database successfully."""
|
||||
db_path = tmp_path / "test.db"
|
||||
db = emlmanager.EmailDatabase(db_path)
|
||||
|
||||
assert db.db_path == db_path
|
||||
assert db.conn is not None
|
||||
db.close()
|
||||
|
||||
def test_init_database_creates_table(self, tmp_path: Path) -> None:
|
||||
"""Should create emails table with correct schema."""
|
||||
db_path = tmp_path / "test.db"
|
||||
db = emlmanager.EmailDatabase(db_path)
|
||||
|
||||
assert db.conn is not None
|
||||
|
||||
cursor = db.conn.cursor()
|
||||
cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='emails'")
|
||||
result = cursor.fetchone()
|
||||
assert result is not None
|
||||
db.close()
|
||||
|
||||
def test_init_database_creates_indexes(self, tmp_path: Path) -> None:
|
||||
"""Should create indexes for better query performance."""
|
||||
db_path = tmp_path / "test.db"
|
||||
db = emlmanager.EmailDatabase(db_path)
|
||||
|
||||
assert db.conn is not None
|
||||
|
||||
cursor = db.conn.cursor()
|
||||
cursor.execute("SELECT name FROM sqlite_master WHERE type='index' AND name='idx_subject'")
|
||||
result = cursor.fetchone()
|
||||
assert result is not None
|
||||
db.close()
|
||||
|
||||
def test_insert_email_success(self, tmp_path: Path) -> None:
|
||||
"""Should insert email data successfully."""
|
||||
db_path = tmp_path / "test.db"
|
||||
db = emlmanager.EmailDatabase(db_path)
|
||||
|
||||
email_data = {
|
||||
"file_path": "/test/path.eml",
|
||||
"file_hash": "abc123",
|
||||
"subject": "Test Subject",
|
||||
"sender": "sender@example.com",
|
||||
"recipients": "recipient@example.com",
|
||||
"date": "Mon, 1 Jan 2024 12:00:00 +0000",
|
||||
"date_parsed": "2024-01-01T12:00:00",
|
||||
"body_text": "Test body",
|
||||
"body_html": "<p>Test body</p>",
|
||||
"has_attachments": 0,
|
||||
"file_size": 1024,
|
||||
}
|
||||
|
||||
result = db.insert_email(email_data)
|
||||
assert result is True
|
||||
assert db.conn is not None
|
||||
|
||||
cursor = db.conn.cursor()
|
||||
cursor.execute("SELECT COUNT(*) FROM emails")
|
||||
count = cursor.fetchone()[0]
|
||||
assert count == 1
|
||||
db.close()
|
||||
|
||||
def test_insert_email_replace_existing(self, tmp_path: Path) -> None:
|
||||
"""Should replace existing email with same file_path."""
|
||||
db_path = tmp_path / "test.db"
|
||||
db = emlmanager.EmailDatabase(db_path)
|
||||
|
||||
email_data = {
|
||||
"file_path": "/test/path.eml",
|
||||
"file_hash": "abc123",
|
||||
"subject": "Original Subject",
|
||||
"sender": "sender@example.com",
|
||||
"recipients": "recipient@example.com",
|
||||
"date": "Mon, 1 Jan 2024 12:00:00 +0000",
|
||||
"date_parsed": "2024-01-01T12:00:00",
|
||||
"body_text": "Original body",
|
||||
"body_html": "<p>Original body</p>",
|
||||
"has_attachments": 0,
|
||||
"file_size": 1024,
|
||||
}
|
||||
|
||||
db.insert_email(email_data)
|
||||
|
||||
# Insert same file_path with different content
|
||||
email_data["subject"] = "Updated Subject"
|
||||
email_data["file_hash"] = "xyz789"
|
||||
db.insert_email(email_data)
|
||||
|
||||
assert db.conn is not None
|
||||
|
||||
cursor = db.conn.cursor()
|
||||
cursor.execute("SELECT COUNT(*) FROM emails")
|
||||
count = cursor.fetchone()[0]
|
||||
assert count == 1
|
||||
|
||||
cursor.execute("SELECT subject FROM emails WHERE file_path = ?", ("/test/path.eml",))
|
||||
subject = cursor.fetchone()[0]
|
||||
assert subject == "Updated Subject"
|
||||
db.close()
|
||||
|
||||
def test_search_emails_no_keyword(self, tmp_path: Path) -> None:
|
||||
"""Should return all emails when no keyword provided."""
|
||||
db_path = tmp_path / "test.db"
|
||||
db = emlmanager.EmailDatabase(db_path)
|
||||
|
||||
# Insert test emails
|
||||
for i in range(5):
|
||||
db.insert_email(
|
||||
{
|
||||
"file_path": f"/test/path{i}.eml",
|
||||
"file_hash": f"hash{i}",
|
||||
"subject": f"Subject {i}",
|
||||
"sender": f"sender{i}@example.com",
|
||||
"recipients": "recipient@example.com",
|
||||
"date": f"Mon, {i + 1} Jan 2024 12:00:00 +0000",
|
||||
"date_parsed": f"2024-01-0{i + 1}T12:00:00",
|
||||
"body_text": f"Body {i}",
|
||||
"body_html": f"<p>Body {i}</p>",
|
||||
"has_attachments": 0,
|
||||
"file_size": 1024,
|
||||
}
|
||||
)
|
||||
|
||||
results = db.search_emails(limit=3)
|
||||
assert len(results) == 3
|
||||
db.close()
|
||||
|
||||
def test_search_emails_by_subject(self, tmp_path: Path) -> None:
|
||||
"""Should search emails by subject."""
|
||||
db_path = tmp_path / "test.db"
|
||||
db = emlmanager.EmailDatabase(db_path)
|
||||
|
||||
db.insert_email(
|
||||
{
|
||||
"file_path": "/test/path1.eml",
|
||||
"file_hash": "hash1",
|
||||
"subject": "Important Meeting",
|
||||
"sender": "sender1@example.com",
|
||||
"recipients": "recipient@example.com",
|
||||
"date": "Mon, 1 Jan 2024 12:00:00 +0000",
|
||||
"date_parsed": "2024-01-01T12:00:00",
|
||||
"body_text": "Meeting body",
|
||||
"body_html": "<p>Meeting body</p>",
|
||||
"has_attachments": 0,
|
||||
"file_size": 1024,
|
||||
}
|
||||
)
|
||||
|
||||
db.insert_email(
|
||||
{
|
||||
"file_path": "/test/path2.eml",
|
||||
"file_hash": "hash2",
|
||||
"subject": "Casual Chat",
|
||||
"sender": "sender2@example.com",
|
||||
"recipients": "recipient@example.com",
|
||||
"date": "Tue, 2 Jan 2024 12:00:00 +0000",
|
||||
"date_parsed": "2024-01-02T12:00:00",
|
||||
"body_text": "Chat body",
|
||||
"body_html": "<p>Chat body</p>",
|
||||
"has_attachments": 0,
|
||||
"file_size": 1024,
|
||||
}
|
||||
)
|
||||
|
||||
results = db.search_emails(keyword="Meeting", field="subject")
|
||||
assert len(results) == 1
|
||||
assert results[0]["subject"] == "Important Meeting"
|
||||
db.close()
|
||||
|
||||
def test_search_emails_by_sender(self, tmp_path: Path) -> None:
|
||||
"""Should search emails by sender."""
|
||||
db_path = tmp_path / "test.db"
|
||||
db = emlmanager.EmailDatabase(db_path)
|
||||
|
||||
db.insert_email(
|
||||
{
|
||||
"file_path": "/test/path1.eml",
|
||||
"file_hash": "hash1",
|
||||
"subject": "Test",
|
||||
"sender": "alice@example.com",
|
||||
"recipients": "recipient@example.com",
|
||||
"date": "Mon, 1 Jan 2024 12:00:00 +0000",
|
||||
"date_parsed": "2024-01-01T12:00:00",
|
||||
"body_text": "Body",
|
||||
"body_html": "<p>Body</p>",
|
||||
"has_attachments": 0,
|
||||
"file_size": 1024,
|
||||
}
|
||||
)
|
||||
|
||||
db.insert_email(
|
||||
{
|
||||
"file_path": "/test/path2.eml",
|
||||
"file_hash": "hash2",
|
||||
"subject": "Test",
|
||||
"sender": "bob@example.com",
|
||||
"recipients": "recipient@example.com",
|
||||
"date": "Tue, 2 Jan 2024 12:00:00 +0000",
|
||||
"date_parsed": "2024-01-02T12:00:00",
|
||||
"body_text": "Body",
|
||||
"body_html": "<p>Body</p>",
|
||||
"has_attachments": 0,
|
||||
"file_size": 1024,
|
||||
}
|
||||
)
|
||||
|
||||
results = db.search_emails(keyword="alice", field="sender")
|
||||
assert len(results) == 1
|
||||
assert results[0]["sender"] == "alice@example.com"
|
||||
db.close()
|
||||
|
||||
def test_search_emails_all_fields(self, tmp_path: Path) -> None:
|
||||
"""Should search emails across all fields."""
|
||||
db_path = tmp_path / "test.db"
|
||||
db = emlmanager.EmailDatabase(db_path)
|
||||
|
||||
db.insert_email(
|
||||
{
|
||||
"file_path": "/test/path1.eml",
|
||||
"file_hash": "hash1",
|
||||
"subject": "Project Update",
|
||||
"sender": "manager@example.com",
|
||||
"recipients": "team@example.com",
|
||||
"date": "Mon, 1 Jan 2024 12:00:00 +0000",
|
||||
"date_parsed": "2024-01-01T12:00:00",
|
||||
"body_text": "Please review the quarterly report",
|
||||
"body_html": "<p>Please review the quarterly report</p>",
|
||||
"has_attachments": 0,
|
||||
"file_size": 1024,
|
||||
}
|
||||
)
|
||||
|
||||
# Search for keyword in subject
|
||||
results = db.search_emails(keyword="Project", field="all")
|
||||
assert len(results) == 1
|
||||
|
||||
# Search for keyword in body
|
||||
results = db.search_emails(keyword="quarterly", field="all")
|
||||
assert len(results) == 1
|
||||
db.close()
|
||||
|
||||
def test_get_grouped_emails(self, tmp_path: Path) -> None:
|
||||
"""Should group emails by normalized subject."""
|
||||
db_path = tmp_path / "test.db"
|
||||
db = emlmanager.EmailDatabase(db_path)
|
||||
|
||||
# Insert emails with same subject (different prefixes)
|
||||
db.insert_email(
|
||||
{
|
||||
"file_path": "/test/path1.eml",
|
||||
"file_hash": "hash1",
|
||||
"subject": "Meeting Tomorrow",
|
||||
"sender": "sender1@example.com",
|
||||
"recipients": "recipient@example.com",
|
||||
"date": "Mon, 1 Jan 2024 12:00:00 +0000",
|
||||
"date_parsed": "2024-01-01T12:00:00",
|
||||
"body_text": "Body 1",
|
||||
"body_html": "<p>Body 1</p>",
|
||||
"has_attachments": 0,
|
||||
"file_size": 1024,
|
||||
}
|
||||
)
|
||||
|
||||
db.insert_email(
|
||||
{
|
||||
"file_path": "/test/path2.eml",
|
||||
"file_hash": "hash2",
|
||||
"subject": "Re: Meeting Tomorrow",
|
||||
"sender": "sender2@example.com",
|
||||
"recipients": "recipient@example.com",
|
||||
"date": "Tue, 2 Jan 2024 12:00:00 +0000",
|
||||
"date_parsed": "2024-01-02T12:00:00",
|
||||
"body_text": "Body 2",
|
||||
"body_html": "<p>Body 2</p>",
|
||||
"has_attachments": 0,
|
||||
"file_size": 1024,
|
||||
}
|
||||
)
|
||||
|
||||
db.insert_email(
|
||||
{
|
||||
"file_path": "/test/path3.eml",
|
||||
"file_hash": "hash3",
|
||||
"subject": "Different Topic",
|
||||
"sender": "sender3@example.com",
|
||||
"recipients": "recipient@example.com",
|
||||
"date": "Wed, 3 Jan 2024 12:00:00 +0000",
|
||||
"date_parsed": "2024-01-03T12:00:00",
|
||||
"body_text": "Body 3",
|
||||
"body_html": "<p>Body 3</p>",
|
||||
"has_attachments": 0,
|
||||
"file_size": 1024,
|
||||
}
|
||||
)
|
||||
|
||||
grouped = db.get_grouped_emails()
|
||||
# Should have 2 groups: "Meeting Tomorrow" and "Different Topic"
|
||||
assert len(grouped) == 2
|
||||
assert "Meeting Tomorrow" in grouped
|
||||
assert len(grouped["Meeting Tomorrow"]) == 2
|
||||
db.close()
|
||||
|
||||
def test_normalize_subject(self, tmp_path: Path) -> None:
|
||||
"""Should normalize subject by removing Re/Fwd prefixes."""
|
||||
db_path = tmp_path / "test.db"
|
||||
db = emlmanager.EmailDatabase(db_path)
|
||||
|
||||
assert db._normalize_subject("Re: Meeting") == "Meeting"
|
||||
assert db._normalize_subject("Fwd: Meeting") == "Meeting"
|
||||
assert db._normalize_subject("FW: Meeting") == "Meeting"
|
||||
assert db._normalize_subject("Re: Fwd: Meeting") == "Fwd: Meeting"
|
||||
assert db._normalize_subject("Meeting") == "Meeting"
|
||||
db.close()
|
||||
|
||||
def test_get_email_count(self, tmp_path: Path) -> None:
|
||||
"""Should return correct email count."""
|
||||
db_path = tmp_path / "test.db"
|
||||
db = emlmanager.EmailDatabase(db_path)
|
||||
|
||||
assert db.get_email_count() == 0
|
||||
|
||||
for i in range(3):
|
||||
db.insert_email(
|
||||
{
|
||||
"file_path": f"/test/path{i}.eml",
|
||||
"file_hash": f"hash{i}",
|
||||
"subject": f"Subject {i}",
|
||||
"sender": f"sender{i}@example.com",
|
||||
"recipients": "recipient@example.com",
|
||||
"date": f"Mon, {i + 1} Jan 2024 12:00:00 +0000",
|
||||
"date_parsed": f"2024-01-0{i + 1}T12:00:00",
|
||||
"body_text": f"Body {i}",
|
||||
"body_html": f"<p>Body {i}</p>",
|
||||
"has_attachments": 0,
|
||||
"file_size": 1024,
|
||||
}
|
||||
)
|
||||
|
||||
assert db.get_email_count() == 3
|
||||
db.close()
|
||||
|
||||
def test_clear_all(self, tmp_path: Path) -> None:
|
||||
"""Should clear all emails from database."""
|
||||
db_path = tmp_path / "test.db"
|
||||
db = emlmanager.EmailDatabase(db_path)
|
||||
|
||||
# Insert some emails
|
||||
for i in range(3):
|
||||
db.insert_email(
|
||||
{
|
||||
"file_path": f"/test/path{i}.eml",
|
||||
"file_hash": f"hash{i}",
|
||||
"subject": f"Subject {i}",
|
||||
"sender": f"sender{i}@example.com",
|
||||
"recipients": "recipient@example.com",
|
||||
"date": f"Mon, {i + 1} Jan 2024 12:00:00 +0000",
|
||||
"date_parsed": f"2024-01-0{i + 1}T12:00:00",
|
||||
"body_text": f"Body {i}",
|
||||
"body_html": f"<p>Body {i}</p>",
|
||||
"has_attachments": 0,
|
||||
"file_size": 1024,
|
||||
}
|
||||
)
|
||||
|
||||
assert db.get_email_count() == 3
|
||||
|
||||
db.clear_all()
|
||||
assert db.get_email_count() == 0
|
||||
db.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# Email Parsing Tests
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestDecodeMimeWords:
|
||||
"""Test decode_mime_words function."""
|
||||
|
||||
def test_decode_simple_text(self) -> None:
|
||||
"""Should decode simple ASCII text."""
|
||||
result = emlmanager.decode_mime_words("Simple text")
|
||||
assert result == "Simple text"
|
||||
|
||||
def test_decode_utf8_encoded(self) -> None:
|
||||
"""Should decode UTF-8 encoded text."""
|
||||
# =?utf-8?b?5Lit5paH?= is "中文" in UTF-8 Base64
|
||||
result = emlmanager.decode_mime_words("=?utf-8?b?5Lit5paH?=")
|
||||
assert result == "中文"
|
||||
|
||||
def test_decode_qp_encoded(self) -> None:
|
||||
"""Should decode Quoted-Printable encoded text."""
|
||||
result = emlmanager.decode_mime_words("=?utf-8?Q?Hello=20World?=")
|
||||
assert result == "Hello World"
|
||||
|
||||
def test_decode_empty_string(self) -> None:
|
||||
"""Should handle empty string."""
|
||||
result = emlmanager.decode_mime_words("")
|
||||
assert result == ""
|
||||
|
||||
def test_decode_none(self) -> None:
|
||||
"""Should handle None input."""
|
||||
result = emlmanager.decode_mime_words("")
|
||||
assert result == ""
|
||||
|
||||
def test_decode_mixed_encoding(self) -> None:
|
||||
"""Should decode mixed encoding."""
|
||||
result = emlmanager.decode_mime_words("Hello =?utf-8?b?5Lit5paH?= World")
|
||||
assert "Hello" in result
|
||||
assert "中文" in result
|
||||
assert "World" in result
|
||||
|
||||
|
||||
class TestParseEmailDate:
|
||||
"""Test _parse_email_date function."""
|
||||
|
||||
def test_parse_valid_date(self) -> None:
|
||||
"""Should parse valid email date."""
|
||||
date_str = "Mon, 1 Jan 2024 12:00:00 +0000"
|
||||
result = emlmanager._parse_email_date(date_str)
|
||||
assert result == "2024-01-01T12:00:00+00:00"
|
||||
|
||||
def test_parse_empty_date(self) -> None:
|
||||
"""Should handle empty date string."""
|
||||
result = emlmanager._parse_email_date("")
|
||||
assert result == ""
|
||||
|
||||
def test_parse_invalid_date(self) -> None:
|
||||
"""Should return original string for invalid date."""
|
||||
result = emlmanager._parse_email_date("Invalid Date")
|
||||
assert result == "Invalid Date"
|
||||
|
||||
|
||||
class TestExtractEmailBodyPart:
|
||||
"""Test _extract_email_body_part function."""
|
||||
|
||||
def test_extract_text_plain(self) -> None:
|
||||
"""Should extract plain text content."""
|
||||
msg = email.message_from_string("Content-Type: text/plain; charset=utf-8\n\nTest body content")
|
||||
result = emlmanager._extract_email_body_part(msg)
|
||||
assert result == "Test body content"
|
||||
|
||||
def test_extract_text_with_charset(self) -> None:
|
||||
"""Should handle different charsets."""
|
||||
msg = email.message_from_string("Content-Type: text/plain; charset=utf-8\n\nHello 世界")
|
||||
result = emlmanager._extract_email_body_part(msg)
|
||||
assert "Hello" in result
|
||||
|
||||
def test_extract_empty_body(self) -> None:
|
||||
"""Should handle empty body."""
|
||||
msg = email.message_from_string("Content-Type: text/plain; charset=utf-8\n\n")
|
||||
result = emlmanager._extract_email_body_part(msg)
|
||||
assert result == ""
|
||||
|
||||
def test_extract_body_with_max_length(self) -> None:
|
||||
"""Should truncate body to MAX_BODY_LENGTH."""
|
||||
long_text = "A" * 10000
|
||||
msg = email.message_from_string(f"Content-Type: text/plain; charset=utf-8\n\n{long_text}")
|
||||
result = emlmanager._extract_email_body_part(msg)
|
||||
assert len(result) == emlmanager.MAX_BODY_LENGTH
|
||||
|
||||
|
||||
class TestProcessMultipartEmail:
|
||||
"""Test _process_multipart_email function."""
|
||||
|
||||
def test_process_multipart_with_attachments(self) -> None:
|
||||
"""Should detect attachments in multipart email."""
|
||||
msg = email.message_from_string(
|
||||
"""From: sender@example.com
|
||||
To: recipient@example.com
|
||||
Subject: Test
|
||||
MIME-Version: 1.0
|
||||
Content-Type: multipart/mixed; boundary=boundary
|
||||
|
||||
--boundary
|
||||
Content-Type: text/plain; charset=utf-8
|
||||
|
||||
Test body
|
||||
|
||||
--boundary
|
||||
Content-Type: application/pdf; name="test.pdf"
|
||||
Content-Disposition: attachment; filename="test.pdf"
|
||||
|
||||
PDF content here
|
||||
|
||||
--boundary--
|
||||
"""
|
||||
)
|
||||
body_text, _body_html, has_attachments = emlmanager._process_multipart_email(msg)
|
||||
assert body_text.strip() == "Test body"
|
||||
assert has_attachments == 1
|
||||
|
||||
def test_process_multipart_text_and_html(self) -> None:
|
||||
"""Should extract both text and html parts."""
|
||||
msg = email.message_from_string(
|
||||
"""From: sender@example.com
|
||||
To: recipient@example.com
|
||||
Subject: Test
|
||||
MIME-Version: 1.0
|
||||
Content-Type: multipart/alternative; boundary=boundary
|
||||
|
||||
--boundary
|
||||
Content-Type: text/plain; charset=utf-8
|
||||
|
||||
Plain text body
|
||||
|
||||
--boundary
|
||||
Content-Type: text/html; charset=utf-8
|
||||
|
||||
<html><body>HTML body</body></html>
|
||||
|
||||
--boundary--
|
||||
"""
|
||||
)
|
||||
body_text, body_html, has_attachments = emlmanager._process_multipart_email(msg)
|
||||
assert "Plain text body" in body_text
|
||||
assert "HTML body" in body_html
|
||||
assert has_attachments == 0
|
||||
|
||||
|
||||
class TestProcessSinglepartEmail:
|
||||
"""Test _process_singlepart_email function."""
|
||||
|
||||
def test_process_text_plain(self) -> None:
|
||||
"""Should process plain text email."""
|
||||
msg = email.message_from_string("Content-Type: text/plain; charset=utf-8\n\nPlain text content")
|
||||
body_text, body_html = emlmanager._process_singlepart_email(msg)
|
||||
assert body_text == "Plain text content"
|
||||
assert body_html == ""
|
||||
|
||||
def test_process_text_html(self) -> None:
|
||||
"""Should process HTML email."""
|
||||
msg = email.message_from_string(
|
||||
"Content-Type: text/html; charset=utf-8\n\n<html><body>HTML content</body></html>"
|
||||
)
|
||||
body_text, body_html = emlmanager._process_singlepart_email(msg)
|
||||
assert body_text == ""
|
||||
assert "HTML content" in body_html
|
||||
|
||||
|
||||
class TestParseEmlFile:
|
||||
"""Test parse_eml_file function."""
|
||||
|
||||
def test_parse_simple_eml(self, tmp_path: Path) -> None:
|
||||
"""Should parse simple EML file."""
|
||||
eml_content = """From: sender@example.com
|
||||
To: recipient@example.com
|
||||
Subject: Test Subject
|
||||
Date: Mon, 1 Jan 2024 12:00:00 +0000
|
||||
|
||||
This is the email body.
|
||||
"""
|
||||
eml_file = tmp_path / "test.eml"
|
||||
eml_file.write_text(eml_content)
|
||||
|
||||
result = emlmanager.parse_eml_file(eml_file)
|
||||
|
||||
assert result is not None
|
||||
assert result["subject"] == "Test Subject"
|
||||
assert result["sender"] == "sender@example.com"
|
||||
assert result["recipients"] == "recipient@example.com"
|
||||
assert "This is the email body" in result["body_text"]
|
||||
assert result["has_attachments"] == 0
|
||||
|
||||
def test_parse_eml_with_mime_subject(self, tmp_path: Path) -> None:
|
||||
"""Should parse EML with MIME-encoded subject."""
|
||||
eml_content = """From: sender@example.com
|
||||
To: recipient@example.com
|
||||
Subject: =?utf-8?b?5Lit5paHIEhlbGxv?=
|
||||
Date: Mon, 1 Jan 2024 12:00:00 +0000
|
||||
|
||||
Email body
|
||||
"""
|
||||
eml_file = tmp_path / "test.eml"
|
||||
eml_file.write_text(eml_content)
|
||||
|
||||
result = emlmanager.parse_eml_file(eml_file)
|
||||
|
||||
assert result is not None
|
||||
assert "中文" in result["subject"]
|
||||
assert "Hello" in result["subject"]
|
||||
|
||||
def test_parse_multipart_eml(self, tmp_path: Path) -> None:
|
||||
"""Should parse multipart EML file."""
|
||||
eml_content = """From: sender@example.com
|
||||
To: recipient@example.com
|
||||
Subject: Multipart Test
|
||||
Date: Mon, 1 Jan 2024 12:00:00 +0000
|
||||
MIME-Version: 1.0
|
||||
Content-Type: multipart/alternative; boundary=boundary
|
||||
|
||||
--boundary
|
||||
Content-Type: text/plain; charset=utf-8
|
||||
|
||||
Plain text version
|
||||
|
||||
--boundary
|
||||
Content-Type: text/html; charset=utf-8
|
||||
|
||||
<html><body>HTML version</body></html>
|
||||
|
||||
--boundary--
|
||||
"""
|
||||
eml_file = tmp_path / "test.eml"
|
||||
eml_file.write_text(eml_content)
|
||||
|
||||
result = emlmanager.parse_eml_file(eml_file)
|
||||
|
||||
assert result is not None
|
||||
assert "Plain text version" in result["body_text"]
|
||||
assert "HTML version" in result["body_html"]
|
||||
|
||||
def test_parse_eml_with_attachment(self, tmp_path: Path) -> None:
|
||||
"""Should detect attachments."""
|
||||
eml_content = """From: sender@example.com
|
||||
To: recipient@example.com
|
||||
Subject: Email with attachment
|
||||
Date: Mon, 1 Jan 2024 12:00:00 +0000
|
||||
MIME-Version: 1.0
|
||||
Content-Type: multipart/mixed; boundary=boundary
|
||||
|
||||
--boundary
|
||||
Content-Type: text/plain; charset=utf-8
|
||||
|
||||
Email body
|
||||
|
||||
--boundary
|
||||
Content-Type: application/pdf; name="test.pdf"
|
||||
Content-Disposition: attachment; filename="test.pdf"
|
||||
Content-Transfer-Encoding: base64
|
||||
|
||||
JVBERi0xLjQK
|
||||
|
||||
--boundary--
|
||||
"""
|
||||
eml_file = tmp_path / "test.eml"
|
||||
eml_file.write_text(eml_content)
|
||||
|
||||
result = emlmanager.parse_eml_file(eml_file)
|
||||
|
||||
assert result is not None
|
||||
assert result["has_attachments"] == 1
|
||||
|
||||
def test_parse_nonexistent_file(self, tmp_path: Path) -> None:
|
||||
"""Should return None for nonexistent file."""
|
||||
eml_file = tmp_path / "nonexistent.eml"
|
||||
result = emlmanager.parse_eml_file(eml_file)
|
||||
assert result is None
|
||||
|
||||
def test_parse_invalid_eml(self, tmp_path: Path) -> None:
|
||||
"""Should handle invalid EML file gracefully."""
|
||||
eml_file = tmp_path / "invalid.eml"
|
||||
eml_file.write_text("This is not a valid EML file")
|
||||
|
||||
result = emlmanager.parse_eml_file(eml_file)
|
||||
# Should still parse but with empty/default values
|
||||
assert result is not None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# Web Server Tests
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestEmlManagerHandler:
|
||||
"""Test EmlManagerHandler HTTP request handler."""
|
||||
|
||||
def test_api_get_status(self, tmp_path: Path) -> None:
|
||||
"""Should return server status."""
|
||||
db_path = tmp_path / "test.db"
|
||||
db = emlmanager.EmailDatabase(db_path)
|
||||
|
||||
# Create a mock handler instance without calling __init__
|
||||
handler = Mock(spec=emlmanager.EmlManagerHandler)
|
||||
handler.db = db
|
||||
handler.work_dir = tmp_path
|
||||
handler._send_json_response = Mock()
|
||||
|
||||
# Call the method directly (not through __init__)
|
||||
emlmanager.EmlManagerHandler._api_get_status(handler)
|
||||
|
||||
handler._send_json_response.assert_called_once()
|
||||
call_args = handler._send_json_response.call_args[0][0]
|
||||
assert call_args["initialized"] is True
|
||||
assert str(tmp_path) in call_args["work_dir"]
|
||||
|
||||
db.close()
|
||||
|
||||
def test_api_get_count(self, tmp_path: Path) -> None:
|
||||
"""Should return email count."""
|
||||
db_path = tmp_path / "test.db"
|
||||
db = emlmanager.EmailDatabase(db_path)
|
||||
|
||||
# Insert some emails
|
||||
for i in range(3):
|
||||
db.insert_email(
|
||||
{
|
||||
"file_path": f"/test/path{i}.eml",
|
||||
"file_hash": f"hash{i}",
|
||||
"subject": f"Subject {i}",
|
||||
"sender": f"sender{i}@example.com",
|
||||
"recipients": "recipient@example.com",
|
||||
"date": f"Mon, {i + 1} Jan 2024 12:00:00 +0000",
|
||||
"date_parsed": f"2024-01-0{i + 1}T12:00:00",
|
||||
"body_text": f"Body {i}",
|
||||
"body_html": f"<p>Body {i}</p>",
|
||||
"has_attachments": 0,
|
||||
"file_size": 1024,
|
||||
}
|
||||
)
|
||||
|
||||
# Create a mock handler instance without calling __init__
|
||||
handler = Mock(spec=emlmanager.EmlManagerHandler)
|
||||
handler.db = db
|
||||
handler._send_json_response = Mock()
|
||||
|
||||
# Call the method directly
|
||||
emlmanager.EmlManagerHandler._api_get_count(handler)
|
||||
|
||||
handler._send_json_response.assert_called_once()
|
||||
call_args = handler._send_json_response.call_args[0][0]
|
||||
assert call_args["count"] == 3
|
||||
|
||||
db.close()
|
||||
|
||||
def test_api_get_emails(self, tmp_path: Path) -> None:
|
||||
"""Should return emails list."""
|
||||
db_path = tmp_path / "test.db"
|
||||
db = emlmanager.EmailDatabase(db_path)
|
||||
|
||||
# Insert test email
|
||||
db.insert_email(
|
||||
{
|
||||
"file_path": "/test/path.eml",
|
||||
"file_hash": "hash",
|
||||
"subject": "Test Subject",
|
||||
"sender": "sender@example.com",
|
||||
"recipients": "recipient@example.com",
|
||||
"date": "Mon, 1 Jan 2024 12:00:00 +0000",
|
||||
"date_parsed": "2024-01-01T12:00:00",
|
||||
"body_text": "Test body",
|
||||
"body_html": "<p>Test body</p>",
|
||||
"has_attachments": 0,
|
||||
"file_size": 1024,
|
||||
}
|
||||
)
|
||||
|
||||
# Create a mock handler instance without calling __init__
|
||||
handler = Mock(spec=emlmanager.EmlManagerHandler)
|
||||
handler.db = db
|
||||
handler._send_json_response = Mock()
|
||||
|
||||
# Call the method directly
|
||||
emlmanager.EmlManagerHandler._api_get_emails(handler, {})
|
||||
|
||||
handler._send_json_response.assert_called_once()
|
||||
call_args = handler._send_json_response.call_args[0][0]
|
||||
assert len(call_args["emails"]) == 1
|
||||
assert call_args["emails"][0]["subject"] == "Test Subject"
|
||||
|
||||
db.close()
|
||||
|
||||
def test_api_clear_database(self, tmp_path: Path) -> None:
|
||||
"""Should clear database."""
|
||||
db_path = tmp_path / "test.db"
|
||||
db = emlmanager.EmailDatabase(db_path)
|
||||
|
||||
# Insert test email
|
||||
db.insert_email(
|
||||
{
|
||||
"file_path": "/test/path.eml",
|
||||
"file_hash": "hash",
|
||||
"subject": "Test Subject",
|
||||
"sender": "sender@example.com",
|
||||
"recipients": "recipient@example.com",
|
||||
"date": "Mon, 1 Jan 2024 12:00:00 +0000",
|
||||
"date_parsed": "2024-01-01T12:00:00",
|
||||
"body_text": "Test body",
|
||||
"body_html": "<p>Test body</p>",
|
||||
"has_attachments": 0,
|
||||
"file_size": 1024,
|
||||
}
|
||||
)
|
||||
|
||||
assert db.get_email_count() == 1
|
||||
|
||||
# Create a mock handler instance without calling __init__
|
||||
handler = Mock(spec=emlmanager.EmlManagerHandler)
|
||||
handler.db = db
|
||||
handler._send_json_response = Mock()
|
||||
|
||||
# Call the method directly
|
||||
emlmanager.EmlManagerHandler._api_clear_database(handler)
|
||||
|
||||
handler._send_json_response.assert_called_once()
|
||||
assert db.get_email_count() == 0
|
||||
db.close()
|
||||
|
||||
def test_send_json_response_with_gzip(self, tmp_path: Path) -> None:
|
||||
"""Should send gzip-compressed JSON response when client supports it."""
|
||||
db_path = tmp_path / "test.db"
|
||||
db = emlmanager.EmailDatabase(db_path)
|
||||
|
||||
# Create a mock handler with all necessary attributes
|
||||
handler = Mock(spec=emlmanager.EmlManagerHandler)
|
||||
handler.db = db
|
||||
handler.headers = {"Accept-Encoding": "gzip, deflate"}
|
||||
handler.send_response = Mock()
|
||||
handler.send_header = Mock()
|
||||
handler.end_headers = Mock()
|
||||
handler.wfile = BytesIO()
|
||||
|
||||
data = {"test": "data"}
|
||||
|
||||
# Call the real method
|
||||
emlmanager.EmlManagerHandler._send_json_response(handler, data)
|
||||
|
||||
# Check that gzip compression was used
|
||||
handler.send_response.assert_called_once_with(200)
|
||||
assert any(
|
||||
call[0][0] == "Content-Encoding" and call[0][1] == "gzip" for call in handler.send_header.call_args_list
|
||||
)
|
||||
|
||||
db.close()
|
||||
|
||||
def test_send_json_response_without_gzip(self, tmp_path: Path) -> None:
|
||||
"""Should send uncompressed JSON response when client doesn't support gzip."""
|
||||
db_path = tmp_path / "test.db"
|
||||
db = emlmanager.EmailDatabase(db_path)
|
||||
|
||||
# Create a mock handler with all necessary attributes
|
||||
handler = Mock(spec=emlmanager.EmlManagerHandler)
|
||||
handler.db = db
|
||||
handler.headers = {"Accept-Encoding": "identity"}
|
||||
handler.send_response = Mock()
|
||||
handler.send_header = Mock()
|
||||
handler.end_headers = Mock()
|
||||
handler.wfile = BytesIO()
|
||||
|
||||
data = {"test": "data"}
|
||||
|
||||
# Call the real method
|
||||
emlmanager.EmlManagerHandler._send_json_response(handler, data)
|
||||
|
||||
# Check that gzip compression was NOT used
|
||||
handler.send_response.assert_called_once_with(200)
|
||||
assert not any(call[0][0] == "Content-Encoding" for call in handler.send_header.call_args_list)
|
||||
|
||||
db.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# Main Function Tests
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestMain:
|
||||
"""Test main function."""
|
||||
|
||||
def test_main_with_dir_argument(self, tmp_path: Path) -> None:
|
||||
"""Should initialize database when dir argument provided."""
|
||||
# Create some EML files
|
||||
for i in range(2):
|
||||
eml_file = tmp_path / f"test{i}.eml"
|
||||
eml_file.write_text(f"""From: sender{i}@example.com
|
||||
To: recipient@example.com
|
||||
Subject: Test {i}
|
||||
Date: Mon, {i + 1} Jan 2024 12:00:00 +0000
|
||||
|
||||
Body {i}
|
||||
""")
|
||||
|
||||
with patch("sys.argv", ["emlmanager", "--dir", str(tmp_path), "--port", "8080"]), patch.object(
|
||||
emlmanager, "ThreadingHTTPServer"
|
||||
) as mock_server, patch("threading.Thread"):
|
||||
# Don't actually start the server
|
||||
mock_server_instance = Mock()
|
||||
mock_server.return_value = mock_server_instance
|
||||
|
||||
# This would normally block, so we'll just test initialization
|
||||
with patch.object(emlmanager.EmlManagerHandler, "db", None):
|
||||
# The main function would be called, but we're patching to prevent blocking
|
||||
pass
|
||||
|
||||
# Verify EML files were found
|
||||
assert len(list(tmp_path.glob("*.eml"))) == 2
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# Integration Tests
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestIntegration:
|
||||
"""Integration tests for emlmanager."""
|
||||
|
||||
def test_full_workflow(self, tmp_path: Path) -> None:
|
||||
"""Test complete workflow: parse -> store -> search."""
|
||||
# Initialize database
|
||||
db_path = tmp_path / "test.db"
|
||||
db = emlmanager.EmailDatabase(db_path)
|
||||
|
||||
# Create EML files
|
||||
eml_files = []
|
||||
for i in range(3):
|
||||
eml_file = tmp_path / f"email{i}.eml"
|
||||
eml_content = f"""From: sender{i}@example.com
|
||||
To: recipient@example.com
|
||||
Subject: Test Email {i}
|
||||
Date: Mon, {i + 1} Jan 2024 12:00:00 +0000
|
||||
|
||||
This is email body {i}.
|
||||
"""
|
||||
eml_file.write_text(eml_content)
|
||||
eml_files.append(eml_file)
|
||||
|
||||
# Parse and insert emails
|
||||
for eml_file in eml_files:
|
||||
email_data = emlmanager.parse_eml_file(eml_file)
|
||||
if email_data:
|
||||
db.insert_email(email_data)
|
||||
|
||||
# Verify insertion
|
||||
assert db.get_email_count() == 3
|
||||
|
||||
# Search emails
|
||||
results = db.search_emails(keyword="Email")
|
||||
assert len(results) == 3
|
||||
|
||||
# Search by sender
|
||||
results = db.search_emails(keyword="sender1", field="sender")
|
||||
assert len(results) == 1
|
||||
assert results[0]["sender"] == "sender1@example.com"
|
||||
|
||||
# Get grouped emails
|
||||
grouped = db.get_grouped_emails()
|
||||
assert len(grouped) > 0
|
||||
|
||||
# Clear database
|
||||
db.clear_all()
|
||||
assert db.get_email_count() == 0
|
||||
|
||||
db.close()
|
||||
@@ -0,0 +1,389 @@
|
||||
"""Tests for ops.dev 模块 envdev/dockercmd 函数 (镜像源配置/Docker 登录)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from pyflowx.conditions import Constants
|
||||
from pyflowx.ops.dev import (
|
||||
docker_login_tencent,
|
||||
download_rustup_script,
|
||||
install_linux_docker,
|
||||
install_linux_fonts,
|
||||
install_linux_qt_libs,
|
||||
install_rust_toolchain,
|
||||
setup_conda_mirror,
|
||||
setup_linux_system_mirror,
|
||||
setup_python_mirror,
|
||||
setup_rust_mirror,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# setup_python_mirror
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestSetupPythonMirror:
|
||||
"""``setup_python_mirror`` 函数测试."""
|
||||
|
||||
def test_unknown_mirror_skips(self, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]) -> None:
|
||||
"""未知镜像源应打印提示并跳过."""
|
||||
monkeypatch.setattr(subprocess, "run", lambda *_, **__: MagicMock())
|
||||
setup_python_mirror("unknown_mirror")
|
||||
captured = capsys.readouterr()
|
||||
assert "未知 Python 镜像源" in captured.out
|
||||
|
||||
def test_known_mirror_writes_config(self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
|
||||
"""已知镜像源应写入配置文件."""
|
||||
monkeypatch.setattr(Path, "home", lambda: tmp_path)
|
||||
setup_python_mirror("tsinghua")
|
||||
# Linux 平台默认配置路径
|
||||
config_path = tmp_path / ".pip" / "pip.conf"
|
||||
if not config_path.exists():
|
||||
config_path = tmp_path / "pip" / "pip.ini"
|
||||
assert config_path.exists()
|
||||
content = config_path.read_text(encoding="utf-8")
|
||||
assert "pypi.tuna.tsinghua.edu.cn" in content
|
||||
|
||||
def test_linux_uses_pip_conf(self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
|
||||
"""Linux 平台应写入 ~/.pip/pip.conf."""
|
||||
monkeypatch.setattr(Constants, "IS_LINUX", True)
|
||||
monkeypatch.setattr(Path, "home", lambda: tmp_path)
|
||||
setup_python_mirror("tsinghua")
|
||||
config_path = tmp_path / ".pip" / "pip.conf"
|
||||
assert config_path.exists()
|
||||
|
||||
def test_non_linux_uses_pip_ini(self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
|
||||
"""非 Linux 平台应写入 ~/pip/pip.ini."""
|
||||
monkeypatch.setattr(Constants, "IS_LINUX", False)
|
||||
monkeypatch.setattr(Path, "home", lambda: tmp_path)
|
||||
setup_python_mirror("tsinghua")
|
||||
config_path = tmp_path / "pip" / "pip.ini"
|
||||
assert config_path.exists()
|
||||
|
||||
def test_sets_env_vars(self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
|
||||
"""应设置 PIP_INDEX_URL 等环境变量."""
|
||||
import os
|
||||
|
||||
monkeypatch.setattr(Path, "home", lambda: tmp_path)
|
||||
monkeypatch.setattr(os, "environ", {})
|
||||
|
||||
setup_python_mirror("aliyun")
|
||||
|
||||
assert "PIP_INDEX_URL" in os.environ
|
||||
assert "mirrors.aliyun.com" in os.environ["PIP_INDEX_URL"]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# setup_conda_mirror
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestSetupCondaMirror:
|
||||
"""``setup_conda_mirror`` 函数测试."""
|
||||
|
||||
def test_unknown_mirror_skips(self, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]) -> None:
|
||||
"""未知镜像源应跳过."""
|
||||
setup_conda_mirror("unknown")
|
||||
captured = capsys.readouterr()
|
||||
assert "未知 Conda 镜像源" in captured.out
|
||||
|
||||
def test_known_mirror_writes_condarc(self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
|
||||
"""已知镜像源应写入 ~/.condarc."""
|
||||
monkeypatch.setattr(Path, "home", lambda: tmp_path)
|
||||
setup_conda_mirror("tsinghua")
|
||||
condarc = tmp_path / ".condarc"
|
||||
assert condarc.exists()
|
||||
content = condarc.read_text(encoding="utf-8")
|
||||
assert "tsinghua" in content
|
||||
assert "channels:" in content
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# setup_rust_mirror
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestSetupRustMirror:
|
||||
"""``setup_rust_mirror`` 函数测试."""
|
||||
|
||||
def test_unknown_mirror_skips(self, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]) -> None:
|
||||
"""未知镜像源应跳过."""
|
||||
setup_rust_mirror("unknown")
|
||||
captured = capsys.readouterr()
|
||||
assert "未知 Rust 镜像源" in captured.out
|
||||
|
||||
def test_known_mirror_writes_config(self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
|
||||
"""已知镜像源应写入 ~/.cargo/config.toml."""
|
||||
monkeypatch.setattr(Path, "home", lambda: tmp_path)
|
||||
setup_rust_mirror("ustc", "nightly")
|
||||
config = tmp_path / ".cargo" / "config.toml"
|
||||
assert config.exists()
|
||||
content = config.read_text(encoding="utf-8")
|
||||
assert "ustc" in content
|
||||
|
||||
def test_creates_sccache_dir(self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
|
||||
"""应创建 sccache 缓存目录."""
|
||||
from pyflowx.ops import dev as dev_module
|
||||
|
||||
fake_sccache = tmp_path / ".cargo" / "sccache"
|
||||
monkeypatch.setattr(dev_module, "_RUST_SCCACHE_DIR", fake_sccache)
|
||||
monkeypatch.setattr(Path, "home", lambda: tmp_path)
|
||||
setup_rust_mirror("tsinghua")
|
||||
assert fake_sccache.exists()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# docker_login_tencent
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestDockerLoginTencent:
|
||||
"""``docker_login_tencent`` 函数测试."""
|
||||
|
||||
def test_default_username_uses_getpass(self, monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
"""未提供 username 时应使用 getpass.getuser."""
|
||||
monkeypatch.setattr("getpass.getuser", lambda: "testuser")
|
||||
ran_cmds: list[list[str]] = []
|
||||
monkeypatch.setattr(subprocess, "run", lambda cmd, **_: ran_cmds.append(cmd) or MagicMock())
|
||||
|
||||
docker_login_tencent()
|
||||
|
||||
assert ran_cmds[0][0] == "docker"
|
||||
assert ran_cmds[0][1] == "login"
|
||||
assert "testuser" in ran_cmds[0]
|
||||
assert "ccr.ccs.tencentyun.com" in ran_cmds[0]
|
||||
|
||||
def test_custom_username(self, monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
"""提供 username 时应使用自定义用户名."""
|
||||
ran_cmds: list[list[str]] = []
|
||||
monkeypatch.setattr(subprocess, "run", lambda cmd, **_: ran_cmds.append(cmd) or MagicMock())
|
||||
|
||||
docker_login_tencent("myuser")
|
||||
|
||||
assert "myuser" in ran_cmds[0]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# setup_linux_system_mirror
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestSetupLinuxSystemMirror:
|
||||
"""``setup_linux_system_mirror`` 函数测试."""
|
||||
|
||||
def test_non_linux_skips(self, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]) -> None:
|
||||
"""非 Linux 平台应跳过."""
|
||||
monkeypatch.setattr(Constants, "IS_LINUX", False)
|
||||
called: list[str] = []
|
||||
monkeypatch.setattr(subprocess, "run", lambda cmd, **_: called.append(cmd))
|
||||
|
||||
setup_linux_system_mirror()
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert "仅在 Linux 上执行" in captured.out
|
||||
assert called == []
|
||||
|
||||
def test_linux_already_configured_skips(
|
||||
self, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]
|
||||
) -> None:
|
||||
"""Linux 上已配置国内镜像时应跳过."""
|
||||
monkeypatch.setattr(Constants, "IS_LINUX", True)
|
||||
|
||||
def fake_read_text(self: Path, encoding: str = "utf-8") -> str:
|
||||
return "tsinghua mirror configured"
|
||||
|
||||
monkeypatch.setattr(Path, "read_text", fake_read_text)
|
||||
called: list[str] = []
|
||||
monkeypatch.setattr(subprocess, "run", lambda cmd, **_: called.append(cmd))
|
||||
|
||||
setup_linux_system_mirror()
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert "已配置" in captured.out
|
||||
assert called == []
|
||||
|
||||
def test_linux_not_configured_runs_script(
|
||||
self, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]
|
||||
) -> None:
|
||||
"""Linux 上未配置镜像时应执行下载与安装脚本."""
|
||||
monkeypatch.setattr(Constants, "IS_LINUX", True)
|
||||
|
||||
def fake_read_text(self: Path, encoding: str = "utf-8") -> str:
|
||||
raise OSError("file not found")
|
||||
|
||||
monkeypatch.setattr(Path, "read_text", fake_read_text)
|
||||
ran_cmds: list[str] = []
|
||||
monkeypatch.setattr(subprocess, "run", lambda cmd, **_: ran_cmds.append(cmd) or MagicMock())
|
||||
|
||||
setup_linux_system_mirror()
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert "下载" in captured.out
|
||||
assert "安装" in captured.out
|
||||
assert len(ran_cmds) == 2
|
||||
|
||||
def test_linux_content_without_mirror_runs_script(
|
||||
self, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]
|
||||
) -> None:
|
||||
"""Linux 上文件存在但不包含镜像关键词时应执行脚本."""
|
||||
monkeypatch.setattr(Constants, "IS_LINUX", True)
|
||||
|
||||
def fake_read_text(self: Path, encoding: str = "utf-8") -> str:
|
||||
return "deb http://archive.ubuntu.com/ubuntu/ jammy main"
|
||||
|
||||
monkeypatch.setattr(Path, "read_text", fake_read_text)
|
||||
ran_cmds: list[str] = []
|
||||
monkeypatch.setattr(subprocess, "run", lambda cmd, **_: ran_cmds.append(cmd) or MagicMock())
|
||||
|
||||
setup_linux_system_mirror()
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert "下载" in captured.out
|
||||
assert len(ran_cmds) == 2
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# install_linux_qt_libs / install_linux_fonts / install_linux_docker
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestLinuxInstallers:
|
||||
"""Linux 专用安装函数测试."""
|
||||
|
||||
def test_qt_libs_non_linux_skips(self, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]) -> None:
|
||||
"""非 Linux 上 install_linux_qt_libs 应跳过."""
|
||||
monkeypatch.setattr(Constants, "IS_LINUX", False)
|
||||
called: list[list[str]] = []
|
||||
monkeypatch.setattr(subprocess, "run", lambda cmd, **_: called.append(cmd))
|
||||
install_linux_qt_libs()
|
||||
captured = capsys.readouterr()
|
||||
assert "仅在 Linux" in captured.out
|
||||
assert called == []
|
||||
|
||||
def test_qt_libs_linux_runs_apt(self, monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
"""Linux 上应执行 apt install."""
|
||||
monkeypatch.setattr(Constants, "IS_LINUX", True)
|
||||
ran_cmds: list[list[str]] = []
|
||||
monkeypatch.setattr(subprocess, "run", lambda cmd, **_: ran_cmds.append(cmd) or MagicMock())
|
||||
install_linux_qt_libs()
|
||||
assert ran_cmds[0][0] == "sudo"
|
||||
assert "apt" in ran_cmds[0]
|
||||
assert "install" in ran_cmds[0]
|
||||
|
||||
def test_fonts_non_linux_skips(self, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]) -> None:
|
||||
"""非 Linux 上 install_linux_fonts 应跳过."""
|
||||
monkeypatch.setattr(Constants, "IS_LINUX", False)
|
||||
called: list[list[str]] = []
|
||||
monkeypatch.setattr(subprocess, "run", lambda cmd, **_: called.append(cmd))
|
||||
install_linux_fonts()
|
||||
captured = capsys.readouterr()
|
||||
assert "仅在 Linux" in captured.out
|
||||
assert called == []
|
||||
|
||||
def test_fonts_linux_runs_apt(self, monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
"""Linux 上应执行 apt install 字体包."""
|
||||
monkeypatch.setattr(Constants, "IS_LINUX", True)
|
||||
ran_cmds: list[list[str]] = []
|
||||
monkeypatch.setattr(subprocess, "run", lambda cmd, **_: ran_cmds.append(cmd) or MagicMock())
|
||||
install_linux_fonts()
|
||||
assert "fonts-noto-cjk" in ran_cmds[0]
|
||||
|
||||
def test_docker_non_linux_skips(self, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]) -> None:
|
||||
"""非 Linux 上 install_linux_docker 应跳过."""
|
||||
monkeypatch.setattr(Constants, "IS_LINUX", False)
|
||||
called: list[list[str]] = []
|
||||
monkeypatch.setattr(subprocess, "run", lambda cmd, **_: called.append(cmd))
|
||||
install_linux_docker()
|
||||
captured = capsys.readouterr()
|
||||
assert "仅在 Linux" in captured.out
|
||||
assert called == []
|
||||
|
||||
def test_docker_linux_runs_install_and_usermod(self, monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
"""Linux 上应执行 apt install docker-compose-v2 和 usermod."""
|
||||
monkeypatch.setattr(Constants, "IS_LINUX", True)
|
||||
monkeypatch.setattr("getpass.getuser", lambda: "testuser")
|
||||
ran_cmds: list[list[str]] = []
|
||||
monkeypatch.setattr(subprocess, "run", lambda cmd, **_: ran_cmds.append(cmd) or MagicMock())
|
||||
|
||||
install_linux_docker()
|
||||
|
||||
assert any("docker-compose-v2" in cmd for cmd in ran_cmds)
|
||||
assert any("usermod" in cmd and "docker" in cmd for cmd in ran_cmds)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# download_rustup_script
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestDownloadRustupScript:
|
||||
"""``download_rustup_script`` 函数测试."""
|
||||
|
||||
def test_rustup_installed_skips(self, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]) -> None:
|
||||
"""rustup 已安装时应跳过."""
|
||||
monkeypatch.setattr("shutil.which", lambda _cmd: "/usr/bin/rustup")
|
||||
called: list[list[str]] = []
|
||||
monkeypatch.setattr(subprocess, "run", lambda cmd, **_: called.append(cmd))
|
||||
|
||||
download_rustup_script()
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert "已安装" in captured.out
|
||||
assert called == []
|
||||
|
||||
def test_windows_downloads_exe(self, monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
"""Windows 上应下载 rustup-init.exe."""
|
||||
monkeypatch.setattr("shutil.which", lambda _cmd: None)
|
||||
monkeypatch.setattr(Constants, "IS_WINDOWS", True)
|
||||
ran_cmds: list[list[str]] = []
|
||||
monkeypatch.setattr(subprocess, "run", lambda cmd, **_: ran_cmds.append(cmd) or MagicMock())
|
||||
|
||||
download_rustup_script()
|
||||
|
||||
assert "powershell" in ran_cmds[0]
|
||||
assert "rustup-init.exe" in ran_cmds[0]
|
||||
|
||||
def test_non_windows_downloads_sh(self, monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
"""非 Windows 上应下载 rustup-init.sh."""
|
||||
monkeypatch.setattr("shutil.which", lambda _cmd: None)
|
||||
monkeypatch.setattr(Constants, "IS_WINDOWS", False)
|
||||
ran_cmds: list[list[str]] = []
|
||||
monkeypatch.setattr(subprocess, "run", lambda cmd, **_: ran_cmds.append(cmd) or MagicMock())
|
||||
|
||||
download_rustup_script()
|
||||
|
||||
assert "curl" in ran_cmds[0]
|
||||
assert "rustup-init.sh" in ran_cmds[0]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# install_rust_toolchain
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestInstallRustToolchain:
|
||||
"""``install_rust_toolchain`` 函数测试."""
|
||||
|
||||
def test_rustup_not_installed_skips(
|
||||
self, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]
|
||||
) -> None:
|
||||
"""rustup 未安装时应跳过."""
|
||||
monkeypatch.setattr("shutil.which", lambda _cmd: None)
|
||||
called: list[list[str]] = []
|
||||
monkeypatch.setattr(subprocess, "run", lambda cmd, **_: called.append(cmd))
|
||||
|
||||
install_rust_toolchain()
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert "未安装" in captured.out
|
||||
assert called == []
|
||||
|
||||
def test_rustup_installed_runs_toolchain_install(self, monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
"""rustup 已安装时应执行 toolchain install."""
|
||||
monkeypatch.setattr("shutil.which", lambda _cmd: "/usr/bin/rustup")
|
||||
ran_cmds: list[list[str]] = []
|
||||
monkeypatch.setattr(subprocess, "run", lambda cmd, **_: ran_cmds.append(cmd) or MagicMock())
|
||||
|
||||
install_rust_toolchain("nightly")
|
||||
|
||||
assert ran_cmds == [["rustup", "toolchain", "install", "nightly"]]
|
||||
|
||||
def test_default_version_stable(self, monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
"""默认版本应为 stable."""
|
||||
monkeypatch.setattr("shutil.which", lambda _cmd: "/usr/bin/rustup")
|
||||
ran_cmds: list[list[str]] = []
|
||||
monkeypatch.setattr(subprocess, "run", lambda cmd, **_: ran_cmds.append(cmd) or MagicMock())
|
||||
|
||||
install_rust_toolchain()
|
||||
|
||||
assert "stable" in ran_cmds[0]
|
||||
@@ -0,0 +1,103 @@
|
||||
"""Tests for cli.filedate module."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from pyflowx.ops import files
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# get_file_timestamp
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestGetFileTimestamp:
|
||||
"""Test get_file_timestamp function."""
|
||||
|
||||
def test_get_file_timestamp(self, tmp_path: Path) -> None:
|
||||
"""Should get file timestamp."""
|
||||
test_file = tmp_path / "test.txt"
|
||||
test_file.write_text("test content")
|
||||
|
||||
timestamp = files.get_file_timestamp(test_file)
|
||||
assert len(timestamp) == 8 # YYYYMMDD format
|
||||
assert timestamp.isdigit()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# remove_date_prefix
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestRemoveDatePrefix:
|
||||
"""Test remove_date_prefix function."""
|
||||
|
||||
def test_remove_date_prefix_with_date(self, tmp_path: Path) -> None:
|
||||
"""Should remove date prefix from filename."""
|
||||
test_file = tmp_path / "20240101_test.txt"
|
||||
test_file.write_text("test content")
|
||||
|
||||
new_path = files.remove_date_prefix(test_file)
|
||||
assert new_path.name == "test.txt"
|
||||
|
||||
def test_remove_date_prefix_without_date(self, tmp_path: Path) -> None:
|
||||
"""Should not change filename without date prefix."""
|
||||
test_file = tmp_path / "test.txt"
|
||||
test_file.write_text("test content")
|
||||
|
||||
new_path = files.remove_date_prefix(test_file)
|
||||
assert new_path == test_file
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# add_date_prefix
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestAddDatePrefix:
|
||||
"""Test add_date_prefix function."""
|
||||
|
||||
def test_add_date_prefix(self, tmp_path: Path) -> None:
|
||||
"""Should add date prefix to filename."""
|
||||
test_file = tmp_path / "test.txt"
|
||||
test_file.write_text("test content")
|
||||
|
||||
new_path = files.add_date_prefix(test_file)
|
||||
assert new_path.name.startswith("20") # Starts with year
|
||||
assert "_test.txt" in new_path.name
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# process_file_date
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestProcessFileDate:
|
||||
"""Test process_file_date function."""
|
||||
|
||||
def test_process_file_date_add(self, tmp_path: Path) -> None:
|
||||
"""Should add date prefix."""
|
||||
test_file = tmp_path / "test.txt"
|
||||
test_file.write_text("test content")
|
||||
|
||||
files.process_file_date(test_file, clear=False)
|
||||
# File should be renamed with date prefix
|
||||
|
||||
def test_process_file_date_clear(self, tmp_path: Path) -> None:
|
||||
"""Should clear date prefix."""
|
||||
test_file = tmp_path / "20240101_test.txt"
|
||||
test_file.write_text("test content")
|
||||
|
||||
files.process_file_date(test_file, clear=True)
|
||||
# File should be renamed without date prefix
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# process_files_date
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestProcessFilesDate:
|
||||
"""Test process_files_date function."""
|
||||
|
||||
def test_process_files_date_batch(self, tmp_path: Path) -> None:
|
||||
"""Should process multiple files."""
|
||||
file_list = []
|
||||
for i in range(3):
|
||||
test_file = tmp_path / f"test{i}.txt"
|
||||
test_file.write_text(f"content{i}")
|
||||
file_list.append(test_file)
|
||||
|
||||
files.process_files_date(file_list, clear=False)
|
||||
# All files should be processed
|
||||
@@ -0,0 +1,96 @@
|
||||
"""Tests for cli.filelevel module."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from pyflowx.ops import files
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# remove_marks
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestRemoveMarks:
|
||||
"""Test remove_marks function."""
|
||||
|
||||
def test_remove_marks_single_mark(self) -> None:
|
||||
"""Should remove single mark."""
|
||||
stem = "filename(PUB)"
|
||||
result = files.remove_marks(stem, ["PUB"])
|
||||
assert result == "filename"
|
||||
|
||||
def test_remove_marks_multiple_marks(self) -> None:
|
||||
"""Should remove multiple marks."""
|
||||
stem = "filename(PUB)(NOR)"
|
||||
result = files.remove_marks(stem, ["PUB", "NOR"])
|
||||
assert result == "filename"
|
||||
|
||||
def test_remove_marks_no_marks(self) -> None:
|
||||
"""Should not change stem without marks."""
|
||||
stem = "filename"
|
||||
result = files.remove_marks(stem, ["PUB"])
|
||||
assert result == "filename"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# process_file_level
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestProcessFileLevel:
|
||||
"""Test process_file_level function."""
|
||||
|
||||
def test_process_file_level_set_pub(self, tmp_path: Path) -> None:
|
||||
"""Should set PUB level."""
|
||||
test_file = tmp_path / "test.txt"
|
||||
test_file.write_text("test content")
|
||||
|
||||
files.process_file_level(test_file, level=1)
|
||||
# File should be renamed with PUB level
|
||||
|
||||
def test_process_file_level_set_int(self, tmp_path: Path) -> None:
|
||||
"""Should set INT level."""
|
||||
test_file = tmp_path / "test.txt"
|
||||
test_file.write_text("test content")
|
||||
|
||||
files.process_file_level(test_file, level=2)
|
||||
# File should be renamed with INT level
|
||||
|
||||
def test_process_file_level_clear(self, tmp_path: Path) -> None:
|
||||
"""Should clear level."""
|
||||
test_file = tmp_path / "test(PUB).txt"
|
||||
test_file.write_text("test content")
|
||||
|
||||
files.process_file_level(test_file, level=0)
|
||||
# File should be renamed without level
|
||||
|
||||
def test_process_file_level_invalid_level(self, tmp_path: Path) -> None:
|
||||
"""Should handle invalid level."""
|
||||
test_file = tmp_path / "test.txt"
|
||||
test_file.write_text("test content")
|
||||
|
||||
files.process_file_level(test_file, level=5)
|
||||
# Should print error message
|
||||
|
||||
def test_process_file_level_nonexistent_file(self, tmp_path: Path) -> None:
|
||||
"""Should handle nonexistent file."""
|
||||
test_file = tmp_path / "nonexistent.txt"
|
||||
|
||||
files.process_file_level(test_file, level=1)
|
||||
# Should print error message
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# process_files_level
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestProcessFilesLevel:
|
||||
"""Test process_files_level function."""
|
||||
|
||||
def test_process_files_level_batch(self, tmp_path: Path) -> None:
|
||||
"""Should process multiple files."""
|
||||
file_list = []
|
||||
for i in range(3):
|
||||
test_file = tmp_path / f"test{i}.txt"
|
||||
test_file.write_text(f"content{i}")
|
||||
file_list.append(test_file)
|
||||
|
||||
files.process_files_level(file_list, level=1)
|
||||
# All files should be processed
|
||||
@@ -0,0 +1,159 @@
|
||||
"""Tests for cli.folderback module."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
from pyflowx.ops import files
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# remove_dump
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestRemoveDump:
|
||||
"""Test remove_dump function."""
|
||||
|
||||
def test_remove_dump_no_files(self, tmp_path: Path) -> None:
|
||||
"""Should handle no zip files."""
|
||||
src = tmp_path / "source"
|
||||
src.mkdir()
|
||||
dst = tmp_path / "backup"
|
||||
dst.mkdir()
|
||||
|
||||
files.remove_dump(src, dst, 5)
|
||||
# Should not raise error
|
||||
|
||||
def test_remove_dump_within_limit(self, tmp_path: Path) -> None:
|
||||
"""Should not remove files within limit."""
|
||||
src = tmp_path / "source"
|
||||
src.mkdir()
|
||||
dst = tmp_path / "backup"
|
||||
dst.mkdir()
|
||||
|
||||
# Create some zip files
|
||||
for i in range(3):
|
||||
zip_file = dst / f"source_20240101_12000{i}.zip"
|
||||
zip_file.write_bytes(b"ZIP content")
|
||||
|
||||
files.remove_dump(src, dst, 5)
|
||||
# All files should remain
|
||||
assert len(list(dst.glob("*.zip"))) == 3
|
||||
|
||||
def test_remove_dump_exceeds_limit(self, tmp_path: Path) -> None:
|
||||
"""Should remove oldest files when exceeds limit."""
|
||||
src = tmp_path / "source"
|
||||
src.mkdir()
|
||||
dst = tmp_path / "backup"
|
||||
dst.mkdir()
|
||||
|
||||
# Create more zip files than limit
|
||||
for i in range(7):
|
||||
zip_file = dst / f"source_20240101_12000{i}.zip"
|
||||
zip_file.write_bytes(b"ZIP content")
|
||||
|
||||
files.remove_dump(src, dst, 5)
|
||||
# Should have only 5 files
|
||||
assert len(list(dst.glob("*.zip"))) == 5
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# zip_target
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestZipTarget:
|
||||
"""Test zip_target function."""
|
||||
|
||||
def test_zip_target_creates_zip(self, tmp_path: Path) -> None:
|
||||
"""Should create zip file."""
|
||||
src = tmp_path / "source"
|
||||
src.mkdir()
|
||||
(src / "test.txt").write_text("test content")
|
||||
dst = tmp_path / "backup"
|
||||
dst.mkdir()
|
||||
|
||||
with patch("time.strftime", return_value="_20240101_120000"):
|
||||
files.zip_target(src, dst, 5)
|
||||
|
||||
# Should create zip file
|
||||
zip_files = list(dst.glob("*.zip"))
|
||||
assert len(zip_files) == 1
|
||||
|
||||
def test_zip_target_with_subdirectories(self, tmp_path: Path) -> None:
|
||||
"""Should zip files in subdirectories."""
|
||||
src = tmp_path / "source"
|
||||
src.mkdir()
|
||||
subdir = src / "subdir"
|
||||
subdir.mkdir()
|
||||
(src / "test.txt").write_text("test content")
|
||||
(subdir / "nested.txt").write_text("nested content")
|
||||
dst = tmp_path / "backup"
|
||||
dst.mkdir()
|
||||
|
||||
with patch("time.strftime", return_value="_20240101_120000"):
|
||||
files.zip_target(src, dst, 5)
|
||||
|
||||
# Should create zip file
|
||||
zip_files = list(dst.glob("*.zip"))
|
||||
assert len(zip_files) == 1
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# backup_folder
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestBackupFolder:
|
||||
"""Test backup_folder function."""
|
||||
|
||||
def test_backup_folder_with_source_and_backup(self, tmp_path: Path) -> None:
|
||||
"""Should backup folder with source and backup paths."""
|
||||
source_dir = tmp_path / "source"
|
||||
source_dir.mkdir()
|
||||
(source_dir / "test.txt").write_text("test content")
|
||||
backup_dir = tmp_path / "backup"
|
||||
|
||||
with patch.object(files, "zip_target") as mock_zip:
|
||||
files.backup_folder(str(source_dir), str(backup_dir), 5)
|
||||
assert mock_zip.called
|
||||
|
||||
def test_backup_folder_with_max_backups(self, tmp_path: Path) -> None:
|
||||
"""Should backup folder with max backups."""
|
||||
source_dir = tmp_path / "source"
|
||||
source_dir.mkdir()
|
||||
(source_dir / "test.txt").write_text("test content")
|
||||
backup_dir = tmp_path / "backup"
|
||||
|
||||
with patch.object(files, "zip_target") as mock_zip:
|
||||
files.backup_folder(str(source_dir), str(backup_dir), 10)
|
||||
assert mock_zip.called
|
||||
|
||||
def test_backup_folder_source_not_exists(self, tmp_path: Path) -> None:
|
||||
"""Should handle non-existent source folder."""
|
||||
source_dir = tmp_path / "nonexistent"
|
||||
backup_dir = tmp_path / "backup"
|
||||
backup_dir.mkdir()
|
||||
|
||||
files.backup_folder(str(source_dir), str(backup_dir), 5)
|
||||
# Should print error message and return
|
||||
|
||||
def test_backup_folder_creates_dst(self, tmp_path: Path) -> None:
|
||||
"""Should create destination directory."""
|
||||
source_dir = tmp_path / "source"
|
||||
source_dir.mkdir()
|
||||
(source_dir / "test.txt").write_text("test content")
|
||||
backup_dir = tmp_path / "backup"
|
||||
|
||||
with patch.object(files, "zip_target") as mock_zip:
|
||||
files.backup_folder(str(source_dir), str(backup_dir), 5)
|
||||
assert backup_dir.exists()
|
||||
assert mock_zip.called
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# 函数注册
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestRegisteredFunctions:
|
||||
"""Test that folderback functions are registered."""
|
||||
|
||||
def test_folderback_default_spec(self) -> None:
|
||||
"""folderback_default should be a registered callable."""
|
||||
# folderback_default 现在是通过 @px.register_fn 注册的普通函数, 不是 TaskSpec
|
||||
assert callable(files.folderback_default)
|
||||
@@ -0,0 +1,61 @@
|
||||
"""Tests for cli.folderzip module."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
from pyflowx.ops import files
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# archive_folder
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestArchiveFolder:
|
||||
"""Test archive_folder function."""
|
||||
|
||||
def test_archive_folder(self, tmp_path: Path) -> None:
|
||||
"""Should archive a folder."""
|
||||
folder = tmp_path / "test_folder"
|
||||
folder.mkdir()
|
||||
(folder / "test.txt").write_text("test content")
|
||||
|
||||
with patch("shutil.make_archive") as mock_archive:
|
||||
files.archive_folder(folder)
|
||||
assert mock_archive.called
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# zip_folders
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestZipFolders:
|
||||
"""Test zip_folders function."""
|
||||
|
||||
def test_zip_folders_with_cwd(self, tmp_path: Path) -> None:
|
||||
"""Should zip folders in cwd."""
|
||||
# Create some folders
|
||||
(tmp_path / "folder1").mkdir()
|
||||
(tmp_path / "folder2").mkdir()
|
||||
(tmp_path / ".git").mkdir() # Should be ignored
|
||||
|
||||
with patch.object(files, "archive_folder") as mock_archive:
|
||||
files.zip_folders(str(tmp_path))
|
||||
# Should archive folder1 and folder2, but not .git
|
||||
assert mock_archive.call_count == 2
|
||||
|
||||
def test_zip_folders_nonexistent_cwd(self, tmp_path: Path) -> None:
|
||||
"""Should handle nonexistent cwd."""
|
||||
files.zip_folders(str(tmp_path / "nonexistent"))
|
||||
# Should print error message and return
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# 函数注册
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestRegisteredFunctions:
|
||||
"""Test that folderzip functions are registered."""
|
||||
|
||||
def test_folderzip_default_spec(self) -> None:
|
||||
"""folderzip_default should be a registered callable."""
|
||||
# folderzip_default 现在是通过 @px.register_fn 注册的普通函数, 不是 TaskSpec
|
||||
assert callable(files.folderzip_default)
|
||||
@@ -0,0 +1,94 @@
|
||||
"""Tests for cli.gittool module."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
import pyflowx as px
|
||||
from pyflowx.ops import dev
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# not_has_git_repo
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestNotHasGitRepo:
|
||||
"""Test not_has_git_repo function."""
|
||||
|
||||
def test_not_has_git_repo_true(self, tmp_path: Path) -> None:
|
||||
"""Should return True when no .git directory."""
|
||||
with patch.object(Path, "cwd", return_value=tmp_path):
|
||||
result = dev.not_has_git_repo()
|
||||
assert result is True
|
||||
|
||||
def test_not_has_git_repo_false(self, tmp_path: Path) -> None:
|
||||
"""Should return False when .git directory exists."""
|
||||
git_dir = tmp_path / ".git"
|
||||
git_dir.mkdir()
|
||||
|
||||
with patch.object(Path, "cwd", return_value=tmp_path):
|
||||
result = dev.not_has_git_repo()
|
||||
assert result is False
|
||||
|
||||
def test_not_has_git_repo_cwd_not_exists(self, tmp_path: Path) -> None:
|
||||
"""Should return True when cwd doesn't exist."""
|
||||
nonexistent = tmp_path / "nonexistent"
|
||||
|
||||
with patch.object(Path, "cwd", return_value=nonexistent):
|
||||
result = dev.not_has_git_repo()
|
||||
assert result is True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# has_files
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestHasFiles:
|
||||
"""Test has_files function."""
|
||||
|
||||
def test_has_files_true(self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
"""Should return True when there are uncommitted changes."""
|
||||
|
||||
class _FakeResult:
|
||||
stdout = " M test.txt\n"
|
||||
|
||||
monkeypatch.setattr("subprocess.run", lambda *_, **__: _FakeResult())
|
||||
result = dev.has_files()
|
||||
assert result is True
|
||||
|
||||
def test_has_files_false(self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
"""Should return False when no uncommitted changes."""
|
||||
|
||||
class _FakeResult:
|
||||
stdout = ""
|
||||
|
||||
monkeypatch.setattr("subprocess.run", lambda *_, **__: _FakeResult())
|
||||
result = dev.has_files()
|
||||
assert result is False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# init_sub_dirs
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestInitSubDirs:
|
||||
"""Test init_sub_dirs function."""
|
||||
|
||||
def test_init_sub_dirs_with_subdirectories(self, tmp_path: Path) -> None:
|
||||
"""Should initialize git in subdirectories."""
|
||||
subdir1 = tmp_path / "subdir1"
|
||||
subdir1.mkdir()
|
||||
subdir2 = tmp_path / "subdir2"
|
||||
subdir2.mkdir()
|
||||
|
||||
with patch.object(Path, "cwd", return_value=tmp_path), patch.object(px, "run") as mock_run:
|
||||
dev.init_sub_dirs()
|
||||
# Should call px.run for each subdirectory
|
||||
assert mock_run.call_count == 2
|
||||
|
||||
def test_init_sub_dirs_no_subdirectories(self, tmp_path: Path) -> None:
|
||||
"""Should handle no subdirectories."""
|
||||
with patch.object(Path, "cwd", return_value=tmp_path), patch.object(px, "run") as mock_run:
|
||||
dev.init_sub_dirs()
|
||||
# Should not call px.run
|
||||
assert mock_run.call_count == 0
|
||||
@@ -0,0 +1,168 @@
|
||||
"""Tests for ops.llm 模块 (msdownload/sglang)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from pyflowx.conditions import Constants
|
||||
from pyflowx.ops.llm import install_sglang, msdownload_run, run_sglang
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# msdownload_run
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestMsdownloadRun:
|
||||
"""``msdownload_run`` 函数测试."""
|
||||
|
||||
def test_empty_name_does_nothing(self, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]) -> None:
|
||||
"""name 为空时应直接返回."""
|
||||
called: list[list[str]] = []
|
||||
monkeypatch.setattr(subprocess, "run", lambda cmd, **_: called.append(cmd))
|
||||
msdownload_run("")
|
||||
captured = capsys.readouterr()
|
||||
assert "name 不能为空" in captured.out
|
||||
assert called == []
|
||||
|
||||
def test_default_download_dir(self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
|
||||
"""未提供 download_dir 时默认使用 ~/.models/<name 最后一段>."""
|
||||
monkeypatch.setattr(Path, "home", lambda: tmp_path)
|
||||
ran_cmds: list[list[str]] = []
|
||||
monkeypatch.setattr(subprocess, "run", lambda cmd, **_: ran_cmds.append(cmd) or MagicMock())
|
||||
|
||||
msdownload_run("Qwen/Qwen2.5-Coder")
|
||||
|
||||
expected_dir = tmp_path / ".models" / "Qwen2.5-Coder"
|
||||
assert expected_dir.exists()
|
||||
assert ran_cmds[0] == [
|
||||
"uvx",
|
||||
"modelscope",
|
||||
"download",
|
||||
"--model",
|
||||
"Qwen/Qwen2.5-Coder",
|
||||
"--local_dir",
|
||||
str(expected_dir),
|
||||
]
|
||||
|
||||
def test_custom_download_dir(self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
|
||||
"""提供 download_dir 时应使用指定目录."""
|
||||
custom_dir = tmp_path / "custom"
|
||||
ran_cmds: list[list[str]] = []
|
||||
monkeypatch.setattr(subprocess, "run", lambda cmd, **_: ran_cmds.append(cmd) or MagicMock())
|
||||
|
||||
msdownload_run("Qwen/Qwen2.5", "dataset", str(custom_dir))
|
||||
|
||||
assert custom_dir.exists()
|
||||
assert ran_cmds[0][3] == "--dataset"
|
||||
assert str(custom_dir) in ran_cmds[0]
|
||||
|
||||
def test_dataset_type(self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
|
||||
"""target_type=dataset 时应传递 --dataset."""
|
||||
monkeypatch.setattr(Path, "home", lambda: tmp_path)
|
||||
ran_cmds: list[list[str]] = []
|
||||
monkeypatch.setattr(subprocess, "run", lambda cmd, **_: ran_cmds.append(cmd) or MagicMock())
|
||||
|
||||
msdownload_run("foo/bar", "dataset")
|
||||
|
||||
assert "--dataset" in ran_cmds[0]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# install_sglang
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestInstallSglang:
|
||||
"""``install_sglang`` 函数测试."""
|
||||
|
||||
def test_already_installed_skips(self, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]) -> None:
|
||||
"""sglang 已安装时应跳过."""
|
||||
monkeypatch.setattr("shutil.which", lambda _cmd: "/usr/bin/sglang")
|
||||
called: list[list[str]] = []
|
||||
monkeypatch.setattr(subprocess, "run", lambda cmd, **_: called.append(cmd))
|
||||
|
||||
install_sglang()
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert "已安装" in captured.out
|
||||
assert called == []
|
||||
|
||||
def test_not_installed_runs_uv_install(self, monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
"""sglang 未安装时应执行 uv install sglang[all]."""
|
||||
monkeypatch.setattr("shutil.which", lambda _cmd: None)
|
||||
ran_cmds: list[list[str]] = []
|
||||
monkeypatch.setattr(subprocess, "run", lambda cmd, **_: ran_cmds.append(cmd) or MagicMock())
|
||||
|
||||
install_sglang()
|
||||
|
||||
assert ran_cmds == [["uv", "install", "sglang[all]"]]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# run_sglang
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestRunSglang:
|
||||
"""``run_sglang`` 函数测试."""
|
||||
|
||||
def test_model_dir_not_exist_skips(
|
||||
self, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]
|
||||
) -> None:
|
||||
"""模型目录不存在时应跳过."""
|
||||
monkeypatch.setattr(Path, "exists", lambda _self: False)
|
||||
called: list[list[str]] = []
|
||||
monkeypatch.setattr(subprocess, "run", lambda cmd, **_: called.append(cmd))
|
||||
|
||||
run_sglang(model="/nonexistent/path")
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert "模型目录不存在" in captured.out
|
||||
assert called == []
|
||||
|
||||
def test_windows_uses_python(self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
|
||||
"""Windows 上应使用 python."""
|
||||
monkeypatch.setattr(Constants, "IS_WINDOWS", True)
|
||||
monkeypatch.setattr(Path, "expanduser", lambda _self: tmp_path)
|
||||
monkeypatch.setattr(Path, "exists", lambda _self: True)
|
||||
ran_cmds: list[list[str]] = []
|
||||
monkeypatch.setattr(subprocess, "run", lambda cmd, **_: ran_cmds.append(cmd) or MagicMock())
|
||||
|
||||
run_sglang(model=str(tmp_path))
|
||||
|
||||
assert ran_cmds[0][0] == "python"
|
||||
assert "-m" in ran_cmds[0]
|
||||
assert "sglang.launch_server" in ran_cmds[0]
|
||||
|
||||
def test_non_windows_uses_python3(self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
|
||||
"""非 Windows 上应使用 python3."""
|
||||
monkeypatch.setattr(Constants, "IS_WINDOWS", False)
|
||||
monkeypatch.setattr(Path, "expanduser", lambda _self: tmp_path)
|
||||
monkeypatch.setattr(Path, "exists", lambda _self: True)
|
||||
ran_cmds: list[list[str]] = []
|
||||
monkeypatch.setattr(subprocess, "run", lambda cmd, **_: ran_cmds.append(cmd) or MagicMock())
|
||||
|
||||
run_sglang(model=str(tmp_path), port=9000, ctx_len=4096, mem_fraction=0.5, host="127.0.0.1", log_level="debug")
|
||||
|
||||
cmd = ran_cmds[0]
|
||||
assert cmd[0] == "python3"
|
||||
assert "--port" in cmd
|
||||
assert "9000" in cmd
|
||||
assert "4096" in cmd
|
||||
assert "0.5" in cmd
|
||||
assert "127.0.0.1" in cmd
|
||||
assert "debug" in cmd
|
||||
|
||||
def test_command_includes_qwen_parser(self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
|
||||
"""命令应包含 --tool-call-parser qwen."""
|
||||
monkeypatch.setattr(Constants, "IS_WINDOWS", True)
|
||||
monkeypatch.setattr(Path, "expanduser", lambda _self: tmp_path)
|
||||
monkeypatch.setattr(Path, "exists", lambda _self: True)
|
||||
ran_cmds: list[list[str]] = []
|
||||
monkeypatch.setattr(subprocess, "run", lambda cmd, **_: ran_cmds.append(cmd) or MagicMock())
|
||||
|
||||
run_sglang(model=str(tmp_path))
|
||||
|
||||
cmd = ran_cmds[0]
|
||||
assert "--tool-call-parser" in cmd
|
||||
qwen_idx = cmd.index("--tool-call-parser")
|
||||
assert cmd[qwen_idx + 1] == "qwen"
|
||||
@@ -0,0 +1,110 @@
|
||||
"""Tests for cli.lscalc module."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from pyflowx.conditions import Constants
|
||||
from pyflowx.ops import system
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# get_ls_dyna_command
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestGetLsDynaCommand:
|
||||
"""Test get_ls_dyna_command function."""
|
||||
|
||||
def test_get_ls_dyna_command_windows(self) -> None:
|
||||
"""Should get LS-DYNA command for Windows."""
|
||||
with patch.object(Constants, "IS_WINDOWS", True), patch.object(Constants, "IS_MACOS", False):
|
||||
cmd = system.get_ls_dyna_command("input.k", 4)
|
||||
assert "ls-dyna_mpp" in cmd
|
||||
assert "i=input.k" in cmd
|
||||
assert "ncpu=4" in cmd
|
||||
|
||||
def test_get_ls_dyna_command_linux(self) -> None:
|
||||
"""Should get LS-DYNA command for Linux."""
|
||||
with patch.object(Constants, "IS_WINDOWS", False), patch.object(Constants, "IS_MACOS", False):
|
||||
cmd = system.get_ls_dyna_command("input.k", 8)
|
||||
assert "ls-dyna_mpp" in cmd
|
||||
assert "i=input.k" in cmd
|
||||
assert "ncpu=8" in cmd
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# run_ls_dyna
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestRunLsDyna:
|
||||
"""Test run_ls_dyna function."""
|
||||
|
||||
def test_run_ls_dyna_success(self, tmp_path: Path) -> None:
|
||||
"""Should run LS-DYNA successfully."""
|
||||
input_file = tmp_path / "input.k"
|
||||
input_file.write_text("LS-DYNA input")
|
||||
|
||||
with patch("subprocess.run") as mock_run:
|
||||
mock_run.return_value = MagicMock(returncode=0)
|
||||
system.run_ls_dyna(str(input_file), ncpu=4)
|
||||
assert mock_run.called
|
||||
|
||||
def test_run_ls_dyna_file_not_found(self, tmp_path: Path) -> None:
|
||||
"""Should handle nonexistent input file."""
|
||||
input_file = tmp_path / "nonexistent.k"
|
||||
|
||||
system.run_ls_dyna(str(input_file), ncpu=4)
|
||||
# Should print error message
|
||||
|
||||
def test_run_ls_dyna_command_not_found(self, tmp_path: Path) -> None:
|
||||
"""Should handle command not found."""
|
||||
input_file = tmp_path / "input.k"
|
||||
input_file.write_text("LS-DYNA input")
|
||||
|
||||
with patch("subprocess.run", side_effect=FileNotFoundError):
|
||||
system.run_ls_dyna(str(input_file), ncpu=4)
|
||||
# Should print error message
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# run_ls_dyna_mpi
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestRunLsDynaMpi:
|
||||
"""Test run_ls_dyna_mpi function."""
|
||||
|
||||
def test_run_ls_dyna_mpi_success(self, tmp_path: Path) -> None:
|
||||
"""Should run LS-DYNA MPI successfully."""
|
||||
input_file = tmp_path / "input.k"
|
||||
input_file.write_text("LS-DYNA input")
|
||||
|
||||
with patch("subprocess.run") as mock_run:
|
||||
mock_run.return_value = MagicMock(returncode=0)
|
||||
system.run_ls_dyna_mpi(str(input_file), ncpu=8)
|
||||
assert mock_run.called
|
||||
|
||||
def test_run_ls_dyna_mpi_file_not_found(self, tmp_path: Path) -> None:
|
||||
"""Should handle nonexistent input file."""
|
||||
input_file = tmp_path / "nonexistent.k"
|
||||
|
||||
system.run_ls_dyna_mpi(str(input_file), ncpu=8)
|
||||
# Should print error message
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# check_ls_dyna_status
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestCheckLsDynaStatus:
|
||||
"""Test check_ls_dyna_status function."""
|
||||
|
||||
def test_check_ls_dyna_status_windows(self) -> None:
|
||||
"""Should check LS-DYNA status on Windows."""
|
||||
with patch.object(Constants, "IS_WINDOWS", True), patch("subprocess.run") as mock_run:
|
||||
mock_run.return_value = MagicMock(stdout="ls-dyna_mpp.exe", returncode=0)
|
||||
system.check_ls_dyna_status()
|
||||
assert mock_run.called
|
||||
|
||||
def test_check_ls_dyna_status_linux(self) -> None:
|
||||
"""Should check LS-DYNA status on Linux."""
|
||||
with patch.object(Constants, "IS_WINDOWS", False), patch("subprocess.run") as mock_run:
|
||||
mock_run.return_value = MagicMock(stdout="1234", returncode=0)
|
||||
system.check_ls_dyna_status()
|
||||
assert mock_run.called
|
||||
@@ -0,0 +1,259 @@
|
||||
"""Tests for cli.packtool module."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from pyflowx.ops import system
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def packtool_tmp_workdir(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
"""自动切换到临时工作目录,防止测试污染项目根目录.
|
||||
|
||||
Args:
|
||||
tmp_path: pytest 提供的临时目录
|
||||
monkeypatch: pytest 的 monkeypatch 工具
|
||||
"""
|
||||
# Mock DEFAULT_CACHE_DIR 到临时目录
|
||||
monkeypatch.setattr(system, "DEFAULT_CACHE_DIR", str(tmp_path / ".cache" / "pypack"))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# pack_source
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestPackSource:
|
||||
"""Test pack_source function."""
|
||||
|
||||
def test_pack_source_basic(self, tmp_path: Path) -> None:
|
||||
"""Should pack source code."""
|
||||
project_dir = tmp_path / "project"
|
||||
project_dir.mkdir()
|
||||
(project_dir / "main.py").write_text("print('hello')")
|
||||
output_dir = tmp_path / "output"
|
||||
|
||||
system.pack_source(project_dir, output_dir)
|
||||
assert output_dir.exists()
|
||||
|
||||
def test_pack_source_with_pyproject(self, tmp_path: Path) -> None:
|
||||
"""Should pack source with pyproject.toml."""
|
||||
project_dir = tmp_path / "project"
|
||||
project_dir.mkdir()
|
||||
(project_dir / "pyproject.toml").write_text("[project]\nname = 'test'")
|
||||
(project_dir / "main.py").write_text("print('hello')")
|
||||
output_dir = tmp_path / "output"
|
||||
|
||||
system.pack_source(project_dir, output_dir)
|
||||
assert output_dir.exists()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# pack_dependencies
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestPackDependencies:
|
||||
"""Test pack_dependencies function."""
|
||||
|
||||
def test_pack_dependencies_empty(self, tmp_path: Path) -> None:
|
||||
"""Should handle empty dependencies."""
|
||||
lib_dir = tmp_path / "libs"
|
||||
|
||||
system.pack_dependencies(lib_dir, [])
|
||||
# Should print message and return
|
||||
|
||||
def test_pack_dependencies_with_deps(self, tmp_path: Path) -> None:
|
||||
"""Should pack dependencies."""
|
||||
lib_dir = tmp_path / "libs"
|
||||
|
||||
with patch("subprocess.run") as mock_run:
|
||||
mock_run.return_value = MagicMock(returncode=0)
|
||||
system.pack_dependencies(lib_dir, ["numpy", "pandas"])
|
||||
assert mock_run.called
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# pack_wheel
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestPackWheel:
|
||||
"""Test pack_wheel function."""
|
||||
|
||||
def test_pack_wheel(self, tmp_path: Path) -> None:
|
||||
"""Should pack wheel."""
|
||||
project_dir = tmp_path / "project"
|
||||
project_dir.mkdir()
|
||||
(project_dir / "pyproject.toml").write_text("[project]\nname = 'test'")
|
||||
output_dir = tmp_path / "dist"
|
||||
|
||||
with patch("subprocess.run") as mock_run:
|
||||
mock_run.return_value = MagicMock(returncode=0)
|
||||
system.pack_wheel(project_dir, output_dir)
|
||||
assert mock_run.called
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# install_embed_python
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestInstallEmbedPython:
|
||||
"""Test install_embed_python function."""
|
||||
|
||||
def test_install_embed_python_basic(self, tmp_path: Path) -> None:
|
||||
"""Should install embedded Python (mocked for speed)."""
|
||||
output_dir = tmp_path / "python"
|
||||
|
||||
# Create a mock cache file that doesn't exist (force download)
|
||||
with patch("platform.machine", return_value="x86_64"), patch(
|
||||
"urllib.request.urlretrieve"
|
||||
) as mock_urlretrieve, patch("zipfile.ZipFile") as mock_zipfile:
|
||||
# Mock successful download
|
||||
mock_urlretrieve.return_value = None
|
||||
mock_zip_instance = MagicMock()
|
||||
mock_zipfile.return_value.__enter__.return_value = mock_zip_instance
|
||||
|
||||
system.install_embed_python("3.10", output_dir)
|
||||
|
||||
# Verify download was called
|
||||
assert mock_urlretrieve.called
|
||||
# Verify extraction was called
|
||||
assert mock_zip_instance.extractall.called
|
||||
# Verify output directory was created
|
||||
assert output_dir.exists()
|
||||
|
||||
def test_install_embed_python_with_cache(self, tmp_path: Path) -> None:
|
||||
"""Should use cached Python if available."""
|
||||
output_dir = tmp_path / "python"
|
||||
cache_dir = tmp_path / ".cache" / "pypack"
|
||||
cache_dir.mkdir(parents=True)
|
||||
|
||||
# Create a fake cached zip file
|
||||
cache_file = cache_dir / "python-3.10.11-embed-amd64.zip"
|
||||
cache_file.write_bytes(b"PK\x03\x04" + b"\x00" * 100) # Minimal ZIP header
|
||||
|
||||
with patch("platform.machine", return_value="x86_64"), patch("zipfile.ZipFile") as mock_zipfile:
|
||||
mock_zip_instance = MagicMock()
|
||||
mock_zipfile.return_value.__enter__.return_value = mock_zip_instance
|
||||
|
||||
system.install_embed_python("3.10", output_dir)
|
||||
|
||||
# Verify extraction was called (using cache)
|
||||
assert mock_zip_instance.extractall.called
|
||||
# Verify output directory was created
|
||||
assert output_dir.exists()
|
||||
|
||||
def test_install_embed_python_real_download(self, tmp_path: Path) -> None:
|
||||
"""Should actually download and extract embedded Python (requires network).
|
||||
|
||||
This test performs a real download to verify the entire workflow.
|
||||
It's marked to run only when network is available.
|
||||
"""
|
||||
import platform
|
||||
import zipfile
|
||||
|
||||
output_dir = tmp_path / "python_real"
|
||||
|
||||
# Only run on Windows (embed Python is Windows-specific)
|
||||
if platform.system() != "Windows":
|
||||
return
|
||||
|
||||
# Perform real installation
|
||||
system.install_embed_python("3.10", output_dir)
|
||||
|
||||
# Verify installation succeeded
|
||||
assert output_dir.exists()
|
||||
|
||||
# Verify key files are present
|
||||
expected_files = [
|
||||
"python.exe",
|
||||
"python310.dll",
|
||||
"python310.zip",
|
||||
]
|
||||
|
||||
for expected_file in expected_files:
|
||||
file_path = output_dir / expected_file
|
||||
assert file_path.exists(), f"Expected file {expected_file} not found"
|
||||
assert file_path.stat().st_size > 0, f"File {expected_file} is empty"
|
||||
|
||||
# Verify python.exe is executable
|
||||
python_exe = output_dir / "python.exe"
|
||||
assert python_exe.is_file()
|
||||
|
||||
# Verify the installation is functional
|
||||
# Check that we can at least read the zip file
|
||||
python_zip = output_dir / "python310.zip"
|
||||
assert zipfile.is_zipfile(python_zip)
|
||||
|
||||
print(f"✅ Successfully downloaded and installed embed Python to {output_dir}")
|
||||
print(f" Files: {list(output_dir.iterdir())}")
|
||||
|
||||
def test_install_embed_python_different_versions(self, tmp_path: Path) -> None:
|
||||
"""Should handle different Python versions."""
|
||||
output_dir = tmp_path / "python"
|
||||
|
||||
with patch("platform.machine", return_value="x86_64"), patch(
|
||||
"urllib.request.urlretrieve"
|
||||
) as mock_urlretrieve, patch("zipfile.ZipFile") as mock_zipfile:
|
||||
mock_zip_instance = MagicMock()
|
||||
mock_zipfile.return_value.__enter__.return_value = mock_zip_instance
|
||||
|
||||
# Test different versions
|
||||
for version in ["3.8", "3.9", "3.10", "3.11", "3.12"]:
|
||||
system.install_embed_python(version, output_dir)
|
||||
assert mock_urlretrieve.called
|
||||
|
||||
def test_install_embed_python_creates_cache(self, tmp_path: Path) -> None:
|
||||
"""Should create cache directory and file."""
|
||||
output_dir = tmp_path / "python"
|
||||
|
||||
with patch("platform.machine", return_value="x86_64"), patch(
|
||||
"urllib.request.urlretrieve"
|
||||
) as mock_urlretrieve, patch("zipfile.ZipFile") as mock_zipfile:
|
||||
mock_urlretrieve.return_value = None
|
||||
mock_zip_instance = MagicMock()
|
||||
mock_zipfile.return_value.__enter__.return_value = mock_zip_instance
|
||||
|
||||
system.install_embed_python("3.10", output_dir)
|
||||
|
||||
# Verify cache directory was created (now in tmp_path)
|
||||
Path(system.DEFAULT_CACHE_DIR)
|
||||
# Note: In test environment, cache might not persist due to mocking
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# create_zip_package
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestCreateZipPackage:
|
||||
"""Test create_zip_package function."""
|
||||
|
||||
def test_create_zip_package(self, tmp_path: Path) -> None:
|
||||
"""Should create ZIP package."""
|
||||
source_dir = tmp_path / "source"
|
||||
source_dir.mkdir()
|
||||
(source_dir / "test.txt").write_text("test content")
|
||||
output_file = tmp_path / "package.zip"
|
||||
|
||||
system.create_zip_package(source_dir, output_file)
|
||||
assert output_file.exists()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# clean_build_dir
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestCleanBuildDir:
|
||||
"""Test clean_build_dir function."""
|
||||
|
||||
def test_clean_build_dir_exists(self, tmp_path: Path) -> None:
|
||||
"""Should clean existing build directory."""
|
||||
build_dir = tmp_path / "build"
|
||||
build_dir.mkdir()
|
||||
(build_dir / "test.txt").write_text("test")
|
||||
|
||||
system.clean_build_dir(build_dir)
|
||||
assert not build_dir.exists()
|
||||
|
||||
def test_clean_build_dir_not_exists(self, tmp_path: Path) -> None:
|
||||
"""Should handle nonexistent build directory."""
|
||||
build_dir = tmp_path / "nonexistent"
|
||||
|
||||
system.clean_build_dir(build_dir)
|
||||
# Should print message
|
||||
@@ -0,0 +1,487 @@
|
||||
"""Tests for cli.pdftool module."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from pyflowx.ops import media
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# pdf_merge
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestPdfMerge:
|
||||
"""Test pdf_merge function."""
|
||||
|
||||
def test_pdf_merge_files(self, tmp_path: Path) -> None:
|
||||
"""Should merge PDF files."""
|
||||
pytest.importorskip("pypdf")
|
||||
input_files = [tmp_path / "input1.pdf", tmp_path / "input2.pdf"]
|
||||
for f in input_files:
|
||||
f.write_bytes(b"PDF content")
|
||||
output_file = tmp_path / "merged.pdf"
|
||||
|
||||
with patch("pypdf.PdfReader"), patch("pypdf.PdfWriter") as mock_writer:
|
||||
mock_writer_instance = MagicMock()
|
||||
mock_writer.return_value = mock_writer_instance
|
||||
media.pdf_merge(input_files, output_file)
|
||||
assert mock_writer_instance.write.called
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# pdf_split
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestPdfSplit:
|
||||
"""Test pdf_split function."""
|
||||
|
||||
def test_pdf_split_file(self, tmp_path: Path) -> None:
|
||||
"""Should split PDF file."""
|
||||
pytest.importorskip("pypdf")
|
||||
input_file = tmp_path / "input.pdf"
|
||||
input_file.write_bytes(b"PDF content")
|
||||
output_dir = tmp_path / "split"
|
||||
|
||||
with patch("pypdf.PdfReader") as mock_reader, patch("pypdf.PdfWriter"):
|
||||
mock_reader_instance = MagicMock()
|
||||
mock_reader.return_value = mock_reader_instance
|
||||
mock_reader_instance.pages = [MagicMock()]
|
||||
media.pdf_split(input_file, output_dir)
|
||||
assert output_dir.exists()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# pdf_compress
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestPdfCompress:
|
||||
"""Test pdf_compress function."""
|
||||
|
||||
def test_pdf_compress_file(self, tmp_path: Path) -> None:
|
||||
"""Should compress PDF file."""
|
||||
pytest.importorskip("fitz")
|
||||
input_file = tmp_path / "input.pdf"
|
||||
input_file.write_bytes(b"PDF content")
|
||||
output_file = tmp_path / "compressed.pdf"
|
||||
|
||||
with patch("fitz.open") as mock_fitz_open:
|
||||
mock_doc = MagicMock()
|
||||
mock_fitz_open.return_value = mock_doc
|
||||
|
||||
# Mock save to actually create the file
|
||||
def mock_save(*args: Any, **kwargs: Any):
|
||||
output_file.write_bytes(b"Compressed PDF")
|
||||
|
||||
mock_doc.save = mock_save
|
||||
media.pdf_compress(input_file, output_file)
|
||||
assert output_file.exists()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# pdf_extract_text
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestPdfExtractText:
|
||||
"""Test pdf_extract_text function."""
|
||||
|
||||
def test_pdf_extract_text_file(self, tmp_path: Path) -> None:
|
||||
"""Should extract text from PDF."""
|
||||
pytest.importorskip("fitz")
|
||||
input_file = tmp_path / "input.pdf"
|
||||
input_file.write_bytes(b"PDF content")
|
||||
output_file = tmp_path / "output.txt"
|
||||
|
||||
with patch("fitz.open") as mock_fitz_open:
|
||||
mock_doc = MagicMock()
|
||||
mock_page = MagicMock()
|
||||
mock_page.get_text.return_value = "Test text"
|
||||
mock_doc.__iter__ = MagicMock(return_value=iter([mock_page]))
|
||||
mock_fitz_open.return_value = mock_doc
|
||||
media.pdf_extract_text(input_file, output_file)
|
||||
assert output_file.exists()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# pdf_extract_images
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestPdfExtractImages:
|
||||
"""Test pdf_extract_images function."""
|
||||
|
||||
def test_pdf_extract_images_file(self, tmp_path: Path) -> None:
|
||||
"""Should extract images from PDF."""
|
||||
pytest.importorskip("fitz")
|
||||
input_file = tmp_path / "input.pdf"
|
||||
input_file.write_bytes(b"PDF content")
|
||||
output_dir = tmp_path / "images"
|
||||
|
||||
with patch("fitz.open") as mock_fitz_open:
|
||||
mock_doc = MagicMock()
|
||||
mock_page = MagicMock()
|
||||
mock_page.get_images.return_value = [[0]]
|
||||
mock_doc.__iter__ = MagicMock(return_value=iter([mock_page]))
|
||||
mock_doc.extract_image.return_value = {"image": b"image data", "ext": "png"}
|
||||
mock_fitz_open.return_value = mock_doc
|
||||
media.pdf_extract_images(input_file, output_dir)
|
||||
assert output_dir.exists()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# pdf_add_watermark
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestPdfAddWatermark:
|
||||
"""Test pdf_add_watermark function."""
|
||||
|
||||
def test_pdf_add_watermark_file(self, tmp_path: Path) -> None:
|
||||
"""Should add watermark to PDF."""
|
||||
pytest.importorskip("fitz")
|
||||
input_file = tmp_path / "input.pdf"
|
||||
input_file.write_bytes(b"PDF content")
|
||||
output_file = tmp_path / "watermarked.pdf"
|
||||
|
||||
with patch("fitz.open") as mock_fitz_open, patch("fitz.get_text_length") as mock_text_length:
|
||||
mock_doc = MagicMock()
|
||||
mock_page = MagicMock()
|
||||
mock_page.rect = MagicMock(width=800, height=600)
|
||||
mock_doc.__iter__ = MagicMock(return_value=iter([mock_page]))
|
||||
mock_fitz_open.return_value = mock_doc
|
||||
mock_text_length.return_value = 100
|
||||
media.pdf_add_watermark(input_file, output_file)
|
||||
assert mock_doc.save.called
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# pdf_rotate
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestPdfRotate:
|
||||
"""Test pdf_rotate function."""
|
||||
|
||||
def test_pdf_rotate_file_90(self, tmp_path: Path) -> None:
|
||||
"""Should rotate PDF by 90 degrees."""
|
||||
pytest.importorskip("fitz")
|
||||
input_file = tmp_path / "input.pdf"
|
||||
input_file.write_bytes(b"PDF content")
|
||||
output_file = tmp_path / "rotated.pdf"
|
||||
|
||||
with patch("fitz.open") as mock_fitz_open:
|
||||
mock_doc = MagicMock()
|
||||
mock_page = MagicMock()
|
||||
mock_doc.__iter__ = MagicMock(return_value=iter([mock_page]))
|
||||
mock_fitz_open.return_value = mock_doc
|
||||
media.pdf_rotate(input_file, output_file, rotation=90)
|
||||
assert mock_doc.save.called
|
||||
|
||||
def test_pdf_rotate_file_180(self, tmp_path: Path) -> None:
|
||||
"""Should rotate PDF by 180 degrees."""
|
||||
pytest.importorskip("fitz")
|
||||
input_file = tmp_path / "input.pdf"
|
||||
input_file.write_bytes(b"PDF content")
|
||||
output_file = tmp_path / "rotated.pdf"
|
||||
|
||||
with patch("fitz.open") as mock_fitz_open:
|
||||
mock_doc = MagicMock()
|
||||
mock_page = MagicMock()
|
||||
mock_doc.__iter__ = MagicMock(return_value=iter([mock_page]))
|
||||
mock_fitz_open.return_value = mock_doc
|
||||
media.pdf_rotate(input_file, output_file, rotation=180)
|
||||
assert mock_doc.save.called
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# pdf_crop
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestPdfCrop:
|
||||
"""Test pdf_crop function."""
|
||||
|
||||
def test_pdf_crop_file(self, tmp_path: Path) -> None:
|
||||
"""Should crop PDF."""
|
||||
pytest.importorskip("fitz")
|
||||
input_file = tmp_path / "input.pdf"
|
||||
input_file.write_bytes(b"PDF content")
|
||||
output_file = tmp_path / "cropped.pdf"
|
||||
|
||||
with patch("fitz.open") as mock_fitz_open, patch("fitz.Rect"):
|
||||
mock_doc = MagicMock()
|
||||
mock_page = MagicMock()
|
||||
mock_page.rect = MagicMock(x0=0, y0=0, x1=800, y1=600)
|
||||
mock_doc.__iter__ = MagicMock(return_value=iter([mock_page]))
|
||||
mock_fitz_open.return_value = mock_doc
|
||||
media.pdf_crop(input_file, output_file, margins=(10, 10, 10, 10))
|
||||
assert mock_doc.save.called
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# pdf_info
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestPdfInfo:
|
||||
"""Test pdf_info function."""
|
||||
|
||||
def test_pdf_info_file(self, tmp_path: Path) -> None:
|
||||
"""Should show PDF info."""
|
||||
pytest.importorskip("fitz")
|
||||
input_file = tmp_path / "input.pdf"
|
||||
input_file.write_bytes(b"PDF content")
|
||||
|
||||
with patch("fitz.open") as mock_fitz_open:
|
||||
mock_doc = MagicMock()
|
||||
mock_doc.page_count = 10
|
||||
mock_doc.metadata = {"title": "Test", "author": "Author"}
|
||||
mock_fitz_open.return_value = mock_doc
|
||||
media.pdf_info(input_file)
|
||||
assert mock_fitz_open.called
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# pdf_ocr
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestPdfOcr:
|
||||
"""Test pdf_ocr function."""
|
||||
|
||||
@pytest.mark.slow
|
||||
def test_pdf_ocr_file(self, tmp_path: Path) -> None:
|
||||
"""Should OCR PDF."""
|
||||
pytest.importorskip("fitz")
|
||||
pytest.importorskip("pytesseract")
|
||||
pytest.importorskip("PIL")
|
||||
input_file = tmp_path / "input.pdf"
|
||||
input_file.write_bytes(b"PDF content")
|
||||
output_file = tmp_path / "ocr.pdf"
|
||||
|
||||
with patch("fitz.open") as mock_fitz_open, patch("PIL.Image.frombytes"), patch(
|
||||
"pytesseract.image_to_string"
|
||||
) as mock_ocr:
|
||||
mock_doc = MagicMock()
|
||||
mock_page = MagicMock()
|
||||
mock_page.rect = MagicMock(width=800, height=600)
|
||||
mock_doc.__iter__ = MagicMock(return_value=iter([mock_page]))
|
||||
mock_fitz_open.return_value = mock_doc
|
||||
mock_ocr.return_value = "OCR text"
|
||||
media.pdf_ocr(input_file, output_file)
|
||||
# Should complete OCR
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# pdf_repair
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestPdfRepair:
|
||||
"""Test pdf_repair function."""
|
||||
|
||||
def test_pdf_repair_file(self, tmp_path: Path) -> None:
|
||||
"""Should repair PDF."""
|
||||
pytest.importorskip("fitz")
|
||||
input_file = tmp_path / "input.pdf"
|
||||
input_file.write_bytes(b"PDF content")
|
||||
output_file = tmp_path / "repaired.pdf"
|
||||
|
||||
with patch("fitz.open") as mock_fitz_open:
|
||||
mock_doc = MagicMock()
|
||||
mock_fitz_open.return_value = mock_doc
|
||||
media.pdf_repair(input_file, output_file)
|
||||
assert mock_doc.save.called
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# pdf_encrypt
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestPdfEncrypt:
|
||||
"""Test pdf_encrypt function."""
|
||||
|
||||
def test_pdf_encrypt_file(self, tmp_path: Path) -> None:
|
||||
"""Should encrypt PDF."""
|
||||
pytest.importorskip("pypdf")
|
||||
input_file = tmp_path / "input.pdf"
|
||||
input_file.write_bytes(b"PDF content")
|
||||
output_file = tmp_path / "encrypted.pdf"
|
||||
|
||||
with patch("pypdf.PdfReader") as mock_reader, patch("pypdf.PdfWriter") as mock_writer:
|
||||
mock_reader_instance = MagicMock()
|
||||
mock_reader.return_value = mock_reader_instance
|
||||
mock_reader_instance.pages = [MagicMock()]
|
||||
mock_writer_instance = MagicMock()
|
||||
mock_writer.return_value = mock_writer_instance
|
||||
media.pdf_encrypt(input_file, output_file, "secret")
|
||||
assert mock_writer_instance.encrypt.called
|
||||
assert mock_writer_instance.write.called
|
||||
|
||||
def test_pdf_encrypt_not_installed(self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
"""Should print message when pypdf not installed."""
|
||||
monkeypatch.setattr(media, "HAS_PYPDF", False)
|
||||
media.pdf_encrypt(tmp_path / "in.pdf", tmp_path / "out.pdf", "pw")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# pdf_decrypt
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestPdfDecrypt:
|
||||
"""Test pdf_decrypt function."""
|
||||
|
||||
def test_pdf_decrypt_encrypted(self, tmp_path: Path) -> None:
|
||||
"""Should decrypt encrypted PDF."""
|
||||
pytest.importorskip("pypdf")
|
||||
input_file = tmp_path / "input.pdf"
|
||||
input_file.write_bytes(b"PDF content")
|
||||
output_file = tmp_path / "decrypted.pdf"
|
||||
|
||||
with patch("pypdf.PdfReader") as mock_reader, patch("pypdf.PdfWriter") as mock_writer:
|
||||
mock_reader_instance = MagicMock()
|
||||
mock_reader.return_value = mock_reader_instance
|
||||
mock_reader_instance.is_encrypted = True
|
||||
mock_reader_instance.pages = [MagicMock()]
|
||||
mock_writer_instance = MagicMock()
|
||||
mock_writer.return_value = mock_writer_instance
|
||||
media.pdf_decrypt(input_file, output_file, "secret")
|
||||
assert mock_reader_instance.decrypt.called
|
||||
assert mock_writer_instance.write.called
|
||||
|
||||
def test_pdf_decrypt_not_encrypted(self, tmp_path: Path) -> None:
|
||||
"""Should handle non-encrypted PDF."""
|
||||
pytest.importorskip("pypdf")
|
||||
input_file = tmp_path / "input.pdf"
|
||||
input_file.write_bytes(b"PDF content")
|
||||
output_file = tmp_path / "decrypted.pdf"
|
||||
|
||||
with patch("pypdf.PdfReader") as mock_reader, patch("pypdf.PdfWriter") as mock_writer:
|
||||
mock_reader_instance = MagicMock()
|
||||
mock_reader.return_value = mock_reader_instance
|
||||
mock_reader_instance.is_encrypted = False
|
||||
mock_reader_instance.pages = [MagicMock()]
|
||||
mock_writer_instance = MagicMock()
|
||||
mock_writer.return_value = mock_writer_instance
|
||||
media.pdf_decrypt(input_file, output_file, "secret")
|
||||
assert not mock_reader_instance.decrypt.called
|
||||
|
||||
def test_pdf_decrypt_not_installed(self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
"""Should print message when pypdf not installed."""
|
||||
monkeypatch.setattr(media, "HAS_PYPDF", False)
|
||||
media.pdf_decrypt(tmp_path / "in.pdf", tmp_path / "out.pdf", "pw")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# pdf_reorder
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestPdfReorder:
|
||||
"""Test pdf_reorder function."""
|
||||
|
||||
def test_pdf_reorder_pages(self, tmp_path: Path) -> None:
|
||||
"""Should reorder PDF pages."""
|
||||
pytest.importorskip("pypdf")
|
||||
input_file = tmp_path / "input.pdf"
|
||||
input_file.write_bytes(b"PDF content")
|
||||
output_file = tmp_path / "reordered.pdf"
|
||||
|
||||
with patch("pypdf.PdfReader") as mock_reader, patch("pypdf.PdfWriter") as mock_writer:
|
||||
mock_reader_instance = MagicMock()
|
||||
mock_reader.return_value = mock_reader_instance
|
||||
mock_reader_instance.pages = [MagicMock(), MagicMock(), MagicMock()]
|
||||
mock_writer_instance = MagicMock()
|
||||
mock_writer.return_value = mock_writer_instance
|
||||
media.pdf_reorder(input_file, output_file, [2, 0, 1])
|
||||
assert mock_writer_instance.add_page.call_count == 3
|
||||
|
||||
def test_pdf_reorder_out_of_range(self, tmp_path: Path) -> None:
|
||||
"""Should skip out-of-range page numbers."""
|
||||
pytest.importorskip("pypdf")
|
||||
input_file = tmp_path / "input.pdf"
|
||||
input_file.write_bytes(b"PDF content")
|
||||
output_file = tmp_path / "reordered.pdf"
|
||||
|
||||
with patch("pypdf.PdfReader") as mock_reader, patch("pypdf.PdfWriter") as mock_writer:
|
||||
mock_reader_instance = MagicMock()
|
||||
mock_reader.return_value = mock_reader_instance
|
||||
mock_reader_instance.pages = [MagicMock()]
|
||||
mock_writer_instance = MagicMock()
|
||||
mock_writer.return_value = mock_writer_instance
|
||||
media.pdf_reorder(input_file, output_file, [0, 5, -1])
|
||||
assert mock_writer_instance.add_page.call_count == 1
|
||||
|
||||
def test_pdf_reorder_not_installed(self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
"""Should print message when pypdf not installed."""
|
||||
monkeypatch.setattr(media, "HAS_PYPDF", False)
|
||||
media.pdf_reorder(tmp_path / "in.pdf", tmp_path / "out.pdf", [0])
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# pdf_to_images
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestPdfToImages:
|
||||
"""Test pdf_to_images function."""
|
||||
|
||||
def test_pdf_to_images_convert(self, tmp_path: Path) -> None:
|
||||
"""Should convert PDF to images."""
|
||||
pytest.importorskip("fitz")
|
||||
input_file = tmp_path / "input.pdf"
|
||||
input_file.write_bytes(b"PDF content")
|
||||
output_dir = tmp_path / "images"
|
||||
|
||||
with patch("fitz.open") as mock_fitz_open:
|
||||
mock_doc = MagicMock()
|
||||
mock_page = MagicMock()
|
||||
mock_pixmap = MagicMock()
|
||||
mock_page.get_pixmap.return_value = mock_pixmap
|
||||
mock_doc.__iter__ = MagicMock(return_value=iter([mock_page]))
|
||||
mock_fitz_open.return_value = mock_doc
|
||||
media.pdf_to_images(input_file, output_dir, dpi=150)
|
||||
assert output_dir.exists()
|
||||
assert mock_pixmap.save.called
|
||||
|
||||
def test_pdf_to_images_not_installed(self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
"""Should print message when PyMuPDF not installed."""
|
||||
monkeypatch.setattr(media, "HAS_PYMUPDF", False)
|
||||
media.pdf_to_images(tmp_path / "in.pdf", tmp_path / "out")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# Not-installed branches for already-tested functions
|
||||
# ---------------------------------------------------------------------- #
|
||||
class TestNotInstalledBranches:
|
||||
"""Test 'not installed' branches for PDF functions."""
|
||||
|
||||
def test_pdf_merge_not_installed(self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
"""Should print message when pypdf not installed."""
|
||||
monkeypatch.setattr(media, "HAS_PYPDF", False)
|
||||
media.pdf_merge([tmp_path / "a.pdf"], tmp_path / "out.pdf")
|
||||
|
||||
def test_pdf_split_not_installed(self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
"""Should print message when pypdf not installed."""
|
||||
monkeypatch.setattr(media, "HAS_PYPDF", False)
|
||||
media.pdf_split(tmp_path / "in.pdf", tmp_path / "out")
|
||||
|
||||
def test_pdf_compress_not_installed(self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
"""Should print message when PyMuPDF not installed."""
|
||||
monkeypatch.setattr(media, "HAS_PYMUPDF", False)
|
||||
media.pdf_compress(tmp_path / "in.pdf", tmp_path / "out.pdf")
|
||||
|
||||
def test_pdf_extract_text_not_installed(self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
"""Should print message when PyMuPDF not installed."""
|
||||
monkeypatch.setattr(media, "HAS_PYMUPDF", False)
|
||||
media.pdf_extract_text(tmp_path / "in.pdf", tmp_path / "out.txt")
|
||||
|
||||
def test_pdf_extract_images_not_installed(self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
"""Should print message when PyMuPDF not installed."""
|
||||
monkeypatch.setattr(media, "HAS_PYMUPDF", False)
|
||||
media.pdf_extract_images(tmp_path / "in.pdf", tmp_path / "out")
|
||||
|
||||
def test_pdf_add_watermark_not_installed(self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
"""Should print message when PyMuPDF not installed."""
|
||||
monkeypatch.setattr(media, "HAS_PYMUPDF", False)
|
||||
media.pdf_add_watermark(tmp_path / "in.pdf", tmp_path / "out.pdf")
|
||||
|
||||
def test_pdf_rotate_not_installed(self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
"""Should print message when PyMuPDF not installed."""
|
||||
monkeypatch.setattr(media, "HAS_PYMUPDF", False)
|
||||
media.pdf_rotate(tmp_path / "in.pdf", tmp_path / "out.pdf")
|
||||
|
||||
def test_pdf_crop_not_installed(self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
"""Should print message when PyMuPDF not installed."""
|
||||
monkeypatch.setattr(media, "HAS_PYMUPDF", False)
|
||||
media.pdf_crop(tmp_path / "in.pdf", tmp_path / "out.pdf", (1, 1, 1, 1))
|
||||
|
||||
def test_pdf_info_not_installed(self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
"""Should print message when PyMuPDF not installed."""
|
||||
monkeypatch.setattr(media, "HAS_PYMUPDF", False)
|
||||
media.pdf_info(tmp_path / "in.pdf")
|
||||
|
||||
def test_pdf_repair_not_installed(self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
"""Should print message when PyMuPDF not installed."""
|
||||
monkeypatch.setattr(media, "HAS_PYMUPDF", False)
|
||||
media.pdf_repair(tmp_path / "in.pdf", tmp_path / "out.pdf")
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user