Files
pyflowx/tests/cli/test_pdftool.py
T
zhou de368ea810
CI / Lint, Typecheck & Test (push) Successful in 1m11s
refactor: 删除冗余 cli 入口脚本, gittool 用数组配置 clean excludes
1. 删除 13 个已有 YAML 配置的 cli .py 入口脚本, 统一通过 pf 调用
2. gittool.yaml 用 CLEAN_EXCLUDES 数组变量配置 git clean 的 -e 参数,
   保留 .venv/.tox/node_modules/.idea 等目录避免误删
3. run_cli 执行前打印调用信息: [gittool] 执行: c
4. 更新 pyproject.toml 移除 13 个冗余 entry points, 仅保留 pf
5. 清理测试文件中的 TestMain 类 (测 _ops 模块的测试保留)
2026-07-05 08:39:20 +08:00

281 lines
11 KiB
Python

"""Tests for cli.pdftool module."""
from __future__ import annotations
from pathlib import Path
from typing import Any
from unittest.mock import MagicMock, patch
import pytest
from pyflowx.cli._ops import media
# ---------------------------------------------------------------------- #
# pdf_merge
# ---------------------------------------------------------------------- #
class TestPdfMerge:
"""Test pdf_merge function."""
def test_pdf_merge_files(self, tmp_path: Path) -> None:
"""Should merge PDF files."""
pytest.importorskip("pypdf")
input_files = [tmp_path / "input1.pdf", tmp_path / "input2.pdf"]
for f in input_files:
f.write_bytes(b"PDF content")
output_file = tmp_path / "merged.pdf"
with patch("pypdf.PdfReader"), patch("pypdf.PdfWriter") as mock_writer:
mock_writer_instance = MagicMock()
mock_writer.return_value = mock_writer_instance
media.pdf_merge(input_files, output_file)
assert mock_writer_instance.write.called
# ---------------------------------------------------------------------- #
# pdf_split
# ---------------------------------------------------------------------- #
class TestPdfSplit:
"""Test pdf_split function."""
def test_pdf_split_file(self, tmp_path: Path) -> None:
"""Should split PDF file."""
pytest.importorskip("pypdf")
input_file = tmp_path / "input.pdf"
input_file.write_bytes(b"PDF content")
output_dir = tmp_path / "split"
with patch("pypdf.PdfReader") as mock_reader, patch("pypdf.PdfWriter"):
mock_reader_instance = MagicMock()
mock_reader.return_value = mock_reader_instance
mock_reader_instance.pages = [MagicMock()]
media.pdf_split(input_file, output_dir)
assert output_dir.exists()
# ---------------------------------------------------------------------- #
# pdf_compress
# ---------------------------------------------------------------------- #
class TestPdfCompress:
"""Test pdf_compress function."""
def test_pdf_compress_file(self, tmp_path: Path) -> None:
"""Should compress PDF file."""
pytest.importorskip("fitz")
input_file = tmp_path / "input.pdf"
input_file.write_bytes(b"PDF content")
output_file = tmp_path / "compressed.pdf"
with patch("fitz.open") as mock_fitz_open:
mock_doc = MagicMock()
mock_fitz_open.return_value = mock_doc
# Mock save to actually create the file
def mock_save(*args: Any, **kwargs: Any):
output_file.write_bytes(b"Compressed PDF")
mock_doc.save = mock_save
media.pdf_compress(input_file, output_file)
assert output_file.exists()
# ---------------------------------------------------------------------- #
# pdf_extract_text
# ---------------------------------------------------------------------- #
class TestPdfExtractText:
"""Test pdf_extract_text function."""
def test_pdf_extract_text_file(self, tmp_path: Path) -> None:
"""Should extract text from PDF."""
pytest.importorskip("fitz")
input_file = tmp_path / "input.pdf"
input_file.write_bytes(b"PDF content")
output_file = tmp_path / "output.txt"
with patch("fitz.open") as mock_fitz_open:
mock_doc = MagicMock()
mock_page = MagicMock()
mock_page.get_text.return_value = "Test text"
mock_doc.__iter__ = MagicMock(return_value=iter([mock_page]))
mock_fitz_open.return_value = mock_doc
media.pdf_extract_text(input_file, output_file)
assert output_file.exists()
# ---------------------------------------------------------------------- #
# pdf_extract_images
# ---------------------------------------------------------------------- #
class TestPdfExtractImages:
"""Test pdf_extract_images function."""
def test_pdf_extract_images_file(self, tmp_path: Path) -> None:
"""Should extract images from PDF."""
pytest.importorskip("fitz")
input_file = tmp_path / "input.pdf"
input_file.write_bytes(b"PDF content")
output_dir = tmp_path / "images"
with patch("fitz.open") as mock_fitz_open:
mock_doc = MagicMock()
mock_page = MagicMock()
mock_page.get_images.return_value = [[0]]
mock_doc.__iter__ = MagicMock(return_value=iter([mock_page]))
mock_doc.extract_image.return_value = {"image": b"image data", "ext": "png"}
mock_fitz_open.return_value = mock_doc
media.pdf_extract_images(input_file, output_dir)
assert output_dir.exists()
# ---------------------------------------------------------------------- #
# pdf_add_watermark
# ---------------------------------------------------------------------- #
class TestPdfAddWatermark:
"""Test pdf_add_watermark function."""
def test_pdf_add_watermark_file(self, tmp_path: Path) -> None:
"""Should add watermark to PDF."""
pytest.importorskip("fitz")
input_file = tmp_path / "input.pdf"
input_file.write_bytes(b"PDF content")
output_file = tmp_path / "watermarked.pdf"
with patch("fitz.open") as mock_fitz_open, patch("fitz.get_text_length") as mock_text_length:
mock_doc = MagicMock()
mock_page = MagicMock()
mock_page.rect = MagicMock(width=800, height=600)
mock_doc.__iter__ = MagicMock(return_value=iter([mock_page]))
mock_fitz_open.return_value = mock_doc
mock_text_length.return_value = 100
media.pdf_add_watermark(input_file, output_file)
assert mock_doc.save.called
# ---------------------------------------------------------------------- #
# pdf_rotate
# ---------------------------------------------------------------------- #
class TestPdfRotate:
"""Test pdf_rotate function."""
def test_pdf_rotate_file_90(self, tmp_path: Path) -> None:
"""Should rotate PDF by 90 degrees."""
pytest.importorskip("fitz")
input_file = tmp_path / "input.pdf"
input_file.write_bytes(b"PDF content")
output_file = tmp_path / "rotated.pdf"
with patch("fitz.open") as mock_fitz_open:
mock_doc = MagicMock()
mock_page = MagicMock()
mock_doc.__iter__ = MagicMock(return_value=iter([mock_page]))
mock_fitz_open.return_value = mock_doc
media.pdf_rotate(input_file, output_file, rotation=90)
assert mock_doc.save.called
def test_pdf_rotate_file_180(self, tmp_path: Path) -> None:
"""Should rotate PDF by 180 degrees."""
pytest.importorskip("fitz")
input_file = tmp_path / "input.pdf"
input_file.write_bytes(b"PDF content")
output_file = tmp_path / "rotated.pdf"
with patch("fitz.open") as mock_fitz_open:
mock_doc = MagicMock()
mock_page = MagicMock()
mock_doc.__iter__ = MagicMock(return_value=iter([mock_page]))
mock_fitz_open.return_value = mock_doc
media.pdf_rotate(input_file, output_file, rotation=180)
assert mock_doc.save.called
# ---------------------------------------------------------------------- #
# pdf_crop
# ---------------------------------------------------------------------- #
class TestPdfCrop:
"""Test pdf_crop function."""
def test_pdf_crop_file(self, tmp_path: Path) -> None:
"""Should crop PDF."""
pytest.importorskip("fitz")
input_file = tmp_path / "input.pdf"
input_file.write_bytes(b"PDF content")
output_file = tmp_path / "cropped.pdf"
with patch("fitz.open") as mock_fitz_open, patch("fitz.Rect"):
mock_doc = MagicMock()
mock_page = MagicMock()
mock_page.rect = MagicMock(x0=0, y0=0, x1=800, y1=600)
mock_doc.__iter__ = MagicMock(return_value=iter([mock_page]))
mock_fitz_open.return_value = mock_doc
media.pdf_crop(input_file, output_file, margins=(10, 10, 10, 10))
assert mock_doc.save.called
# ---------------------------------------------------------------------- #
# pdf_info
# ---------------------------------------------------------------------- #
class TestPdfInfo:
"""Test pdf_info function."""
def test_pdf_info_file(self, tmp_path: Path) -> None:
"""Should show PDF info."""
pytest.importorskip("fitz")
input_file = tmp_path / "input.pdf"
input_file.write_bytes(b"PDF content")
with patch("fitz.open") as mock_fitz_open:
mock_doc = MagicMock()
mock_doc.page_count = 10
mock_doc.metadata = {"title": "Test", "author": "Author"}
mock_fitz_open.return_value = mock_doc
media.pdf_info(input_file)
assert mock_fitz_open.called
# ---------------------------------------------------------------------- #
# pdf_ocr
# ---------------------------------------------------------------------- #
class TestPdfOcr:
"""Test pdf_ocr function."""
@pytest.mark.slow
def test_pdf_ocr_file(self, tmp_path: Path) -> None:
"""Should OCR PDF."""
pytest.importorskip("fitz")
pytest.importorskip("pytesseract")
pytest.importorskip("PIL")
input_file = tmp_path / "input.pdf"
input_file.write_bytes(b"PDF content")
output_file = tmp_path / "ocr.pdf"
with patch("fitz.open") as mock_fitz_open, patch("PIL.Image.frombytes"), patch(
"pytesseract.image_to_string"
) as mock_ocr:
mock_doc = MagicMock()
mock_page = MagicMock()
mock_page.rect = MagicMock(width=800, height=600)
mock_doc.__iter__ = MagicMock(return_value=iter([mock_page]))
mock_fitz_open.return_value = mock_doc
mock_ocr.return_value = "OCR text"
media.pdf_ocr(input_file, output_file)
# Should complete OCR
# ---------------------------------------------------------------------- #
# pdf_repair
# ---------------------------------------------------------------------- #
class TestPdfRepair:
"""Test pdf_repair function."""
def test_pdf_repair_file(self, tmp_path: Path) -> None:
"""Should repair PDF."""
pytest.importorskip("fitz")
input_file = tmp_path / "input.pdf"
input_file.write_bytes(b"PDF content")
output_file = tmp_path / "repaired.pdf"
with patch("fitz.open") as mock_fitz_open:
mock_doc = MagicMock()
mock_fitz_open.return_value = mock_doc
media.pdf_repair(input_file, output_file)
assert mock_doc.save.called