From f69ddc513354aec623c4fec25c81a033c2876c2a Mon Sep 17 00:00:00 2001 From: gooker_young Date: Wed, 24 Jun 2026 21:36:47 +0800 Subject: [PATCH] +hfdownload --- pyproject.toml | 1 + src/pyflowx/cli/hfdownload.py | 82 +++++++++++++++++++++++++++++++++++ uv.lock | 2 +- 3 files changed, 84 insertions(+), 1 deletion(-) create mode 100644 src/pyflowx/cli/hfdownload.py diff --git a/pyproject.toml b/pyproject.toml index 44fec19..26f30e8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,6 +31,7 @@ filelvl = "pyflowx.cli.filelevel:main" foldback = "pyflowx.cli.folderback:main" foldzip = "pyflowx.cli.folderzip:main" gitt = "pyflowx.cli.gittool:main" +hfdown = "pyflowx.cli.hfdownload:main" lscalc = "pyflowx.cli.lscalc:main" packtool = "pyflowx.cli.packtool:main" pdftool = "pyflowx.cli.pdftool:main" diff --git a/src/pyflowx/cli/hfdownload.py b/src/pyflowx/cli/hfdownload.py new file mode 100644 index 0000000..61df6e2 --- /dev/null +++ b/src/pyflowx/cli/hfdownload.py @@ -0,0 +1,82 @@ +import argparse +import os +from pathlib import Path +from typing import Literal, get_args + +import pyflowx as px + +HFDownloadType = Literal["model", "dataset", "space"] + + +def setenvs(): + """设置 HuggingFace mirror 环境变量.""" + os.environ["HF_ENDPOINT"] = "https://hf-mirror.com" + + +def main(): + parser = argparse.ArgumentParser(description="Download a model from HuggingFace.") + parser.add_argument("dataset_name", type=str, help="HuggingFace dataset name.") + parser.add_argument( + "--type", + type=str, + nargs="?", + default="dataset", + choices=get_args(HFDownloadType), + help="HuggingFace dataset type.", + ) + parser.add_argument("--use-hfd", action="store_true", help="Use HFD tool to download dataset.") + args = parser.parse_args() + + if not args.dataset_name: + parser.error("dataset_name is required") + + dataset_name = args.dataset_name + + # 创建下载目录 + download_dir = Path.cwd() / dataset_name + download_dir.mkdir(parents=True, exist_ok=True) + + if args.use_hfd: + graph = px.Graph.from_specs([ + px.TaskSpec(name="setenvs", fn=setenvs, verbose=True), + px.TaskSpec( + name="download_hfd", + cmd=["wget", "https://hf-mirror.com/hfd/hfd.sh"], + depends_on=["setenvs"], + verbose=True, + ), + px.TaskSpec( + name="chmod_hfd", + cmd=["chmod", "a+x", "hfd.sh"], + depends_on=["download_hfd"], + verbose=True, + ), + px.TaskSpec( + name="run_hfd", + cmd=["./hfd.sh", dataset_name, args.type], + depends_on=["chmod_hfd"], + verbose=True, + ), + ]) + else: + graph = px.Graph.from_specs([ + px.TaskSpec(name="setenvs", fn=setenvs, verbose=True), + px.TaskSpec( + name="download", + cmd=[ + "uvx", + "hf", + "download", + "--repo-type", + args.type, + "--force-download", + dataset_name, + "--local-dir", + str(Path.cwd() / dataset_name), + ], + depends_on=["setenvs"], + verbose=True, + ), + ]) + + px.run(graph, strategy="thread", verbose=True) diff --git a/uv.lock b/uv.lock index 5c85bea..b79fb13 100644 --- a/uv.lock +++ b/uv.lock @@ -2184,7 +2184,7 @@ wheels = [ [[package]] name = "pyflowx" -version = "0.1.7" +version = "0.1.8" source = { editable = "." } dependencies = [ { name = "graphlib-backport", marker = "python_full_version < '3.9'" },