+hfdownload

This commit is contained in:
2026-06-24 21:36:47 +08:00
parent 477d901281
commit f69ddc5133
3 changed files with 84 additions and 1 deletions
+1
View File
@@ -31,6 +31,7 @@ filelvl = "pyflowx.cli.filelevel:main"
foldback = "pyflowx.cli.folderback:main"
foldzip = "pyflowx.cli.folderzip:main"
gitt = "pyflowx.cli.gittool:main"
hfdown = "pyflowx.cli.hfdownload:main"
lscalc = "pyflowx.cli.lscalc:main"
packtool = "pyflowx.cli.packtool:main"
pdftool = "pyflowx.cli.pdftool:main"
+82
View File
@@ -0,0 +1,82 @@
import argparse
import os
from pathlib import Path
from typing import Literal, get_args
import pyflowx as px
HFDownloadType = Literal["model", "dataset", "space"]
def setenvs():
"""设置 HuggingFace mirror 环境变量."""
os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"
def main():
parser = argparse.ArgumentParser(description="Download a model from HuggingFace.")
parser.add_argument("dataset_name", type=str, help="HuggingFace dataset name.")
parser.add_argument(
"--type",
type=str,
nargs="?",
default="dataset",
choices=get_args(HFDownloadType),
help="HuggingFace dataset type.",
)
parser.add_argument("--use-hfd", action="store_true", help="Use HFD tool to download dataset.")
args = parser.parse_args()
if not args.dataset_name:
parser.error("dataset_name is required")
dataset_name = args.dataset_name
# 创建下载目录
download_dir = Path.cwd() / dataset_name
download_dir.mkdir(parents=True, exist_ok=True)
if args.use_hfd:
graph = px.Graph.from_specs([
px.TaskSpec(name="setenvs", fn=setenvs, verbose=True),
px.TaskSpec(
name="download_hfd",
cmd=["wget", "https://hf-mirror.com/hfd/hfd.sh"],
depends_on=["setenvs"],
verbose=True,
),
px.TaskSpec(
name="chmod_hfd",
cmd=["chmod", "a+x", "hfd.sh"],
depends_on=["download_hfd"],
verbose=True,
),
px.TaskSpec(
name="run_hfd",
cmd=["./hfd.sh", dataset_name, args.type],
depends_on=["chmod_hfd"],
verbose=True,
),
])
else:
graph = px.Graph.from_specs([
px.TaskSpec(name="setenvs", fn=setenvs, verbose=True),
px.TaskSpec(
name="download",
cmd=[
"uvx",
"hf",
"download",
"--repo-type",
args.type,
"--force-download",
dataset_name,
"--local-dir",
str(Path.cwd() / dataset_name),
],
depends_on=["setenvs"],
verbose=True,
),
])
px.run(graph, strategy="thread", verbose=True)
Generated
+1 -1
View File
@@ -2184,7 +2184,7 @@ wheels = [
[[package]]
name = "pyflowx"
version = "0.1.7"
version = "0.1.8"
source = { editable = "." }
dependencies = [
{ name = "graphlib-backport", marker = "python_full_version < '3.9'" },