| |
| """Upload this codebase to Hugging Face Hub while excluding the .back folder. |
| |
| Examples: |
| /g/data/rr81/aev/bin/python upload.py |
| /g/data/rr81/aev/bin/python upload.py --repo-id aryadomain/all_code_base |
| /g/data/rr81/aev/bin/python upload.py --method large |
| /g/data/rr81/aev/bin/python upload.py --repo-id my-user/all_code_base |
| """ |
|
|
| from __future__ import annotations |
|
|
| import argparse |
| import inspect |
| from pathlib import Path |
| from typing import List |
|
|
| from huggingface_hub import HfApi |
|
|
|
|
| def build_ignore_patterns(extra_ignore: List[str]) -> List[str]: |
| |
| patterns = [ |
| ".git", |
| ".git/**", |
| ".back", |
| ".back/**", |
| "**/.back/**", |
| ] |
| patterns.extend(extra_ignore) |
| return patterns |
|
|
|
|
| def parse_args() -> argparse.Namespace: |
| parser = argparse.ArgumentParser( |
| description="Upload a folder to Hugging Face Hub, excluding .back." |
| ) |
| parser.add_argument( |
| "--source-dir", |
| type=str, |
| default=".", |
| help="Local folder to upload (default: current directory).", |
| ) |
| parser.add_argument( |
| "--repo-name", |
| type=str, |
| default="all_code_base", |
| help="Repo name used when --repo-id is not provided.", |
| ) |
| parser.add_argument( |
| "--repo-id", |
| type=str, |
| default="aryadomain/all_code_base", |
| help="Full Hugging Face repo id like user_or_org/repo_name (default: aryadomain/all_code_base).", |
| ) |
| parser.add_argument( |
| "--namespace", |
| type=str, |
| default=None, |
| help="Optional user/org namespace override when using --repo-name.", |
| ) |
| parser.add_argument( |
| "--repo-type", |
| type=str, |
| default="model", |
| choices=["model", "dataset", "space"], |
| help="Hub repo type.", |
| ) |
| parser.add_argument( |
| "--private", |
| action="store_true", |
| help="Create the repo as private.", |
| ) |
| parser.add_argument( |
| "--revision", |
| type=str, |
| default="main", |
| help="Target branch/revision (default: main).", |
| ) |
| parser.add_argument( |
| "--commit-message", |
| type=str, |
| default="Upload codebase excluding .back", |
| help="Commit message for upload.", |
| ) |
| parser.add_argument( |
| "--extra-ignore", |
| nargs="*", |
| default=[], |
| help="Additional ignore patterns for upload_folder.", |
| ) |
| parser.add_argument( |
| "--dry-run", |
| action="store_true", |
| help="Print resolved settings and exit without uploading.", |
| ) |
| parser.add_argument( |
| "--method", |
| type=str, |
| default="large", |
| choices=["auto", "folder", "large"], |
| help=( |
| "Upload method: auto chooses large upload for big folders, " |
| "folder forces upload_folder, large forces upload_large_folder compatibility path." |
| ), |
| ) |
| parser.add_argument( |
| "--large-threshold-gb", |
| type=float, |
| default=10.0, |
| help="In auto mode, switch to large upload when folder size exceeds this many GB.", |
| ) |
| return parser.parse_args() |
|
|
|
|
| def resolve_repo_id(api: HfApi, args: argparse.Namespace) -> str: |
| if args.repo_id: |
| return args.repo_id |
|
|
| if args.namespace: |
| return f"{args.namespace}/{args.repo_name}" |
|
|
| who = api.whoami() |
| username = who.get("name") |
| if not username: |
| raise RuntimeError("Could not resolve username from Hugging Face login.") |
| return f"{username}/{args.repo_name}" |
|
|
|
|
| def folder_size_bytes(folder: Path) -> int: |
| total = 0 |
| for p in folder.rglob("*"): |
| if p.is_file() and ".back" not in p.parts: |
| try: |
| total += p.stat().st_size |
| except OSError: |
| pass |
| return total |
|
|
|
|
| def upload_folder_compat( |
| api: HfApi, |
| repo_id: str, |
| repo_type: str, |
| source_dir: Path, |
| revision: str, |
| commit_message: str, |
| ignore_patterns: List[str], |
| ): |
| kwargs = { |
| "repo_id": repo_id, |
| "repo_type": repo_type, |
| "folder_path": str(source_dir), |
| "path_in_repo": ".", |
| "revision": revision, |
| "commit_message": commit_message, |
| "ignore_patterns": ignore_patterns, |
| } |
| sig = inspect.signature(api.upload_folder) |
| if "multi_commits" in sig.parameters: |
| kwargs["multi_commits"] = True |
| if "multi_commits_verbose" in sig.parameters: |
| kwargs["multi_commits_verbose"] = True |
| return api.upload_folder(**kwargs) |
|
|
|
|
| def upload_large_compat( |
| api: HfApi, |
| repo_id: str, |
| repo_type: str, |
| source_dir: Path, |
| revision: str, |
| ignore_patterns: List[str], |
| ): |
| if not hasattr(api, "upload_large_folder"): |
| raise RuntimeError("Installed huggingface_hub does not provide upload_large_folder") |
|
|
| sig = inspect.signature(api.upload_large_folder) |
| kwargs = { |
| "repo_id": repo_id, |
| "repo_type": repo_type, |
| "folder_path": str(source_dir), |
| } |
| if "revision" in sig.parameters: |
| kwargs["revision"] = revision |
| if "path_in_repo" in sig.parameters: |
| kwargs["path_in_repo"] = "." |
| if "ignore_patterns" in sig.parameters: |
| kwargs["ignore_patterns"] = ignore_patterns |
|
|
| return api.upload_large_folder(**kwargs) |
|
|
|
|
| def main() -> None: |
| args = parse_args() |
| source_dir = Path(args.source_dir).resolve() |
|
|
| if not source_dir.exists() or not source_dir.is_dir(): |
| raise FileNotFoundError(f"Source directory not found or not a directory: {source_dir}") |
|
|
| api = HfApi() |
| repo_id = resolve_repo_id(api, args) |
| ignore_patterns = build_ignore_patterns(args.extra_ignore) |
| total_size_gb = None |
|
|
| if args.method == "auto": |
| total_size = folder_size_bytes(source_dir) |
| total_size_gb = total_size / (1024 ** 3) |
| use_large = total_size_gb >= args.large_threshold_gb |
| else: |
| use_large = args.method == "large" |
|
|
| print("Source directory:", source_dir) |
| print("Repo id:", repo_id) |
| print("Repo type:", args.repo_type) |
| print("Private:", args.private) |
| print("Revision:", args.revision) |
| print("Ignore patterns:", ignore_patterns) |
| if total_size_gb is None: |
| print("Folder size scan: skipped (set --method auto to enable size-based selection)") |
| else: |
| print(f"Folder size (excluding .back): {total_size_gb:.2f} GB") |
| print("Upload method:", "large" if use_large else "folder") |
|
|
| if args.dry_run: |
| print("Dry run requested. Exiting before create/upload.") |
| return |
|
|
| api.create_repo( |
| repo_id=repo_id, |
| repo_type=args.repo_type, |
| private=args.private, |
| exist_ok=True, |
| ) |
|
|
| if use_large: |
| try: |
| commit_info = upload_large_compat( |
| api=api, |
| repo_id=repo_id, |
| repo_type=args.repo_type, |
| source_dir=source_dir, |
| revision=args.revision, |
| ignore_patterns=ignore_patterns, |
| ) |
| except Exception as e: |
| print(f"Large upload path failed ({e}). Falling back to upload_folder with multi-commit mode.") |
| commit_info = upload_folder_compat( |
| api=api, |
| repo_id=repo_id, |
| repo_type=args.repo_type, |
| source_dir=source_dir, |
| revision=args.revision, |
| commit_message=args.commit_message, |
| ignore_patterns=ignore_patterns, |
| ) |
| else: |
| commit_info = upload_folder_compat( |
| api=api, |
| repo_id=repo_id, |
| repo_type=args.repo_type, |
| source_dir=source_dir, |
| revision=args.revision, |
| commit_message=args.commit_message, |
| ignore_patterns=ignore_patterns, |
| ) |
|
|
| print("Upload completed.") |
| print("Commit:", commit_info) |
| print("Repo URL: https://huggingface.co/" + repo_id) |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|