import argparse import os import shutil import unicodedata REPO_PATH = os.path.join(os.path.dirname(__file__), "..") BUCKETS = { "abcdefghi": "a-i", "jklmnopqr": "j-r", "stuvwxyz": "s-z", } def find_bucket(name: str) -> str: initial = name[0].lower() for letters, test_bucket in BUCKETS.items(): if initial in letters: return test_bucket return "other" def main(): ap = argparse.ArgumentParser() ap.add_argument("-s", "--source", help="Source path for files", required=True) ap.add_argument( "-d", "--dest", help="Destination path for files", default=REPO_PATH ) ap.add_argument("-n", "--dry-run", help="Dry run", action="store_true") args = ap.parse_args() buckets = set() with os.scandir(args.source) as it: for entry in it: name = entry.name if entry.is_file(): bucket = find_bucket(name) buckets.add(bucket) name_norm = unicodedata.normalize("NFC", name).lower() dest = os.path.join(args.dest, bucket, name_norm) if args.dry_run: print(f"Would copy {entry.path} to {dest}") else: os.makedirs(os.path.dirname(dest), exist_ok=True) shutil.copyfile(entry.path, dest) print(f"Copied {name} to {dest}") else: print(f"Skipping {name}") for bucket in buckets: dest = os.path.join(args.dest, bucket) if os.path.isdir(dest): n_files = len(os.listdir(dest)) if n_files > 900: print(f"Bucket {bucket} has more than 900 files, please split it") if __name__ == "__main__": main()