mirror of
https://github.com/koodiklinikka/emoji.git
synced 2026-01-26 11:44:04 +00:00
58 lines
1.7 KiB
Python
58 lines
1.7 KiB
Python
import argparse
|
|
import os
|
|
import shutil
|
|
import unicodedata
|
|
|
|
REPO_PATH = os.path.join(os.path.dirname(__file__), "..")
|
|
|
|
BUCKETS = {
|
|
"abcdefghi": "a-i",
|
|
"jklmnopqr": "j-r",
|
|
"stuvwxyz": "s-z",
|
|
}
|
|
|
|
|
|
def find_bucket(name: str) -> str:
|
|
initial = name[0].lower()
|
|
for letters, test_bucket in BUCKETS.items():
|
|
if initial in letters:
|
|
return test_bucket
|
|
return "other"
|
|
|
|
|
|
def main():
|
|
ap = argparse.ArgumentParser()
|
|
ap.add_argument("-s", "--source", help="Source path for files", required=True)
|
|
ap.add_argument(
|
|
"-d", "--dest", help="Destination path for files", default=REPO_PATH
|
|
)
|
|
ap.add_argument("-n", "--dry-run", help="Dry run", action="store_true")
|
|
args = ap.parse_args()
|
|
buckets = set()
|
|
with os.scandir(args.source) as it:
|
|
for entry in it:
|
|
name = entry.name
|
|
if entry.is_file():
|
|
bucket = find_bucket(name)
|
|
buckets.add(bucket)
|
|
name_norm = unicodedata.normalize("NFC", name).lower()
|
|
dest = os.path.join(args.dest, bucket, name_norm)
|
|
if args.dry_run:
|
|
print(f"Would copy {entry.path} to {dest}")
|
|
else:
|
|
os.makedirs(os.path.dirname(dest), exist_ok=True)
|
|
shutil.copyfile(entry.path, dest)
|
|
print(f"Copied {name} to {dest}")
|
|
else:
|
|
print(f"Skipping {name}")
|
|
for bucket in buckets:
|
|
dest = os.path.join(args.dest, bucket)
|
|
if os.path.isdir(dest):
|
|
n_files = len(os.listdir(dest))
|
|
if n_files > 900:
|
|
print(f"Bucket {bucket} has more than 900 files, please split it")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|