mirror of
https://github.com/ivuorinen/emoji.git
synced 2026-01-26 11:23:58 +00:00
73 lines
2.0 KiB
Python
73 lines
2.0 KiB
Python
#!/usr/bin/env python3
|
|
"""Find and remove duplicate emoji files based on content hash."""
|
|
|
|
import hashlib
|
|
from collections import defaultdict
|
|
from pathlib import Path
|
|
|
|
EMOJI_DIR = Path("emoji")
|
|
EXTENSIONS = (".png", ".gif", ".jpg", ".jpeg")
|
|
|
|
|
|
def hash_file(path: Path) -> str:
|
|
"""Return SHA-256 hash of file contents."""
|
|
return hashlib.sha256(path.read_bytes()).hexdigest()
|
|
|
|
|
|
def find_duplicates(files: list[Path]) -> dict[str, list[Path]]:
|
|
"""Group files by their content hash, return only groups with duplicates."""
|
|
by_hash: dict[str, list[Path]] = defaultdict(list)
|
|
for file in files:
|
|
file_hash = hash_file(file)
|
|
by_hash[file_hash].append(file)
|
|
return {h: paths for h, paths in by_hash.items() if len(paths) > 1}
|
|
|
|
|
|
def deduplicate(duplicates: dict[str, list[Path]]) -> tuple[int, int]:
|
|
"""Remove duplicates, keeping first alphabetically. Returns (groups, removed)."""
|
|
total_removed = 0
|
|
|
|
for file_hash, paths in duplicates.items():
|
|
sorted_paths = sorted(paths, key=lambda p: p.name.lower())
|
|
keep = sorted_paths[0]
|
|
remove = sorted_paths[1:]
|
|
|
|
print(f"\nDuplicate group ({len(paths)} files):")
|
|
print(f" KEEP: {keep.name}")
|
|
for path in remove:
|
|
print(f" DELETE: {path.name}")
|
|
path.unlink()
|
|
total_removed += 1
|
|
|
|
return len(duplicates), total_removed
|
|
|
|
|
|
def main():
|
|
files = [
|
|
f for f in EMOJI_DIR.iterdir()
|
|
if f.suffix.lower() in EXTENSIONS
|
|
]
|
|
|
|
if not files:
|
|
print("No image files found in emoji/ folder.")
|
|
return
|
|
|
|
print(f"Scanning {len(files)} files...")
|
|
|
|
duplicates = find_duplicates(files)
|
|
|
|
if not duplicates:
|
|
print("\nNo duplicates found.")
|
|
return
|
|
|
|
groups, removed = deduplicate(duplicates)
|
|
|
|
print(f"\n--- Summary ---")
|
|
print(f"Files scanned: {len(files)}")
|
|
print(f"Duplicate groups: {groups}")
|
|
print(f"Files removed: {removed}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|