Test css grid based listing

2026-03-20 19:02:41 +00:00 · 2022-08-17 10:33:10 +03:00
3173 changed files with 1270 additions and 9401 deletions
--- a/.claude/settings.json
+++ b/.claude/settings.json
@@ -1,71 +0,0 @@
 {
  "hooks": {
    "PreToolUse": [
      {
        "matcher": "Edit|Write",
        "hooks": [
          {
            "type": "command",
            "command": "file=$(jq -r '.tool_input.file_path // empty'); if [[ \"$file\" == *README.md || \"$file\" == *index.html ]]; then echo 'BLOCKED: README.md and index.html are generated artifacts. Edit create_listing.py instead, then run /regen-listings.' >&2; exit 2; fi"
          }
        ]
      },
      {
        "matcher": "Edit|Write",
        "hooks": [
          {
            "type": "command",
            "command": "file=$(jq -r '.tool_input.file_path // empty'); if [[ \"$file\" == */uv.lock || \"$file\" == uv.lock ]]; then echo 'BLOCKED: uv.lock is auto-generated. Modify pyproject.toml and run uv sync instead.' >&2; exit 2; fi"
          }
        ]
      },
      {
        "matcher": "Edit|Write",
        "hooks": [
          {
            "type": "command",
            "command": "file=$(jq -r '.tool_input.file_path // empty'); if [[ \"$file\" == *.github/workflows/*.yml || \"$file\" == .github/workflows/*.yml ]]; then echo 'BLOCKED: CI workflows use pinned action SHAs with # version comments for security. Edit workflow files carefully and maintain the SHA-pinning convention.' >&2; exit 2; fi"
          }
        ]
      },
      {
        "matcher": "Edit|Write",
        "hooks": [
          {
            "type": "command",
            "command": "file=$(jq -r '.tool_input.file_path // empty'); if [[ \"$file\" == */renovate.json || \"$file\" == renovate.json ]]; then echo 'BLOCKED: renovate.json is rarely edited. Make changes deliberately and confirm with the user first.' >&2; exit 2; fi"
          }
        ]
      },
      {
        "matcher": "Edit|Write",
        "hooks": [
          {
            "type": "command",
            "command": "file=$(jq -r '.tool_input.file_path // empty'); if [[ \"$file\" == emoji/* || \"$file\" == */emoji/* ]]; then echo 'BLOCKED: Emoji image files should not be written by Claude. Manage images manually or use /dedup-check.' >&2; exit 2; fi"
          }
        ]
      }
    ],
    "PostToolUse": [
      {
        "matcher": "Edit|Write",
        "hooks": [
          {
            "type": "command",
            "command": "file=$(jq -r '.tool_input.file_path // empty'); if [[ \"$file\" == *.py ]]; then uv run ruff check --fix \"$file\" 2>&1 | tail -5 && uv run ruff format \"$file\" 2>&1 | tail -3; fi"
          }
        ]
      },
      {
        "matcher": "Edit|Write",
        "hooks": [
          {
            "type": "command",
            "command": "file=$(jq -r '.tool_input.file_path // empty'); if [[ \"$file\" == *.py ]]; then uv run pytest --tb=short -q 2>&1 | tail -5; fi"
          }
        ]
      }
    ]
  }
 }
--- a/.claude/skills/dedup-check/SKILL.md
+++ b/.claude/skills/dedup-check/SKILL.md
@@ -1,10 +0,0 @@
 ---
 name: dedup-check
 description: Run dedup in dry-run mode and report duplicate groups found
 disable-model-invocation: true
 ---
 ## Steps
 1. Run `uv run dedup --dry-run` from the project root
 2. Summarize the output — report how many duplicate groups were found and which files are involved
--- a/.claude/skills/regen-listings/SKILL.md
+++ b/.claude/skills/regen-listings/SKILL.md
@@ -1,11 +0,0 @@
 ---
 name: regen-listings
 description: Regenerate README.md and index.html from emoji/ contents and verify output
 disable-model-invocation: true
 ---
 ## Steps
 1. Run `uv run python3 create_listing.py` from the project root
 2. Confirm both `README.md` and `index.html` exist and are non-empty
 3. Report the file sizes of both generated files
--- a/.gitattributes
+++ b/.gitattributes
@@ -0,0 +1,20 @@
 *               text=auto
 *.md            text diff=markdown
 *.php           text diff=php
 .gitattributes  export-ignore
 .gitignore      export-ignore
 # Graphics
 *.gif             binary
 *.gifv            binary
 *.jpg             binary
 *.jpeg            binary
 *.png             binary
 # SVG treated as an asset (binary) by default.
 *.svg             text
 *.svgz            binary
 *.tif             binary
 *.tiff            binary
 *.wbmp            binary
 *.webp            binary
--- a/.github/workflows/compress-images.yml
+++ b/.github/workflows/compress-images.yml
@@ -11,7 +11,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout Repo
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
+        uses: actions/checkout@v2
      - name: Compress Images
        id: calibre
        uses: calibreapp/image-actions@main
@@ -20,7 +20,7 @@ jobs:
          compressOnly: true
      - name: Create New Pull Request If Needed
        if: steps.calibre.outputs.markdown != ''
-        uses: peter-evans/create-pull-request@c0f553fe549906ede9cf27b5156039d195d2ece0 # v8
+        uses: peter-evans/create-pull-request@v3
        with:
          title: Compressed Images Nightly
          branch-suffix: timestamp
--- a/.github/workflows/generate-listings.yml
+++ b/.github/workflows/generate-listings.yml
@@ -1,30 +0,0 @@
 name: Generate Listings
 on:
  push:
    paths:
      - 'emoji/**'
      - 'create_listing.py'
    branches:
      - master
 jobs:
  generate:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout Repo
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
      - name: Set up Python
        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6
        with:
          python-version: '3.14'
      - name: Generate listings
        run: python3 create_listing.py
      - name: Commit changes
        run: |
          git config --local user.email "github-actions[bot]@users.noreply.github.com"
          git config --local user.name "github-actions[bot]"
          git add README.md index.html
          git diff --staged --quiet || git commit -m "Update listings"
          git push
--- a/.github/workflows/pr-compress-images.yml
+++ b/.github/workflows/pr-compress-images.yml
@@ -16,7 +16,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout Repo
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
+        uses: actions/checkout@v2
      - name: Compress Images
        uses: calibreapp/image-actions@main
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -1,25 +0,0 @@
 name: Tests
 on:
  push:
    paths:
      - '**.py'
      - 'pyproject.toml'
      - 'uv.lock'
  pull_request:
    paths:
      - '**.py'
      - 'pyproject.toml'
      - 'uv.lock'
 jobs:
  test:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
      - uses: astral-sh/setup-uv@5a095e7a2014a4212f075830d4f7277575a9d098 # v7
      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6
        with:
          python-version: '3.14'
      - run: uv sync --dev
      - run: uv run pytest -v
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +0,0 @@
 __pycache__/
 .claude/settings.local.json
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -1,52 +0,0 @@
 # CLAUDE.md
 This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
 ## Project Overview
 Personal emoji/emote collection for chat apps (Slack, Discord, etc.). Contains 3000+ custom emoji images in `emoji/` with Python tooling for maintenance: listing generation and perceptual deduplication.
 ## Commands
 ```bash
 # Install dependencies (uses uv package manager)
 uv sync
 # Regenerate README.md and index.html from emoji/ contents
 uv run python3 create_listing.py
 # Find duplicate emojis (dry run)
 uv run python3 dedup.py --dry-run
 # Find duplicates with custom threshold (0=exact match, default)
 uv run python3 dedup.py --threshold 5 --dry-run
 # Actually remove duplicates
 uv run python3 dedup.py --dir emoji/
 # Or via uv entry point
 uv run dedup --dry-run
 # Run tests
 uv run pytest
 # Run tests with verbose output
 uv run pytest -v
 ```
 ## Architecture
 Two standalone Python scripts, no shared modules:
 - **`create_listing.py`** — Generates `README.md` (HTML tables) and `index.html` (searchable dark-theme SPA) from all images in `emoji/`. No dependencies beyond stdlib. Both output files are auto-generated and committed by CI on push.
 - **`dedup.py`** — Finds and removes duplicate images using multi-algorithm perceptual hashing (pHash, aHash, dHash, colorHash). Uses Union-Find clustering. Animated GIFs get extra frame-by-frame verification including timing. Keeps alphabetically-first filename per duplicate group.
 ## Key Conventions
 - Python >=3.11 required; dependencies managed via `uv` with `uv.lock`
 - Image formats: `.png`, `.gif`, `.jpg`, `.jpeg`
 - `README.md` and `index.html` are generated artifacts — edit the scripts, not the outputs
 - CI uses pinned action SHAs (not tags) for security
 - Dependency updates managed by Renovate bot
 - Always use `uv run` to execute Python commands (e.g. `uv run pytest`, `uv run ruff`, `uv run python3 script.py`) to ensure the correct virtualenv and dependencies are used
--- a/README.md
+++ b/README.md
--- a/_create-listing.php
+++ b/_create-listing.php
@@ -0,0 +1,68 @@
 <?php
 $output        = 'README.md';
 $per_row       = 5;
 $files         = glob( 'emoji/*.{png,gif,jpg,jpeg}', GLOB_BRACE );
 $listing       = [];
 $per_row_width = floor( 100 / $per_row ) . '%';
 sort( $files );
 if ( count( $files ) < 1 ) {
    die( 'No images to continue with.' );
 }
 function get_basename( string $file ) {
    $parts = explode( DIRECTORY_SEPARATOR, $file );
    return end( $parts );
 }
 foreach ( $files as $file ) {
    $first = get_basename( $file );
    $first = str_replace( 'emoji/', '', $first );
    $first = trim( $first[0] );
    if ( preg_match( '/([^a-zA-Z:])/', $first ) ) {
        $first = '\[^a-zA-Z:\]';
    }
    if ( ! array_key_exists( $first, $listing ) ) {
        $listing[ $first ] = [];
    }
    $listing[ $first ][] = $file;
 }
 $contents = "# Emotes\n\n";
 $contents .= sprintf(
    "Listing of %d emojis last refreshed: %s",
    count($files),
    date('c')
 ) . "\n\n";
 $contents .= "<!-- markdownlint-disable-file MD033 -->\n";
 foreach ( $listing as $header => $icons ) {
    $contents .= sprintf( "\n## %s\n\n", $header );
    $chunks = array_chunk( $icons, $per_row );
    $contents .= '<div style="text-align: center;display:grid;grid-template-columns: repeat(5, 1fr);grid-template-rows: minmax(70px, auto);">' . "\n";
    foreach ( $chunks as $chunk_icons ) {
        foreach ( $chunk_icons as $icon ) {
            $file = $icon;
            [ $name, $ext ] = explode( '.', get_basename($icon), 2 );
            $format   = '<div style=\'border:1px solid #eee;padding:.5rem\'>'
                . '<img width=\'30\' src="%1$s" alt="%1$s"><br>'
                . '<kbd style=\'display:inline-block;max-width: 15vw;white-space: nowrap;overflow:auto\'>%2$s</kbd></div>';
            $contents .= sprintf( $format, $file, $name ) . "\n";
        }
    }
    $contents .= "</div>\n";
 }
 file_put_contents( $output, $contents );
--- a/create_listing.py
+++ b/create_listing.py
@@ -1,228 +0,0 @@
 #!/usr/bin/env python3
 """Generate README.md and index.html with emoji listings."""
 import html
 import re
 from collections import defaultdict
 from datetime import datetime, timezone
 from pathlib import Path
 from urllib.parse import quote
 PER_ROW = 10
 EMOJI_DIR = Path("emoji")
 EXTENSIONS = (".png", ".gif", ".jpg", ".jpeg")
 def generate_readme(files: list[Path]) -> None:
    """Generate README.md with HTML tables of all emoji images."""
    listing = defaultdict(list)
    for file in files:
        first_char = file.name[0].lower()
        if not re.match(r"[a-z]", first_char):
            first_char = r"\[^a-zA-Z:\]"
        listing[first_char].append(file)
    per_row_width = f"{100 // PER_ROW}%"
    contents = "# Emotes\n\n"
    for header in sorted(listing.keys(), key=lambda x: (not x.startswith("\\"), x)):
        icons = listing[header]
        contents += f"## {header}\n\n"
        contents += '<table style="text-align: center;width: 100%">\n'
        for i in range(0, len(icons), PER_ROW):
            chunk = icons[i : i + PER_ROW]
            contents += "<tr>\n"
            for icon in chunk:
                name = icon.stem
                encoded_path = f"emoji/{quote(icon.name)}"
                display_path = f"emoji/{icon.name}"
                contents += (
                    f"<td style='width: {per_row_width}'>"
                    f"<img width='30' src=\"{encoded_path}\" "
                    f'alt="{display_path}" title=":{name}:"></td>\n'
                )
            contents += "</tr>\n"
        contents += "</table>\n\n"
    contents += f"\n\n Generated: {datetime.now(timezone.utc).isoformat()}"
    Path("README.md").write_text(contents, encoding="utf-8")
    print(f"Generated README.md with {len(files)} emojis")
 def generate_html(files: list[Path]) -> None:
    """Generate index.html with searchable emoji grid grouped alphabetically."""
    # Group files by first character
    listing = defaultdict(list)
    for file in files:
        first_char = file.name[0].lower()
        if not re.match(r"[a-z]", first_char):
            first_char = "#"
        listing[first_char].append(file)
    # Build grouped HTML
    sections = []
    for header in sorted(listing.keys(), key=lambda x: (x != "#", x)):
        display_header = "0-9 / Special" if header == "#" else header.upper()
        emoji_items = []
        for file in listing[header]:
            name = file.stem
            encoded_path = f"emoji/{quote(file.name)}"
            escaped_name = html.escape(name)
            emoji_items.append(
                f'      <div class="emoji" data-keyword="{escaped_name}">'
                f'<img src="{encoded_path}" alt="{escaped_name}" title=":{escaped_name}:"></div>'
            )
        sections.append(
            f'    <section data-group="{html.escape(header)}">\n'
            f"      <h2>{display_header}</h2>\n"
            f'      <div class="grid">\n{chr(10).join(emoji_items)}\n      </div>\n'
            f"    </section>"
        )
    contents = f"""<!DOCTYPE html>
 <html lang="en">
 <head>
  <meta charset="UTF-8">
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  <title>Emotes</title>
  <style>
    * {{ box-sizing: border-box; }}
    body {{
      font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
      margin: 0;
      padding: 20px;
      background: #1a1a1a;
      color: #fff;
    }}
    #search {{
      width: 100%;
      max-width: 400px;
      padding: 12px 16px;
      font-size: 16px;
      border: 2px solid #333;
      border-radius: 8px;
      background: #2a2a2a;
      color: #fff;
      margin-bottom: 20px;
    }}
    #search:focus {{
      outline: none;
      border-color: #666;
    }}
    #search::placeholder {{
      color: #888;
    }}
    section {{
      margin-bottom: 24px;
    }}
    section.hidden {{
      display: none;
    }}
    h2 {{
      font-size: 18px;
      font-weight: 600;
      margin: 0 0 12px 0;
      color: #ccc;
    }}
    .grid {{
      display: grid;
      grid-template-columns: repeat(auto-fill, minmax(50px, 1fr));
      gap: 8px;
    }}
    .emoji {{
      display: flex;
      align-items: center;
      justify-content: center;
      padding: 8px;
      background: #2a2a2a;
      border-radius: 6px;
      transition: background 0.15s;
    }}
    .emoji:hover {{
      background: #3a3a3a;
    }}
    .emoji img {{
      width: 32px;
      height: 32px;
      object-fit: contain;
    }}
    .emoji.hidden {{
      display: none;
    }}
    #count {{
      color: #888;
      font-size: 14px;
      margin-bottom: 12px;
    }}
    h1 {{
      margin: 0 0 20px 0;
      font-size: 24px;
    }}
    h1 a {{
      color: #fff;
      text-decoration: none;
    }}
    h1 a:hover {{
      text-decoration: underline;
    }}
  </style>
 </head>
 <body>
  <h1><a href="https://github.com/ivuorinen/emoji">ivuorinen/emoji</a></h1>
  <input type="text" id="search" placeholder="Search emojis..." autofocus>
  <div id="count">{len(files)} emojis</div>
  <div id="content">
 {chr(10).join(sections)}
  </div>
  <script>
    let timeout;
    const search = document.getElementById('search');
    const emojis = document.querySelectorAll('.emoji');
    const sections = document.querySelectorAll('section');
    const count = document.getElementById('count');
    const total = emojis.length;
    search.addEventListener('input', function(e) {{
      clearTimeout(timeout);
      timeout = setTimeout(() => {{
        const query = e.target.value.toLowerCase();
        let visible = 0;
        emojis.forEach(el => {{
          const match = el.dataset.keyword.toLowerCase().includes(query);
          el.classList.toggle('hidden', !match);
          if (match) visible++;
        }});
        sections.forEach(sec => {{
          const hasVisible = sec.querySelector('.emoji:not(.hidden)');
          sec.classList.toggle('hidden', !hasVisible);
        }});
        count.textContent = query ? visible + ' of ' + total + ' emojis' : total + ' emojis';
      }}, 150);
    }});
  </script>
 </body>
 </html>
 """
    Path("index.html").write_text(contents, encoding="utf-8")
    print(f"Generated index.html with {len(files)} emojis")
 def main():
    files = sorted(f for f in EMOJI_DIR.iterdir() if f.suffix.lower() in EXTENSIONS)
    if not files:
        raise SystemExit("No images to continue with.")
    generate_readme(files)
    generate_html(files)
 if __name__ == "__main__":
    main()
--- a/dedup.py
+++ b/dedup.py
@@ -1,374 +0,0 @@
 #!/usr/bin/env python3
 """Find and remove duplicate emoji files using perceptual hashing."""
 import argparse
 import hashlib
 from pathlib import Path
 from dataclasses import dataclass
 import imagehash
 from PIL import Image
 EXTENSIONS = (".png", ".gif", ".jpg", ".jpeg")
 # Number of hash algorithms that must agree for images to be considered similar
 MIN_HASH_AGREEMENT = 4
 # Maximum file size difference ratio for duplicates (e.g., 0.05 = 5% difference allowed)
 MAX_SIZE_DIFF_RATIO = 0.02
@dataclass
 class ImageInfo:
    """Container for image metadata and hashes."""
    phash: imagehash.ImageHash
    ahash: imagehash.ImageHash
    dhash: imagehash.ImageHash
    colorhash: imagehash.ImageHash
    width: int
    height: int
    n_frames: int  # 1 for static images
    md5: str  # File content hash for exact duplicate detection
    def _has_degenerate_hash(self) -> bool:
        """Check if this image has degenerate (all-zero) hashes, indicating mostly transparent content."""
        zero_hash = "0000000000000000"
        # If 3+ hashes are all zeros, the image is likely mostly transparent
        zero_count = sum(1 for h in [str(self.phash), str(self.ahash), str(self.dhash)] if h == zero_hash)
        return zero_count >= 3
    def is_candidate(self, other: "ImageInfo", threshold: int) -> tuple[bool, int, int]:
        """
        Check if two images are candidate duplicates based on metadata and hashes.
        Returns (is_candidate, agreements, total_distance).
        This is a fast pre-filter. GIFs require additional frame verification.
        """
        # Dimensions must match exactly
        if self.width != other.width or self.height != other.height:
            return False, 0, 999
        # Frame count must match for animated images
        if self.n_frames != other.n_frames:
            return False, 0, 999
        # Calculate perceptual hash distances
        distances = [
            self.phash - other.phash,
            self.ahash - other.ahash,
            self.dhash - other.dhash,
            self.colorhash - other.colorhash,
        ]
        total_distance = sum(distances)
        agreements = sum(1 for d in distances if d <= threshold)
        # For static images: detect re-compressed/re-exported duplicates
        # Require identical structure AND color, with small perceptual variance:
        # - aHash=0 AND dHash=0 AND colorHash=0 AND pHash <= 10
        # - OR all 4 hashes match exactly (total_distance = 0)
        if self.n_frames == 1:
            phash_dist = self.phash - other.phash
            ahash_dist = self.ahash - other.ahash
            dhash_dist = self.dhash - other.dhash
            chash_dist = self.colorhash - other.colorhash
            # Identical structure + color, small perceptual variance = re-compressed image
            if ahash_dist == 0 and dhash_dist == 0 and chash_dist == 0 and phash_dist <= 10:
                return True, agreements, total_distance
            # All hashes match exactly
            if total_distance == 0:
                return True, agreements, total_distance
            return False, agreements, total_distance
        # For animated images: require all 4 hashes to agree (will be verified by frame check)
        return agreements >= MIN_HASH_AGREEMENT, agreements, total_distance
    def is_animated(self) -> bool:
        """Check if this is an animated image (multiple frames)."""
        return self.n_frames > 1
 class UnionFind:
    """Union-Find data structure for clustering similar images."""
    def __init__(self):
        self.parent = {}
    def find(self, x):
        if x not in self.parent:
            self.parent[x] = x
        if self.parent[x] != x:
            self.parent[x] = self.find(self.parent[x])
        return self.parent[x]
    def union(self, x, y):
        px, py = self.find(x), self.find(y)
        if px != py:
            self.parent[px] = py
 def _compute_hashes(img: Image.Image) -> tuple[imagehash.ImageHash, ...]:
    """Compute all hash types for a single image/frame."""
    # Convert to RGBA to handle transparency consistently
    if img.mode != "RGBA":
        img = img.convert("RGBA")
    return (
        imagehash.phash(img),
        imagehash.average_hash(img),
        imagehash.dhash(img),
        imagehash.colorhash(img),
    )
 def _compute_md5(path: Path) -> str:
    """Compute MD5 hash of file contents."""
    md5 = hashlib.md5()
    with open(path, "rb") as f:
        for chunk in iter(lambda: f.read(8192), b""):
            md5.update(chunk)
    return md5.hexdigest()
 def _get_gif_frame_info(path: Path) -> list[tuple[str, int]] | None:
    """
    Get perceptual hash and duration for each frame of a GIF.
    Returns list of (hash_string, duration_ms) tuples, or None if not a multi-frame image.
    """
    try:
        with Image.open(path) as img:
            n_frames = getattr(img, "n_frames", 1)
            if n_frames <= 1:
                return None
            frame_info = []
            for i in range(n_frames):
                img.seek(i)
                frame = img.copy()
                if frame.mode != "RGBA":
                    frame = frame.convert("RGBA")
                duration = img.info.get("duration", 0)
                frame_info.append((str(imagehash.phash(frame)), duration))
            return frame_info
    except Exception:
        return None
 def _gifs_are_identical(path1: Path, path2: Path) -> bool:
    """
    Compare two GIFs frame-by-frame to check if they have identical content AND timing.
    Returns True only if all frames and durations match.
    """
    info1 = _get_gif_frame_info(path1)
    info2 = _get_gif_frame_info(path2)
    # If either isn't a multi-frame GIF, fall back to MD5 comparison
    if info1 is None or info2 is None:
        return _compute_md5(path1) == _compute_md5(path2)
    # Frame counts must match
    if len(info1) != len(info2):
        return False
    # All frames AND durations must match
    return info1 == info2
 def compute_image_info(path: Path) -> ImageInfo | None:
    """
    Compute image metadata and perceptual hashes.
    For animated GIFs, samples middle frame to avoid blank first-frame issues.
    Returns None if image can't be processed.
    """
    try:
        md5 = _compute_md5(path)
        with Image.open(path) as img:
            width, height = img.size
            n_frames = getattr(img, "n_frames", 1)
            is_animated = getattr(img, "is_animated", False)
            if not is_animated:
                hashes = _compute_hashes(img)
            else:
                # For animated images, use middle frame for hashing
                middle_frame = n_frames // 2
                try:
                    img.seek(middle_frame)
                    hashes = _compute_hashes(img.copy())
                except EOFError:
                    img.seek(0)
                    hashes = _compute_hashes(img)
            return ImageInfo(
                phash=hashes[0],
                ahash=hashes[1],
                dhash=hashes[2],
                colorhash=hashes[3],
                width=width,
                height=height,
                n_frames=n_frames,
                md5=md5,
            )
    except Exception as e:
        print(f"  Warning: Could not process {path.name}: {e}")
        return None
 def _files_size_similar(path1: Path, path2: Path) -> bool:
    """Check if two files have similar sizes (within MAX_SIZE_DIFF_RATIO)."""
    size1 = path1.stat().st_size
    size2 = path2.stat().st_size
    if size1 == 0 or size2 == 0:
        return size1 == size2
    ratio = abs(size1 - size2) / max(size1, size2)
    return ratio <= MAX_SIZE_DIFF_RATIO
 def _verify_duplicate_pair(path_i: Path, info_i: ImageInfo, path_j: Path, info_j: ImageInfo, threshold: int) -> bool:
    """
    Verify if two candidate images are true duplicates.
    For animated GIFs, compares frames and timing. For static images, perceptual match is sufficient.
    """
    # For animated images, verify frame-by-frame including timing
    if info_i.is_animated() and info_j.is_animated():
        return _gifs_are_identical(path_i, path_j)
    # For static images, perceptual hash agreement is sufficient
    # (handles re-compressed/re-exported duplicates with different file sizes)
    return True
 def find_similar_groups(files: list[Path], threshold: int) -> list[list[tuple[Path, ImageInfo]]]:
    """Find groups of similar images using multi-hash consensus and union-find."""
    # Compute image info for all files
    images: list[tuple[Path, ImageInfo]] = []
    for file in files:
        info = compute_image_info(file)
        if info is not None:
            # Skip images with degenerate (all-zero) hashes - they can't be meaningfully compared
            if not info._has_degenerate_hash():
                images.append((file, info))
    if not images:
        return []
    # Use union-find to cluster similar images
    # First pass: find candidates based on hashes and metadata
    # Second pass: verify GIFs with frame comparison
    uf = UnionFind()
    for i, (path_i, info_i) in enumerate(images):
        uf.find(i)  # Initialize
        for j in range(i + 1, len(images)):
            path_j, info_j = images[j]
            # Check if candidates based on hashes/metadata
            is_candidate, _, _ = info_i.is_candidate(info_j, threshold)
            if not is_candidate:
                continue
            # For animated images, also check file size similarity
            # (static images may have different compression, so skip size check)
            if info_i.is_animated() and not _files_size_similar(path_i, path_j):
                continue
            # Verify: for GIFs, compare frames; for static, already verified by hashes
            if _verify_duplicate_pair(path_i, info_i, path_j, info_j, threshold):
                uf.union(i, j)
    # Group by cluster
    clusters: dict[int, list[tuple[Path, ImageInfo]]] = {}
    for i, (path, info) in enumerate(images):
        root = uf.find(i)
        if root not in clusters:
            clusters[root] = []
        clusters[root].append((path, info))
    # Return only groups with duplicates
    return [group for group in clusters.values() if len(group) > 1]
 def deduplicate(groups: list[list[tuple[Path, ImageInfo]]], dry_run: bool, threshold: int) -> tuple[int, int]:
    """Remove duplicates, keeping first alphabetically. Returns (groups, removed)."""
    total_removed = 0
    for group in groups:
        # Sort by filename alphabetically
        sorted_group = sorted(group, key=lambda x: x[0].name.lower())
        keep_path, keep_info = sorted_group[0]
        remove = sorted_group[1:]
        # Calculate agreement info for display
        agreements_info = [keep_info.is_candidate(info, threshold) for _, info in remove]
        frames_str = f", {keep_info.n_frames} frames" if keep_info.is_animated() else ""
        print(f"\nSimilar group ({len(group)} files, {keep_info.width}x{keep_info.height}{frames_str}):")
        print(f"  KEEP: {keep_path.name}")
        for (path, info), (_, agreements, total_dist) in zip(remove, agreements_info):
            action = "WOULD DELETE" if dry_run else "DELETE"
            print(f"  {action}: {path.name} (agreements: {agreements}/4, dist: {total_dist})")
            if not dry_run:
                path.unlink()
                total_removed += 1
    if dry_run:
        return len(groups), sum(len(g) - 1 for g in groups)
    return len(groups), total_removed
 def main():
    parser = argparse.ArgumentParser(description="Find and remove duplicate emoji files using perceptual hashing.")
    parser.add_argument(
        "--threshold",
        type=int,
        default=0,
        help="Similarity threshold (0=exact, default=0)",
    )
    parser.add_argument(
        "--dry-run",
        action="store_true",
        help="Show duplicates without deleting",
    )
    parser.add_argument(
        "--dir",
        type=Path,
        default=Path("emoji"),
        help="Directory to scan (default: emoji/)",
    )
    args = parser.parse_args()
    emoji_dir = args.dir
    if not emoji_dir.exists():
        print(f"Error: Directory '{emoji_dir}' does not exist.")
        return
    files = [f for f in emoji_dir.iterdir() if f.suffix.lower() in EXTENSIONS]
    if not files:
        print(f"No image files found in {emoji_dir}/ folder.")
        return
    print(f"Scanning {len(files)} files (threshold: {args.threshold})...")
    if args.dry_run:
        print("(dry-run mode - no files will be deleted)")
    groups = find_similar_groups(files, args.threshold)
    if not groups:
        print("\nNo similar images found.")
        return
    group_count, removed = deduplicate(groups, args.dry_run, args.threshold)
    print("\n--- Summary ---")
    print(f"Files scanned: {len(files)}")
    print(f"Similar groups: {group_count}")
    if args.dry_run:
        print(f"Files to remove: {removed}")
    else:
        print(f"Files removed: {removed}")
 if __name__ == "__main__":
    main()
--- a/emoji/+2.png
+++ b/emoji/+2.png
--- a/emoji/000.png
+++ b/emoji/000.png
--- a/emoji/01x.png
+++ b/emoji/01x.png
--- a/emoji/10000.png
+++ b/emoji/10000.png
--- a/emoji/1000000.png
+++ b/emoji/1000000.png
--- a/emoji/10x.png
+++ b/emoji/10x.png
--- a/emoji/22peukku.png
+++ b/emoji/22peukku.png
--- a/emoji/30fpsparrot.gif
+++ b/emoji/30fpsparrot.gif
--- a/emoji/5d-chess.png
+++ b/emoji/5d-chess.png
--- a/emoji/5g.png
+++ b/emoji/5g.png
--- a/emoji/6-5.png
+++ b/emoji/6-5.png
--- a/emoji/60fpsparrot.gif
+++ b/emoji/60fpsparrot.gif
--- a/emoji/8bitduck-cold-sweat.png
+++ b/emoji/8bitduck-cold-sweat.png
--- a/emoji/8bitduck-heart.png
+++ b/emoji/8bitduck-heart.png
--- a/emoji/8bitduck-scream.png
+++ b/emoji/8bitduck-scream.png
--- a/emoji/8bitduck-thumbsup.png
+++ b/emoji/8bitduck-thumbsup.png
--- a/emoji/99.png
+++ b/emoji/99.png
--- a/emoji/CatJamDisco.gif
+++ b/emoji/CatJamDisco.gif
--- a/emoji/CryingMan.png
+++ b/emoji/CryingMan.png
--- a/emoji/RocketInABlanket.png
+++ b/emoji/RocketInABlanket.png
--- a/emoji/SCP096.png
+++ b/emoji/SCP096.png
--- a/emoji/SCP173.png
+++ b/emoji/SCP173.png
--- a/emoji/SCP999.png
+++ b/emoji/SCP999.png
--- a/emoji/SCP_094_Shy.png
+++ b/emoji/SCP_094_Shy.png
--- a/emoji/SCP_096_Huh.png
+++ b/emoji/SCP_096_Huh.png
--- a/emoji/SCP_096_Sorry.png
+++ b/emoji/SCP_096_Sorry.png
--- a/emoji/SCP_106_Cute.png
+++ b/emoji/SCP_106_Cute.png
--- a/emoji/SCP_173.png
+++ b/emoji/SCP_173.png
--- a/emoji/SCP_173_Sorry.png
+++ b/emoji/SCP_173_Sorry.png
--- a/emoji/SCP_173_Standingaround.png
+++ b/emoji/SCP_173_Standingaround.png
--- a/emoji/SCP_999_Cute.png
+++ b/emoji/SCP_999_Cute.png
--- a/emoji/Trader_ThisIsFine.png
+++ b/emoji/Trader_ThisIsFine.png
--- a/emoji/aa.png
+++ b/emoji/aa.png
--- a/emoji/aaa.gif
+++ b/emoji/aaa.gif
--- a/emoji/aaa.png
+++ b/emoji/aaa.png
--- a/emoji/aatos.png
+++ b/emoji/aatos.png
--- a/emoji/aaw_yeah.gif
+++ b/emoji/aaw_yeah.gif
--- a/emoji/abua.jpg
+++ b/emoji/abua.jpg
--- a/emoji/accessibility.png
+++ b/emoji/accessibility.png
--- a/emoji/accessibleparrot.gif
+++ b/emoji/accessibleparrot.gif
--- a/emoji/admin-valvoo.jpg
+++ b/emoji/admin-valvoo.jpg
--- a/emoji/admin.png
+++ b/emoji/admin.png
--- a/emoji/administrador.png
+++ b/emoji/administrador.png
--- a/emoji/afk.png
+++ b/emoji/afk.png
--- a/emoji/africa.png
+++ b/emoji/africa.png
--- a/emoji/ah.png
+++ b/emoji/ah.png
--- a/emoji/ahahaha.png
+++ b/emoji/ahahaha.png
--- a/emoji/ahhhhh.png
+++ b/emoji/ahhhhh.png
--- a/emoji/ahshit.png
+++ b/emoji/ahshit.png
--- a/emoji/aia.png
+++ b/emoji/aia.png
--- a/emoji/aiet.png
+++ b/emoji/aiet.png
--- a/emoji/aiforia.png
+++ b/emoji/aiforia.png
--- a/emoji/aira-gunpoint.png
+++ b/emoji/aira-gunpoint.png
--- a/emoji/ajanrakenneonmuuttunut.png
+++ b/emoji/ajanrakenneonmuuttunut.png
--- a/emoji/ajk.png
+++ b/emoji/ajk.png
--- a/emoji/aku.png
+++ b/emoji/aku.png
--- a/emoji/akuankka.jpg
+++ b/emoji/akuankka.jpg
--- a/emoji/akusedo.png
+++ b/emoji/akusedo.png
--- a/emoji/akx.png
+++ b/emoji/akx.png
--- a/emoji/alavihaa.png
+++ b/emoji/alavihaa.png
--- a/emoji/alex-jones.png
+++ b/emoji/alex-jones.png
--- a/emoji/alibabacloud.png
+++ b/emoji/alibabacloud.png
--- a/emoji/alibi.png
+++ b/emoji/alibi.png
--- a/emoji/alien-cat.png
+++ b/emoji/alien-cat.png
--- a/emoji/alien-catz.png
+++ b/emoji/alien-catz.png
--- a/emoji/alien.png
+++ b/emoji/alien.png
--- a/emoji/alienbud.png
+++ b/emoji/alienbud.png
--- a/emoji/alko.png
+++ b/emoji/alko.png
--- a/emoji/all-good.png
+++ b/emoji/all-good.png
--- a/emoji/all-in.gif
+++ b/emoji/all-in.gif
--- a/emoji/allthethings.jpg
+++ b/emoji/allthethings.jpg
--- a/emoji/allu.png
+++ b/emoji/allu.png
--- a/emoji/alma-logo.png
+++ b/emoji/alma-logo.png
--- a/emoji/alma.png
+++ b/emoji/alma.png
--- a/emoji/almond.gif
+++ b/emoji/almond.gif
--- a/emoji/alphabet-white-a.png
+++ b/emoji/alphabet-white-a.png
--- a/emoji/alphabet-white-at.png
+++ b/emoji/alphabet-white-at.png
--- a/emoji/alphabet-white-b.png
+++ b/emoji/alphabet-white-b.png
--- a/emoji/alphabet-white-c.png
+++ b/emoji/alphabet-white-c.png
--- a/emoji/alphabet-white-d.png
+++ b/emoji/alphabet-white-d.png
--- a/emoji/alphabet-white-e.png
+++ b/emoji/alphabet-white-e.png
--- a/emoji/alphabet-white-exclamation.png
+++ b/emoji/alphabet-white-exclamation.png
--- a/emoji/alphabet-white-f.png
+++ b/emoji/alphabet-white-f.png
--- a/emoji/alphabet-white-g.png
+++ b/emoji/alphabet-white-g.png
--- a/emoji/alphabet-white-h.png
+++ b/emoji/alphabet-white-h.png
--- a/emoji/alphabet-white-hash.png
+++ b/emoji/alphabet-white-hash.png
--- a/Show More
+++ b/Show More
		`@@ -1,2 +0,0 @@`
			`__pycache__/`
			`.claude/settings.local.json`