From 2001b9ac2a3df5302c8071af951ff7a8cd847ac7 Mon Sep 17 00:00:00 2001 From: Ismo Vuorinen Date: Mon, 2 Mar 2026 02:57:45 +0200 Subject: [PATCH] chore: add ruff formatter, .gitignore, and uv convention (#40) * chore: add ruff formatter, .gitignore, and uv convention - Add ruff as dev dependency with basic config - Add PostToolUse hook to auto-run ruff on .py edits - Create .gitignore to exclude __pycache__ and settings.local.json - Document uv run convention in CLAUDE.md - Add CLAUDE.md, .claude/settings.json, and skills - Apply ruff formatting to existing Python files * fix: address CR feedback on hooks and uv run convention - Fix hooks to read file_path from stdin JSON via jq instead of nonexistent $CLAUDE_FILE env var - Update CLAUDE.md commands to use uv run python3 consistently - Update skills to use uv run for python/dedup commands --- .claude/settings.json | 71 ++++++++++++++++++++++++++ .claude/skills/dedup-check/SKILL.md | 10 ++++ .claude/skills/regen-listings/SKILL.md | 11 ++++ .gitignore | 2 + CLAUDE.md | 52 +++++++++++++++++++ create_listing.py | 17 +++--- dedup.py | 19 ++----- pyproject.toml | 6 ++- tests/test_create_listing.py | 2 +- uv.lock | 31 ++++++++++- 10 files changed, 194 insertions(+), 27 deletions(-) create mode 100644 .claude/settings.json create mode 100644 .claude/skills/dedup-check/SKILL.md create mode 100644 .claude/skills/regen-listings/SKILL.md create mode 100644 .gitignore create mode 100644 CLAUDE.md diff --git a/.claude/settings.json b/.claude/settings.json new file mode 100644 index 0000000..0c12947 --- /dev/null +++ b/.claude/settings.json @@ -0,0 +1,71 @@ +{ + "hooks": { + "PreToolUse": [ + { + "matcher": "Edit|Write", + "hooks": [ + { + "type": "command", + "command": "file=$(jq -r '.tool_input.file_path // empty'); if [[ \"$file\" == *README.md || \"$file\" == *index.html ]]; then echo 'BLOCKED: README.md and index.html are generated artifacts. Edit create_listing.py instead, then run /regen-listings.' >&2; exit 2; fi" + } + ] + }, + { + "matcher": "Edit|Write", + "hooks": [ + { + "type": "command", + "command": "file=$(jq -r '.tool_input.file_path // empty'); if [[ \"$file\" == */uv.lock || \"$file\" == uv.lock ]]; then echo 'BLOCKED: uv.lock is auto-generated. Modify pyproject.toml and run uv sync instead.' >&2; exit 2; fi" + } + ] + }, + { + "matcher": "Edit|Write", + "hooks": [ + { + "type": "command", + "command": "file=$(jq -r '.tool_input.file_path // empty'); if [[ \"$file\" == *.github/workflows/*.yml || \"$file\" == .github/workflows/*.yml ]]; then echo 'BLOCKED: CI workflows use pinned action SHAs with # version comments for security. Edit workflow files carefully and maintain the SHA-pinning convention.' >&2; exit 2; fi" + } + ] + }, + { + "matcher": "Edit|Write", + "hooks": [ + { + "type": "command", + "command": "file=$(jq -r '.tool_input.file_path // empty'); if [[ \"$file\" == */renovate.json || \"$file\" == renovate.json ]]; then echo 'BLOCKED: renovate.json is rarely edited. Make changes deliberately and confirm with the user first.' >&2; exit 2; fi" + } + ] + }, + { + "matcher": "Edit|Write", + "hooks": [ + { + "type": "command", + "command": "file=$(jq -r '.tool_input.file_path // empty'); if [[ \"$file\" == emoji/* || \"$file\" == */emoji/* ]]; then echo 'BLOCKED: Emoji image files should not be written by Claude. Manage images manually or use /dedup-check.' >&2; exit 2; fi" + } + ] + } + ], + "PostToolUse": [ + { + "matcher": "Edit|Write", + "hooks": [ + { + "type": "command", + "command": "file=$(jq -r '.tool_input.file_path // empty'); if [[ \"$file\" == *.py ]]; then uv run ruff check --fix \"$file\" 2>&1 | tail -5 && uv run ruff format \"$file\" 2>&1 | tail -3; fi" + } + ] + }, + { + "matcher": "Edit|Write", + "hooks": [ + { + "type": "command", + "command": "file=$(jq -r '.tool_input.file_path // empty'); if [[ \"$file\" == *.py ]]; then uv run pytest --tb=short -q 2>&1 | tail -5; fi" + } + ] + } + ] + } +} diff --git a/.claude/skills/dedup-check/SKILL.md b/.claude/skills/dedup-check/SKILL.md new file mode 100644 index 0000000..f751520 --- /dev/null +++ b/.claude/skills/dedup-check/SKILL.md @@ -0,0 +1,10 @@ +--- +name: dedup-check +description: Run dedup in dry-run mode and report duplicate groups found +disable-model-invocation: true +--- + +## Steps + +1. Run `uv run dedup --dry-run` from the project root +2. Summarize the output — report how many duplicate groups were found and which files are involved diff --git a/.claude/skills/regen-listings/SKILL.md b/.claude/skills/regen-listings/SKILL.md new file mode 100644 index 0000000..ca37c7d --- /dev/null +++ b/.claude/skills/regen-listings/SKILL.md @@ -0,0 +1,11 @@ +--- +name: regen-listings +description: Regenerate README.md and index.html from emoji/ contents and verify output +disable-model-invocation: true +--- + +## Steps + +1. Run `uv run python3 create_listing.py` from the project root +2. Confirm both `README.md` and `index.html` exist and are non-empty +3. Report the file sizes of both generated files diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6658aed --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +__pycache__/ +.claude/settings.local.json diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..7ffad0d --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,52 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Overview + +Personal emoji/emote collection for chat apps (Slack, Discord, etc.). Contains 3000+ custom emoji images in `emoji/` with Python tooling for maintenance: listing generation and perceptual deduplication. + +## Commands + +```bash +# Install dependencies (uses uv package manager) +uv sync + +# Regenerate README.md and index.html from emoji/ contents +uv run python3 create_listing.py + +# Find duplicate emojis (dry run) +uv run python3 dedup.py --dry-run + +# Find duplicates with custom threshold (0=exact match, default) +uv run python3 dedup.py --threshold 5 --dry-run + +# Actually remove duplicates +uv run python3 dedup.py --dir emoji/ + +# Or via uv entry point +uv run dedup --dry-run + +# Run tests +uv run pytest + +# Run tests with verbose output +uv run pytest -v +``` + +## Architecture + +Two standalone Python scripts, no shared modules: + +- **`create_listing.py`** — Generates `README.md` (HTML tables) and `index.html` (searchable dark-theme SPA) from all images in `emoji/`. No dependencies beyond stdlib. Both output files are auto-generated and committed by CI on push. + +- **`dedup.py`** — Finds and removes duplicate images using multi-algorithm perceptual hashing (pHash, aHash, dHash, colorHash). Uses Union-Find clustering. Animated GIFs get extra frame-by-frame verification including timing. Keeps alphabetically-first filename per duplicate group. + +## Key Conventions + +- Python >=3.11 required; dependencies managed via `uv` with `uv.lock` +- Image formats: `.png`, `.gif`, `.jpg`, `.jpeg` +- `README.md` and `index.html` are generated artifacts — edit the scripts, not the outputs +- CI uses pinned action SHAs (not tags) for security +- Dependency updates managed by Renovate bot +- Always use `uv run` to execute Python commands (e.g. `uv run pytest`, `uv run ruff`, `uv run python3 script.py`) to ensure the correct virtualenv and dependencies are used diff --git a/create_listing.py b/create_listing.py index 720c414..ccdf2c5 100644 --- a/create_listing.py +++ b/create_listing.py @@ -31,7 +31,7 @@ def generate_readme(files: list[Path]) -> None: contents += '\n' for i in range(0, len(icons), PER_ROW): - chunk = icons[i:i + PER_ROW] + chunk = icons[i : i + PER_ROW] contents += "\n" for icon in chunk: @@ -42,7 +42,7 @@ def generate_readme(files: list[Path]) -> None: contents += ( f"\n" + f'alt="{display_path}" title=":{name}:">\n' ) contents += "\n" @@ -80,12 +80,12 @@ def generate_html(files: list[Path]) -> None: ) sections.append( f'
\n' - f'

{display_header}

\n' + f"

{display_header}

\n" f'
\n{chr(10).join(emoji_items)}\n
\n' - f'
' + f" " ) - contents = f''' + contents = f""" @@ -208,17 +208,14 @@ def generate_html(files: list[Path]) -> None: -''' +""" Path("index.html").write_text(contents, encoding="utf-8") print(f"Generated index.html with {len(files)} emojis") def main(): - files = sorted( - f for f in EMOJI_DIR.iterdir() - if f.suffix.lower() in EXTENSIONS - ) + files = sorted(f for f in EMOJI_DIR.iterdir() if f.suffix.lower() in EXTENSIONS) if not files: raise SystemExit("No images to continue with.") diff --git a/dedup.py b/dedup.py index e0b33a9..7a15e6f 100644 --- a/dedup.py +++ b/dedup.py @@ -225,9 +225,7 @@ def _files_size_similar(path1: Path, path2: Path) -> bool: return ratio <= MAX_SIZE_DIFF_RATIO -def _verify_duplicate_pair( - path_i: Path, info_i: ImageInfo, path_j: Path, info_j: ImageInfo, threshold: int -) -> bool: +def _verify_duplicate_pair(path_i: Path, info_i: ImageInfo, path_j: Path, info_j: ImageInfo, threshold: int) -> bool: """ Verify if two candidate images are true duplicates. For animated GIFs, compares frames and timing. For static images, perceptual match is sufficient. @@ -241,9 +239,7 @@ def _verify_duplicate_pair( return True -def find_similar_groups( - files: list[Path], threshold: int -) -> list[list[tuple[Path, ImageInfo]]]: +def find_similar_groups(files: list[Path], threshold: int) -> list[list[tuple[Path, ImageInfo]]]: """Find groups of similar images using multi-hash consensus and union-find.""" # Compute image info for all files images: list[tuple[Path, ImageInfo]] = [] @@ -292,9 +288,7 @@ def find_similar_groups( return [group for group in clusters.values() if len(group) > 1] -def deduplicate( - groups: list[list[tuple[Path, ImageInfo]]], dry_run: bool, threshold: int -) -> tuple[int, int]: +def deduplicate(groups: list[list[tuple[Path, ImageInfo]]], dry_run: bool, threshold: int) -> tuple[int, int]: """Remove duplicates, keeping first alphabetically. Returns (groups, removed).""" total_removed = 0 @@ -306,7 +300,6 @@ def deduplicate( # Calculate agreement info for display agreements_info = [keep_info.is_candidate(info, threshold) for _, info in remove] - min_agreements = min(a for _, a, _ in agreements_info) frames_str = f", {keep_info.n_frames} frames" if keep_info.is_animated() else "" print(f"\nSimilar group ({len(group)} files, {keep_info.width}x{keep_info.height}{frames_str}):") @@ -325,9 +318,7 @@ def deduplicate( def main(): - parser = argparse.ArgumentParser( - description="Find and remove duplicate emoji files using perceptual hashing." - ) + parser = argparse.ArgumentParser(description="Find and remove duplicate emoji files using perceptual hashing.") parser.add_argument( "--threshold", type=int, @@ -370,7 +361,7 @@ def main(): group_count, removed = deduplicate(groups, args.dry_run, args.threshold) - print(f"\n--- Summary ---") + print("\n--- Summary ---") print(f"Files scanned: {len(files)}") print(f"Similar groups: {group_count}") if args.dry_run: diff --git a/pyproject.toml b/pyproject.toml index d88a31c..f0f4401 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,11 @@ dependencies = [ ] [dependency-groups] -dev = ["pytest>=8.0"] +dev = ["pytest>=8.0", "ruff>=0.11"] + +[tool.ruff] +target-version = "py311" +line-length = 120 [project.scripts] dedup = "dedup:main" diff --git a/tests/test_create_listing.py b/tests/test_create_listing.py index ea41e81..3a41508 100644 --- a/tests/test_create_listing.py +++ b/tests/test_create_listing.py @@ -133,7 +133,7 @@ class TestGenerateReadme: f = make_png(tmp_path, "smile.png") create_listing.generate_readme([f]) content = (tmp_path / "README.md").read_text() - assert ':smile:' in content + assert ":smile:" in content class TestGenerateHtml: diff --git a/uv.lock b/uv.lock index 3e8543a..fea46f5 100644 --- a/uv.lock +++ b/uv.lock @@ -23,6 +23,7 @@ dependencies = [ [package.dev-dependencies] dev = [ { name = "pytest" }, + { name = "ruff" }, ] [package.metadata] @@ -32,7 +33,10 @@ requires-dist = [ ] [package.metadata.requires-dev] -dev = [{ name = "pytest", specifier = ">=8.0" }] +dev = [ + { name = "pytest", specifier = ">=8.0" }, + { name = "ruff", specifier = ">=0.11" }, +] [[package]] name = "imagehash" @@ -326,6 +330,31 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/68/d2/a8065103f5e2e613b916489e6c85af6402a1ec64f346d1429e2d32cb8d03/pywavelets-1.9.0-cp314-cp314t-win_amd64.whl", hash = "sha256:3b6ff6ba4f625d8c955f68c2c39b0a913776d406ab31ee4057f34ad4019fb33b", size = 4306793, upload-time = "2025-08-04T16:20:02.934Z" }, ] +[[package]] +name = "ruff" +version = "0.15.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/da/31/d6e536cdebb6568ae75a7f00e4b4819ae0ad2640c3604c305a0428680b0c/ruff-0.15.4.tar.gz", hash = "sha256:3412195319e42d634470cc97aa9803d07e9d5c9223b99bcb1518f0c725f26ae1", size = 4569550, upload-time = "2026-02-26T20:04:14.959Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f2/82/c11a03cfec3a4d26a0ea1e571f0f44be5993b923f905eeddfc397c13d360/ruff-0.15.4-py3-none-linux_armv6l.whl", hash = "sha256:a1810931c41606c686bae8b5b9a8072adac2f611bb433c0ba476acba17a332e0", size = 10453333, upload-time = "2026-02-26T20:04:20.093Z" }, + { url = "https://files.pythonhosted.org/packages/ce/5d/6a1f271f6e31dffb31855996493641edc3eef8077b883eaf007a2f1c2976/ruff-0.15.4-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:5a1632c66672b8b4d3e1d1782859e98d6e0b4e70829530666644286600a33992", size = 10853356, upload-time = "2026-02-26T20:04:05.808Z" }, + { url = "https://files.pythonhosted.org/packages/b1/d8/0fab9f8842b83b1a9c2bf81b85063f65e93fb512e60effa95b0be49bfc54/ruff-0.15.4-py3-none-macosx_11_0_arm64.whl", hash = "sha256:a4386ba2cd6c0f4ff75252845906acc7c7c8e1ac567b7bc3d373686ac8c222ba", size = 10187434, upload-time = "2026-02-26T20:03:54.656Z" }, + { url = "https://files.pythonhosted.org/packages/85/cc/cc220fd9394eff5db8d94dec199eec56dd6c9f3651d8869d024867a91030/ruff-0.15.4-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b2496488bdfd3732747558b6f95ae427ff066d1fcd054daf75f5a50674411e75", size = 10535456, upload-time = "2026-02-26T20:03:52.738Z" }, + { url = "https://files.pythonhosted.org/packages/fa/0f/bced38fa5cf24373ec767713c8e4cadc90247f3863605fb030e597878661/ruff-0.15.4-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3f1c4893841ff2d54cbda1b2860fa3260173df5ddd7b95d370186f8a5e66a4ac", size = 10287772, upload-time = "2026-02-26T20:04:08.138Z" }, + { url = "https://files.pythonhosted.org/packages/2b/90/58a1802d84fed15f8f281925b21ab3cecd813bde52a8ca033a4de8ab0e7a/ruff-0.15.4-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:820b8766bd65503b6c30aaa6331e8ef3a6e564f7999c844e9a547c40179e440a", size = 11049051, upload-time = "2026-02-26T20:04:03.53Z" }, + { url = "https://files.pythonhosted.org/packages/d2/ac/b7ad36703c35f3866584564dc15f12f91cb1a26a897dc2fd13d7cb3ae1af/ruff-0.15.4-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c9fb74bab47139c1751f900f857fa503987253c3ef89129b24ed375e72873e85", size = 11890494, upload-time = "2026-02-26T20:04:10.497Z" }, + { url = "https://files.pythonhosted.org/packages/93/3d/3eb2f47a39a8b0da99faf9c54d3eb24720add1e886a5309d4d1be73a6380/ruff-0.15.4-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f80c98765949c518142b3a50a5db89343aa90f2c2bf7799de9986498ae6176db", size = 11326221, upload-time = "2026-02-26T20:04:12.84Z" }, + { url = "https://files.pythonhosted.org/packages/ff/90/bf134f4c1e5243e62690e09d63c55df948a74084c8ac3e48a88468314da6/ruff-0.15.4-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:451a2e224151729b3b6c9ffb36aed9091b2996fe4bdbd11f47e27d8f2e8888ec", size = 11168459, upload-time = "2026-02-26T20:04:00.969Z" }, + { url = "https://files.pythonhosted.org/packages/b5/e5/a64d27688789b06b5d55162aafc32059bb8c989c61a5139a36e1368285eb/ruff-0.15.4-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:a8f157f2e583c513c4f5f896163a93198297371f34c04220daf40d133fdd4f7f", size = 11104366, upload-time = "2026-02-26T20:03:48.099Z" }, + { url = "https://files.pythonhosted.org/packages/f1/f6/32d1dcb66a2559763fc3027bdd65836cad9eb09d90f2ed6a63d8e9252b02/ruff-0.15.4-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:917cc68503357021f541e69b35361c99387cdbbf99bd0ea4aa6f28ca99ff5338", size = 10510887, upload-time = "2026-02-26T20:03:45.771Z" }, + { url = "https://files.pythonhosted.org/packages/ff/92/22d1ced50971c5b6433aed166fcef8c9343f567a94cf2b9d9089f6aa80fe/ruff-0.15.4-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:e9737c8161da79fd7cfec19f1e35620375bd8b2a50c3e77fa3d2c16f574105cc", size = 10285939, upload-time = "2026-02-26T20:04:22.42Z" }, + { url = "https://files.pythonhosted.org/packages/e6/f4/7c20aec3143837641a02509a4668fb146a642fd1211846634edc17eb5563/ruff-0.15.4-py3-none-musllinux_1_2_i686.whl", hash = "sha256:291258c917539e18f6ba40482fe31d6f5ac023994ee11d7bdafd716f2aab8a68", size = 10765471, upload-time = "2026-02-26T20:03:58.924Z" }, + { url = "https://files.pythonhosted.org/packages/d0/09/6d2f7586f09a16120aebdff8f64d962d7c4348313c77ebb29c566cefc357/ruff-0.15.4-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:3f83c45911da6f2cd5936c436cf86b9f09f09165f033a99dcf7477e34041cbc3", size = 11263382, upload-time = "2026-02-26T20:04:24.424Z" }, + { url = "https://files.pythonhosted.org/packages/1b/fa/2ef715a1cd329ef47c1a050e10dee91a9054b7ce2fcfdd6a06d139afb7ec/ruff-0.15.4-py3-none-win32.whl", hash = "sha256:65594a2d557d4ee9f02834fcdf0a28daa8b3b9f6cb2cb93846025a36db47ef22", size = 10506664, upload-time = "2026-02-26T20:03:50.56Z" }, + { url = "https://files.pythonhosted.org/packages/d0/a8/c688ef7e29983976820d18710f955751d9f4d4eb69df658af3d006e2ba3e/ruff-0.15.4-py3-none-win_amd64.whl", hash = "sha256:04196ad44f0df220c2ece5b0e959c2f37c777375ec744397d21d15b50a75264f", size = 11651048, upload-time = "2026-02-26T20:04:17.191Z" }, + { url = "https://files.pythonhosted.org/packages/3e/0a/9e1be9035b37448ce2e68c978f0591da94389ade5a5abafa4cf99985d1b2/ruff-0.15.4-py3-none-win_arm64.whl", hash = "sha256:60d5177e8cfc70e51b9c5fad936c634872a74209f934c1e79107d11787ad5453", size = 10966776, upload-time = "2026-02-26T20:03:56.908Z" }, +] + [[package]] name = "scipy" version = "1.17.1"
" f"