From a5222ec8fe0d36f53518f9d399ef695467dc7341 Mon Sep 17 00:00:00 2001 From: Ismo Vuorinen Date: Mon, 2 Mar 2026 01:03:35 +0200 Subject: [PATCH] feat: add pytest unit tests and CI workflow (#38) * feat: add pytest unit tests and CI workflow Add 67 tests covering both create_listing.py and dedup.py with shared Pillow-based image fixtures. Add GitHub Actions workflow to run tests on Python file changes. * fix: address PR review feedback - Use monkeypatch.chdir(tmp_path) so tests write to temp dirs instead of polluting the repo's README.md and index.html - Strengthen unicode filename test to assert URL-encoded form (%C3%A9) - Move hashlib import to module level in test_dedup.py - Remove unused _zero_hash helper and Path import - Prefix unused tuple unpacking variables with underscore * fix: add docstrings and strengthen degenerate hash test - Add docstrings to all test classes, methods, and helper functions to achieve 100% docstring coverage - Strengthen test_skips_degenerate_hashes to assert groups == [] instead of only checking for no-crash * fix: use hardcoded MD5 digests and add fixture validation - Replace hashlib.md5() calls with known digest constants to remove hashlib import from test module - Add input validation to _make_gif fixture for clear error messages on empty colors or mismatched durations length --- .github/workflows/test.yml | 25 ++ pyproject.toml | 3 + tests/__init__.py | 0 tests/conftest.py | 89 +++++++ tests/test_create_listing.py | 256 ++++++++++++++++++ tests/test_dedup.py | 489 +++++++++++++++++++++++++++++++++++ uv.lock | 69 +++++ 7 files changed, 931 insertions(+) create mode 100644 .github/workflows/test.yml create mode 100644 tests/__init__.py create mode 100644 tests/conftest.py create mode 100644 tests/test_create_listing.py create mode 100644 tests/test_dedup.py diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..1ba2c48 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,25 @@ +name: Tests + +on: + push: + paths: + - '**.py' + - 'pyproject.toml' + - 'uv.lock' + pull_request: + paths: + - '**.py' + - 'pyproject.toml' + - 'uv.lock' + +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + - uses: astral-sh/setup-uv@5a095e7a2014a4212f075830d4f7277575a9d098 # v7 + - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6 + with: + python-version: '3.14' + - run: uv sync --dev + - run: uv run pytest -v diff --git a/pyproject.toml b/pyproject.toml index d13c565..d88a31c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,5 +8,8 @@ dependencies = [ "Pillow>=10.0", ] +[dependency-groups] +dev = ["pytest>=8.0"] + [project.scripts] dedup = "dedup:main" diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..5d4bf46 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,89 @@ +"""Shared test fixtures for emoji project tests.""" + +from pathlib import Path + +import pytest +from PIL import Image + + +@pytest.fixture +def make_png(): + """Factory fixture: creates a small PNG image and returns its Path.""" + + def _make_png( + directory: Path, + name: str, + color: tuple = (255, 0, 0, 255), + size: tuple[int, int] = (4, 4), + ) -> Path: + img = Image.new("RGBA", size, color) + path = directory / name + img.save(path, "PNG") + return path + + return _make_png + + +@pytest.fixture +def make_gif(): + """Factory fixture: creates an animated GIF with multiple frames and returns its Path.""" + + def _make_gif( + directory: Path, + name: str, + colors: list[tuple], + durations: list[int], + size: tuple[int, int] = (4, 4), + ) -> Path: + if not colors: + raise ValueError("colors must not be empty") + if len(durations) != len(colors): + raise ValueError(f"durations length ({len(durations)}) must match colors length ({len(colors)})") + frames = [Image.new("RGBA", size, c) for c in colors] + path = directory / name + frames[0].save( + path, + save_all=True, + append_images=frames[1:], + duration=durations, + loop=0, + ) + return path + + return _make_gif + + +@pytest.fixture +def make_jpg(): + """Factory fixture: creates a small JPEG image and returns its Path.""" + + def _make_jpg( + directory: Path, + name: str, + color: tuple = (255, 0, 0), + size: tuple[int, int] = (4, 4), + ) -> Path: + img = Image.new("RGB", size, color) + path = directory / name + img.save(path, "JPEG") + return path + + return _make_jpg + + +@pytest.fixture +def emoji_dir(tmp_path, make_png, make_gif, make_jpg): + """Creates a temp directory with several named test images.""" + d = tmp_path / "emoji" + d.mkdir() + make_png(d, "alpha.png", color=(255, 0, 0, 255)) + make_png(d, "beta.png", color=(0, 255, 0, 255)) + make_png(d, "gamma.png", color=(0, 0, 255, 255)) + make_jpg(d, "delta.jpg", color=(128, 128, 0)) + make_gif( + d, + "animated.gif", + colors=[(255, 0, 0, 255), (0, 255, 0, 255)], + durations=[100, 100], + ) + return d diff --git a/tests/test_create_listing.py b/tests/test_create_listing.py new file mode 100644 index 0000000..ea41e81 --- /dev/null +++ b/tests/test_create_listing.py @@ -0,0 +1,256 @@ +"""Tests for create_listing.py.""" + +from unittest.mock import patch + +import pytest + +import create_listing + + +class TestCharacterGrouping: + """Test file grouping by first character.""" + + def test_alpha_files_grouped_by_first_char(self, tmp_path, make_png, monkeypatch): + """Verify files starting with letters are grouped under their first char.""" + monkeypatch.chdir(tmp_path) + a1 = make_png(tmp_path, "apple.png") + a2 = make_png(tmp_path, "avocado.png") + b1 = make_png(tmp_path, "banana.png") + + create_listing.generate_readme([a1, a2, b1]) + content = (tmp_path / "README.md").read_text() + assert "## a" in content + assert "## b" in content + + def test_non_alpha_grouped_as_special(self, tmp_path, make_png, monkeypatch): + """Verify non-alphabetic filenames are grouped into the special category.""" + monkeypatch.chdir(tmp_path) + f1 = make_png(tmp_path, "1emoji.png") + f2 = make_png(tmp_path, "_underscore.png") + + create_listing.generate_readme([f1, f2]) + content = (tmp_path / "README.md").read_text() + assert r"\[^a-zA-Z:\]" in content + + def test_numeric_file_grouped_as_special(self, tmp_path, make_png, monkeypatch): + """Verify files starting with digits go into the special group.""" + monkeypatch.chdir(tmp_path) + f1 = make_png(tmp_path, "9lives.png") + create_listing.generate_readme([f1]) + content = (tmp_path / "README.md").read_text() + assert r"\[^a-zA-Z:\]" in content + + +class TestSorting: + """Test sort order of groups.""" + + def test_special_group_sorts_before_alpha(self, tmp_path, make_png, monkeypatch): + """Verify the special characters group appears before alphabetical groups.""" + monkeypatch.chdir(tmp_path) + f_special = make_png(tmp_path, "1first.png") + f_alpha = make_png(tmp_path, "zebra.png") + + create_listing.generate_readme([f_special, f_alpha]) + content = (tmp_path / "README.md").read_text() + special_pos = content.index(r"\[^a-zA-Z:\]") + z_pos = content.index("## z") + assert special_pos < z_pos + + +class TestRowChunking: + """Test that files are split into rows of PER_ROW.""" + + def test_files_chunked_into_rows(self, tmp_path, make_png, monkeypatch): + """Verify 25 files produce 3 table rows (10 per row).""" + monkeypatch.chdir(tmp_path) + files = [make_png(tmp_path, f"a{i:02d}.png", color=(i, 0, 0, 255)) for i in range(25)] + create_listing.generate_readme(files) + content = (tmp_path / "README.md").read_text() + assert content.count("") == 3 + + def test_single_file_one_row(self, tmp_path, make_png, monkeypatch): + """Verify a single file produces exactly one table row.""" + monkeypatch.chdir(tmp_path) + f = make_png(tmp_path, "alone.png") + create_listing.generate_readme([f]) + content = (tmp_path / "README.md").read_text() + assert content.count("") == 1 + + +class TestUrlEncoding: + """Test URL encoding of filenames.""" + + def test_space_in_filename(self, tmp_path, make_png, monkeypatch): + """Verify spaces are percent-encoded as %20.""" + monkeypatch.chdir(tmp_path) + f = make_png(tmp_path, "my emoji.png") + create_listing.generate_readme([f]) + content = (tmp_path / "README.md").read_text() + assert "my%20emoji.png" in content + + def test_plus_in_filename(self, tmp_path, make_png, monkeypatch): + """Verify plus signs are percent-encoded as %2B.""" + monkeypatch.chdir(tmp_path) + f = make_png(tmp_path, "a+b.png") + create_listing.generate_readme([f]) + content = (tmp_path / "README.md").read_text() + assert "a%2Bb.png" in content + + def test_hash_in_filename(self, tmp_path, make_png, monkeypatch): + """Verify hash symbols are percent-encoded as %23.""" + monkeypatch.chdir(tmp_path) + f = make_png(tmp_path, "c#sharp.png") + create_listing.generate_readme([f]) + content = (tmp_path / "README.md").read_text() + assert "c%23sharp.png" in content + + +class TestGenerateReadme: + """Test generate_readme output structure.""" + + def test_produces_markdown_table(self, tmp_path, make_png, monkeypatch): + """Verify output contains markdown heading, table, and image tags.""" + monkeypatch.chdir(tmp_path) + f = make_png(tmp_path, "test.png") + create_listing.generate_readme([f]) + content = (tmp_path / "README.md").read_text() + assert content.startswith("# Emotes") + assert "" in content + assert 'id="search"' in content + assert "test.png" in content + + def test_data_keyword_attribute(self, tmp_path, make_png, monkeypatch): + """Verify emoji divs have data-keyword attributes for search.""" + monkeypatch.chdir(tmp_path) + f = make_png(tmp_path, "happy.png") + create_listing.generate_html([f]) + content = (tmp_path / "index.html").read_text() + assert 'data-keyword="happy"' in content + + def test_special_group_header(self, tmp_path, make_png, monkeypatch): + """Verify the # group displays as '0-9 / Special'.""" + monkeypatch.chdir(tmp_path) + f = make_png(tmp_path, "1number.png") + create_listing.generate_html([f]) + content = (tmp_path / "index.html").read_text() + assert "0-9 / Special" in content + + def test_hash_group_sorts_first(self, tmp_path, make_png, monkeypatch): + """Verify the special group appears before alphabetical groups in HTML.""" + monkeypatch.chdir(tmp_path) + f_special = make_png(tmp_path, "1first.png") + f_alpha = make_png(tmp_path, "zebra.png") + create_listing.generate_html([f_special, f_alpha]) + content = (tmp_path / "index.html").read_text() + special_pos = content.index("0-9 / Special") + z_pos = content.index(">Z<") + assert special_pos < z_pos + + def test_dark_theme(self, tmp_path, make_png, monkeypatch): + """Verify the HTML uses dark theme background color.""" + monkeypatch.chdir(tmp_path) + f = make_png(tmp_path, "test.png") + create_listing.generate_html([f]) + content = (tmp_path / "index.html").read_text() + assert "#1a1a1a" in content + + def test_emoji_count_shown(self, tmp_path, make_png, monkeypatch): + """Verify the total emoji count is displayed.""" + monkeypatch.chdir(tmp_path) + files = [make_png(tmp_path, f"e{i}.png", color=(i, 0, 0, 255)) for i in range(3)] + create_listing.generate_html(files) + content = (tmp_path / "index.html").read_text() + assert "3 emojis" in content + + def test_html_escapes_name(self, tmp_path, make_png, monkeypatch): + """Verify HTML special characters in filenames are escaped.""" + monkeypatch.chdir(tmp_path) + f = make_png(tmp_path, "a find(1)==find(3).""" + uf = UnionFind() + uf.union(1, 2) + uf.union(2, 3) + assert uf.find(1) == uf.find(3) + + +# --------------------------------------------------------------------------- +# Helpers to build ImageInfo with known hash values +# --------------------------------------------------------------------------- +def _make_hash(val: int) -> imagehash.ImageHash: + """Create an ImageHash from a single integer (fills 8x8 bit array).""" + bits = np.zeros((8, 8), dtype=bool) + if val != 0: + flat = bits.flatten() + for i in range(min(val, 64)): + flat[i] = True + bits = flat.reshape(8, 8) + return imagehash.ImageHash(bits) + + +def _make_info( + phash=0, + ahash=0, + dhash=0, + colorhash=0, + width=4, + height=4, + n_frames=1, + md5="abc", +) -> ImageInfo: + """Build an ImageInfo with controllable hash values for testing.""" + return ImageInfo( + phash=_make_hash(phash), + ahash=_make_hash(ahash), + dhash=_make_hash(dhash), + colorhash=_make_hash(colorhash), + width=width, + height=height, + n_frames=n_frames, + md5=md5, + ) + + +# --------------------------------------------------------------------------- +# ImageInfo +# --------------------------------------------------------------------------- +class TestImageInfoDegenerateHash: + """Test _has_degenerate_hash() detection of all-zero hashes.""" + + def test_all_zero_is_degenerate(self): + """Verify three zero hashes are detected as degenerate.""" + info = _make_info(phash=0, ahash=0, dhash=0) + assert info._has_degenerate_hash() is True + + def test_not_degenerate_when_hashes_nonzero(self): + """Verify nonzero hashes are not flagged as degenerate.""" + info = _make_info(phash=5, ahash=10, dhash=20) + assert info._has_degenerate_hash() is False + + def test_two_zeros_not_degenerate(self): + """Verify only two zero hashes (below threshold) are not degenerate.""" + info = _make_info(phash=0, ahash=0, dhash=5) + assert info._has_degenerate_hash() is False + + +class TestImageInfoIsAnimated: + """Test is_animated() based on frame count.""" + + def test_static_image(self): + """Verify n_frames=1 is not animated.""" + info = _make_info(n_frames=1) + assert info.is_animated() is False + + def test_animated_image(self): + """Verify n_frames>1 is animated.""" + info = _make_info(n_frames=5) + assert info.is_animated() is True + + +class TestImageInfoIsCandidate: + """Test is_candidate() duplicate detection logic.""" + + def test_rejects_dimension_mismatch(self): + """Verify images with different dimensions are rejected.""" + a = _make_info(width=4, height=4) + b = _make_info(width=8, height=8) + is_match, _, _ = a.is_candidate(b, threshold=0) + assert is_match is False + + def test_rejects_frame_count_mismatch(self): + """Verify images with different frame counts are rejected.""" + a = _make_info(n_frames=1) + b = _make_info(n_frames=3) + is_match, _, _ = a.is_candidate(b, threshold=0) + assert is_match is False + + def test_exact_match_static(self): + """Verify identical static images match with zero distance.""" + a = _make_info() + b = _make_info() + is_match, _agreements, total_dist = a.is_candidate(b, threshold=0) + assert is_match is True + assert total_dist == 0 + + def test_recompressed_static_detected(self): + """Verify re-compressed images match: ahash=0, dhash=0, colorhash=0, phash<=10.""" + a = _make_info(phash=0, ahash=0, dhash=0, colorhash=0) + b = _make_info(phash=3, ahash=0, dhash=0, colorhash=0) + is_match, _, _ = a.is_candidate(b, threshold=0) + assert is_match is True + + def test_animated_needs_all_four_agreements(self): + """Verify animated images require all 4 hash algorithms to agree.""" + a = _make_info(n_frames=5) + b = _make_info(n_frames=5) + is_match, _agreements, _ = a.is_candidate(b, threshold=0) + assert is_match is True + assert _agreements == 4 + + def test_animated_rejects_partial_agreement(self): + """Verify animated images with <4 hash agreements are rejected.""" + a = _make_info(phash=0, ahash=0, dhash=0, colorhash=0, n_frames=5) + b = _make_info(phash=30, ahash=30, dhash=30, colorhash=0, n_frames=5) + is_match, _agreements, _ = a.is_candidate(b, threshold=0) + assert is_match is False + + +# --------------------------------------------------------------------------- +# Helper functions +# --------------------------------------------------------------------------- +class TestComputeHashes: + """Test _compute_hashes() hash generation.""" + + def test_returns_four_hashes(self): + """Verify four ImageHash objects are returned for an RGBA image.""" + img = Image.new("RGBA", (4, 4), (255, 0, 0, 255)) + result = _compute_hashes(img) + assert len(result) == 4 + assert all(isinstance(h, imagehash.ImageHash) for h in result) + + def test_converts_rgb_to_rgba(self): + """Verify RGB images are handled (converted to RGBA internally).""" + img = Image.new("RGB", (4, 4), (255, 0, 0)) + result = _compute_hashes(img) + assert len(result) == 4 + + +class TestComputeMd5: + """Test _compute_md5() file hashing.""" + + def test_correct_digest(self, tmp_path): + """Verify MD5 digest matches expected value for known content.""" + f = tmp_path / "test.bin" + f.write_bytes(b"hello world") + assert _compute_md5(f) == "5eb63bbbe01eeed093cb22bb8f5acdc3" + + def test_empty_file(self, tmp_path): + """Verify MD5 digest is correct for an empty file.""" + f = tmp_path / "empty.bin" + f.write_bytes(b"") + assert _compute_md5(f) == "d41d8cd98f00b204e9800998ecf8427e" + + +class TestFilesSizeSimilar: + """Test _files_size_similar() file size comparison.""" + + def test_identical_sizes(self, tmp_path): + """Verify identical file sizes are considered similar.""" + a = tmp_path / "a.bin" + b = tmp_path / "b.bin" + a.write_bytes(b"x" * 1000) + b.write_bytes(b"x" * 1000) + assert _files_size_similar(a, b) is True + + def test_within_threshold(self, tmp_path): + """Verify files within 2% size difference are considered similar.""" + a = tmp_path / "a.bin" + b = tmp_path / "b.bin" + a.write_bytes(b"x" * 1000) + b.write_bytes(b"x" * 990) + assert _files_size_similar(a, b) is True + + def test_beyond_threshold(self, tmp_path): + """Verify files with >2% size difference are not considered similar.""" + a = tmp_path / "a.bin" + b = tmp_path / "b.bin" + a.write_bytes(b"x" * 1000) + b.write_bytes(b"x" * 500) + assert _files_size_similar(a, b) is False + + def test_zero_size_equal(self, tmp_path): + """Verify two empty files are considered similar.""" + a = tmp_path / "a.bin" + b = tmp_path / "b.bin" + a.write_bytes(b"") + b.write_bytes(b"") + assert _files_size_similar(a, b) is True + + def test_zero_size_vs_nonzero(self, tmp_path): + """Verify an empty file and a non-empty file are not similar.""" + a = tmp_path / "a.bin" + b = tmp_path / "b.bin" + a.write_bytes(b"") + b.write_bytes(b"x") + assert _files_size_similar(a, b) is False + + +class TestGetGifFrameInfo: + """Test _get_gif_frame_info() frame extraction.""" + + def test_static_png_returns_none(self, tmp_path, make_png): + """Verify a static PNG returns None (not a multi-frame image).""" + f = make_png(tmp_path, "static.png") + assert _get_gif_frame_info(f) is None + + def test_animated_gif_returns_frames(self, tmp_path, make_gif): + """Verify an animated GIF returns per-frame hash and duration tuples.""" + f = make_gif( + tmp_path, + "anim.gif", + colors=[(255, 0, 0, 255), (0, 255, 0, 255), (0, 0, 255, 255)], + durations=[100, 200, 150], + ) + result = _get_gif_frame_info(f) + assert result is not None + assert len(result) == 3 + for phash_str, duration in result: + assert isinstance(phash_str, str) + assert isinstance(duration, int) + + +class TestGifsAreIdentical: + """Test _gifs_are_identical() frame-by-frame comparison.""" + + def test_identical_gifs(self, tmp_path, make_gif): + """Verify two GIFs with identical frames and timing are identical.""" + colors = [(255, 0, 0, 255), (0, 255, 0, 255)] + durations = [100, 100] + a = make_gif(tmp_path, "a.gif", colors=colors, durations=durations) + b = make_gif(tmp_path, "b.gif", colors=colors, durations=durations) + assert _gifs_are_identical(a, b) is True + + def test_different_frames(self, tmp_path): + """Verify GIFs with different spatial patterns are not identical.""" + size = (64, 64) + + # GIF A: frame 1 = left half white, frame 2 = top half white + f1a = Image.new("RGBA", size, (0, 0, 0, 255)) + for x in range(32): + for y in range(64): + f1a.putpixel((x, y), (255, 255, 255, 255)) + f2a = Image.new("RGBA", size, (0, 0, 0, 255)) + for x in range(64): + for y in range(32): + f2a.putpixel((x, y), (255, 255, 255, 255)) + + path_a = tmp_path / "a.gif" + f1a.save(path_a, save_all=True, append_images=[f2a], duration=[100, 100], loop=0) + + # GIF B: frame 1 = right half white, frame 2 = bottom half white + f1b = Image.new("RGBA", size, (0, 0, 0, 255)) + for x in range(32, 64): + for y in range(64): + f1b.putpixel((x, y), (255, 255, 255, 255)) + f2b = Image.new("RGBA", size, (0, 0, 0, 255)) + for x in range(64): + for y in range(32, 64): + f2b.putpixel((x, y), (255, 255, 255, 255)) + + path_b = tmp_path / "b.gif" + f1b.save(path_b, save_all=True, append_images=[f2b], duration=[100, 100], loop=0) + + assert _gifs_are_identical(path_a, path_b) is False + + def test_different_timing(self, tmp_path, make_gif): + """Verify GIFs with same frames but different durations are not identical.""" + colors = [(255, 0, 0, 255), (0, 255, 0, 255)] + a = make_gif(tmp_path, "a.gif", colors=colors, durations=[100, 100]) + b = make_gif(tmp_path, "b.gif", colors=colors, durations=[100, 500]) + assert _gifs_are_identical(a, b) is False + + +# --------------------------------------------------------------------------- +# Integration-level +# --------------------------------------------------------------------------- +class TestComputeImageInfo: + """Test compute_image_info() end-to-end metadata extraction.""" + + def test_static_png(self, tmp_path, make_png): + """Verify correct metadata for a static PNG image.""" + f = make_png(tmp_path, "test.png", size=(8, 8)) + info = compute_image_info(f) + assert info is not None + assert info.width == 8 + assert info.height == 8 + assert info.n_frames == 1 + assert info.is_animated() is False + assert isinstance(info.md5, str) + + def test_animated_gif(self, tmp_path, make_gif): + """Verify correct metadata for an animated GIF.""" + f = make_gif( + tmp_path, + "test.gif", + colors=[(255, 0, 0, 255), (0, 255, 0, 255)], + durations=[100, 100], + size=(8, 8), + ) + info = compute_image_info(f) + assert info is not None + assert info.n_frames == 2 + assert info.is_animated() is True + + def test_corrupt_file_returns_none(self, tmp_path): + """Verify corrupt/invalid files return None gracefully.""" + f = tmp_path / "corrupt.png" + f.write_bytes(b"not an image") + info = compute_image_info(f) + assert info is None + + +class TestFindSimilarGroups: + """Test find_similar_groups() clustering behavior.""" + + def test_groups_identical_images(self, tmp_path, make_png): + """Verify identical images are grouped together.""" + a = make_png(tmp_path, "a.png", color=(255, 0, 0, 255), size=(8, 8)) + b = make_png(tmp_path, "b.png", color=(255, 0, 0, 255), size=(8, 8)) + groups = find_similar_groups([a, b], threshold=0) + assert len(groups) == 1 + assert len(groups[0]) == 2 + + def test_separates_unique_images(self, tmp_path, make_png): + """Verify images with different dimensions are not grouped.""" + a = make_png(tmp_path, "a.png", color=(255, 0, 0, 255), size=(32, 32)) + b = make_png(tmp_path, "b.png", color=(0, 0, 255, 255), size=(16, 16)) + groups = find_similar_groups([a, b], threshold=0) + assert len(groups) == 0 + + def test_skips_degenerate_hashes(self, tmp_path, make_png): + """Verify fully transparent images with degenerate hashes produce no groups.""" + a = make_png(tmp_path, "a.png", color=(0, 0, 0, 0), size=(4, 4)) + b = make_png(tmp_path, "b.png", color=(0, 0, 0, 0), size=(4, 4)) + groups = find_similar_groups([a, b], threshold=0) + assert groups == [] + + +class TestDeduplicate: + """Test deduplicate() file removal logic.""" + + def test_dry_run_keeps_all_files(self, tmp_path, make_png): + """Verify dry_run=True reports duplicates but keeps all files.""" + a = make_png(tmp_path, "a.png", color=(255, 0, 0, 255), size=(8, 8)) + b = make_png(tmp_path, "b.png", color=(255, 0, 0, 255), size=(8, 8)) + info_a = compute_image_info(a) + info_b = compute_image_info(b) + groups = [[(a, info_a), (b, info_b)]] + + group_count, removed = deduplicate(groups, dry_run=True, threshold=0) + assert group_count == 1 + assert removed == 1 + assert a.exists() + assert b.exists() + + def test_deletes_duplicates(self, tmp_path, make_png): + """Verify dry_run=False actually removes duplicate files.""" + a = make_png(tmp_path, "a.png", color=(255, 0, 0, 255), size=(8, 8)) + b = make_png(tmp_path, "b.png", color=(255, 0, 0, 255), size=(8, 8)) + info_a = compute_image_info(a) + info_b = compute_image_info(b) + groups = [[(a, info_a), (b, info_b)]] + + group_count, removed = deduplicate(groups, dry_run=False, threshold=0) + assert group_count == 1 + assert removed == 1 + assert a.exists() + assert not b.exists() + + def test_keeps_alphabetically_first(self, tmp_path, make_png): + """Verify the alphabetically-first filename is kept in each group.""" + z = make_png(tmp_path, "z_last.png", color=(255, 0, 0, 255), size=(8, 8)) + a = make_png(tmp_path, "a_first.png", color=(255, 0, 0, 255), size=(8, 8)) + info_z = compute_image_info(z) + info_a = compute_image_info(a) + groups = [[(z, info_z), (a, info_a)]] + + deduplicate(groups, dry_run=False, threshold=0) + assert a.exists() + assert not z.exists() + + +class TestMainCLI: + """Test main() CLI argument parsing and behavior.""" + + def test_missing_directory(self, tmp_path, capsys): + """Verify error message when directory does not exist.""" + with patch("sys.argv", ["dedup", "--dir", str(tmp_path / "nonexistent")]): + dedup.main() + captured = capsys.readouterr() + assert "does not exist" in captured.out + + def test_empty_directory(self, tmp_path, capsys): + """Verify message when directory contains no image files.""" + d = tmp_path / "empty" + d.mkdir() + with patch("sys.argv", ["dedup", "--dir", str(d), "--dry-run"]): + dedup.main() + captured = capsys.readouterr() + assert "No image files" in captured.out + + def test_dry_run_flag(self, tmp_path, make_png, capsys): + """Verify --dry-run flag is acknowledged in output.""" + d = tmp_path / "imgs" + d.mkdir() + make_png(d, "a.png", color=(255, 0, 0, 255), size=(8, 8)) + make_png(d, "b.png", color=(255, 0, 0, 255), size=(8, 8)) + with patch("sys.argv", ["dedup", "--dir", str(d), "--dry-run"]): + dedup.main() + captured = capsys.readouterr() + assert "dry-run" in captured.out + + def test_threshold_argument(self, tmp_path, make_png, capsys): + """Verify --threshold value is used and shown in output.""" + d = tmp_path / "imgs" + d.mkdir() + make_png(d, "only.png", size=(8, 8)) + with patch("sys.argv", ["dedup", "--dir", str(d), "--threshold", "5"]): + dedup.main() + captured = capsys.readouterr() + assert "threshold: 5" in captured.out diff --git a/uv.lock b/uv.lock index e14aca9..3e8543a 100644 --- a/uv.lock +++ b/uv.lock @@ -2,6 +2,15 @@ version = 1 revision = 3 requires-python = ">=3.11" +[[package]] +name = "colorama" +version = "0.4.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload-time = "2022-10-25T02:36:22.414Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" }, +] + [[package]] name = "emoji-dedup" version = "0.1.0" @@ -11,12 +20,20 @@ dependencies = [ { name = "pillow" }, ] +[package.dev-dependencies] +dev = [ + { name = "pytest" }, +] + [package.metadata] requires-dist = [ { name = "imagehash", specifier = ">=4.3" }, { name = "pillow", specifier = ">=10.0" }, ] +[package.metadata.requires-dev] +dev = [{ name = "pytest", specifier = ">=8.0" }] + [[package]] name = "imagehash" version = "4.3.2" @@ -32,6 +49,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/31/2c/5f0903a53a62029875aaa3884c38070cc388248a2c1b9aa935632669e5a7/ImageHash-4.3.2-py2.py3-none-any.whl", hash = "sha256:02b0f965f8c77cd813f61d7d39031ea27d4780e7ebcad56c6cd6a709acc06e5f", size = 296657, upload-time = "2025-02-01T08:45:36.102Z" }, ] +[[package]] +name = "iniconfig" +version = "2.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503, upload-time = "2025-10-18T21:55:43.219Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" }, +] + [[package]] name = "numpy" version = "2.4.2" @@ -111,6 +137,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/de/e5/b7d20451657664b07986c2f6e3be564433f5dcaf3482d68eaecd79afaf03/numpy-2.4.2-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:be71bf1edb48ebbbf7f6337b5bfd2f895d1902f6335a5830b20141fc126ffba0", size = 12502577, upload-time = "2026-01-31T23:13:07.08Z" }, ] +[[package]] +name = "packaging" +version = "26.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/65/ee/299d360cdc32edc7d2cf530f3accf79c4fca01e96ffc950d8a52213bd8e4/packaging-26.0.tar.gz", hash = "sha256:00243ae351a257117b6a241061796684b084ed1c516a08c48a3f7e147a9d80b4", size = 143416, upload-time = "2026-01-21T20:50:39.064Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b7/b9/c538f279a4e237a006a2c98387d081e9eb060d203d8ed34467cc0f0b9b53/packaging-26.0-py3-none-any.whl", hash = "sha256:b36f1fef9334a5588b4166f8bcd26a14e521f2b55e6b9de3aaa80d3ff7a37529", size = 74366, upload-time = "2026-01-21T20:50:37.788Z" }, +] + [[package]] name = "pillow" version = "12.1.1" @@ -198,6 +233,40 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f2/26/c56ce33ca856e358d27fda9676c055395abddb82c35ac0f593877ed4562e/pillow-12.1.1-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:cb9bb857b2d057c6dfc72ac5f3b44836924ba15721882ef103cecb40d002d80e", size = 7029880, upload-time = "2026-02-11T04:23:04.783Z" }, ] +[[package]] +name = "pluggy" +version = "1.6.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412, upload-time = "2025-05-15T12:30:07.975Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" }, +] + +[[package]] +name = "pygments" +version = "2.19.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b0/77/a5b8c569bf593b0140bde72ea885a803b82086995367bf2037de0159d924/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", size = 4968631, upload-time = "2025-06-21T13:39:12.283Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" }, +] + +[[package]] +name = "pytest" +version = "9.0.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "iniconfig" }, + { name = "packaging" }, + { name = "pluggy" }, + { name = "pygments" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d1/db/7ef3487e0fb0049ddb5ce41d3a49c235bf9ad299b6a25d5780a89f19230f/pytest-9.0.2.tar.gz", hash = "sha256:75186651a92bd89611d1d9fc20f0b4345fd827c41ccd5c299a868a05d70edf11", size = 1568901, upload-time = "2025-12-06T21:30:51.014Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801, upload-time = "2025-12-06T21:30:49.154Z" }, +] + [[package]] name = "pywavelets" version = "1.9.0"