diff --git a/local/bin/x-dupes.pl b/local/bin/x-dupes.pl deleted file mode 100755 index 8b480a9..0000000 --- a/local/bin/x-dupes.pl +++ /dev/null @@ -1,209 +0,0 @@ -#!/usr/bin/env perl - -=head1 NAME - -dupes - Report on files with duplicate contents, via SHA1 hash. - -=cut - -=head1 SYNOPSIS - - dupes [options] directory - - General Options: - - --help Show the help information for this script. - --verbose Show useful debugging information. - -=cut - - -=head1 ABOUT - -dupes is a simple script to report upon files that are identical, -recursively. - -The process involves calculating the SHA1 hash of the file contents -and reporting on anything collisions we see. - -Note that a collision might be caused by a symbolic link, or hardlink, -so blindly deleting duplicates without investigation is almost certainly -a mistake. - -=cut - -=head1 AUTHOR - - Steve - -- - http://www.steve.org.uk/ - -=cut - - -=head1 LICENSE - -Copyright (c) 2013 by Steve Kemp. All rights reserved. - -This script is free software;you can redistribute it and/or modify it under -the same terms as Perl itself. - -The LICENSE file contains the full text of the license. - -=cut - - - -use strict; -use warnings; - -use File::Find; -use Getopt::Long; -use Pod::Usage; - - -# -# Parse the arguments -# -my %config = parsedOptions(); - - -# -# The path to examine. -# -my $path = $ARGV[0] || '.'; - - -# -# Get the hashing object, dynamically. -# -my $ctx = getHashObject(); -my %digest; - - -# -# Find files and store the hash of their contents. -# -find( { - 'wanted' => sub { - if ( -f $_ ) - { - lstat; - if ( ( -r _ ) && ( !-l _ ) ) - { - $ctx->reset; - $ctx->addfile($_); - my $md5 = $ctx->hexdigest; - if ( exists $digest{ $md5 } ) - { - push @{ $digest{ $md5 }->{ 'dupes' } }, $_; - } - else - { - $digest{ $md5 } = { 'file' => $_, - 'dupes' => [] }; - } - } - } - else - { - $config{ 'verbose' } && print "Entering $_\n"; - } - }, - 'no_chdir' => 1 - }, - $path - ); - - -# -# Report upon collisions. -# -foreach my $hash ( keys %digest ) -{ - my $dupes = $digest{ $hash }->{ 'dupes' }; - my $src = $digest{ $hash }->{ 'file' }; - - if (@$dupes) - { - print $src . "\n"; - foreach my $dupe (@$dupes) - { - print "\t$dupe\n"; - } - } -} - - -# -# All done. -# -exit(0); - - -=begin doc - -Load one of M and M, depending on what is available. - -=end doc - -=cut - -sub getHashObject -{ - my $hash = undef; - - foreach my $module (qw! Digest::SHA Digest::SHA1 !) - { - - # If we succeeded in calculating the hash we're done. - next if ( defined($hash) ); - - # Attempt to load the module - my $eval = "use $module;"; - - ## no critic (Eval) - eval($eval); - ## use critic - - if ( !$@ ) - { - $hash = $module->new; - } - - } - - if ($hash) - { - return ($hash); - } - else - { - print "Failed to load either DIgest::SHA or Digest::SHA1\n"; - exit(1); - } -} - -=begin doc - -Parse the options and return suitable values. - -=end doc - -=cut - -sub parsedOptions -{ - my %vars; - - exit - if ( - !GetOptions( "help" => \$vars{ 'help' }, - "verbose" => \$vars{ 'verbose' } ) ); - - pod2usage(1) if ( $vars{ 'help' } ); - - return (%vars); - -} -