#!/usr/bin/perl # Copyright 2003-2007 Vlado Keselj http://www.cs.dal.ca/~vlado sub help { print <<"#EOT" } # Find equal files in a directory tree, version $VERSION # # Relies on diff. # # Usage: find-equal-files [switches] [directories] # -n find equal files even if they have different names # -i report equal files as found, beside the final report # -h Print help and exit. # -r pathp Remove redundant files, not with path prefix pathp # Removes only if there is at least one file not ~pathp # -t Test mode, used with -r, but only report what would be # removed instad of removing it. # -v Print version of the program and exit. #EOT use strict; use vars qw( $VERSION %Tab ); $VERSION = sprintf "%d.%d", q$Revision: 1.7 $ =~ /(\d+)/g; use Getopt::Std; use vars qw($opt_v $opt_h $opt_n $opt_i $opt_r $opt_t); getopts("vhnitr:"); if ($opt_v) { print "$VERSION\n"; exit; } elsif ($opt_h || !@ARGV) { &help(); exit; } $| = 1; &find_equal_files(@ARGV); print "FINAL REPORT:\n"; foreach my $k (keys %Tab) { foreach my $e (@{ $Tab{$k} }) { next unless @{ $e->{otherfiles} }; print "equal files: $e->{file0}\n"; foreach my $f (@{ $e->{otherfiles} }) { print " and: $f\n" } } } if (length($opt_r) > 0) { &remove_files() } sub remove_files { print "REMOVING FILES with prefix \"$opt_r\""; print " (TEST MODE)" if $opt_t; print ":\n"; foreach my $k (keys %Tab) { foreach my $e (@{ $Tab{$k} }) { my @files = ($e->{file0}, @{ $e->{otherfiles} } ); next unless @files > 1; my @fileswithp = grep { index($_,$opt_r)==0 } @files; my @filesnop = grep { index($_,$opt_r)!=0 } @files; next if @fileswithp == 0 or @filesnop == 0; print "Keep: @filesnop\n"; foreach my $f (@fileswithp) { if ($opt_t) { print " TO RM $f\n" } else { print " Removing $f\n"; unlink($f); } } } } } sub find_equal_files { while ($#_ > -1) { my $dir = shift; next if -l $dir || !-e $dir; # symbolic link or does not exist: ignore it if (not -d $dir) { # a file my $size = ((stat $dir)[7]); my $basename = $dir; if ($dir =~ /\/([^\/]+)$/) { $basename = $1 } my $key = $opt_n ? $size : "$basename $size"; if (exists $Tab{$key}) { # Could be equal local $_; foreach ( @{ $Tab{$key} } ) { local(*SAVEOUT, *SAVEERR); # temporarily redirect STDOUT open(SAVEOUT, ">&STDOUT"); open(SAVEERR, ">&STDERR"); open(STDOUT, ">/dev/null") || die "Can't redirect stdout to /dev/null"; open(STDERR, ">/dev/null") || die "Can't redirect stdout to /dev/null"; my $r = system('diff', $_->{file0}, $dir) / 256; close(STDERR); open(STDERR, ">&SAVEERR"); close(STDOUT); open(STDOUT, ">&SAVEOUT"); if ($r == 0) { push @{ $_->{otherfiles} }, $dir; if ($opt_i) { print "equal files:$_->{file0}\n and:$dir\n" } goto FOUND_SAME; } } push @{ $Tab{$key} }, { file0=>$dir, otherfiles=>[] }; FOUND_SAME: } else { $Tab{$key} = [ { file0=>$dir, otherfiles=>[] } ] } next; } local ($_, *DIR); # recursively enter directory opendir(DIR, $dir) || die "can't opendir $dir: $!"; map { /^\.\.?$/ ? '' : (&find_equal_files("$dir/$_")) } readdir(DIR); closedir(DIR); } }