para

#!/usr/bin/env perl6
use v6;
unit module File::Find::Duplicates:auth<labster>;

use Digest;
use File::Compare;

sub find_duplicates (:@dirs!, :$ignore_empty = False, :$recursive = False, :$method = 'md5' ) is export {
    my (@files, @duplicates);
    if $recursive {
        use File::Find;
        @files = map -> $d {find( dir => $d ).Slip}, @dirs.flat;
    }
    else { @files = @dirs».IO».dir».Slip.flat }

    my %filesizes;
    for @files.unique -> $f { $f.f and push %filesizes, $f.s=>$f }
    my $emptyfiles = %filesizes{'0'} :delete // Nil;
      # since empty files are obviously equivalent

    if ($method eq 'compare') {
        %filesizes.pairs
	    ==> grep( { .value ~~ Array } )
	    ==> map( { .value } )
	    ==> map( { compare_multiple_files($_.Array) } )
	    ==> @duplicates ;
    }
    else {
        %filesizes.pairs
            ==> grep( { .value ~~ Array } )
            ==> map(  {  computeMD5($_) } )
            ==> grep( { .value ~~ Array } )
            ==> map(  { .value } )
            ==> @duplicates ;
    }

    @duplicates.push($emptyfiles) if !$ignore_empty and $emptyfiles;
    return @duplicates;
}

use MONKEY-TYPING;
augment class IO::Path {
    method duplicates ( :$ignore_empty = False, :$recursive = False ) is export {
        find_duplicates( :$ignore_empty, :$recursive, dirs=>[self.path] )
    }

}

# COMPUTEMD5 function
sub computeMD5 (Pair $size_files) {
    my $size = $size_files.key;
    my @files = $size_files.value.flat;
    my %checksums;

    for @files -> $f { %checksums.push( md5( $f.IO.slurp(:bin) ).list».fmt("%02x").join => $f)}

    return %checksums.pairs.Slip;

}


sub MAIN (:r(:$recursive), :l(:$sameline), :S($size), :n($noempty), :c($compare), *@directories) {
    for @directories -> $d { $d.path.d or die "Given path is not a directory" };

    my @dupes = find_duplicates( dirs => @directories,
				ignore_empty => $noempty,
				method => ($compare ?? 'compare' !! 'md5'),
				:$recursive);
    #say @dupes.perl;
    for @dupes -> $f {
            say $f[0].s, " bytes each:" if $size;
            if $sameline { $f».path.say }
            else { $f».path».say; print "\n"; }
    }
}




head

File::Find::Duplicates - get a list of duplicate files in directories


my @dupes = find_duplicates( dirs => ["~/Pictures", "/camera/import"],
                                    recursive=>True, ignore_empty => True );
       say "First set: {@dupes[0]».path.join(', ')}"
	#Produces (as an example)
               # "First set: ~/Pictures/IMG0001.jpg, /camera/import/IMG0001.JPG"
my @moredupes = "/copiedfiles".path.duplicates;


File::Find::Duplicates finds files which are duplicates of each other, by comparing size
and MD5 checksums.  While it is certainly possible that files of the same size will have
a hash collision, it's unlikely enough that most applications won't notice the difference.
Symbolic links can still get you into trouble, though.


The C<find_duplicates> function is the main method for accessing the function, though a
C<duplicates> method for IO::Path objects is also provided.  Both take the same arguments,
with the exception of C<dirs>.  Both functions return an array of arrays, listing each set
of duplicate files as IO::Path objects.


A required option, C<dirs> specifies which directories to look in.  Requires an array of
paths (as ordinary strings), though it's okay if it only contains one item.  In the method
form, the invocant IO::Path object serves as the directory to search through, and this
option is not required.


Specifies whether to descend through directories encountered; default is False.  If set to
a value like True, this module uses File::Find to descend the directory tree.


Specifies whether or not we should bother to report empty files back as duplicates.
Defaults to False, but any value that evaluates to true will omit results with
file size = 0 bytes.


Takes "md5" (default) or "compare".  MD5 mode uses Digest::MD5 to check compare the content
of files, which may cause some rare false positives.  The other method, "compare", uses
File::Compare to look at the individual bytes of files.


This module can be directly called from the command line, where it emulates some of the
functionality of fdupes.  Due to a bug, some perl6 implementations might not call C<MAIN>
in a module, and you might have to comment out the C<module> line to get it to work.


$ perl Duplicates.pm6 [options] directories


-r	--recursive	Go through directories recursively
-S	--size		Print size of duplicate files
-n	--noempty	Don't include empty files in the results
-l	--sameline	Print results on a single line
			(careful: fdupes uses -1 instead of -l)
-c	--compare	Compare byte-by-byte rather than via MD5 hash


Probably optimize the code.  Add options for ordering and file deletion.


Released under the same terms as Perl 6; see the LICENSE file for details.


root

README

React

=begin React :component<HeaderCol> :id<menu> 
=para L<Documentation|/doc/introduction>
=para L<Modules| /mods>
=para L<Examples| file:../examples/index.podlite>
=para L<Download |file:../download/index.podlite>
=para L<About|file:../about/index.podlite>
=para 🔍 K<⌘K>/K<ctrl-K>
=end React


=begin React :component<HeaderCol> :id<footer>  
=begin nested :!nested
=item1 B<Language>
    =item2 L<Get started|file:../getting-started/getting-started.podlite>
    =item2 L<Why Raku?|/doc/language/faq#Why-should-I-learn-Raku-What's-so-great-about-it>
    =item2 L<Try Raku|https://glot.io/new/raku>
    =item2 L<Raku cheat sheet|https://github.com/Raku/mu/blob/master/docs/Perl6/Cheatsheet/cheatsheet.txt>
    =item2 L<empty|#>
    =item2 L<empty|#>
    =item2 L<empty|#>
    =item2 L<Raku on Exercism|https://exercism.org/tracks/raku>
    =item2 L<Wikipedia|https://en.wikipedia.org/wiki/Raku_(programming_language)>
    =item2 L<empty|#>
    =item2 L<empty|#>
    =item2 L<empty|#>

=end nested
=begin nested :!nested

=item1 B<L<Documentation|/doc>>
    =item2 L<Getting started, Migration guides from other languages, & Tutorials | file:../doc/introduction.podlite>
    =item2 L<Language References|file:../doc/reference.podlite>
    =item2 L<Type Reference| file:../doc/types.podlite>
    =item2 L<Miscellaneous| file:../doc/miscellaneous.podlite>
    =item2 L<FAQs (Frequently Asked Questions)|/doc/language/faq>
    =item2 L<Community|/doc/language/community>
    =item2 L<The list of all documents|file:../doc/index.podlite>

=item1 B<Resources>
    =item2 L<The Raku Guide|https://raku.guide/>
    =item2 L<Books |https://perl6book.com/>
    =item2 L<Rosetta Code | https://www.raku.org/community/rosettacode>

=end nested
=begin nested :!nested

=item1 B<Resoures>
    =item2 L<Download | file:../download/index.podlite>
    =item2 L<empty|#>
    =item2 L<empty|#>
    =item2 L<empty|#>
    =item2 L<empty|#>
    =item2 L<empty|#>
    =item2 L<empty|#>
    =item2 L<empty|#>

=item1 B<Learning>
    =item2 L<The Raku Guide|https://raku.guide/>
    =item2 L<Wikibook |https://en.wikibooks.org/wiki/Raku_Programming>
    =item2 L<Books |https://perl6book.com/>
    =item2 L<Rosetta Code | https://www.raku.org/community/rosettacode>
    =item2 L<Learn Raku in Y minutes|https://learnxinyminutes.com/docs/raku/>
    =item2 L<Golfing |https://github.com/AlexDaniel/raku-golf-cheatsheet>
=end nested
=begin nested :!nested

=item1 B<Explore>
    =item2 L<Raku Blog Aggregator|https://planet.raku.org/>
    =item2 L<Rakudo Weekly|https://rakudoweekly.blog/>
    =item2 L<The Weekly Challenge |https://perlweeklychallenge.org/>
    =item2 L<Raku Advent Calendar|https://raku-advent.blog/>
    =item2 L<empty|#>
    =item2 L<empty|#>
    =item2 L<empty|#>
    =item2 L<empty|#>
    =item2 L<empty|#>

=end nested
=end React


=begin React :component<CookieConsent> :id<CookieConsent> :buttonCaption("Got it!")
=para
This website uses cookies for analytics.
=end React


Duplicates

NAME

SYNOPSIS

DESCRIPTION

dirs

recursive

ignore_empty

method

CLI Usage

CLI Options

TODO

SEE ALSO

AUTHOR