package FastGlob; =head1 NAME FastGlob - A faster glob() implementation =head1 SYNOPSIS use FastGlob qw(glob); @list = &glob('*.c'); =head1 DESCRIPTION This module implements globbing in perl, rather than forking a csh. This is faster than the built-in glob() call, and more robust (on many platforms, csh chokes on C if too many files are in the directory.) There are several module-local variables that can be set for alternate environments, they are listed below with their (UNIX-ish) defaults. $FastGlob::dirsep = '/'; # directory path separator $FastGlob::rootpat = '\A\Z'; # root directory prefix pattern $FastGlob::curdir = '.'; # name of current directory in dir $FastGlob::parentdir = '..'; # name of parent directory in dir $FastGlob::hidedotfiles = 1; # hide filenames starting with . $FastGlob::vmsfilespecs = 0; # deal with VMS filespecs Currently the implementation attempts to set them correctly for MacOS, VMS, and OS2/Windows/MSDOS, and defaults to UNIX-ish behavior. =head1 INSTALLATION Copy this module to the Perl 5 Library directory. =head1 COPYRIGHT Copyright (c) 1997 Marc Mengel. All rights reserved. This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =head1 AUTHOR Marc Mengel EFE =cut use Exporter (); @ISA = qw(Exporter); @EXPORT = qw(&glob); @EXPORT_OK = qw(&glob2regex &expandtildes); use strict; # be good no strict 'vars'; # ... but not *that* good # # recursively wildcard expand a list of strings # # platform specifics # default is UNIX/AMIGA-DOS/plan9-ish $dirsep = '/'; $rootpat= '\A\Z'; $curdir = '.'; $parentdir = '..'; $hidedotfiles = 1; $vmsfilespecs = 0; $debugflag = 0; # some platforms however are different... if ( $^O =~ /VMS/ ) { $rootpat='\A.+:\Z'; $hidedotfiles = 0; $vmsfilespecs = 1; } if ( $^O =~ /^os2|^win/ ) { $rootpat = '\A[A-Z]:\Z'; $hidedotfiles = 0; $dirsep = '\\'; } if ( $^O =~ /MacOS/ ) { $dirsep = ':'; $hidedotfiles = 0; } # get home directory by name from ~name/... # if name is null, return HOME sub gethome { my(@list); if ( $_[0] eq '' ) { return $ENV{'HOME'}; } else { @list = getpwnam($_[0]); return $list[7]; } } sub expandtildes { my($string) = $_[0]; my($users, @users, $rest, $user); my(@res) = (); # check for and do tilde expansion # if ( $string =~ /\A\~([^${dirsep}]*)(.*)/ ) { $users = $1; $rest = $2; if ( $users =~ /\{.*\}/) { @users = ($users =~ m/(\w+)/g); print "here users is @users\n" if ($debugflag); foreach $user (@users) { unshift(@res, (&gethome($user) . $rest) ); } } else { $string =~ s/\A\~([^${dirsep}]*)/&gethome($1)/e; unshift(@res, $string); } } else { unshift(@res, $string); } return @res; } # Make the glob into a regexp # sub glob2regex { my($string) = $_[0]; my($users, $re); $re = $string; # un-escape \x style letters # $re =~ s/\\(\w)/$1/go; # escape wildcards *except* {,}[]*? and $dirsep # $re =~ s/[^\w\[\]\{\},*?${dirsep}]/\\$&/go; # handle * and ? # $re =~ s/(\A|[^\\])\*/$1.*/go; $re =~ s/(\A|[^\\])\?/$1./go; # don't handle [...], it's spelled the same in regexs and globs # deal with {xxx,yyy,zzz} -> (xxx|yyy|zzz) # its a while for nested ones # while ( $re =~ m/(\A|[^\\])\{(([^\{\}]|\\.)*)\}/o ) { @altlist = split(',',$2); $re =~ s/(\A|[^\\])\{(([^{}\\]|\\.)*)\}/$1."(".join("|", @altlist).")"/e; } # deal with dot files if ( $hidedotfiles ) { # look for . or .* at the begining of string, or after # directory separators, possibly hidden in nested parens $re =~ s%(\A\(*|${dirsep}\(*)\.\*%${1}([^.].*)?%go; $re =~ s%(\A\(*|${dirsep}\(*)\.%${1}[^.]%go; } return $re; } sub glob { my($string, @globlist); my(@comps,@res,@list,$re,@middle,$rootdir); @globlist = (); foreach $string (@_) { unshift(@globlist, &expandtildes($string)); } @res = (); GLOBLIST: foreach $string (@globlist) { # if there's no other wildcards, just return it # if ( ! ($string =~ m/(^|[^\\])[*?\[\]{}]/o) ) { print "no wildcards\n" if ($debugflag); unshift( @res, $string ); next GLOBLIST; } if ($vmsfilespecs) { $string = unVMSify($string); } $re = glob2regex($string); # debugging print "regexp is $re\n" if ($debugflag); # now split it into directory components @comps = split( ${dirsep}, ${re} ); if ( $comps[0] =~ /${rootpat}/ ) { $rootdir = shift(@comps); unshift( @res, &recurseglob( "$rootdir", "$rootdir$dirsep" , @comps )); } else { unshift(@res, &recurseglob( $curdir, '' , @comps )); } } return sort(@res); } sub recurseglob { my($dir, $dirname, @comps) = @_; my(@res) = (); my($re, $anymatches, @names, $string); if ( $#comps == -1 ) { # boottom of recursion, just return the path chop($dirname); # always has gratiutous trailing ${dirsep} if ($vmsfilespecs) { @res = (&VMSify($dirname)); } else { @res = ($dirname); } } else { # need to match the *whole* component $re = '\A' . shift(@comps) . '\Z'; # slurp in the directory if ($vmsfilespecs) { opendir(HANDLE, &VMSify($dir)); } else { # check for ugly / case if ($dir eq '' ) { opendir(HANDLE, $dirname); } else { opendir(HANDLE, $dir); } } @names = readdir(HANDLE); closedir(HANDLE); print "\$dir $dir \@names is @names\n" if ($debugflag); # look for matches, and if you find one, glob the rest of the # components. We eval the loop so the regexp gets compiled in, # making searches on large directories faster. $anymatches = 0; # the nested if tests in here are a little convoluted looking. # basically we don't want to recurse into subdirectories unless # we got a match explicitly for the subdirectory $string = <) { chomp; @t0 = times(); @list2 = glob($_); @t1 = times(); $udiffg = ($t1[0] + $t1[2]) - ($t0[0] + $t0[2]); $sdiffg = ($t1[1] + $t1[3]) - ($t0[1] + $t0[3]); print "@list2\n" if ($debugflag); @t0 = times(); @list1 = &glob($_); @t1 = times(); $udiffm = ($t1[0] + $t1[2]) - ($t0[0] + $t0[2]); $sdiffm = ($t1[1] + $t1[3]) - ($t0[1] + $t0[3]); print "@list1\n" if ($debugflag); print "mine: [${udiffm}u\t${sdiffm}s]\t"; print "glob: [${udiffg}u\t${sdiffg}s]\t"; @list2 = sort(@list2); if (join(';',@list1) eq join(';',@list2)) { print "Results matched\n"; } else { print "Results did not match!\n"; } print "pattern: " if ($debugflag); } } # map a vms filespec foo$bar:[d1.d2]file.ext # to a slash-ish foo$bar:/d1.dir/d2.dir/file.ext sub unVMSify { my($string) = $_[0]; my($start,$middle,$end,@middle); if ( $string =~ m/\A(.*?):?\[(.*)\](.*)\Z/ ) { $start= $1; $middle= $2; $end = $3; @middle = split(m/\./,$middle); if ( $start ne '' ) { $string = $start . ':' . ${dirsep} . join(".dir${dirsep}",@middle) . ".dir${dirsep}" . $end; } elsif ( $middle =~ /\A\./ ) { shift(@middle); $string = join(".dir${dirsep}",@middle) . ".dir${dirsep}" . $end; } else { #??? } } return $string; } # map a foo$bar:/d1.dir/d2.dir/file.ext to a vms filespec # foo$bar:[d1.d2]file.ext sub VMSify { my($string) = $_[0]; my(@components); $string =~ s/\.dir${dirsep}/${dirsep}/g; @components = split(${dirsep}, $string); if ( $#components > 0 ) { if ( $components[0] =~ m/$rootpat/ ) { $start = shift(@components) . '['; } else { $start = '[.'; } $string = $start . join('.', @components[0..($#components-1)]) . ']' . $components[$#components]; } return $string; } 1; __END__