#!/usr/bin/perl -w # # unarc # # Unpack archives (ie possibly compressed tarfiles) into their own # directory. If they already have a top-level directory, then that # is used instead. # # Usage: unarc files... # # For example: unarc tarfile.tar.gz # # The -l flag will list the archive's contents to stdout, rather than # unpacking it. -v will list contents while unpacking. # # This program is in the public domain. Use at your own risk. # . # # Version 0.2.8 # # -- Ed Avis, ed@membled.com, 2002-11-15 # use strict; use File::Basename; use POSIX 'getcwd'; use Getopt::Std; require 'archive_types.pl'; use vars '%TYPES'; sub absolute_path($); sub list_arc($$); sub unpack_arc($$$$); sub rename_unique($$); sub archive_info($); # Running on NT? my $NT = 0; # Parse command line options use vars qw[$opt_l $opt_v]; getopts('lv'); if (not @ARGV) { print STDERR "usage: $0 [-l] [-v] files...\n"; exit(1); } # For NT, we must glob the arguments ourselves. if ($NT) { @ARGV = map { glob($_) } @ARGV; foreach (@ARGV) { tr!\\!/!; } } # Quote characters in filenames really mess things up. Let's try # and avoid them. NB - other strange characters will mess things up # also. Yuck. # my @tmp = (); foreach (@ARGV) { # Chars in tr/// doubled to avoid confusing Emacs. if (tr/''""`` //) { warn "$_ contains bad chars, skipping"; next; } if (-d) { warn "$_ is a directory, skipping"; next; } push @tmp, $_; } @ARGV = @tmp; # Because we chdir() during the loop and could find an error at any # time, we just store the original directory here and chdir() back to # it at the start of the loop. # my $orig_wd = getcwd(); my $archive; ARCHIVE: foreach $archive (@ARGV) { (getcwd() eq $orig_wd) or chdir $orig_wd or die "cannot chdir to $orig_wd: $!"; if (not (-e $archive)) { warn "cannot find $archive, skipping"; next ARCHIVE; } # Find out basename and command to run my ($basename, $cmds) = archive_info($archive); if (not defined $basename) { warn "don't recognize extension of $archive, skipping"; next ARCHIVE; } if ($opt_l) { list_arc($archive, $cmds); } else { unpack_arc($archive, $basename, $cmds, $opt_v); } } # absolute_path() # # Does a filename have an absolute path? # sub absolute_path($) { die 'usage: absolute_path(filename)' if @_ != 1; local $_ = $_[0]; if ($NT) { return (m!^[a-zA-Z]:[\/]?! or m!^[\/]!); } return m!^/!; } # list_arc() # # List the contents of an archive to stdout (trivial really). # # Parameters: # filename of archive # reference to hash of commands, hopefully with a 'list' entry # sub list_arc($$) { die 'usage: list_arc(filename, command hashref)' if @_ != 2; my ($filename, $cmds) = @_; my $cmd = $cmds->{list}; if (defined $cmd) { system "{ $cmd ; } <$filename"; } else { warn "don't know how to list archive $filename"; } } # unpack_arc() # # Unpack an archive, into its own directory if necessary. # # Parameters: # filename of archive # name of directory to create, if needed # reference to hash of commands, hopefully with 'unpack' entry # verbose flag (print filenames to stdout) # sub unpack_arc($$$$) { die 'usage: unpack_arc(filename, dir to create, command hash, verbose)' if @_ != 4; my ($archive, $dir_to_create, $cmds, $v) = @_; my $cmd = $cmds->{unpack}; if (not defined $cmd) { warn "don't know how to unpack archive $archive"; return; } # Make a directory to unpack the stuff into my $dir = "$$.tmp.$archive"; $dir =~ tr/A-Za-z0-9.-/_/c; mkdir $dir, 0777 or die "cannot mkdir $dir: $!"; chdir $dir or die "cannot chdir to $dir: $!"; my $tidy = sub { chdir '..' or die "cannot chdir to ..: $!"; rmdir $dir or warn "cannot rmdir $dir: $!"; }; my $error = sub { chdir '..' or die "cannot chdir to ..: $!"; system('rm', '-rf', $dir) && warn "cannot remove dir $dir\n"; exit(1); }; local ($SIG{HUP}, $SIG{INT}, $SIG{TERM}) = ($error, $error, $error); # How can we reach the archive, now we've chdir()ed? my $archive_fromhere; if (absolute_path($archive)) { $archive_fromhere = $archive } else { $archive_fromhere = "../$archive" } # Run the unpacking command with its stdout in FILELIST if (not open(FILELIST, "{ $cmd ; } <$archive_fromhere |")) { warn "cannot run $cmd: $!, skipping"; $tidy->(); next ARCHIVE; } # Process the list of filenames in FILELIST, finding all the # top-level directory names or files not in a directory. # my %b; # Top-level file or directory names found while () { print if $v; chomp; # Remove ./ and / from the front of filenames s!^\./+!!; s!^/+!!; next if $_ eq ''; # An archive which tries to write into the parent directory # or root directory is a bad idea. But we don't abort on # finding such things (after all, they have already been # unpacked), we just ignore them when deciding whether to # create a top-level directory. # if (m!^\.\./!) { warn "archive $archive contains file '$_' in parent dir"; next; } if (absolute_path($_)) { warn "archive $archive contains absolute path '$_'"; next; } if (m!^(.+?)/!) { # Inside top-level directory $b{$1} = 1; } else { # Not inside a directory $b{$_} = 1; } } # FIXME: At this point we ought to check the return status from # the command. However many of the commands are pipelines where # the return status doesn't really indicate success or failure. # So for the time being, just rely on the list of filenames # printed. # my $num_bases = scalar (keys %b); if ($num_bases == 0) { warn "archive $archive is empty or broken"; $tidy->(); return; } my $first_base = (keys %b)[0]; my $renamed; if ($num_bases == 1 && -d $first_base) { # Everything inside a single directory, $first_base $renamed = rename_unique($first_base => "../$first_base"); # to be printed later $renamed =~ s!^../!!; $tidy->(); } else { # Either several bases, or $first_base is not a directory chdir '..' or die "cannot chdir to ..: $!"; $renamed = rename_unique($dir => $dir_to_create); } print STDERR "$archive -> $renamed\n"; } # rename_unique() # # Rename a file, sticking a numeric suffix on the end if necessary to # make sure renaming is successful (ie if the target file already # exists). # # If after trying lots of suffixes, renaming is still unsuccessful, # die with an error. # # Parameters: # source filename # destination filename # # Returns: # actual destination filename used # sub rename_unique($$) { die 'usage: rename_unique(source, dest)' if @_ != 2; my ($from, $to) = @_; my $TRIES = 1000; # Max number of tries for (my $num = 0; $num < $TRIES; $num++) { my $suffix = $num ? ".$num" : ''; my $dest = "$to$suffix"; # Try another filename if the destination already exists. # This has some race conditions, but it's better than just # overwriting all the time. # next if -e $dest; if (rename $from => $dest) { # Renamed successfully. return $dest; } elsif ($! =~ /^Directory not empty/) { # Couldn't rename because the destination is a directory # and isn't empty. # next; } elsif ($! =~ /^Not a directory/) { # The destination already exists and is not a directory. next; } elsif ($! =~ /^Device or resource busy/) { # Just keep trying until it is un-busy redo; } elsif ($! =~ /^No such file or directory/) { # This is really gross. A bug in Linux's NFS means that # you can sometimes create a file and then fail to rename # it, because some cache somewhere hasn't caught up with # reality. In this case we should try again. # # But what of the case where $from really does not exist? # We don't want to hang retrying for ever. # # The 'solution' is to try again, but increment $num so # that there is some limit to the number of retries. Thus # a numeric suffix may appear when it isn't needed. I # don't think you will see this on non-NFS filesystems. # warn "'No such file or directory' for $from, retrying"; next; } else { die "cannot rename $from to $dest: unrecognized error: $!"; } } die "cannot rename $from to $to.x even with $TRIES suffixes"; } # archive_info() # # From an archive filename, find the basename of the archive (the # leafname without the extension) and the hash of commands to use on # it. Return undef if these can't be found. # # Parameters: # filename of archive # # Returns: # (basename, hash) where hash is from archive_types.pl # # Returns undef if we don't know about this kind of archive. # sub archive_info($) { die 'usage: archive_info(filename)' if @_ != 1; local $_ = $_[0]; foreach my $type (keys %TYPES) { # We'll see if the file's extension matches against the type, # for example if the type is 'tar.gz', we want to test for a # filename ending in '.tar.gz' or similar. # # Make a regexp to match both dots and underscores, since # some systems change one to the other. # (my $re = ".$type") =~ s/\./[._]/g; if (m!([^/]+)$re$!) { # It matched, assume this is the right one. return ($1, $TYPES{$type}); } } return undef; }