#!/usr/bin/perl -w
#
# unarc
#
# Unpack archives (ie possibly compressed tarfiles) into their own
# directory. If they already have a top-level directory, then that
# is used instead.
#
# Usage: unarc files...
#
# For example: unarc tarfile.tar.gz
#
# The -l flag will list the archive's contents to stdout, rather than
# unpacking it. -v will list contents while unpacking.
#
# This program is in the public domain. Use at your own risk.
# .
#
# Version 0.2.8
#
# -- Ed Avis, ed@membled.com, 2002-11-15
#
use strict;
use File::Basename;
use POSIX 'getcwd';
use Getopt::Std;
require 'archive_types.pl'; use vars '%TYPES';
sub absolute_path($);
sub list_arc($$);
sub unpack_arc($$$$);
sub rename_unique($$);
sub archive_info($);
# Running on NT?
my $NT = 0;
# Parse command line options
use vars qw[$opt_l $opt_v]; getopts('lv');
if (not @ARGV) {
print STDERR "usage: $0 [-l] [-v] files...\n";
exit(1);
}
# For NT, we must glob the arguments ourselves.
if ($NT) {
@ARGV = map { glob($_) } @ARGV;
foreach (@ARGV) {
tr!\\!/!;
}
}
# Quote characters in filenames really mess things up. Let's try
# and avoid them. NB - other strange characters will mess things up
# also. Yuck.
#
my @tmp = ();
foreach (@ARGV) {
# Chars in tr/// doubled to avoid confusing Emacs.
if (tr/''""`` //) {
warn "$_ contains bad chars, skipping";
next;
}
if (-d) {
warn "$_ is a directory, skipping";
next;
}
push @tmp, $_;
}
@ARGV = @tmp;
# Because we chdir() during the loop and could find an error at any
# time, we just store the original directory here and chdir() back to
# it at the start of the loop.
#
my $orig_wd = getcwd();
my $archive;
ARCHIVE: foreach $archive (@ARGV) {
(getcwd() eq $orig_wd)
or chdir $orig_wd
or die "cannot chdir to $orig_wd: $!";
if (not (-e $archive)) {
warn "cannot find $archive, skipping";
next ARCHIVE;
}
# Find out basename and command to run
my ($basename, $cmds) = archive_info($archive);
if (not defined $basename) {
warn "don't recognize extension of $archive, skipping";
next ARCHIVE;
}
if ($opt_l) {
list_arc($archive, $cmds);
}
else {
unpack_arc($archive, $basename, $cmds, $opt_v);
}
}
# absolute_path()
#
# Does a filename have an absolute path?
#
sub absolute_path($) {
die 'usage: absolute_path(filename)' if @_ != 1;
local $_ = $_[0];
if ($NT) {
return (m!^[a-zA-Z]:[\/]?! or m!^[\/]!);
}
return m!^/!;
}
# list_arc()
#
# List the contents of an archive to stdout (trivial really).
#
# Parameters:
# filename of archive
# reference to hash of commands, hopefully with a 'list' entry
#
sub list_arc($$) {
die 'usage: list_arc(filename, command hashref)' if @_ != 2;
my ($filename, $cmds) = @_;
my $cmd = $cmds->{list};
if (defined $cmd) {
system "{ $cmd ; } <$filename";
}
else {
warn "don't know how to list archive $filename";
}
}
# unpack_arc()
#
# Unpack an archive, into its own directory if necessary.
#
# Parameters:
# filename of archive
# name of directory to create, if needed
# reference to hash of commands, hopefully with 'unpack' entry
# verbose flag (print filenames to stdout)
#
sub unpack_arc($$$$) {
die 'usage: unpack_arc(filename, dir to create, command hash, verbose)'
if @_ != 4;
my ($archive, $dir_to_create, $cmds, $v) = @_;
my $cmd = $cmds->{unpack};
if (not defined $cmd) {
warn "don't know how to unpack archive $archive";
return;
}
# Make a directory to unpack the stuff into
my $dir = "$$.tmp.$archive";
$dir =~ tr/A-Za-z0-9.-/_/c;
mkdir $dir, 0777 or die "cannot mkdir $dir: $!";
chdir $dir or die "cannot chdir to $dir: $!";
my $tidy = sub {
chdir '..' or die "cannot chdir to ..: $!";
rmdir $dir or warn "cannot rmdir $dir: $!";
};
my $error = sub {
chdir '..' or die "cannot chdir to ..: $!";
system('rm', '-rf', $dir) && warn "cannot remove dir $dir\n";
exit(1);
};
local ($SIG{HUP}, $SIG{INT}, $SIG{TERM})
= ($error, $error, $error);
# How can we reach the archive, now we've chdir()ed?
my $archive_fromhere;
if (absolute_path($archive)) { $archive_fromhere = $archive }
else { $archive_fromhere = "../$archive" }
# Run the unpacking command with its stdout in FILELIST
if (not open(FILELIST, "{ $cmd ; } <$archive_fromhere |")) {
warn "cannot run $cmd: $!, skipping";
$tidy->();
next ARCHIVE;
}
# Process the list of filenames in FILELIST, finding all the
# top-level directory names or files not in a directory.
#
my %b; # Top-level file or directory names found
while () {
print if $v;
chomp;
# Remove ./ and / from the front of filenames
s!^\./+!!;
s!^/+!!;
next if $_ eq '';
# An archive which tries to write into the parent directory
# or root directory is a bad idea. But we don't abort on
# finding such things (after all, they have already been
# unpacked), we just ignore them when deciding whether to
# create a top-level directory.
#
if (m!^\.\./!) {
warn "archive $archive contains file '$_' in parent dir";
next;
}
if (absolute_path($_)) {
warn "archive $archive contains absolute path '$_'";
next;
}
if (m!^(.+?)/!) {
# Inside top-level directory
$b{$1} = 1;
}
else {
# Not inside a directory
$b{$_} = 1;
}
}
# FIXME: At this point we ought to check the return status from
# the command. However many of the commands are pipelines where
# the return status doesn't really indicate success or failure.
# So for the time being, just rely on the list of filenames
# printed.
#
my $num_bases = scalar (keys %b);
if ($num_bases == 0) {
warn "archive $archive is empty or broken";
$tidy->();
return;
}
my $first_base = (keys %b)[0];
my $renamed;
if ($num_bases == 1 && -d $first_base) {
# Everything inside a single directory, $first_base
$renamed = rename_unique($first_base => "../$first_base");
# to be printed later
$renamed =~ s!^../!!;
$tidy->();
}
else {
# Either several bases, or $first_base is not a directory
chdir '..' or die "cannot chdir to ..: $!";
$renamed = rename_unique($dir => $dir_to_create);
}
print STDERR "$archive -> $renamed\n";
}
# rename_unique()
#
# Rename a file, sticking a numeric suffix on the end if necessary to
# make sure renaming is successful (ie if the target file already
# exists).
#
# If after trying lots of suffixes, renaming is still unsuccessful,
# die with an error.
#
# Parameters:
# source filename
# destination filename
#
# Returns:
# actual destination filename used
#
sub rename_unique($$) {
die 'usage: rename_unique(source, dest)' if @_ != 2;
my ($from, $to) = @_;
my $TRIES = 1000; # Max number of tries
for (my $num = 0; $num < $TRIES; $num++) {
my $suffix = $num ? ".$num" : '';
my $dest = "$to$suffix";
# Try another filename if the destination already exists.
# This has some race conditions, but it's better than just
# overwriting all the time.
#
next if -e $dest;
if (rename $from => $dest) {
# Renamed successfully.
return $dest;
}
elsif ($! =~ /^Directory not empty/) {
# Couldn't rename because the destination is a directory
# and isn't empty.
#
next;
}
elsif ($! =~ /^Not a directory/) {
# The destination already exists and is not a directory.
next;
}
elsif ($! =~ /^Device or resource busy/) {
# Just keep trying until it is un-busy
redo;
}
elsif ($! =~ /^No such file or directory/) {
# This is really gross. A bug in Linux's NFS means that
# you can sometimes create a file and then fail to rename
# it, because some cache somewhere hasn't caught up with
# reality. In this case we should try again.
#
# But what of the case where $from really does not exist?
# We don't want to hang retrying for ever.
#
# The 'solution' is to try again, but increment $num so
# that there is some limit to the number of retries. Thus
# a numeric suffix may appear when it isn't needed. I
# don't think you will see this on non-NFS filesystems.
#
warn "'No such file or directory' for $from, retrying";
next;
}
else {
die "cannot rename $from to $dest: unrecognized error: $!";
}
}
die "cannot rename $from to $to.x even with $TRIES suffixes";
}
# archive_info()
#
# From an archive filename, find the basename of the archive (the
# leafname without the extension) and the hash of commands to use on
# it. Return undef if these can't be found.
#
# Parameters:
# filename of archive
#
# Returns:
# (basename, hash) where hash is from archive_types.pl
#
# Returns undef if we don't know about this kind of archive.
#
sub archive_info($) {
die 'usage: archive_info(filename)' if @_ != 1;
local $_ = $_[0];
foreach my $type (keys %TYPES) {
# We'll see if the file's extension matches against the type,
# for example if the type is 'tar.gz', we want to test for a
# filename ending in '.tar.gz' or similar.
#
# Make a regexp to match both dots and underscores, since
# some systems change one to the other.
#
(my $re = ".$type") =~ s/\./[._]/g;
if (m!([^/]+)$re$!) {
# It matched, assume this is the right one.
return ($1, $TYPES{$type});
}
}
return undef;
}