#!/usr/bin/perl -w # # parr # # Try to parallelize a command using xargs(1). This means to split # up the argument list, and run two or more processes each taking # some of the arguments. This works only for programs that treat # files given on the command line separately. So gzip(1) works well # but tar(1) will not work. # # Tries to separate flags (which must be the same for each # invocation) from filenames and other arguments. You can also give # the first argument as a string containing whitespace, in which case # it will be parsed as a command with flags. # # Examples (from sh): # # % parr gzip -dv *.gz # # but 'parr gzip -S .z *' won't work because the '.z' is not # considered part of the options to gzip. Instead, you should do: # # % parr 'gzip -S .z' * # # BUGS: quoting hell with spaces in filenames and stuff. # # -- Ed Avis, epa98@doc.ic.ac.uk, 1999-11-10 # ######## # Configuration # # Maximum number of processes to run concurrently. Should normally # be the number of CPUs in your machine (but more could be useful if # you have fast CPU relative to your disk or network). # # On NT, we could use the environment variable NUM_CPUS or # NUMBER_OF_PROCESSORS to get this, but Unix doesn't have an # equivalent. Hey! NT is actually better for something! # my $MAX_PROCS = $ENV{'NUM_CPUS'} || $ENV{'NUMBER_OF_PROCESSORS'} || 2; ######## # End of configuration # use strict; die "usage: $0 progname args..." if @ARGV == 0; # Get the program name. my $progname = shift @ARGV; my @flags; if ($progname =~ /\s/) { # Program name contains spaces, so it's probably a string like # 'grep -i pattern' or something. We assume that the other # arguments must be filenames for this command (or whatever), so # there are no other flags. # @flags = (); } else { # Try to pick out the flags. my $arg; while (defined($arg = shift @ARGV)) { if ($arg =~ /^-/) { push @flags, $arg; } else { # End of flags processing, they must all be at the front. unshift @ARGV, $arg; last; } } } # We need to tell xargs the number of arguments to dole out to each # process. We just split the argument list fairly evenly. # # FIXME: if some files take much longer than others, this might mean # that one process finishes long before the other. Maybe a fudge # factor here, running several smaller processes, would avoid this # problem. But then, it would mean more overhead. # my $args_per_ps = int((scalar @ARGV) / $MAX_PROCS); $args_per_ps = 1 if $args_per_ps == 0; # Build up the command to run. my $cmd = "xargs -P $MAX_PROCS -n $args_per_ps $progname " . join(' ', @flags); # Run xargs and feed it the non-flag arguments. open (CMD, "| $cmd") or die "cannot pipe to $cmd: $!"; foreach (@ARGV) { print CMD "$_\n"; } close CMD;