head 1.4; access; symbols epa1:1.1.1.1 epa:1.1.1; locks; strict; comment @# @; 1.4 date 2002.09.10.08.21.27; author edward; state Exp; branches; next 1.3; 1.3 date 2002.09.10.08.03.39; author edward; state Exp; branches; next 1.2; 1.2 date 2002.09.10.05.33.06; author edward; state Exp; branches; next 1.1; 1.1 date 2002.09.10.03.15.02; author edward; state Exp; branches 1.1.1.1; next ; 1.1.1.1 date 2002.09.10.03.15.02; author edward; state Exp; branches; next ; desc @@ 1.4 log @Documentation changes. @ text @package Sort::Merge; use strict; require Exporter; use vars '@@ISA'; @@ISA = qw(Exporter); use vars '@@EXPORT'; @@EXPORT = qw(mergesort); use vars '$VERSION'; $VERSION = '0.1'; =head1 NAME Sort::Merge - Perl extension for merge sort =head1 SYNOPSIS use Sort::Merge; my @@a = sort qw(gamma beta alpha); my @@b = sort qw(banana apple clementine); my @@merged = mergesort(\@@a, \@@b); =head1 DESCRIPTION This module exports one function, C, which merges some already-sorted lists into one sorted list. The arguments are references to the lists to be merged, except that if the first argument is a subroutine reference it is used as the comparison function. Unlike PerlE<39>s built-in C, the comparison function should take two arguments in @@_, I the magic variables C<$a> and C<$b>. If the first argument is not a subroutine reference, then the default comparison function C<{ $_[0] cmp $_[1] }> is used. Remember that the input lists must already be sorted with respect to the comparison used. If they are not sorted then the output is undefined. =head1 PERFORMANCE It is always possible to get the same output by concatenating the input lists and then using C. Using merge sort gives fewer calls to the comparison function, but there is greater overhead because the sort routine itself is written in Perl not C. Whether merge sort or ordinary C is a performance gain depends on how big your lists are and how long the comparison function takes. With simple comparisons like C=E<62>> or C it appears that ordinary sorting is quicker: or at least that the large input sizes which would give C an advantage are so big I didnE<39>t have time to benchmark them. On the other hand, for a fairly complex comparison function I halved the run time of a sorting program by switching. I just guessed at a suitable algorithm for n-way merge sort (as opposed to just merging two lists); I donE<39>t know that it is optimal. I believe the time complexity is B((I + I)(log I))>, where I is the number of input lists and I is the maximum length of an input list - but I could have calculated that wrongly. A reimplementation of this module as a C extension would run faster (although the time complexity would be of the same order, of course) and hopefully compare well with the builtin C. =head1 AUTHOR Ed Avis, Eed@@membled.comE =head1 SEE ALSO L. =cut # n-way merge sort by keeping a pool of the lowest element from each # input list, outputting the smallest, and inserting a replacement # element (from the same input list) with insertion sort. # # This has time complexity O((m + n)(log n)), where n is the number of # input lists and m is the maximum length of an input list. That's a # lot better than Omega(mn(log mn)) you'd get by concatenating and # sorting, but I don't know if it's the best possible. # sub mergesort( @@ ) { my @@r; my $f = shift; if (not defined $f or not ref($f) or ref($f) ne 'CODE') { unshift @@_, $f; $f = sub { $_[0] cmp $_[1] }; } my @@l = @@_; # We don't want to modify the input lists by shift()ing them, so # we keep track of 'current position' instead. @@pos contains the # index of the new 'first element'. # my @@pos = map { 0 } @@l; my $nonempty = sub ( $ ) { $pos[$_[0]] < @@{$l[$_[0]]} }; my $shift = sub( $ ) { $l[$_[0]]->[$pos[$_[0]]++] }; # The pool contains tuples of [ element, list it came from ]. We # extend $f to these tuples. # my $f_fst = sub( $$ ) { $f->($_[0]->[0], $_[1]->[0]) }; my @@pool = sort $f_fst map { [ $shift->($_), $_ ] } grep { $nonempty->($_) } (0 .. $#l); foreach (@@pool) { die if not defined $_->[1] } foreach (@@pool) { die if not @@$_ }; while (@@pool) { my ($e, $from) = @@{shift @@pool}; die if not defined $from; push @@r, $e; if ($nonempty->($from)) { foreach (@@pool) { die if not defined $_->[1] } foreach (@@pool) { die if not @@$_ }; insert($f_fst, \@@pool, [ $shift->($from), $from ]); foreach (@@pool) { die if not defined $_->[1] } foreach (@@pool) { die if not @@$_ }; } } return @@r; } sub insert( $$$ ) { my ($f, $list, $new) = @@_; my ($l, $h) = (0, scalar @@$list); while ($l != $h) { my $mid = int(($l + $h) / 2); die if $mid >= @@$list; my $cmp = $f->($list->[$mid], $new); if ($cmp < 0) { # List element at $mid lt new element. $l = $mid + 1; } elsif ($cmp == 0) { # List element at $mid eq new element. $l = $h = $mid; } elsif ($cmp > 0) { # List element at $mid gt new element. $h = $mid; } else { die } } splice @@$list, $l, 0, ($new); } 1; @ 1.3 log @Removed trace statements; added documentation. @ text @d25 4 a28 3 already-sorted lists into one sorted list. The first argument to C is the comparison function to use, with the remaining arguments being references to the lists to be merged. d35 4 d46 1 a46 1 big your data structures are and how long the comparison function takes. d48 6 a53 5 With simple numeric comparisons it appears that ordinary sorting is quicker: or at least that the large input sizes which would give C an advantage are so big I didnE<39>t have time to benchmark them. On the other hand, for a fairly complex comparison function I halved the run time of a sorting program by switching. d61 3 a63 2 A reimplementation of merge sort as a C extension would get better performance and compare with the builtin C. @ 1.2 log @First version. It appears to work but is not thoroughly tested. This n-way merge sort works by keeping a 'pool' of the lowest element in each input list, and after the lowest element of the pool is removed and output, the pool is topped up with insertion sort. This should have time complexity O((n + m)(log n)), where m is the length of each input list - but I suspect I calculated this wrongly. Anyway, some benchmarks should show whether it runs faster than concatenate-then-sort on some nice big inputs. To implement the merge sort I needed to write an insertion sort - which was the most difficult part of the exercise :-(. @ text @a2 1 use Log::TraceMessages qw(t d); d34 24 a77 1 local $Log::TraceMessages::On = 0; d93 1 a93 4 foreach (0 .. $#l) { t "input $_: " . d $l[$_]; t 'nonempty? ' . d $nonempty->($_); } a104 1 t '\@@pool=' . d \@@pool; a109 3 t 'popped first element from pool: ' . d $e; t "came from input $from"; t 'pool now: ' . d \@@pool; a123 1 local $Log::TraceMessages::On = 0; a124 2 t "list to insert into: " . d $list; t "element to insert: " . d $new; a125 1 t "to start, $l -| $h"; a126 1 t "$l -| $h"; a127 1 t "mid: $mid"; a128 1 t "comparison: $cmp"; a142 3 t "found position $l"; t 'old list: ' . d $list; t 'splicing new element: ' . d $new; a143 1 t 'list now: ' . d $list; @ 1.1 log @Initial revision @ text @a1 2 use 5.006; d3 1 a3 1 use warnings; d6 2 a7 16 use AutoLoader qw(AUTOLOAD); our @@ISA = qw(Exporter); # Items to export into callers namespace by default. Note: do not export # names by default without a very good reason. Use EXPORT_OK instead. # Do not simply export all your public functions/methods/constants. # This allows declaration use Sort::Merge ':all'; # If you do not need this, moving things directly into @@EXPORT or @@EXPORT_OK # will save memory. our %EXPORT_TAGS = ( 'all' => [ qw( ) ] ); our @@EXPORT_OK = ( @@{ $EXPORT_TAGS{'all'} } ); d9 2 a10 13 our @@EXPORT = qw( ); our $VERSION = '0.01'; # Preloaded methods go here. # Autoload methods go after =cut, and are processed by the autosplit program. 1; __END__ # Below is stub documentation for your module. You better edit it! d14 1 a14 1 Sort::Merge - Perl extension for blah blah blah d19 3 a21 1 blah blah blah d25 9 a33 3 Stub documentation for Sort::Merge, created by h2xs. It looks like the author of the extension was negligent enough to leave the stub unedited. d35 1 a35 1 Blah blah blah. d37 1 a37 1 =head2 EXPORT d39 1 a39 1 None by default. d41 1 d43 1 a43 1 =head1 AUTHOR d45 96 a140 1 A. U. Thor, Ea.u.thor@@a.galaxy.far.far.awayE d142 1 a142 1 =head1 SEE ALSO a143 1 L. a144 1 =cut @ 1.1.1.1 log @New Sort::Merge Perl module. Importing skeleton directory created by h2xs. @ text @@