#!/usr/bin/perl
use warnings;
use strict;
package Magic_diamond_fh;

# Set this if you want URIs given on the command line to be fetched.
# I think it could reasonably be turned on by default in a future version.
#
our $Fetch_URIs = 1;

# Internal function returning the type of magic to use for a filename.
# Returns how (plain, 2arg, or uri) and name (usually the original filename,
# but may have trimmed whitespace or other normalization).
#
sub decide_magic {
    local $_ = shift; # perl-5.6 compatibility

    # Warn about magic characters that do something in perl's 2-arg
    # open, but cause writing files or pipes instead of reading.
    #
    if (/\A\s*[|>]/) {
	warn "filename '$_' not treated as magic "
	    . "because we are reading, not writing\n";
	return ('plain', $_);
    }

    # A filename of - always means standard input (even if a file
    # called - exists) following the normal Unix conventions.
    #
    # Otherwise, if the file exists then read it as a plain file.
    # Also if it exists after stripping whitespace.  This is extra
    # DWYM suggested by Tom Christiansen.  It doesn't make things
    # completely safe (think race conditions) but it will avoid some
    # surprises.
    #
  TEST:
    return ('2arg', $_) if $_ eq '-';
    return ('plain', $_) if -e;
    s/\A\s+// && goto TEST;
    s/\s+\Z// && goto TEST;

    # OK, we've trimmed the filename if necessary and it definitely
    # does not exist.  Could it be magic now?
    #
    return ('2arg', $_) if /[|]\Z/; # pipe from a command
    return ('2arg', $_) if /\A</;   # explicit leading <, e.g. <&0

    # Optional URI fetching.
    if ($Fetch_URIs and /\A[a-z]{2,}[:]/) {
	my $ok = eval {
	    require LWP::Simple;
	    require IO::String;
	    1;
	};
	die "cannot load modules needed to fetch URI $_: $@\n" if not $ok;
	return ('uri', $_);
    }

    # Well, it will probably not be found, but try to open it later.
    return ('plain', $_);
}

sub TIEHANDLE {
    my $class = shift;
    my @todo = map { [ decide_magic($_) ] } @_;
    return bless {
	current_fh => undef,
	todo => \@todo,
    }, $class;
}

sub die_read_only { die "the magic diamond filehandle is for reading only\n" }
sub WRITE  { die_read_only }
sub PRINT  { die_read_only }
sub PRINTF { die_read_only }

# Internal function to open a 'file' according to the magic method given.
# Returns a filehandle.  On error, returns undef and tries to set $!.
# (It is not really possible to set $! for an error fetching a web page.)
#
sub do_open {
    my ($how, $name) = @_;
    my ($fh, $ok);
    if ($how eq 'plain') {
	$ok = open $fh, '<', $name;
    }
    elsif ($how eq '2arg') {
	$ok = open $fh, $name;
    }
    elsif ($how eq 'uri') {
	# Slurp the whole page into a string and then use IO::String
	# to make a filehandle interface.  An alternative would be to
	# fetch data lazily as it is read from the filehandle, but
	# that risks the server timing out.
	#
	my $got = LWP::Simple::get $name;
	return undef if not defined $got;
	$fh = IO::String->new($got);
	$ok = 1;
    }
    return undef if not $ok;
    die if not $fh;
    return $fh;
}

# Internal function to return the filehandle to read from.  Either the
# currently open handle, or else open the next file in the list.  If no
# more files to read from, returns undef.
#
# If opening a next file fails, the function warns and moves on to the
# next, the same as perl's builtin -n.
#
sub get_fh {
    my $self = shift;
    for ($self->{current_fh}) {
	return $_ if $_;
	while (@{$self->{todo}}) {
	    my ($how, $name) = @{shift @{$self->{todo}}};
	    $_ = do_open $how, $name;
	    last if $_;
	    warn "Can't open $name with $how: $!\n";
	}
	return $_;
    }
}

# Internal function to close the current filehandle.  Then the next
# call to get_fh will open a new one.  Returns success or failure and
# tries to set $! if needed.
#
# If no current filehandle is open, does nothing.
#
sub close_current_fh {
    my $self = shift;
    for ($self->{current_fh}) {
	return if not $_;
	my $ok = close $_;
	undef $_;
	return $ok;
    }
}

sub READ {
    my $self = shift;
    my $bufref = \$_[0];
    my (undef, $len, $offset) = @_;

    my $accum = '';
    while (length $accum < $len) {
	my $fh = $self->get_fh;
	last if not $fh;
	my $l = length $accum;
	my $n = read $fh, $accum, $len - $l, $l;
	if (not defined $n) {
	    # Some I/O error.  $! is set.  Return what we have so far.
	    last;
	}
	if ($n == 0) {
	    # EOF.
	    my $ok = $self->close_current_fh;
	    last if not $ok; # Mysterious error, $| is set, return what we have
	}
	# Otherwise test whether we have enough characters now.
	# This includes the case where we read fewer than wanted;
	# we will just try to read again and perhaps get 0 for EOF
	# the second time round.
	#
    }

    # Now we must add $accum to the buffer, matching the behaviour of
    # the read builtin.
    #
    my $l = length $$bufref;
    if ($offset < 0) {
	die "Offset outside string\n" if $offset < -$l;
	my $before = substr $$bufref, 0, length($$bufref) + $offset;
	$$bufref = $before . $accum;
    }
    elsif ($offset == 0) {
	$$bufref = $accum;
    }
    elsif ($offset > 0) {
	if ($offset > $l) {
	    $$bufref .= chr(0) . ($offset - $l);
	    $l = length $$bufref;
	    die if $l != $offset;
	}
	my $before = substr $$bufref, 0, $offset;
	$$bufref = $before . $accum;
    }
    else { die "bad offset $offset\n" }
}

sub READLINE {
    my $self = shift;
    while (1) {
	my $fh = $self->get_fh;
	return undef if not $fh; # No files left to read
	my $line = <$fh>;
	return $line if defined $line;
	# <$fh> returned undef.  Assume this means EOF.  TODO.
	$self->close_current_fh or return undef;
    }
}

sub GETC {
    my $self = shift;
    while (1) {
	my $fh = $self->get_fh;
	return undef if not $fh; # No files left to read
	my $char = getc $fh;
	return $char if defined $char;
	# getc returned undef.  Assume this means EOF.  TODO.
	$self->close_current_fh or return undef;
    }
}

sub do_close {
    my $self = shift;
    $self->close_current_fh;
    $self->{todo} = [];
    # Any further calls will return EOF, which is maybe not ideal.
}

sub CLOSE { $_[0]->do_close }
sub UNTIE { $_[0]->do_close }
sub DESTROY { $_[0]->do_close }

package diamond;
tie *::MAGIC_ARGV, 'Magic_diamond_fh', @ARGV;

1;

