#!/usr/bin/perl -w # # get_picon # # Use the picon search service at Indiana University to locate the # best picon for an SMTP address. Output the URL of a GIF file to # standard output. # # There is a cache to avoid excessive load on Indiana's server; # however this is just a flat text file which grows and grows. You # might like to occasionally delete or trim the file by hand, both to # stop it growing too large, and to force a refresh of cached data. # # Usage: # get_picon user@host # # Note that the address given must be in simple user@host form; the # full definition of an address in RFC822 is not supported. # # If you don't know what all this is about, see # . # # -- Ed Avis, epa98@doc.ic.ac.uk, 1999-11-05 # ######## # Configuration # The URL of the picon search service. Appending ?user@host to # this gets a page of possible matches. Try it in your web browser! # my $SEARCH_PREFIX = 'http://www.cs.indiana.edu:800/piconsearch'; # Your 'picon cache'. This will be a file which caches past requests # to avoid overloading the Indiana server. This file will be created # if it doesn't already exist - but make sure you change the filename # to something sensible. On Unix, something in your home directory # would be a good choice; on NT, perhaps a file called 'cache' in the # same directory as this script. # # I'm sure there is a proper way to do this, with a local picon # database or something, but I don't know what it is. # my $CACHE = "$ENV{HOME}/piconcache"; ######## # End of configuration use LWP::Simple; use HTML::TokeParser; use Fcntl qw(:flock); use diagnostics; use strict; my $addr; if (@ARGV != 1) { die "usage: $0 " . 'USER@HOST'; } else { $addr = $ARGV[0]; die 'bad address, should be in form USER@HOST: ' . $addr if $addr !~ /^.+@.+/; } if (not -e $CACHE) { # Create a new, empty, cache file. There is no race condition # here, because this open() call will create the file if # necessary, but won't truncate an existing file. # open(CACHE, ">> $CACHE") or die "can't append to $CACHE: $!"; close CACHE; } # Read the cache file into %cache. my %cache; open(CACHE, "< $CACHE") or die "can't open $CACHE: $!"; flock(CACHE, LOCK_SH()); while () { chomp; m!((?: .+) @ (?: .+)) \s* : \s* ((?: http | ftp) :// .+)!x or die "$CACHE: $.: bad cache entry: $_"; # We don't worry about duplicates, because the cache grows # 'organically', by appending. So it's possible that two people # both appended the same entry to the file. # #die "$CACHE: $.: duplicate cache entry for $1: $_\n" # if defined($cache{$1}); $cache{$1} = $2; } close(CACHE); if (defined $cache{$addr}) { # Already in cache. print "$cache{$addr}\n"; } else { # Go and ask the server. my $url = "$SEARCH_PREFIX?$addr"; my $page = get($url) or die "can't get page $url: $!"; my $p = HTML::TokeParser->new(\$page) or die "cannot parse HTML $page" . "\n" . "error is: $!"; # Look for the first link with title 'gif'. This will be the # closest match and the picture we want. # my $found = 0; while (my $token = $p->get_tag("a")) { if ($p->get_trimmed_text("/a") eq 'gif') { my $link = $token->[1]->{'href'}; print "$link\n"; $found = 1; # Add this entry to the cache. Open the file, lock it, # and then append this entry to the end. # open(CACHE, ">> $CACHE") or die "can't append to $CACHE: $!"; flock(CACHE, LOCK_EX()); print CACHE "$addr: $link\n"; close CACHE; last; } } if (not $found) { # Fallback - this should never happen. warn "Failed to find any 'gif' link in $url"; print "http://www.cs.indiana.edu" . "/picons/db/unknown/MISC/unknown/face.gif\n"; } }