#!/usr/bin/perl -w
#
# get_picon
#
# Use the picon search service at Indiana University to locate the
# best picon for an SMTP address. Output the URL of a GIF file to
# standard output.
#
# There is a cache to avoid excessive load on Indiana's server;
# however this is just a flat text file which grows and grows. You
# might like to occasionally delete or trim the file by hand, both to
# stop it growing too large, and to force a refresh of cached data.
#
# Usage:
# get_picon user@host
#
# Note that the address given must be in simple user@host form; the
# full definition of an address in RFC822 is not supported.
#
# If you don't know what all this is about, see
# .
#
# -- Ed Avis, epa98@doc.ic.ac.uk, 1999-11-05
#
########
# Configuration
# The URL of the picon search service. Appending ?user@host to
# this gets a page of possible matches. Try it in your web browser!
#
my $SEARCH_PREFIX = 'http://www.cs.indiana.edu:800/piconsearch';
# Your 'picon cache'. This will be a file which caches past requests
# to avoid overloading the Indiana server. This file will be created
# if it doesn't already exist - but make sure you change the filename
# to something sensible. On Unix, something in your home directory
# would be a good choice; on NT, perhaps a file called 'cache' in the
# same directory as this script.
#
# I'm sure there is a proper way to do this, with a local picon
# database or something, but I don't know what it is.
#
my $CACHE = "$ENV{HOME}/piconcache";
########
# End of configuration
use LWP::Simple;
use HTML::TokeParser;
use Fcntl qw(:flock);
use diagnostics;
use strict;
my $addr;
if (@ARGV != 1) {
die "usage: $0 " . 'USER@HOST';
}
else {
$addr = $ARGV[0];
die 'bad address, should be in form USER@HOST: ' . $addr
if $addr !~ /^.+@.+/;
}
if (not -e $CACHE) {
# Create a new, empty, cache file. There is no race condition
# here, because this open() call will create the file if
# necessary, but won't truncate an existing file.
#
open(CACHE, ">> $CACHE") or die "can't append to $CACHE: $!";
close CACHE;
}
# Read the cache file into %cache.
my %cache;
open(CACHE, "< $CACHE") or die "can't open $CACHE: $!";
flock(CACHE, LOCK_SH());
while () {
chomp;
m!((?: .+) @ (?: .+)) \s* : \s* ((?: http | ftp) :// .+)!x
or die "$CACHE: $.: bad cache entry: $_";
# We don't worry about duplicates, because the cache grows
# 'organically', by appending. So it's possible that two people
# both appended the same entry to the file.
#
#die "$CACHE: $.: duplicate cache entry for $1: $_\n"
# if defined($cache{$1});
$cache{$1} = $2;
}
close(CACHE);
if (defined $cache{$addr}) {
# Already in cache.
print "$cache{$addr}\n";
}
else {
# Go and ask the server.
my $url = "$SEARCH_PREFIX?$addr";
my $page = get($url) or die "can't get page $url: $!";
my $p = HTML::TokeParser->new(\$page)
or die "cannot parse HTML $page" . "\n" . "error is: $!";
# Look for the first link with title 'gif'. This will be the
# closest match and the picture we want.
#
my $found = 0;
while (my $token = $p->get_tag("a")) {
if ($p->get_trimmed_text("/a") eq 'gif') {
my $link = $token->[1]->{'href'};
print "$link\n";
$found = 1;
# Add this entry to the cache. Open the file, lock it,
# and then append this entry to the end.
#
open(CACHE, ">> $CACHE")
or die "can't append to $CACHE: $!";
flock(CACHE, LOCK_EX());
print CACHE "$addr: $link\n";
close CACHE;
last;
}
}
if (not $found) {
# Fallback - this should never happen.
warn "Failed to find any 'gif' link in $url";
print "http://www.cs.indiana.edu" .
"/picons/db/unknown/MISC/unknown/face.gif\n";
}
}