#!/usr/bin/perl -w # # outlook_csv_to_mbox # # Convert 'save as CSV' from Outlook to Unix mbox format. This # program currently handles files saved from the English- and # German-language versions of Outlook. # # Part of outlook_text_to_mbox release 1.2. # # This program is in the public domain. Use at your own risk. It has # a web page at # . # # $Id: outlook_csv_to_mbox,v 1.7 2003/02/20 22:12:34 ed Exp $ # # -- Ed Avis, ed@membled.com, 2003-02-20 # use strict; use Text::CSV_XS; use IO::File; use Log::TraceMessages qw(t d); $Log::TraceMessages::On = 1; sub read_contents( $ ); sub fix_addresses( $$@ ); sub write_msg( $$ ); sub german( $ ); @ARGV = ('-') if not @ARGV; foreach (@ARGV) { my $fh = new IO::File($_); die "cannot open $_: $!" if not $fh; my $contents = read_contents $fh; close $fh or warn "cannot close $_: $!"; if (not defined $contents) { warn "unable to read $_ as CSV, skipping\n"; next; } # We need to work out what language the column headers are in. We # look for the presence of one particular header and assume that # if it is there, all the others will be of the same language. # # This code isn't intended to be biased towards English, rather, # the column headers should be treated as machine-readable # strings. It just happens that the English-language version of # Outlook uses column names that correspond fairly closely with # Internet mail header names. (Thank goodness.) The German # version is not so sensible. # # The subroutine should take a list of hashrefs and change the # keys if necessary. # my %translate = (Subject => { people => { To => 'To', From => 'From', CC => 'cc', BCC => 'bcc' }, subheadings => [ qw(Address Name Type) ], subject => 'Subject', unknown => [ 'Billing Information', 'Categories', 'Importance', 'Mileage', 'Sensitivity' ], body => 'Body' }, Betreff => { people => { An => 'To', Von => 'From', CC => 'cc', BCC => 'bcc' }, subheadings => [ qw(Adresse Name Typ) ], subject => 'Betreff', unknown => [ qw(Abrechnungsinformationen Kategorien Wichtigkeit Reisekilometer Vertraulichkeit) ], body => 'Text' }, ); my $seen; foreach my $m (@$contents) { foreach (keys %translate) { next if not exists $m->{$_}; die "seen both $seen and $_ columns, can't guess language\n" if defined $seen and $seen ne $_; $seen = $_; } } if (not defined $seen) { my $sought = join(', ', sort keys %translate); die "did not see any column in ($sought), cannot understand file\n"; } use vars '%strs'; local *strs = $translate{$seen}; fix_addresses($strs{people}, $strs{subheadings}, @$contents); foreach (@$contents) { # Subject. my $subj = delete $_->{$strs{subject}}; if (not defined $subj) { warn "message without $strs{subject}, skipping\n"; } if (exists $_->{Subject}) { die "messages have both $strs{subject} and Subject, giving up\n"; } $_->{Subject} = $subj; # Headers we don't know how to write. foreach my $h (@{$strs{unknown}}) { delete $_->{$h}; } # Body. my $body = delete $_->{$strs{body}}; if (not defined $body) { warn "message with no body ($strs{body})"; $body = ''; } write_msg($_, $body); } } # Read a CSV file and return as a listref of hashrefs. sub read_contents( $ ) { my $fh = shift; my $csv = new Text::CSV_XS({ binary => 1 }); # First line gives column headings. my $cols = $csv->getline($fh); if (not @$cols) { warn 'no column headings found in file'; return undef; } my @headings = @$cols; my @r; while (not eof $fh and my $got = $csv->getline($fh)) { if (not @$got) { warn 'got line with no columns, skipping'; next; } if (@$got > @headings) { warn 'got line with more columns than num of headings, skipping'; next; } my %l; # Treat empty columns as undef. @l{@headings[0 .. $#$got]} = map { length() ? $_ : undef } @$got; push @r, \%l; } return \@r; } # Convert the separate 'X (Address)', 'X (Name)' and 'X (Type)' keys # into SMTP-style 'X'. The particular X that are interesting, you # have to specify, also the strings to look for in parentheses. # # Parameters: # hashref mapping X to their mbox equivalents, eg { CC => 'cc' } # listref of three strings [ 'Address', 'Name', 'Type' ] # zero or more hashrefs, to be modified # sub fix_addresses( $$@ ) { use vars '%people'; local *people = shift; my ($a_str, $n_str, $t_str) = @{shift()}; foreach (@_) { foreach my $p (keys %people) { my $a = delete $_->{"$p: ($a_str)"}; my $n = delete $_->{"$p: ($n_str)"}; my $t = delete $_->{"$p: ($t_str)"}; if (not defined $a and not defined $n and not defined $t) { next; } elsif (defined $a and defined $n and defined $t) { # Okay, will process in just a moment. } else { warn "have some of $a_str, $n_str and $t_str for $p, but not all"; next; } die if not length $a; # expected only nonempty fields my @a = split /;/, $a; my @n = split /;/, $n; my @t = split /;/, $t; if (@a == @n and @a == @t) { # Okay... } else { warn "different numbers of $a_str, $n_str and $t_str for $p"; next; } my @r; foreach (0 .. $#a) { # Keep Name and Address but discard Type. Even if # it's an icky X.400 address we still include it. # push @r, "$n[$_] <$a[$_]>"; } my $new = $people{$p}; if (exists $_->{$new}) { warn "message already has $new field"; next; } $_->{$new} = join(', ', @r); } } } # Write in Unix mbox format. Unfortunately the date doesn't seem to # be available. # # Parameters: # hashref of headers # message body # # This routine expects the English Outlook headers, the ones which are # similar to Internet mail headers. Any error messages will mention # these column names and not the local ones. # my %warned_unknown; sub write_msg( $$ ) { use vars '%h'; local *h = $_[0]; my $body = $_[1]; # Delimiting From line. Fake the date. warn 'message with no From, not writing' if not defined $h{From}; (my $sender_id = $h{From}) =~ s/\s*<[^>]*>//g; print "From $sender_id Thu Jan 1 00:00:00 1970\n"; # Required headers. foreach my $k (qw(From To)) { my $v = delete $h{$k}; if (not defined $v) { warn "message with no $k"; next; } print "$k: $v\n"; } # Optional headers. foreach my $k (qw(Subject cc bcc)) { my $v = delete $h{$k}; next if not defined $v; print "$k: $v\n"; } # Other headers. foreach (keys %h) { warn "unknown header $_, not writing" unless $warned_unknown{$_}++; } # Body. $body =~ s/\n+//; # FIXME why? # FIXME what about lines beginning 'From'? print "\n$body\n"; }