#!/usr/bin/perl -w # # outlook_text_to_mbox # # There are utilities to convert Outlook and Outlook Express's # proprietary .mbx format to a Unix mailbox. But I made the mistake # of exporting messages in 'text format'. This is something similar # to mbox, but not as useful. # # This script reads a 'text format' export from Exchange or Outlook # and converts it to a Unix mbox. # # This program is in the public domain. Use at your own risk. It has # a web page at # . # # The release of the outlook_text_to_mbox package (which also includes # outlook_csv_to_mbox for reading CSV files) is version 1.2. # # $Id: outlook_text_to_mbox,v 1.6 2003/02/22 18:57:04 ed Exp $ # # -- Ed Avis, ed@membled.com, 2002-05-14 # use strict; use Date::Manip; #use Log::TraceMessages qw(t d); Log::TraceMessages::check_argv(); sub write_msgs( @ ); sub name_and_addr( $$ ); my $head; my $body; my @msgs; while (<>) { s/^\s+//; s/\s+$//; if (/^From:\s/) { # Beginning of a new message. push @msgs, [ $head, $body ] if $head; $head = {}; undef $body; } if (defined $head and not defined $body) { # In the header. if (/^From:\s+(.+)/) { if (defined $head->{sender_name}) { warn "$ARGV:$.:From already seen\n"; next; } $head->{sender_name} = $1; if ($head->{sender_name} =~ s/\s+\[(.+?)\]$//) { my $sender_addr = $1; if ($sender_addr !~ tr/@//) { warn "$ARGV:$.:sender address $sender_addr has no @\n"; next; } $head->{sender_addr} = $sender_addr; } } elsif (/^Sent:\s+(.+)/) { if (defined $head->{date}) { warn "$ARGV:$.:Sent already seen\n"; next; } $head->{date} = ParseDate($1); if (not defined $head->{date}) { warn "$ARGV:$.:cannot parse date $1\n"; next; } } elsif (/^To:\s+(.+)/) { if (defined $head->{to_addr} or defined $head->{to_name}) { warn "$ARGV:$.:To already seen\n"; next; } my $to = $1; if ($to =~ tr/@//) { $head->{to_addr} = $to; } else { $head->{to_name} = $to; } } elsif (/^Cc:\s+(.+)/) { if (defined $head->{cc_addr} or defined $head->{cc_name}) { warn "$ARGV:$.:Cc already seen\n"; next; } my @cc_list = split /;\s*/, $1; foreach (@cc_list) { s/^\'//; s/\'$//; # Assume no need to preserve ordering of ccs if (tr/@//) { push @{$head->{cc_addr}}, $_; } else { push @{$head->{cc_name}}, $_; } } } elsif (/^Subject:\s+(.+)/) { if (defined $head->{subject}) { warn "$ARGV:$.:Subject already seen\n"; next; } $head->{subject} = $1; } elsif (not length) { # A blank line means end of headers. $body = ''; } else { warn "$ARGV:$.:bad header line $_\n"; } } elsif (defined $head and defined $body) { # In the body. $body .= "$_\n"; } elsif (not defined $head and not defined $body) { warn "$ARGV:$.:line outside any message\n"; } elsif (not defined $head and defined $body) { die 'got body and no header - huh?'; } else { die } } # Last one. if (not $head and not defined $body) { # No messages at all, seemingly. } elsif (not $head and defined $body) { die 'got body and no header - huh?'; } elsif ($head and not defined $body) { warn "$ARGV:last message has no body\n"; push @msgs, [ $head, '' ]; } elsif ($head and defined $body) { push @msgs, [ $head, $body ]; } else { die } # Output messages in date order. write_msgs(sort { Date_Cmp($a->[0]->{date}, $b->[0]->{date}) } @msgs); sub write_msgs( @ ) { foreach (@_) { my ($h, $body) = @$_; my %head = %$h; # make a copy $body =~ s/\n+$//; my $out = ''; # Delimiting 'From ' line. my $sender_addr = delete $head{sender_addr}; my $sender_name = delete $head{sender_name}; my $sender_id = $sender_addr; $sender_id = $sender_name if not defined $sender_id; if (not defined $sender_id) { warn "message has no sender, skipping\n"; return; } my $date = delete $head{date}; if (not defined $date) { warn "message has no date, skipping\n"; } my $date_id = UnixDate($date, '%c %z'); $out .= "From $sender_id $date_id\n"; my $date_str = UnixDate($date, '%a, ' . (UnixDate($date, '%d') + 0) . ' %b %Y %T %z (%Z)'); $out .= "Date: $date_str\n"; my $from = name_and_addr($sender_name, $sender_addr); $out .= "From: $from\n"; my $to_name = delete $head{to_name}; my $to_addr = delete $head{to_addr}; if (not defined $to_name and not defined $to_addr) { warn "message has no recipient, skipping\n"; return; } my $to = name_and_addr($to_name, $to_addr); $out .= "To: $to\n"; my $subject = delete $head{subject}; if (defined $subject) { $out .= "Subject: $subject\n"; # not mandatory } my @cc; my $cc_name = delete $head{cc_name}; if ($cc_name) { foreach (@$cc_name) { push @cc, $_; } } my $cc_addr = delete $head{cc_addr}; if ($cc_addr) { foreach (@$cc_addr) { push @cc, "<$_>"; } } if (@cc) { $out .= 'Cc: ' . join(', ', @cc) . "\n"; } foreach (keys %head) { warn "unused key $_"; } $out .= "\n$body\n\n"; print $out; } } sub name_and_addr( $$ ) { my ($name, $addr) = @_; if (not defined $name and not defined $addr) { die 'no name or addr given'; } elsif (not defined $addr and defined $name) { return $name; } elsif (defined $addr and not defined $name) { return $addr; } elsif (defined $addr and defined $name) { return "$name <$addr>"; } else { die } }