#!/usr/bin/perl -w # # Try compressing two files with zlib, both separately and together, # and report on how similar they are on a scale of 0 to 1. # # Part of similarity-utils version 0.2.2. # use strict; use Compress::Zlib; # I can't believe this isn't built into perl... sub read_file( $ ) { my $f = shift; local *FH; open(FH, $f) or die "cannot open $f: $!"; local $/ = undef; my $c = ; close FH or die "cannot close $f: $!"; return $c; } die "usage: $0 file0 file1\n" if @ARGV != 2; my ($f0, $f1) = @ARGV; my ($c0, $c1) = (read_file($f0), read_file($f1)); my ($c0l, $c1l) = (length(compress($c0)), length(compress($c1))); my $separate_l = $c0l + $c1l; my $concat_l = (length(compress($c0 . $c1)) + length(compress($c1 . $c0))) / 2; my $saving = $separate_l - $concat_l; $saving = 0 if $saving < 0; # 'in theory', shouldn't happen # Maximum possible saving 'in theory' is the whole of the longer file # length. my $max_saving = $c0l; $max_saving = $c1l if $max_saving < $c1l; my $proportion = $saving / $max_saving; $proportion = 1 if $proportion > 1; # 'in theory', shouldn't happen printf "$f0\t$f1\t%.3f\n", $proportion;