#!/usr/bin/env perl

use strict;
use warnings;
use Getopt::Long;
use Data::Dump qw(dump);
use File::Find;
use File::Spec;
use Cwd qw(realpath);
use File::Basename qw(basename dirname);
use File::Path qw(mkpath);

sub getName
{
	my $dir = shift;
	my @directories = split('/', $dir);
	return join('-', $directories[-3], $directories[-2], $directories[-1]);
}

# duc model is the human summary.
# peer is the summary to score.

# get the total number of arguements.
my $totalArguments = @ARGV;

# dump the help message if no arguments, and exit.
if ($totalArguments == 0)
{
	print "-t testDirectory -r referenceDirectory (human) -o outputFile -s idsToSkipFile -j \n";
}

# get the test and reference directories.
my $testDirectory      = '/home/jmkubin/wiki/corpora/multiling2015/mms-submits/trunc/occams/english/1';
my $referenceDirectory = '/u04/jmkubin/wiki/corpora/multiling2015/mms-submits/trunc/human/english/1';
my $configFile;
my $jackknifing = 0;
my $idsToSkipFile;
GetOptions("t=s" => \$testDirectory, "r=s" => \$referenceDirectory, "o=s" => \$configFile, "j" => \$jackknifing, "b=s" => \$idsToSkipFile);

unless (defined $configFile)
{
	$configFile = '/home/jmkubin/wiki/corpora/multiling2015/mms-submits/configs-rouge/' . getName($testDirectory) . '.cfg';
	print $configFile . "\n";
}

my $refIdName  = getName($referenceDirectory);
my $testIdName = getName($testDirectory);

# make sure the reference directory of summaries exist.
if (!-d $referenceDirectory)
{
	die "reference directory '$referenceDirectory' does not exist.\n";
}

# get the list of reference files.
$referenceDirectory = realpath($referenceDirectory);

my $listOfReferenceFiles = getListOfReferenceFiles($referenceDirectory);

# put the list of reference files into a hash based on the original document.
my %listOfReferenceSummaries;
foreach my $referenceFile (@$listOfReferenceFiles)
{
	# get the components of the file path.
	my ($volume, $path, $basename) = File::Spec->splitpath($referenceFile);

	# get the document name of the file.
	my $docId = $basename;
	if ($basename =~ /^(D\d+\_[^\.]+)/i)
	{
		$docId = uc $1;
	}
	elsif ($basename =~ /^([a-f\d]+)\_(body|summary)\.txt$/i)
	{
		$docId = uc $1;
	}
	elsif ($basename =~ /^([^\.]+)/i)
	{
		$docId = uc $1;
	}

	# add the file to the list for the document.
	$listOfReferenceSummaries{$docId} = [] if !exists $listOfReferenceSummaries{$docId};
	push @{ $listOfReferenceSummaries{$docId} }, $referenceFile;
}

# read in the ids to skip and delete them from the $listOfReferenceSummaries.
if (defined($idsToSkipFile) && -f $idsToSkipFile)
{
	if (-f $idsToSkipFile)
	{
		my $fh;
		if (!open($fh, '<:utf8', $idsToSkipFile))
		{
			warn "could not open file '$idsToSkipFile' for reading.\n";
			return undef;
		}

		my $docId;
		while (defined($docId = <$fh>))
		{
			chomp $docId;
			$docId = uc $docId;
			$docId = $1 if ($docId =~ /([a-f0-9]{32})/);
			if (exists($listOfReferenceSummaries{$docId}))
			{
				delete $listOfReferenceSummaries{$docId};
			}
		}
		close $fh;
	}
	else
	{
		warn "$idsToSkipFile is not a file.\n";
	}
}

# get the list of test files.
$testDirectory = realpath($testDirectory);
my (undef, undef, $testName) = File::Spec->splitpath($testDirectory);
my $listOfTestFiles = getListOfTestFiles($testDirectory);

# foreach test file get the corresponding reference files.
my @listOfEvalInfo;
foreach my $testFile (@$listOfTestFiles)
{
	# split the file path to get the basename.
	my ($volume, $path, $basename) = File::Spec->splitpath($testFile);

	# from the basename get the reference file ids.
	my $docId;
	if ($basename =~ /^(D\d+\_[^\.]+)/i)
	{
		$docId = uc $1;
	}
	elsif ($basename =~ /^([a-f\d]+)\_(body|summary)\.txt$/i)
	{
		$docId = uc $1;
	}
	elsif ($basename =~ /^([^\.]+)/i)
	{
		$docId = uc $1;
	}

	if (defined $docId)
	{
		if (exists($listOfReferenceSummaries{$docId}))
		{
			my %evaluation;
			$evaluation{evalid}         = $docId;
			$evaluation{testfiles}      = { $testIdName => $testFile };
			$evaluation{referenceFiles} = { map { (getModelId($_), $_) } @{ $listOfReferenceSummaries{$docId} } };
			push @listOfEvalInfo, \%evaluation;
		}
	}
}

my $configDirectory = dirname $configFile;
mkpath($configDirectory, 0, 0755) unless (-d $configFile);

if ($jackknifing)
{
	writeRougeJackknifeConfigFile(\@listOfEvalInfo, $configFile);
}
else
{
	writeRougeConfigFile(\@listOfEvalInfo, $configFile);
}

exit;

sub writeRougeConfigFile
{
	my $el = "\n";
	my ($listOfEvalInfo, $outputFile) = @_;

	# open the output file for writing.
	my $fhOut;
	if (!open($fhOut, '>', $outputFile))
	{
		die "could not open file '$outputFile' for writing.\n";
	}

	print $fhOut '<ROUGE_EVAL version="1.5.5">' . $el;

	foreach my $evaluation (@$listOfEvalInfo)
	{
		# print the eval id.
		print $fhOut '<EVAL ID="' . $evaluation->{evalid} . '">' . $el;

		# print the model (reference) root directory.
		print $fhOut '<MODEL-ROOT>/' . $el;
		print $fhOut '</MODEL-ROOT>' . $el;

		# print the peer (test) root directory.
		print $fhOut '<PEER-ROOT>/' . $el;
		print $fhOut '</PEER-ROOT>' . $el;

		# print the text input format.
		print $fhOut '<INPUT-FORMAT TYPE="SPL">' . $el . '</INPUT-FORMAT>' . $el;

		# write the test file.
		print $fhOut '<PEERS>' . $el;
		while (my ($id, $testFile) = each %{ $evaluation->{testfiles} })
		{
			print $fhOut '<P ID="' . $id . '">' . $testFile . '</P>' . $el;
		}
		print $fhOut '</PEERS>' . $el;

		# write the reference files.
		print $fhOut '<MODELS>' . $el;
		while (my ($id, $referenceFile) = each %{ $evaluation->{referenceFiles} })
		{
			my $isTestFile = 0;
			while (my (undef, $testFile) = each %{ $evaluation->{testfiles} })
			{
				if ($testFile eq $referenceFile)
				{
					$isTestFile = 1;
					last;
				}
			}
			next if $isTestFile;
			print $fhOut '<M ID="' . $id . '">' . $referenceFile . '</M>' . $el;
		}
		print $fhOut '</MODELS>' . $el;

		print $fhOut '</EVAL>' . $el;
	}

	print $fhOut '</ROUGE_EVAL>' . $el;
}

sub writeRougeJackknifeConfigFile
{
	my $el = "\n";
	my ($listOfEvalInfo, $outputFile) = @_;

	my $evalCount = 0;

	my @listOfJackknifeEvalInfo;

	foreach my $evaluation (@$listOfEvalInfo)
	{
		my $referenceFiles       = $evaluation->{referenceFiles};
		my $totalReferencesFiles = scalar keys %$referenceFiles;
		if ($totalReferencesFiles > 1)
		{
			# add the jackknifes for each reference.
			foreach my $id (keys %$referenceFiles)
			{
				my %newEval;
				$newEval{evalid} = $evalCount++;

				# add reference file ($id, $referenceFile) to the test file list.
				$newEval{testfiles} = { %{ $evaluation->{testfiles} } };
				$newEval{testfiles}->{$id} = $referenceFiles->{$id};

				# remove the reference file ($id, $referenceFile) from the list.
				my $copyOfReferenceFiles = {%$referenceFiles};
				delete $copyOfReferenceFiles->{$id};
				$newEval{referenceFiles} = $copyOfReferenceFiles;
				push @listOfJackknifeEvalInfo, \%newEval;
			}
		}
		else
		{
			my %newEval;
			$newEval{evalid}         = $evalCount++;
			$newEval{testfiles}      = { %{ $evaluation->{testfiles} } };
			$newEval{referenceFiles} = {%$referenceFiles};
			push @listOfJackknifeEvalInfo, \%newEval;
		}
	}

	writeRougeConfigFile(\@listOfJackknifeEvalInfo, $outputFile);
}

# gets the list of all full paths of the reference summaries.
{
	my @listOfReferenceFiles;

	sub getListOfReferenceFiles
	{
		@listOfReferenceFiles = ();
		find(\&addReferenceFiles, @_);
		return [@listOfReferenceFiles];
	}

	sub addReferenceFiles
	{
		return 0 if !-f $_;
		return 0 if $_ !~ /\.txt$/i;
		my $file = $File::Find::name;

		#$file =~ s|^/\.ccs||;
		push @listOfReferenceFiles, $file;
		return 1;
	}
}

# gets the list of all full paths of the summaries to test.
{
	my @listOfTestFiles;

	sub getListOfTestFiles
	{
		@listOfTestFiles = ();
		find(\&addTestFiles, @_);
		return [@listOfTestFiles];
	}

	sub addTestFiles
	{
		return 0 if !-f $_;
		my $file = $File::Find::name;

		#$file =~ s|^/\.ccs||;
		if ($_ =~ /^D\d+\_/i)
		{
			push @listOfTestFiles, $file;
			return 1;
		}
		if ($_ =~ /CLASSY\d*$/i)
		{
			push @listOfTestFiles, $file;
			return 1;
		}
		if ($_ =~ /\.txt$/i)
		{
			push @listOfTestFiles, $file;
			return 1;
		}
		return 0;
	}
}

sub getPeerId
{
	my $filePath = lc $_[0];
	return 'classy-stemmed-' . $1 . '-' . $2    if ($filePath =~ /\/classy\/stemmed\/(\d\d\d)\/(..)/);
	return 'classy-unstemmed-' . $1 . '-' . $2  if ($filePath =~ /\/classy\/unstemmed\/(\d\d\d)\/(..)/);
	return 'classy-unstemmed-' . $1 . '-' . $2  if ($filePath =~ /\/classy\/split\/(\d\d\d)\/(..)/);
	return 'human-summary-unstemmed-' . $1      if ($filePath =~ /wikipedia_fa_txt\/(..)\/summary/);
	return 'human-summary-stemmed-' . $1        if ($filePath =~ /wikipedia-fa\/splitsummstxt\/(..)/);
	return 'oracle-' . $2 . '-' . $3 . '-' . $1 if ($filePath =~ /oracles\/([^\/]+)\/([^\/]+)\/(..)/);
	return 'lead-stemmed-' . $1                 if ($filePath =~ m|/stemmed/wikipedia-fa/splitbodiestype1/(..)|);
	return 'lead-unstemmed-' . $1               if ($filePath =~ m|/unstemmed/wikipedia-fa/splitbodiestype1/(..)|);
	return 'lead-unstemmed-' . $1               if ($filePath =~ m|wikipedia_fa_type1_txt/(..)/body|);
	return 'unstemmed-stemmed-' . $1            if ($filePath =~ m|unstemmed_stemmed\/(\d\d\d)\/|);

	return $1 if ($filePath =~ /\.(.)$/);
	return 'no-peer-id';
}

sub getModelId
{
	my $filePath = lc $_[0];
	return 'human-summary-unstemmed-' . $1      if ($filePath =~ /wikipedia_fa_txt\/(..)\/summary/);
	return 'human-body-unstemmed-' . $1         if ($filePath =~ /wikipedia_fa_txt\/(..)\/body/);
	return 'human-summary-stemmed-' . $1        if ($filePath =~ /wikipedia-fa\/splitsummstxt\/(..)/);
	return 'human-body-stemmed-' . $1           if ($filePath =~ /wikipedia-fa\/splitbodiestxt\/(..)/);
	return 'oracle-' . $2 . '-' . $3 . '-' . $1 if ($filePath =~ /oracles\/([^\/]+)\/([^\/]+)\/(..)/);
	return 'human-stemmed-' . $1                if ($filePath =~ m|/stemmed/wikipedia-fa/splitsummstype1/(..)|);
	return 'human-unstemmed-' . $1              if ($filePath =~ m|/unstemmed/wikipedia-fa/splitsummstype1/(..)|);
	return 'human-summary-unstemmed-' . $1      if ($filePath =~ m|wikipedia_fa_type1_txt/(..)/summary|);
	my $name = basename($_[0]);
	return 'human-' . $1 if $name =~ m/^[^\.]+\.([^\.]+)/;
	return 'no-model-id';
}

