#!/usr/bin/env perl

use strict;
use warnings;
use Getopt::Long;
use Data::Dump qw(dump);
use File::Find;
use File::Spec;
use Cwd qw(realpath);

# duc model is the human summary.
# peer is the summary to score.

# get the total number of arguements.
my $totalArguments = @ARGV;

# dump the help message if no arguments, and exit.
if ($totalArguments == 0)
{
  print "-t testDirectory -r referenceDirectory -o outputFile -j \n";
  exit 0;
}

# get the test and reference directories.
my $testDirectory = '/g01/conundrum/corpora/duc02/testingSingleSum/peers/peer-trsb';
my $referenceDirectory = '/g01/conundrum/corpora/duc02/testingSingleSum/models';
my $configFile = '/home/jmkubin/tmp/config.txt';
my $jackknifing = 0;
GetOptions ("t=s" => \$testDirectory, "r=s" => \$referenceDirectory, "o=s" => \$configFile, "j" => \$jackknifing);

# make sure the reference directory of summaries exist.
if (! -d $referenceDirectory)
{
  die "reference directory '$referenceDirectory' does not exist.\n";
}

# get the list of reference files.
$referenceDirectory = realpath ($referenceDirectory);

my $listOfReferenceFiles = getListOfReferenceFiles ($referenceDirectory);

# put the list of reference files into a hash based on the original document.
my %listOfReferenceSummaries;
foreach my $referenceFile (@$listOfReferenceFiles)
{
  # get the components of the file path.
  my ($volume, $path, $filename) = File::Spec->splitpath ($referenceFile);
  
  # get the document name of the file.
  my $docId = $filename;
  $docId = uc $1 if ($filename =~ /^([^\.]+)/);
  
  # add the file to the list for the document.
  $listOfReferenceSummaries{$docId} = [] if ! exists $listOfReferenceSummaries{$docId};
  push @{$listOfReferenceSummaries{$docId}}, $referenceFile;
}

# get the list of test files.
$testDirectory = realpath ($testDirectory);
my (undef, undef, $testName) = File::Spec->splitpath ($testDirectory);
my $listOfTestFiles = getListOfTestFiles ($testDirectory);

# foreach test file get the corresponding reference files.
my @listOfEvalInfo;
foreach my $testFile (@$listOfTestFiles)
{
  # split the file path to get the basename.
  my ($volume, $path, $basename) = File::Spec->splitpath ($testFile);
  
  # from the basename get the reference file ids.
  if ($basename =~ /^(D\d+\_[^\.]+)/i)
  {
    my $docId = uc $1;
    if (exists ($listOfReferenceSummaries{$docId}))
    {
      my %evaluation;
      $evaluation{evalid} = $docId;
      $evaluation{testfiles} = {$testName => $testFile};
      $evaluation{referenceFiles} = {map {(getModelId ($_), $_)} @{$listOfReferenceSummaries{$docId}}};
      push @listOfEvalInfo, \%evaluation;
    }
  }
}

if ($jackknifing)
{
  writeRougeJackknifeConfigFile (\@listOfEvalInfo, $configFile);
}
else
{
  writeRougeConfigFile (\@listOfEvalInfo, $configFile);
}

exit;


sub writeRougeConfigFile
{
  my $el = "\n";
  my ($listOfEvalInfo, $outputFile) = @_;
  
  # open the output file for writing.
  my $fhOut;
  if (!open ($fhOut, '>', $outputFile))
  {
    die "could not open file '$outputFile' for writing.\n";
  }
  
  print $fhOut '<ROUGE_EVAL version="1.5.5">' . $el;
  
  foreach my $evaluation (@$listOfEvalInfo)
  {
    # print the eval id.
    print $fhOut '<EVAL ID="' . $evaluation->{evalid} . '">' . $el;
    
    # print the model (reference) root directory.
    print $fhOut '<MODEL-ROOT>/' . $el;
    print $fhOut '</MODEL-ROOT>' . $el;

    # print the peer (test) root directory.
    print $fhOut '<PEER-ROOT>/' . $el;
    print $fhOut '</PEER-ROOT>' . $el;
    
    # print the text input format.
    print $fhOut '<INPUT-FORMAT TYPE="SPL">' . $el .  '</INPUT-FORMAT>' . $el;
    
    # write the test file.
    print $fhOut '<PEERS>' . $el;
    while (my ($id, $testFile) = each %{$evaluation->{testfiles}})
    {
      my $ident = getPeerId ($testFile);
      print $fhOut '<P ID="' . $ident . '">' . $testFile . '</P>' . $el;
    }
    print $fhOut '</PEERS>' . $el;

    # write the reference files.
    print $fhOut '<MODELS>' . $el;
    while (my ($id, $referenceFile) = each %{$evaluation->{referenceFiles}})
    {
      my $isTestFile = 0;
      while (my (undef, $testFile) = each %{$evaluation->{testfiles}})
      {
        if ($testFile eq $referenceFile)
        {
          $isTestFile = 1;
          last;
        }
      }
      next if $isTestFile;
      my $ident = getModelId ($referenceFile);
      print $fhOut '<M ID="' . $ident . '">' . $referenceFile . '</M>' . $el;
    }
    print $fhOut '</MODELS>' . $el;    

    print $fhOut '</EVAL>' . $el;
  }
  
  print $fhOut '</ROUGE_EVAL>' . $el;
}


sub writeRougeJackknifeConfigFile
{
  my $el = "\n";
  my ($listOfEvalInfo, $outputFile) = @_;
  
  my $evalCount = 0;
  
  my @listOfJackknifeEvalInfo;
  
  foreach my $evaluation (@$listOfEvalInfo)
  {
    my $referenceFiles = $evaluation->{referenceFiles};
    my $totalReferencesFiles = scalar keys %$referenceFiles;
    if ($totalReferencesFiles > 1)
    {
      # add the jackknifes for each reference.
      foreach my $id (keys  %$referenceFiles)
      {
        my %newEval;
        $newEval{evalid} = $evalCount++;
        
        # add reference file ($id, $referenceFile) to the test file list. 
        $newEval{testfiles} = {%{$evaluation->{testfiles}}};
        $newEval{testfiles}->{$id} = $referenceFiles->{$id};
        
        # remove the reference file ($id, $referenceFile) from the list.
        my $copyOfReferenceFiles = {%$referenceFiles};
        delete $copyOfReferenceFiles->{$id};
        $newEval{referenceFiles} = $copyOfReferenceFiles;
        push @listOfJackknifeEvalInfo, \%newEval;
      }
    }
    else
    {
      my %newEval;
      $newEval{evalid} = $evalCount++;
      $newEval{testfiles} = {%{$evaluation->{testfiles}}};
      $newEval{referenceFiles} = {%$referenceFiles};
      push @listOfJackknifeEvalInfo, \%newEval;
    }
  }
  
  writeRougeConfigFile (\@listOfJackknifeEvalInfo, $outputFile);
}


# gets the list of all full paths of the reference summaries.
{
  my @listOfReferenceFiles;

  sub getListOfReferenceFiles
  {
    @listOfReferenceFiles = ();
    find(\&addReferenceFiles,  @_);
    return [@listOfReferenceFiles];
  }

  sub addReferenceFiles
  {
    return 0 if ! -f $_;
    return 0 if $_ !~ /\.txt$/i;
    push @listOfReferenceFiles, $File::Find::name;
    return 1;
  }
}


# gets the list of all full paths of the summaries to test.
{
  my @listOfTestFiles;

  sub getListOfTestFiles
  {
    @listOfTestFiles = ();
    find(\&addTestFiles,  @_);
    return [@listOfTestFiles];
  }

  sub addTestFiles
  {
    return 0 if ! -f $_;
    return 0 if $_ !~ /^D\d+\_/i;
    push @listOfTestFiles, $File::Find::name;
    return 1;
  }
}

sub getModelId
{
  my $filePath = uc $_[0];
  return $1 if ($filePath =~ /\.(.)\.txt$/i);
  return $1 if ($filePath =~ /\.(.)$/);
  return 'model-unknown';
}

sub getPeerId
{
  my $filePath = lc $_[0];
  return $1 if ($filePath =~ /peers\/classy\/\d+\/([^\/]+)\//);
  return uc $1 if ($filePath =~ /\.(.)\.txt$/i);
  return uc $1 if ($filePath =~ /\.(.)$/);
  return 'peer-unknown';
}



