#!/usr/bin/env perl

use strict;
use warnings;
use Carp qw(cluck croak);
use File::Spec;
use XML::Simple;
use Data::Dump qw(dump);
use Encode;
use Getopt::Long;
use Encode;
use File::Slurp;

# get the number of command line options present.
my $totalOptions = @ARGV;
if ($totalOptions == 0)
{
	print "options: -i inputFile -o outputFile -l character-length.\n";
	exit 0;
}

# process the command line parameters.
my $inputFile;
my $outputFile;
my $length;
my $result = GetOptions("i=s" => \$inputFile, "o=s" => \$outputFile, 'l' => \$length);

# make sure the inputDirectory was defined.
if (!defined $inputFile)
{
	die "input file parameter '-i' not defined.\n";
}

# make sure the input directory exists.
if (!-f $inputFile)
{
	die "file '$inputFile' does not exist.\n";
}

# make the outputDirectory was defined.
if (!defined $outputFile)
{
	die "output file parameter '-o' not defined.\n";
}

my $text = read_file($inputFile, binmode => ':utf8');
$text = substr($text, 0, $length) if (defined($length) && ($length > 0));
$text =~ s/[^\p{L}\p{N}]+/ /g;
$text =~ s/^ +//;
$text =~ s/ $//;
my @characters = split(//, $text);
foreach my $character (@characters)
{
	$character = codepoint_hex($character);
}
$text = join(' ', @characters);
write_file($outputFile, { binmode => ':utf8' }, $text);

sub codepoint_hex
{
	eval {
		if (my $char = shift)
		{
			return sprintf '%2.2x', unpack('U0U*', $char);
		}
	};
	return '';
}
