#!/usr/bin/perl -w

use strict;
use warnings;
use Data::Dumper;
our $qq = "\\\'\\\"";

opendir (DIR, "./Alice");
foreach my $file (sort(grep(/\.aiml$/i, readdir(DIR)))) {
	$file =~ s~\.aiml$~~i;
	my $aiml = &parse("./Alice/$file.aiml");
	open (OUT, ">dump.txt");
	print OUT Dumper($aiml);
	close (OUT);

	# Process the AIML some more.
	open (WRITE, ">./RiveScript/$file.rs");
	foreach my $category (@{$aiml}) {
		my $pattern = $category->{pattern};
		my $that = exists $category->{that} ? $category->{that} : '';
		my $template = $category->{template};

		$pattern = lc($pattern);
		$pattern =~ s~_~*~g; # Convert AIML's _ wildcard to *

		print WRITE "+ $pattern\n";

		if (length $that) {
			$that = lc($that);
			$that =~ s~_~*~g;
			print WRITE "% $that\n";
		}

		# Process the tags in the template.

		# <think> tags.
		my @thought = ();
		print "Processing <think> tags\n";
		while ($template =~ /<think>(.+?)<\/think>/si) {
			# <think> tags are usually used to set variables. In AIML, a
			# <set> tag returns the value it just set; this isn't the case
			# with RiveScript. So let's just strip the think tags and put
			# them into @thought for substitution later. Then, all other
			# <set> tags need to be joined with a <get> to get the same
			# effect within RiveScript.
			my $think = $1;
			$think =~ s/^[\x0d\x0a\s\t]+//sig;
			$think =~ s/[\x0d\x0a\s\t]+$//sig;
			$think =~ s/<set name=[$qq](.+?)[$qq]>(.+?)<\/set>/<set $1=$2>/g;
			$think =~ s/[\x0d\x0a]//sig;
			print "<think>$think</think>\n";
			my $i = scalar(@thought);
			push (@thought,$think);
			$template =~ s/<think>(.+?)<\/think>/<think.$i>/si;
		}

		# <star>-like tags.
			# <star/>                ==>  <star>
			# <star index="1"/>      ==>  <star1>
			# <thatstar/>            ==>  <botstar>
			# <thatstar index="1"/>  ==>  <botstar1>
			# <input/>               ==>  <input1>
			# <input index="1"/>     ==>  <input1>
		$template =~ s~<star\s*\/*>~<star>~sig;
		$template =~ s~<thatstar\s*\/*>~<botstar>~sig;
		$template =~ s~<star index=[$qq](\d+)[$qq]\s*\/*>~<star$1>~sig;
		$template =~ s~<thatstar index=[$qq](\d+)[$qq]\s*\/*>~<botstar$1>~sig;
		$template =~ s~<that\s*\/*>~<reply1>~sig;
		$template =~ s~<that index=[$qq](\d+)[$qq]\s*\/*>~<reply$1>~sig;
		$template =~ s~<input\s*\/*>~<input1>~sig;
		$template =~ s~<input index=[$qq](\d+)[$qq]\s*\/*>~<input$1>~sig;

		# <condition> tags.
		while ($template =~ /<condition>(.+?)<\/condition>/si) {
			my $condition = $1;
			my @cond = ();
			print "Processing <condition>/<li> tags\n";
			while ($condition =~ /<li name=[$qq](.+?)[$qq] value=[$qq](.+?)[$qq]>(.+?)<\/li>/si) {
				my $var   = $1;
				my $value = $2;
				my $text  = $3;
				$text =~ s/^[\x0d\x0a\s\t]+//sig;
				$text =~ s/[\x0d\x0a\s\t]+$//sig;
				$condition =~ s/<li name=[$qq](.+?)[$qq] value=[$qq](.+?)[$qq]>(.+?)<\/li>//si;
				push (@cond,'* <get $var> eq $value => $text');
			}

			my $code = join ("\n",@cond);
			print WRITE "$code\n";

			$template =~ s/<condition>(.+?)<\/condition>//si;
		}
		while ($template =~ /<condition name=[$qq](.+?)[$qq]>(.+?)<\/condition>/i) {
			my $var = $1;
			my $condition = $2;
			my @cond = ();

			print "Processing <condition name=\"$var\">/<li> tags\n";
			while ($condition =~ /<li value=[$qq](.+?)[$qq]>(.+?)<\/li>/si) {
				my $value = $1;
				my $text  = $2;
				$text =~ s/^[\x0d\x0a\s\t]+//sig;
				$text =~ s/[\x0d\x0a\s\t]+$//sig;
				$condition =~ s/<li name=[$qq](.+?)[$qq] value=[$qq](.+?)[$qq]>(.+?)<\/li>//si;
				push (@cond,'* <get $var> eq $value => $text');
			}

			my $code = join ("\n",@cond);
			print WRITE "$code\n";

			$template =~ s/<condition name=[$qq](.+?)[$qq]>(.+?)<\/condition>//si;
		}

		# <random> tags.
		print "Processing <random> tags\n";
		while ($template =~ /<random>(.+?)<\/random>/si) {
			my $random = $1;
			my @rand = ();
			print "Processing <random>/<li> tags\n";
			while ($random =~ /<li>(.+?)<\/li>/si) {
				my $li = $1;
				$li =~ s/^[\x0d\x0a\s\t]+//sig;
				$li =~ s/[\x0d\x0a\s\t]+$//sig;
				$random =~ s/<li>(.+?)<\/li>//si;
				push (@rand,$li);
			}

			my $code = '*{random}'
				. join ("|\n^ ",@rand) . "{/random}";

			$template =~ s/<random>(.+?)<\/random>/$code/si;
		}

		# <sr> and <srai> tags.
			# <sr/>           =>  <@>
			# <srai>*</srai>  =>  {@*}
		$template =~ s/<sr\/*>/<\@>/sig;
		while ($template =~ /<srai>(.+?)<\/srai>/si) {
			my $srai = $1;
			$srai =~ s/^[\x0d\x0a\s\t]+//sig;
			$srai =~ s/[\x0d\x0a\s\t]+$//sig;
			$srai = lc($srai);
			$template =~ s~<srai>(.+?)</srai>~{\@$srai}~sig;
		}

		# <set> and <get> tags.
			# <set name="x">y</set>  ==> <set x=y><get x>
			# <get name="x"/>        ==> <get x>
		while ($template =~ /<set name=[$qq](.+?)[$qq]>(.+?)<\/set>/si) {
			my $var = $1;
			my $value = $2;
			$value =~ s/^[\x0d\x0a\s\t]+//sig;
			$value =~ s/[\x0d\x0a\s\t]+$//sig;
			$template =~ s/<set name=[$qq](.+?)[$qq]>(.+?)<\/set>/<set $var=$value>/si;
		}
		$template =~ s/<get name=[$qq](.+?)[$qq]\s*\/*>/<get $1>/sig;

		# Miscellaneous tags.
			# <person/>           ==>  <person>
			# <person2/>          ==>  <person>
			# <person>*</person>  ==>  {person}*{/person}
			# <id/>               ==>  <id>
		$template =~ s~<person\s*\/*>~<person>~sig;
		$template =~ s~<person2\s\/*>~<person>~sig;
		$template =~ s~<person>(.+>)</person>~\{person\}$1\{\/person\}~sig;
		$template =~ s~<id\s*/*>~<id>~sig;
		$template =~ s~<uppercase>(.+?)</uppercase>~\{uppercase\}$1\{\/uppercase}~sig;
		$template =~ s~<lowercase>(.+?)</lowercase>~\{lowercase\}$1\{\/lowercase}~sig;
		$template =~ s~<formal>(.+?)</formal>~\{formal\}$1\{\/formal\}~sig;
		$template =~ s~<sentence>(.+?)</sentence>~\{sentence\}$1\{\/sentence\}~sig;

		# Substitute the <thinks> back in.
		$template =~ s/<think.(\d+)>/$thought[$1]/sig;

		# Fix any extra newlines left over.
		while ($template =~ /[\x0d\x0a]+([^\+\%\-\*\^]+)(.+?)/si) {
			my $sym = $1;
			my $txt = $2;
			print "Odd line: $sym$txt\n";
			<STDIN>;
			$sym =~ s/^[\x0d\x0a\s\t]+//sig;
			$sym =~ s/[\x0d\x0a\s\t]+$//sig;
			$txt =~ s/^[\x0d\x0a\s\t]+//sig;
			$txt =~ s/[\x0d\x0a\s\t]+$//sig;
			print "Now: $sym$txt\n";
			<STDIN>;
			$template =~ s/[\x0d\x0a]+([^\+\%\-\*\^]+)(.+?)/\n\^ $sym$txt/si;
		}
		$template =~ s~\*\{random\}~\n\^ {random\}~sig;

		# Write.
		print WRITE "- $template\n\n";
	}
}

# parse: takes an AIML file, returns a data structure.
sub parse {
	my $file = shift;

	open (FILE, $file);
	my @lines = <FILE>;
	close (FILE);
	chomp @lines;

	my $code = join("\n",@lines);

	$code =~ s~<~%AIML::PARSE::MLTAG::START%~ig;
	$code =~ s~>~%AIML::PARSE::MLTAG::END%~ig;

	my @parts = split(/%AIML::PARSE/, $code);

	my $inCategory = 0;
	my $inPattern  = 0;
	my $inThat     = 0;
	my $inTemplate = 0;
	my $bufPattern = '';
	my $bufThat    = '';
	my $bufTemplate = '';

	my $results = [];
	# Expected format:
	# $results = [
	#   {
	#      pattern  => 'MY NAME IS *',
	#      that     => undef,
	#      template => '<think><set name="name"><star/></set></think>'
	#                  . 'Nice to meet you, <get name="name"/>.',
	#   },
	#   {
	#      pattern  => 'WHAT AM I',
	#      that     => undef,
	#      template => 'You are a <condition name="gender">'
	#                  . '<li value="male">boy</li>'
	#                  . '<li value="female">girl</li>'
	#                  . '</condition>.',
	#   },
	# ];

	foreach my $section (@parts) {
		if ($section =~ /^::MLTAG::START%/) {
			# Start of a tag.
			$section =~ s/^::MLTAG::START%//i; # Strip it

			# If we're inside a container tag, ignore other tags.
			if ($inPattern == 1 && $section !~ /^\/pattern/i) {
				$bufPattern .= "<$section>";
				next;
			}
			if ($inTemplate == 1 && $section !~ /^\/template/i) {
				$bufTemplate .= "<$section>";
				next;
			}

			# Get the name.
			my ($name,$attr) = split(/\s+/, $section, 2);
			$name = uc($name);

			# Process the tags.
			if ($name eq 'AIML') {
				print "> AIML $attr\n";
			}
			elsif ($name eq 'CATEGORY') {
				$inCategory = 1;
				print ">> Found a category\n";
				$bufPattern = '';
				$bufThat    = '';
				$bufTemplate = '';
			}
			elsif ($name eq '/CATEGORY') {
				$inCategory = 0;
				my @gather = ();
				if (length $bufThat > 0) {
					$bufThat =~ s/^[\t\s]+//g;
					$bufThat =~ s/[\t\s]+$//g;
					push (@gather, 'that', $bufThat);
				}
				$bufPattern =~ s/^[\t\s]+//g;
				$bufPattern =~ s/[\t\s]+$//g;
				$bufTemplate =~ s/^[\t\s]+//g;
				$bufTemplate =~ s/[\t\s]+$//g;
				push (@gather,'pattern',$bufPattern);
				push (@gather,'template',$bufTemplate);
				push (@{$results}, { @gather });
				print "<< Ended a category\n"
					. "\tPattern: $bufPattern\n"
					. "\t   That: $bufThat\n"
					. "\tTemplte: $bufTemplate\n";
			}
			elsif ($name eq 'PATTERN') {
				$inPattern = 1;
				print ">>> Found a pattern\n";
			}
			elsif ($name eq '/PATTERN') {
				$inPattern = 0;
				print "<<< Ended a pattern\n";
			}
			elsif ($name eq 'THAT') {
				$inThat = 1;
				print ">>> Found a <that>\n";
			}
			elsif ($name eq '/THAT') {
				$inThat = 0;
				print "<<< Ended a <that>\n";
			}
			elsif ($name eq 'TEMPLATE') {
				$inTemplate = 1;
				print ">>> Found a template\n";
			}
			elsif ($name eq '/TEMPLATE') {
				$inTemplate = 0;
				print "<<< Closed a template\n";
			}
			else {
				# Keep this tag.
				if ($inPattern) {
					$bufPattern .= "<$section>";
				}
				elsif ($inThat) {
					$bufThat .= "<$section>";
				}
				elsif ($inTemplate) {
					$bufTemplate .= "<$section>";
				}
			}

			next;
		}
		else {
			$section =~ s/^::MLTAG::END%//i; # Strip it

			if ($inPattern) {
				$bufPattern .= "$section";
			}
			elsif ($inThat) {
				$bufThat .= "$section";
			}
			elsif ($inTemplate) {
				$bufTemplate .= "$section";
			}
		}
	}

	return $results;
}
