#!/usr/bin/perl -w

use strict;
use warnings;
use XML::Simple;
use Data::Dumper;

# Create begin.rs
open (BEGIN, ">./rs/begin.rs");
print BEGIN <<EOF;
// aiml2rs begin script

> begin
	+ request
	- {ok}
< begin

! var name     = Alice
! var gender   = female
! var master   = Dr. Wallace
! var birthday = a very long time ago
! var birthplace = New York
! var boyfriend  = I am single
! var favoritebook = Artificial Intelligence: A Modern Approach
! var favoritecolor = blue
! var favoriteband  = They Might Be Giants
! var favoritefood  = pizza
! var favoritesong  = Mr. Roboto
! var favoritemovie = A.I.
! var forfun        = I chat with people online
! var friends       = I chat with people online
! var girlfriend    = I am single
! var kindmusic     = electronic
! var location      = New York
! var looklike      = kind of how you think I look
! var question      = ask me a question
! var sign          = saggitarious
! var talkabout     = anything that comes up
! var wear          = kind of what you think I'm wearing
! var website       = alicebot.org
! var email         = alice\@alicebot.org
! var language      = English

// Substitutions
! sub &quot;    = "
! sub &apos;    = '
! sub &amp;     = &
! sub &lt;      = <
! sub &gt;      = >
! sub +         = plus
! sub -         = minus
! sub /         = divided
! sub *         = times
! sub i'm       = i am
! sub i'd       = i would
! sub i've      = i have
! sub i'll      = i will
! sub don't     = do not
! sub isn't     = is not
! sub you'd     = you would
! sub you're    = you are
! sub you've    = you have
! sub you'll    = you will
! sub he'd      = he would
! sub he's      = he is
! sub he'll     = he will
! sub she'd     = she would
! sub she's     = she is
! sub she'll    = she will
! sub they'd    = they would
! sub they're   = they are
! sub they've   = they have
! sub they'll   = they will
! sub we'd      = we would
! sub we're     = we are
! sub we've     = we have
! sub we'll     = we will
! sub whats     = what is
! sub what's    = what is
! sub what're   = what are
! sub what've   = what have
! sub what'll   = what will
! sub can't     = can not
! sub whos      = who is
! sub who's     = who is
! sub who'd     = who would
! sub who'll    = who will
! sub don't     = do not
! sub didn't    = did not
! sub it's      = it is
! sub could've  = could have
! sub couldn't  = could not
! sub should've = should have
! sub shouldn't = should not
! sub would've  = would have
! sub wouldn't  = would not
! sub when's    = when is
! sub when're   = when are
! sub when'd    = when did
! sub y         = why
! sub u         = you
! sub ur        = your
! sub r         = are
! sub n         = and
! sub im        = i am
! sub wat       = what
! sub wats      = what is
! sub ohh       = oh
! sub becuse    = because
! sub becasue   = because
! sub becuase   = because
! sub practise  = practice
! sub its a     = it is a
! sub fav       = favorite
! sub fave      = favorite
! sub yesi      = yes i
! sub yetit     = yet it
! sub iam       = i am
! sub welli     = well i
! sub wellit    = well it
! sub amfine    = am fine
! sub aman      = am an
! sub amon      = am on
! sub amnot     = am not
! sub realy     = really
! sub iamusing  = i am using
! sub amleaving = am leaving
! sub yuo       = you
! sub youre     = you are
! sub didnt     = did not
! sub ain't     = is not
! sub aint      = is not
! sub wanna     = want to
! sub brb       = be right back
! sub bbl       = be back later
! sub gtg       = got to go
! sub g2g       = got to go
! sub lyl       = love you lots
! sub gf        = girlfriend
! sub g/f       = girlfriend
! sub bf        = boyfriend
! sub b/f       = boyfriend
! sub b/f/f     = best friend forever
! sub :-)       = smile
! sub :)        = smile
! sub :d        = grin
! sub :-d       = grin
! sub :-p       = tongue
! sub :p        = tongue
! sub ;-)       = wink
! sub ;)        = wink
! sub :-(       = sad
! sub :(        = sad
! sub :'(       = cry
! sub :-[       = shy
! sub :-\\       = uncertain
! sub :-/       = uncertain
! sub :-s       = uncertain
! sub 8-)       = cool
! sub 8)        = cool
! sub :-*       = kissyface
! sub :-!       = foot
! sub o:-)      = angel
! sub >:o       = angry
! sub :\@        = angry
! sub 8o|       = angry
! sub :\$        = blush
! sub :-\$       = blush
! sub :-[       = blush
! sub :[        = bat
! sub (a)       = angel
! sub (h)       = cool
! sub 8-|       = nerdy
! sub |-)       = tired
! sub +o(       = ill
! sub *-)       = uncertain
! sub ^o)       = raised eyebrow
! sub (6)       = devil
! sub (l)       = love
! sub (u)       = broken heart
! sub (k)       = kissyface
! sub (f)       = rose
! sub (w)       = wilted rose

// Person substitutions
! person i am    = you are
! person you are = I am
! person i'm     = you're
! person you're  = I'm
! person my      = your
! person your    = my
! person you     = I
! person i       = you
EOF

opendir (DIR, "./aiml");
foreach my $file (sort(grep(/\.aiml$/i, readdir(DIR)))) {
	&processAIML("./aiml/$file");
}
closedir (DIR);

sub processAIML {
	my $file = shift;

	open (FILE, $file);
	my @data = <FILE>;
	close (FILE);
	chomp @data;

	# Convert everything inside <pattern></pattern> and
	# <template></template> into CDATA.
	my $aiml = join("\n",@data);
	$aiml =~ s/<(pattern|template|that)>/<$1><![CDATA[/sig;
	$aiml =~ s/<\/(pattern|template|that)>/]]><\/$1>/sig;

	# Load it.
	print "Loading XML data from $file\n";
	my $xml = XMLin($aiml);
	if (!exists $xml->{category} || ref($xml->{category}) ne "ARRAY") {
		warn "ERROR: The file $file didn't parse correctly!\n";
		return;
	}

	my $rs = $file;
	$rs =~ s/\/([A-Za-z0-9]+?)\.aiml$/$1\.rs/ig;
	open (RIVE, ">./rs/$rs") or die "Can't write ./rs/$rs: $!";
	print RIVE "// aiml2rs -- Generated on " . localtime(time()) . "\n";

	# Process the categories.
	print "Processing AIML file $file...\n";
	foreach my $category (@{$xml->{category}}) {
		my $pattern  = $category->{pattern};
		my $template = $category->{template};
		my $that     = exists $category->{that} ? $category->{that} : undef;

		# Remove newlines from these.
		$pattern =~ s/[\x0d\x0a]+//sig;
		$template =~ s/[\x0d\x0a]+//sig;
		if (defined $that) { $that =~ s/[\x0d\x0a]+//sig; }

		# Process pattern tags.
		$pattern = &doTags($pattern,"pattern");
		if (defined $that) {
			$that = &doTags($that,"pattern");
		}

		# Process tags in the template.
		$template = &doTags($template,"template","pattern");

		print RIVE "\n"
			. "+ " . lc($pattern) . "\n";
		if (defined $that) {
			print RIVE "% " . lc($that) . "\n";
		}
		print RIVE $template . "\n";
	}

	close (RIVE);
}

sub doTags {
	my $string  = shift;
	my %context = map { $_ => 1 } @_;

	# Common regexp bits
	my $qq = q{(?:"|')}; # Both kinds of quotes

	if (exists $context{pattern}) {
		# Tags that can exist in <pattern>
		$string =~ s{<bot\s+name=$qq(.+?)$qq\s*/>}{<bot $1>}ig;
	}

	if (exists $context{template}) {
		$string =~ s{<think>}{rs2aiml_aiml_think=on::}ig;
		$string =~ s{</think>}{rs2aiml_aiml_think=off::}ig;
		my @parts = split(/rs2aiml_/, $string);
		my @condition = ();
		my @new = ();
		foreach my $part (@parts) {
			my $think = 0;
			if ($part =~ /^aiml_think=(on|off)::/i) {
				$think = $1 eq 'on' ? 1 : 0;
				$part =~ s/^aiml_think=(on|off):://ig;
			}

			next if length $part == 0;
			next if $part =~ /^\s+$/;

			# <set name="topic" is ghey
			my $t = "topic";
			$part =~ s{<set\s+name=$qq$t$qq>(.+?)</set>}{$1}ig;

			# Inside <think>
			if ($think) {
				# <set> becomes just <set>
				$part =~ s{<set\s+name=$qq(.+?)$qq>\s*(.+?)\s*</set>}{<set $1=$2>}ig;
			}
			else {
				# <set> becomes <set><get>
				$part =~ s{<set\s+name=$qq(.+?)$qq>\s*(.+?)\s*</set>}{<set $1=$2><get $1>}ig;
			}

			# Template-only tags
			$part =~ s{<star\s*/>}{<star>}ig;
			$part =~ s{<star\s+index=$qq(\d+)$qq\s*/>}{<star$1>}ig;

			$part =~ s{<that\s*/>}{<reply1>}ig;
			$part =~ s{<that\s+index=$qq(\d+),\d+$qq\s*/>}{<reply$1>}ig;
			$part =~ s{<that\s+index=$qq(\d+)$qq\s*/>}{<reply$1>}ig;

			$part =~ s{<input\s+index=$qq(\d+)$qq\s*/>}{<input$1>}ig;

			$part =~ s{<thatstar\s*/>}{<botstar>}ig;
			$part =~ s{<thatstar\s+index=$qq(\d+)$qq\s*/>}{<botstar$1>}ig;

			$part =~ s{<topicstar\s*/>}{}ig;
			$part =~ s{<topicstar\s+index=$qq(\d+)$qq\s*/>}{}ig;

			$part =~ s{<get\s+name=$qq(.+?)$qq\s*/>}{<get $1>}ig;

			$part =~ s{<date\s*/>}{<call>alice date</call>}ig;

			$part =~ s{<id\s*/>}{<id>}ig;

			$part =~ s{<size\s*/>}{<call>alice size</call>}ig;

			$part =~ s{<version\s*/>}{<call>alice version</call>}ig;

			$part =~ s{<gossip>(.+?)</gossip>}{<call>alice gossip</call>}ig;

			$part =~ s{<(uppercase|/uppercase)>}{\{$1\}}ig;
			$part =~ s{<(lowercase|/lowercase)>}{\{$1\}}ig;
			$part =~ s{<(formal|/formal)>}{\{$1\}}ig;
			$part =~ s{<(sentence|/sentence)>}{\{$1\}}ig;
			$part =~ s{<(person|/person)>}{\{$1\}}ig;

			$part =~ s{<(person|uppercase|lowercase|formal|sentence)\s*/>}{<$1>}ig;

			$part =~ s{<person2>(.+?)</person2>}{<call>alice person2 $1</call>}ig;
			$part =~ s{<gender>(.+?)</gender>}{<call>alice gender $1</call>}ig;
			$part =~ s{<learn>(.+?)</learn>}{<call>alice learn $1</call>}ig;
			$part =~ s{<system>(.+?)</system>}{<call>alice system $1</call>}ig;

			$part =~ s{<srai>\s*(.+?)\s*</srai>}{\{\@\L$1\E\}}ig;
			$part =~ s{<sr\s*/>}{<\@>}ig;
			my $i = 0;
			# Look for conditionals.
			if ($part =~ /<condition/i) {
				$i++;
				if ($i > 100) {
					die "Couldn't resolve conditionals: $part\n";
				}
				while ($part =~ /<condition\s+name=$qq(.+?)$qq\s+value=$qq(.+?)$qq>(.+?)<\/condition>/i) {
					$i++;
					if ($i > 100) {
						die "Got stuck in conditional 1: $part";
					}
					my $var = $1; my $value = $2; my $text = $3;
					push (@condition, "$var eq $value => $text");
					$part =~ s/<condition\s+name=$qq(.+?)$qq\s+value=$qq(.+?)$qq>(.+?)<\/condition>//i;
				}
				while ($part =~ /<condition\s+name=$qq(.+?)$qq>(.+?)<\/condition>/i) {
					$i++;
					if ($i > 100) {
						die "Got stuck in conditional 2: $part";
					}
					my $var = $1;
					my $body = $2;
					while ($body =~ /<li\s+value=$qq(.+?)$qq>(.+?)<\/li>/i) {
						$i++;
						if ($i > 100) {
							die "Got stuck in conditional 2.5: $body";
						}
						my $value = $1; my $text = $2;
						push (@condition, "$var eq $value => $text");
						$body =~ s/<li\s+value=$qq(.+?)$qq>(.+?)<\/li>//i;
					}
					$part =~ s/<condition\s+name=$qq(.+?)$qq>(.+?)<\/condition>//i;
				}
				while ($part =~ /<condition>(.+?)<\/condition>/i) {
					$i++;
					if ($i > 100) {
						die "Got stuck in conditional 3: $part";
					}
					my $body = $1;
					while ($body =~ /<li\s+name=$qq(.+?)$qq\s+value=$qq(.+?)$qq>(.+?)<\/li>/i) {
						$i++;
						if ($i > 100) {
							die "Got stuck in conditional 3.33: $body";
						}
						my $var = $1; my $value = $2; my $text = $3;
						push (@condition, "$var eq $value => $text");
						$body =~ s/<li\s+name=$qq(.+?)$qq\s+value=$qq(.+?)$qq>(.+?)<\/li>//i;
					}
					while ($body =~ /<li>(.+?)<\/li>/i) {
						$i++;
						if ($i > 100) {
							die "Got stuck in conditional 3.66: $body";
						}
						push (@new, $1);
						$body =~ s/<li>(.+?)<\/li>//i;
					}
					$part =~ s/<condition>(.+?)<\/condition>//i;
				}
				$part =~ s/<condition.+?>//ig;
				$part =~ s/<\/condition.+?>//ig;
			}
			# Look for randomness.
			if ($part =~ /<random>/i) {
				while ($part =~ /<random>(.+?)<\/random>/i) {
					my $body = $1;
					my @rand = ();
					while ($body =~ /<li>(.+?)<\/li>/i) {
						push (@rand, $1);
						$body =~ s/<li>(.+?)<\/li>//i;
					}
					my $rnd = join("|",@rand);
					$part =~ s/<random>(.+?)<\/random>/{random}$rnd\{\/random}/i;
				}
				$part =~ s/<random>//ig;
				$part =~ s/<\/random>//ig;
			}

			# Fix weird set issues
			$part =~ s{<set\s*(\w+)=<set\s*name=$qq\w+$qq>\s*(.+?)\s*>+\s*</set>}{<set $1=$2>}ig;
			$part =~ s{<set\s*name=$qq(.+?)$qq>\s*(.+?)\s*</set>}{$2}ig;

			$part =~ s/^\s+//g;
			$part =~ s/\s+$//g;
			$part =~ s/\s+/ /g;

			push (@new,$part);
		}

		$string = '';
		if (scalar(@condition)) {
			$string = '* ' . join("\n* ", @condition) . "\n";
		}
		$string .= '- ' . join("\n^ ",@new);
	}

	return $string;
}
