#!/usr/bin/perl

use Data::Dumper;

print qq#
package xtc.parser;
header {
    import xtc.util.Utilities;
    import xtc.tree.Attribute;
    import java.util.regex.*;
}
class PParser;
body {
    public static String makeString (Object value) {
	String retVal="";
	if( value != null )
	{
	    if( value instanceof Pair )
	    {
		for(int i = 0; i<((Pair)value).size(); i++) 
		{
		    Object pairElement;

		    pairElement = ((Pair)value).list().get(i);
		    retVal = retVal + " " + pairElement + " "; 
		}
	    } else {
		retVal = " " + value.toString() + " ";
	    }
	} else {
	    retVal = ""; 
	}

	// Returns a version of the input where all contiguous
	// whitespace characters are replaced with a single
	// space. Line terminators are treated like whitespace.
	String patternStr = "\\\\s+";
	String replaceStr = " ";
	Pattern pattern = Pattern.compile(patternStr);
	Matcher matcher = pattern.matcher(retVal);
	return matcher.replaceAll(replaceStr);
    }
}
option location,debug,mainMethod;
top text;
#;


my $nomorerats=0;

while(<STDIN>)
{
    my $norats=0;

    if( /NORATS/ )
    {
	s/^NORATS //;
	$norats=1;
    }

    if( /^; --- NORATS ---$/ )
    {
	$nomorerats=1;
    }

    if( /^;/ ) # If comment
    {
	s!^;!//!;
	print;
	next;
    }

    if( /^\s*$/ )
    {
	print;
	next;
    }

# Add type and change <- to =
    s/^(\S+) <- /String \1 = /;

    # Fix - and _
    s/[-_](.)/\u$1/g;

    # Add trailing ;
    s/(\S+.*)$/\1;/g;

    ######
    # This part is where we add in the Rats! specific code to
    #  generate a parse tree (and maybe other things down the line).
    #
    # This is very complicated!
    ######
    
    if( ! $norats && ! $nomorerats )
    {
	$pre = $_;
	$pre =~ s/(.*)=.*/$1/;
	$post = $_;
	$post =~ s/.*=(.*)/$1/;
	$post =~ s/;\s*$//s;
	chomp $pre;

	$post = realWork($post, 1);
	$post =~ s/NULL/( { yyValue = "" ; } )/;

	$_ = $pre . " = " . $post . ";\n";
    }

    print;
}

sub realWork
{
    my (@names, $i, @stuff, $j, $parens, $semi, $top);
    $i=0; $j=0; $parens=0, $semi=0;
    $_ = $_[0];
    $top = $_[1];  # Marks if this is the top-most recursion.


    # Match balanced parens
    $reparen = qr{
	\(
	    (?:
	     (?> [^()]+ )    # Non-parens without backtracking
	     |
	     (??{ $reparen })     # Group with matching parens
	    )*
	\)
    }x;

    ## print "realwork_pre: $_.\n";
    # Recursion back-out.
    if( ! /[a-zA-Z]/ )
    {
	return;
    }

    # Catch bare () productions
    if( /^\s*\(\s*\)\s*$/ || /^\s*\( \{ yyValue = \"\"; \} \)\s*$/ )
    {
	return "NULL";
    }

    # Strip outermost parens if this production consists of only a paren statement
    if( /^\(.*\)$/ )
    {
	s/\((.*)\)/$1/g;
	$parens=1;
    }
    ## print "realwork: $_.\n";

    # This section deals with parenthetical expressions within the current production
    # by seperating them out and calling realWork on them recursively.  It also
    # replaces the text of the parenthetical expression with "PARSERstuff<num>"
    # so that things don't get acted on too many times.
    s/([^(]*[^(-:]?)($reparen)(\S*)/
	## print "rw1: $1--rw2: $2--$3.\n";
	my $first=$1;
	my $second=$2;
	my $third=$3;
	$j++;
	my $result=$first . "PARSERstuff$j" . $third;

	# If the paren production is empty.
	if( $second =~ m{^\s*\(\s*\)\s*$} ) {
	    $j--;
	    $result=$first . NULL . $third;
	# Else If the paren production is preceded by ! or &, do nothing.
	} elsif( $first =~ m{[!&]\s*$} ) {
	    @stuff[$j] = $second;
	# Else If the paren prod is a repeater, special handling due to Rats! bug.
	} elsif( $third =~ m{^\s*[*+]} ) {
	    @stuff[$j] = realWork($second, 0, 1);
	# Default case.
	} else {
	    ## print "Before work: $result.\n";
	    @stuff[$j] = realWork($second);
	    ## print "PARSERstuff$j: @stuff[$j]\n";
	    ## print "After work: $result.\n";
	}
	$result;
    /exg;
    ## print "After reparen1: $_.\n";

    # Here we test for disjunctions (/) and pass each setion back to realWork.
    my $slashed=0;
    if( 
	# Make sure there's something next to the slash, otherwise we're at the wrong level.
	( m/^.*[a-zA-Z].*\/[^()]*[a-zA-Z][^()]*$/ ) ||
	( m/^[^()]*[a-zA-Z][^()]*\/.*[a-zA-Z].*$/ ) ||
	( m/^.*[a-zA-Z].*\/[^()]*$reparen[^()]*$/x ) ||
	( m/^[^()]*$reparen[^()]*\/.*[a-zA-Z].*$/x )
    )
    {
	$slashed=1;

	## print "Doing slash on $_.\n";
	# Handle all but the last section.
	s{([^/]+)/}{
	    ##print "slash: $1.\n";
	    realWork($1) . "/ $2";
	}eg;

	## print "After slash: $_.\n";
	# Handle the last section.
	s{^(.*)/([^/]+)$}{
	    ##print "slash2: $2.\n";
	    "$1/ " . realWork($2);
	}eg;
	## print "After slash2: $_.\n";
    }

    ###
    # Naming (i.e. turning A <- B C into A <- b:B c:C)
    ###

#     # We don't want to name a sole element; that should just recurse normally.
#     my (@counter, $count);
#     $count = 0;
#     if( m/\b(?<![!&:])([a-zA-Z][^:\s]*)(?!:)\b/ )
#     {
# 	# However, A !B !C doesn't count as a sole element for these purposes,
# 	# so the regex below is not what you'd normally expect.
# 	@counter = m/([a-zA-Z][^:\s]*)(?!:)\b/g;
# 	$count = $#counter + 1;
#     }
# 
#     ## print "count: $count \n";

    # We don't want to name elements seperated by /
    if( ! $slashed )
    {
	s/\b(?<![!&:])([a-zA-Z][^:\s]*)(?!:)\b/
	    ## print "Naming $1.\n";
	    if( $1 =~ m{(^PARSERstuff.*)} )
	    {
		## print "paren $1.\n";
		$i++; @names[$i] = "PARSERparen$i"; "PARSERparen$i:$1";
	    } elsif( $1 =~ m{^\s*NULL\s*$} ) {
		"NULL"; 
	    } else {
		## print "no paren $1.\n";
		$i++; @names[$i] = "$1SEP$i"; "$1SEP$i:$1";
	    }
	/eg;
	## print "Names: " . Dumper(\@names) . ".\n"; 
    }
    ## print "After names: $_.\n";

    # Turn PARSERstuff back into its actual value
    if( $j > 0 ) { 
	## print "Stuff: " . Dumper(\@stuff) . ".\n"; 
	s{\bPARSERstuff(\d+)}{$stuff[$1];}exg;
    }
    ## print "After stuff: $_.\n";

    # Build the yyValue construction at the end.
    if( @names )
    {
	$_ .= " { yyValue = ";
	for my $elem (@names)
	{
	    if( $elem )
	    {
		my $elem2;
		$elem2 = $elem;
		$elem2 =~ s/SEP\d+//;
		$_ .= " \" $elem2=(\" + makeString($elem) + \") \" + ";
	    }
	}
	s/\s*\+\s*$//;
	$_ .= " ; } ";
    }

    # Put parens back on.
    if( $parens )
    {    
	$_ = "($_)";
    }

    ## print "Returning $_\n";
    return $_;
}
