#!/usr/bin/perl use Data::Dumper; print qq# package xtc.parser; header { import xtc.util.Utilities; import xtc.tree.Attribute; import java.util.regex.*; } class PParser; body { public static String makeString (Object value) { String retVal=""; if( value != null ) { if( value instanceof Pair ) { for(int i = 0; i<((Pair)value).size(); i++) { Object pairElement; pairElement = ((Pair)value).list().get(i); retVal = retVal + " " + pairElement + " "; } } else { retVal = " " + value.toString() + " "; } } else { retVal = ""; } // Returns a version of the input where all contiguous // whitespace characters are replaced with a single // space. Line terminators are treated like whitespace. String patternStr = "\\\\s+"; String replaceStr = " "; Pattern pattern = Pattern.compile(patternStr); Matcher matcher = pattern.matcher(retVal); return matcher.replaceAll(replaceStr); } } option location,debug,mainMethod; top text; #; my $nomorerats=0; while() { my $norats=0; if( /NORATS/ ) { s/^NORATS //; $norats=1; } if( /^; --- NORATS ---$/ ) { $nomorerats=1; } if( /^;/ ) # If comment { s!^;!//!; print; next; } if( /^\s*$/ ) { print; next; } # Add type and change <- to = s/^(\S+) <- /String \1 = /; # Fix - and _ s/[-_](.)/\u$1/g; # Add trailing ; s/(\S+.*)$/\1;/g; ###### # This part is where we add in the Rats! specific code to # generate a parse tree (and maybe other things down the line). # # This is very complicated! ###### if( ! $norats && ! $nomorerats ) { $pre = $_; $pre =~ s/(.*)=.*/$1/; $post = $_; $post =~ s/.*=(.*)/$1/; $post =~ s/;\s*$//s; chomp $pre; $post = realWork($post, 1); $post =~ s/NULL/( { yyValue = "" ; } )/; $_ = $pre . " = " . $post . ";\n"; } print; } sub realWork { my (@names, $i, @stuff, $j, $parens, $semi, $top); $i=0; $j=0; $parens=0, $semi=0; $_ = $_[0]; $top = $_[1]; # Marks if this is the top-most recursion. # Match balanced parens $reparen = qr{ $ (?: (?> [^()]+ ) # Non-parens without backtracking | (??{ $reparen }) # Group with matching parens )* $ }x; ## print "realwork_pre: $_.\n"; # Recursion back-out. if( ! /[a-zA-Z]/ ) { return; } # Catch bare () productions if( /^\s*$\s*$\s*$/ || /^\s*$ \{ yyValue = \"\"; \} $\s*$/ ) { return "NULL"; } # Strip outermost parens if this production consists of only a paren statement if( /^$.*$$/ ) { s/$(.*)$/$1/g; $parens=1; } ## print "realwork: $_.\n"; # This section deals with parenthetical expressions within the current production # by seperating them out and calling realWork on them recursively. It also # replaces the text of the parenthetical expression with "PARSERstuff" # so that things don't get acted on too many times. s/([^(]*[^(-:]?)($reparen)(\S*)/ ## print "rw1: $1--rw2: $2--$3.\n"; my $first=$1; my $second=$2; my $third=$3; $j++; my $result=$first . "PARSERstuff$j" . $third; # If the paren production is empty. if( $second =~ m{^\s*$\s*$\s*$} ) { $j--; $result=$first . NULL . $third; # Else If the paren production is preceded by ! or &, do nothing. } elsif( $first =~ m{[!&]\s*$} ) { @stuff[$j] = $second; # Else If the paren prod is a repeater, special handling due to Rats! bug. } elsif( $third =~ m{^\s*[*+]} ) { @stuff[$j] = realWork($second, 0, 1); # Default case. } else { ## print "Before work: $result.\n"; @stuff[$j] = realWork($second); ## print "PARSERstuff$j: @stuff[$j]\n"; ## print "After work: $result.\n"; } $result; /exg; ## print "After reparen1: $_.\n"; # Here we test for disjunctions (/) and pass each setion back to realWork. my $slashed=0; if( # Make sure there's something next to the slash, otherwise we're at the wrong level. ( m/^.*[a-zA-Z].*\/[^()]*[a-zA-Z][^()]*$/ ) || ( m/^[^()]*[a-zA-Z][^()]*\/.*[a-zA-Z].*$/ ) || ( m/^.*[a-zA-Z].*\/[^()]*$reparen[^()]*$/x ) || ( m/^[^()]*$reparen[^()]*\/.*[a-zA-Z].*$/x ) ) { $slashed=1; ## print "Doing slash on $_.\n"; # Handle all but the last section. s{([^/]+)/}{ ##print "slash: $1.\n"; realWork($1) . "/ $2"; }eg; ## print "After slash: $_.\n"; # Handle the last section. s{^(.*)/([^/]+)$}{ ##print "slash2: $2.\n"; "$1/ " . realWork($2); }eg; ## print "After slash2: $_.\n"; } ### # Naming (i.e. turning A <- B C into A <- b:B c:C) ### # # We don't want to name a sole element; that should just recurse normally. # my (@counter, $count); # $count = 0; # if( m/\b(? 0 ) { ## print "Stuff: " . Dumper(\@stuff) . ".\n"; s{\bPARSERstuff(\d+)}{$stuff[$1];}exg; } ## print "After stuff: $_.\n"; # Build the yyValue construction at the end. if( @names ) { $_ .= " { yyValue = "; for my $elem (@names) { if( $elem ) { my $elem2; $elem2 = $elem; $elem2 =~ s/SEP\d+//; $_ .= " \" $elem2=(\" + makeString($elem) + \") \" + "; } } s/\s*\+\s*$//; $_ .= " ; } "; } # Put parens back on. if( $parens ) { $_ = "($_)"; } ## print "Returning $_\n"; return $_; }