/usr/share/perl5/LaTeX/TOM/Parser.pm

###############################################################################
#
# LaTeX::TOM::Parser
#
# The parsing class
#
###############################################################################

package LaTeX::TOM::Parser;

use strict;
use base qw(
    LaTeX::TOM::Node
    LaTeX::TOM::Tree
);

our $VERSION = '0.01';

# Constructor
#
sub new {
    my $class = shift;

    my $self = {};

    bless $self, ref($class) || $class;

    $self->_init(@_);

    return $self;
}

# Set/reset "globals"
#
sub _init {
    my $parser = shift;
    my ($parse_errors_fatal, $readinputs, $applymappings) = @_;

    my $retrieve_opt_with_default = sub
    {
        my ($opt, $default) = @_;
        return $opt if defined $opt;
        return $default;
    };

    # set user options
    #
    $parser->{readinputs}         = $retrieve_opt_with_default->($readinputs, 0);
    $parser->{applymappings}      = $retrieve_opt_with_default->($applymappings, 0);
    $parser->{PARSE_ERRORS_FATAL} = $retrieve_opt_with_default->($parse_errors_fatal, $parser->{config}{PARSE_ERRORS_FATAL});

    # init internal stuff
    #
    $parser->{MATHBRACKETS} = $parser->{config}{MATHBRACKETS};

    # this will hold a running list/hash of commands that have been remapped
    $parser->{MAPPEDCMDS} = {};

    # this will hold a running list/hash of commands that have been used. We dont
    # bother apply mappings except to commands that have been used.
    $parser->{USED_COMMANDS} = {};

    # no file yet
    $parser->{file} = undef;
}

# Parse a LaTeX file, return a tree. You probably want this method.
#
sub parseFile {
    my $parser = shift;
    my $filename = shift;

    # init variables
    #
    $parser->{file} = $filename;        # file name member data
    my $tree = {};                      # init output tree

    # read in text from file
    #
    my $text = _readFile($filename);

    # do the parse or bomb out
    #
    if ($text) {
        $tree = $parser->parse($text);
    } else {
        die "Could not read file $filename !";
    }

    return $tree;
}

# main parsing entrypoint
#
sub parse {
    my $parser = shift;

    my $text = shift;

    # first half of parsing (goes up to finding commands, reading inputs)
    #
    my ($tree, $bracehash) = $parser->_parseA($text);

    #print "done with _parseA\n";
    #$tree->print();

    # handle mappings
    #
    $parser->_applyMappings($tree) if $parser->{applymappings};

    #print "done with _applyMappings\n";
    #$tree->print();

    # second half of parsing (environments)
    #
    $parser->_parseB($tree);

    #print "done with _parseB\n";
    #$tree->print();

    # once all the above is done we can propegate math/plaintext modes down
    #
    $parser->_propegateModes($tree, 0, 0);   # math = 0, plaintext = 0

    #print "done with _propegateModes\n";
    #$tree->print();

    # handle kooky \[ \] math mode
    #
    if (not exists $parser->{MAPPEDCMDS}->{'\\['}) {
        # math mode (\[ \], \( \))
        $parser->_stage5($tree, {'\\[' => '\\]', '\\(' => '\\)'}, 1);
        $parser->_propegateModes($tree, 0, 0);     # have to do this again of course
        $parser->{MATHBRACKETS}->{'\\['} = '\\]';  # put back in brackets list for
        $parser->{MATHBRACKETS}->{'\\('} = '\\)';  # printing purposes.
    }

    #$tree->print();

    $tree->listify;     # add linked-list stuff

    return $tree;
}

# Parsing with no mappings and no externally accessible parser object.
#
sub _basicparse {
    my $parser = shift; # @_ would break code
    my $text   = shift;

    my $parse_errors_fatal = (defined $_[0] ? $_[0] : $parser->{config}{PARSE_ERRORS_FATAL});
    my $readinputs = (defined $_[1] ? $_[1] : 1);

    $parser = LaTeX::TOM::Parser->new($parse_errors_fatal, $readinputs);
    my ($tree, $bracehash) = $parser->_parseA($text); 

    $parser->_parseB($tree);

    $tree->listify; # add linked-list stuff

    return ($tree, $bracehash);
}

# start the tree. separate out comment and text nodes.
#
sub _stage1 {
    my $parser = shift;
    my $text = shift;

    my @nodes = _getTextAndCommentNodes($text, 0, length($text));

    return LaTeX::TOM::Tree->new([@nodes], $parser);
}

# this stage parses the braces ({}) and adds the corresponding structure to
# the tree.
#
sub _stage2 {
    my $parser = shift;

    my $tree = shift;
    my $bracehash = shift || undef;
    my $startidx = shift || 0;      # last two params for starting at some specific
    my $startpos = shift || 0;      # node and offset.

    my %blankhash;

    if (not defined $bracehash) {
        $bracehash = {%blankhash};
    }

    my $leftidx = -1;
    my $leftpos = -1;
    my $leftcount = 0;

    # loop through the nodes
    for (my $i = $startidx; $i < @{$tree->{nodes}}; $i++) {
        my $node = $tree->{nodes}[$i];
        my $spos = $node->{start};	# get text start position 

        # set position placeholder within the text block
        my $pos = ($i == $startidx) ? $startpos : 0;

        if ($node->{type} eq 'TEXT') {

         #warn "parseStage2: looking at text node: [$node->{content}]";
         my ($nextpos, $brace) = _findbrace($node->{content}, $pos);
         while ($nextpos != -1) {

            $pos = $nextpos + 1; # update position pointer

            # handle left brace
            if ($brace eq '{') {
                #warn "found '{' at position $nextpos, leftcount is $leftcount";
                if ($leftcount == 0) {
                    $leftpos = $nextpos;
                    $leftidx = $i
                }
                $leftcount++;
            }

            # handle right brance
            elsif ($brace eq '}') {

                #warn "found '}' at position $nextpos , leftcount is $leftcount";
                my $rightpos = $nextpos;
                $leftcount--;

                # found the corresponding right brace to our starting left brace
                if ($leftcount == 0) {

                    # see if we have to split the text node into 3 parts
                    #
                    if ($leftidx == $i) {

                        my ($leftside, $textnode3) = $node->split($rightpos, $rightpos);
                        my ($textnode1, $textnode2) = $leftside->split($leftpos, $leftpos);

                        # make the new GROUP node
                        my $groupnode = LaTeX::TOM::Node->new(
                            {type => 'GROUP',
                             start => $textnode2->{start} - 1,
                             end => $textnode2->{end} + 1,
                             children => LaTeX::TOM::Tree->new([$textnode2], $parser),
                            });

                        # splice the new subtree into the old location
                        splice @{$tree->{nodes}}, $i, 1, $textnode1, $groupnode, $textnode3;

                        # add to the brace-pair lookup table
                        $bracehash->{$groupnode->{start}} = $groupnode->{end};
                        $bracehash->{$groupnode->{end}} = $groupnode->{start};

                        # recur into new child node
                        $parser->_stage2($groupnode->{children}, $bracehash);

                        $i++; # skip to textnode3 for further processing
                    }

                    # split across nodes
                    #
                    else {

                        my ($textnode1, $textnode2) = $tree->{nodes}[$leftidx]->split($leftpos, $leftpos);
                        my ($textnode3, $textnode4) = $node->split($rightpos, $rightpos);

                        # remove nodes in between the node we found '{' in and the node
                        # we found '}' in
                        #
                        my @removed = splice @{$tree->{nodes}}, $leftidx+1, $i-$leftidx-1;

                        # create a group node that contains the text after the left brace,
                        # then all the nodes up until the next text node, then the text
                        # before the right brace.
                        #
                        my $groupnode = LaTeX::TOM::Node->new(
                            {type => 'GROUP',
                             start => $textnode2->{start} - 1,
                             end => $textnode3->{end} + 1,
                             children => LaTeX::TOM::Tree->new(
                                [$textnode2,
                                 @removed,
                                 $textnode3],
                                 $parser),
                            });

                        # replace the two original text nodes with the leftover left and
                        # right portions, as well as the group node with everything in
                        # the middle.
                        #
                        splice @{$tree->{nodes}}, $leftidx, 2, $textnode1, $groupnode, $textnode4;

                        # add to the brace-pair lookup table
                        $bracehash->{$groupnode->{start}} = $groupnode->{end};  
                        $bracehash->{$groupnode->{end}} = $groupnode->{start};

                        # recur into new child nodes
                        $parser->_stage2($groupnode->{children}, $bracehash);

                        # step back to textnode4 on this level for further processing
                        $i -= scalar @removed;
                    }

                    $leftpos = -1; # reset left data
                    $leftidx = -1;
                    last;
                } # $leftcount == 0

                # check for '}'-based error
                #
                if ($leftcount < 0) {
                    if ($parser->{PARSE_ERRORS_FATAL} == 1) {
                        die "parse error: '}' before '{' at ".($spos+$rightpos).".";
                    } elsif ($parser->{PARSE_ERRORS_FATAL} == 0) {
                        warn "parse error: '}' before '{' at ".($spos+$rightpos).".";
                    }
                    # reset and continue
                    $leftcount = 0;
                }
            } # right brace

            ($nextpos, $brace) = _findbrace($node->{content}, $pos);

         } # while (braces left)

         } # if TEXT

    } # loop over all nodes

    # check for extra '{' parse error
    #
    if ($leftcount > 0) {
        my $spos = $tree->{nodes}[$leftidx]->{start};	# get text start position 
        if ($parser->{PARSE_ERRORS_FATAL} == 1) {
            die "parse error: unmatched '{' at ".($spos+$leftpos).".";
        } elsif ($parser->{PARSE_ERRORS_FATAL} == 0) {
            warn "parse error: unmatched '{' at ".($spos+$leftpos).".";
        }
        # try to continue on, after the offending brace
        $parser->_stage2($tree, $bracehash, $leftidx, $leftpos + 1);
    }

    return $bracehash;
}

# this stage finds LaTeX commands and accordingly turns GROUP nodes into
# command nodes, labeled with the command
#
sub _stage3 {
    my $parser = shift;

    my $tree = shift;
    my $parent = shift;

    for (my $i = 0; $i< @{$tree->{nodes}}; $i++) {

        my $node = $tree->{nodes}[$i];

        # check text node for command tag
        if ($node->{type} eq 'TEXT') {
            my $text = $node->{content};

            # inner command (such as {\command text text}). our regexp checks to see
            # if this text chunk begins with \command, since that would be the case
            # due to the previous parsing stages. if found, the parent node is
            # promoted to a command.
            #
            if ($text =~ /^\s*\\(\w+\*?)/ && defined $parent && $parser->{config}{INNERCMDS}->{$1}) {
                my $command = $1;

                # if the parent is already a command node, we have to make a new
                # nested command node
                #
                if ($parent->{type} eq 'COMMAND') {

                    # make a new command node
                    my $newnode = LaTeX::TOM::Node->new(
                        {type => 'COMMAND',
                         command => $command,
                         start => $parent->{start},
                         end => $parent->{end},
                         position => 'inner',
                         children => $parent->{children} });

                    # point parent to it
                    $parent->{children} = LaTeX::TOM::Tree->new([$newnode], $parser);

                    # start over at this level (get additional inner commands)
                    $parent = $newnode;
                    $i = -1;

                    $parser->{USED_COMMANDS}->{$newnode->{command}} = 1;
                }

                # parent is a naked group, we can make it into a command node
                #
                elsif ($parent->{type} eq 'GROUP') {
                    $parent->{type} = 'COMMAND';
                    $parent->{command} = $command;
                    $parent->{position} = 'inner';

                    # start over at this level
                    $i = -1;

                    $parser->{USED_COMMANDS}->{$parent->{command}} = 1;
                }

                $node->{content} =~ s/^\s*\\(?:\w+\*?)//o;
            }

            # outer command (such as \command{parameters}). our regexp checks to
            # see if this text chunk ends in \command, since that would be the case
            # due to the previous parsing stages.
            #
            if ($text =~ /(?:^|[^\\])(\\\w+\*?(\s*\[.*?\])?)\s*$/os && 
                    defined $tree->{nodes}[$i+1] &&
                    $tree->{nodes}[$i+1]->{type} eq 'GROUP') {

                my $tag = $1;

                #print "found text node [$text] with command tag [$tag]\n";

                # remove the text
                $node->{content} =~ s/\\\w+\*?\s*(?:\[.*?\])?\s*$//os;

                # parse it for command and ops
                $tag =~ /^\\(\w+\*?)\s*(?:\[(.*?)\])?$/os;

                my $command = $1;
                my $opts = $2;

                # make the next node a command node with the above data
                my $next = $tree->{nodes}[$i+1];

                $next->{type} = 'COMMAND';
                $next->{command} = $command;
                $next->{opts} = $opts;
                $next->{position} = 'outer';

                $parser->{USED_COMMANDS}->{$next->{command}} = 1;
            }

            # recognize braceless commands
            #
            if ($text =~ /(\\(\w+\*?)[ \t]+(\S+))/gso || $text =~ /(\\(\w+)(\d+))/gso) {
                my $all = $1;
                my $command = $2;
                my $param = $3;

                if ($parser->{config}{BRACELESS}->{$command}) {
                 # warn "found braceless command $command with param $param";

                    # get location to split from node text
                    my $a = index $node->{content}, $all, 0;
                    my $b = $a + length($all) - 1;

                    # make all the new nodes

                    # new left and right text nodes
                    my ($leftnode, $rightnode) = $node->split($a, $b);

                    # param contents node
                    my $pstart = index $node->{content}, $param, $a;
                    my $newchild = LaTeX::TOM::Node->new(
                        {type => 'TEXT',
                         start => $node->{start} + $pstart,
                         end => $node->{start} + $pstart + length($param) - 1,
                         content => $param });

                    # new command node
                    my $commandnode = LaTeX::TOM::Node->new(
                        {type => 'COMMAND',
                         braces => 0,
                         command => $command,
                         start => $node->{start} + $a,
                         end => $node->{start} + $b,
                         children => LaTeX::TOM::Tree->new([$newchild], $parser),
                        });

                    $parser->{USED_COMMANDS}->{$commandnode->{command}} = 1;

                    # splice these all into the original array  
                    splice @{$tree->{nodes}}, $i, 1, $leftnode, $commandnode, $rightnode;

                    # make the rightnode the node we're currently analyzing
                    $node = $rightnode;

                    # make sure outer loop will continue parsing *after* rightnode
                    $i += 2;
                }
            }
        }

        # recur
        if ($node->{type} eq 'GROUP' ||
            $node->{type} eq 'COMMAND') {

            $parser->_stage3($node->{children}, $node);
        }
    }
}

# this stage finds \begin{x} \end{x} environments and shoves their contents 
#	down into a new child node, with a parent node of ENVIRONMENT type.
# 
# this has the effect of making the tree deeper, since much of the structure
#	is in environment tags and will now be picked up.
# 
# for ENVIRONMENTs, "start" means the ending } on the \begin tag, 
# "end" means the starting \ on the \end tag,
# "ostart" is the starting \ on the "begin" tag,
# "oend" is the ending } on the "end" tag, and
# and "class" is the "x" from above.
#
sub _stage4 {
    my $parser = shift;
    my $tree = shift;

    my $bcount = 0; # \begin "stack count"
    my $class = ""; # environment class
    my $bidx = 0;   # \begin array index.

    for (my $i = 0; $i < @{$tree->{nodes}}; $i++) {
        my $node = $tree->{nodes}->[$i];

        # see if this is a "\begin" command node
        if ($node->{type} eq 'COMMAND' && $node->{command} eq 'begin') {

            #warn "parseStage4: found a begin COMMAND node, ".$node->{children}->{nodes}[0]->{content}." @ $node->{start}";

            # start a new "stack"
            if ($bcount == 0) {
                $bidx = $i;
                $bcount++;
                $class = $node->{children}->{nodes}->[0]->{content}; 
                #warn "parseStage4: opening environment tag found, class = $class";
            }

            # add to the "stack"
            elsif ($node->{children}->{nodes}->[0]->{content} eq $class) {
                $bcount++;
                #warn "parseStage4: incrementing tag count for $class";
            }
        }

        # handle "\end" command nodes
        elsif ($node->{type} eq 'COMMAND' &&
               $node->{command} eq 'end' &&
               $node->{children}->{nodes}->[0]->{content} eq $class) {

            $bcount--;
            #warn "parseStage4: decrementing tag count for $class";

            # we found our closing "\end" tag. replace everything with the proper
            # ENVIRONMENT tag and subtree.
            #
            if ($bcount == 0) {

                #warn "parseStage4: closing environment $class";

                # first we must take everything between the "\begin" and "\end" 
                # nodes and put them in a new array, removing them from the old one
                my @newarray = splice @{$tree->{nodes}}, $bidx+1, $i - ($bidx + 1);

                # make the ENVIRONMENT node
                my $start = $tree->{nodes}[$bidx]->{end};
                my $end = $node->{start};
                my $envnode = LaTeX::TOM::Node->new(
                    {type => 'ENVIRONMENT',
                     class => $class,
                     start => $start, # "inner" start and end
                     end => $end,
                     ostart => $start - length('begin') - length($class) - 2,
                     oend => $end + length('end') + length($class) + 2,
                     children => LaTeX::TOM::Tree->new([@newarray], $parser),
                    });

                if ($parser->{config}{MATHENVS}->{$envnode->{class}}) {
                    $envnode->{math} = 1;
                }

                # replace the \begin and \end COMMAND nodes with the single 
                # environment node
                splice @{$tree->{nodes}}, $bidx, 2, $envnode;

                $class = ""; # reset class.

                # i is going to change by however many nodes we removed
                $i -= scalar @newarray;

                # recur into the children
                $parser->_stage4($envnode->{children});	
            }
        }

        # recur in general
        elsif ($node->{children}) {
            $parser->_stage4($node->{children});
        }
    }

    # parse error if we're missing an "\end" tag.
    if ($bcount > 0) {
        if ($parser->{PARSE_ERRORS_FATAL} == 1) {
            die "parse error: missing \\end{$class} for \\begin{$class} at position ".$tree->{nodes}[$bidx]->{end}." !";
        } elsif ($parser->{PARSE_ERRORS_FATAL} == 0) {
            warn "parse error: missing \\end{$class} for \\begin{$class} at position ".$tree->{nodes}[$bidx]->{end}." !";
        }
    }
}

# This is the "math" stage: here we grab simple-delimeter math modes from
# the text they are embedded in, and turn those into new groupings, with the
# "math" flag set.
#
# having this top level to go over all the bracket types prevents some pretty
# bad combinatorial explosion
#
sub _stage5 {
    my $parser = shift;

    my $tree = shift;
    my $caremath = shift || 0;

    my $brackets = $parser->{MATHBRACKETS};

    # loop through all the different math mode bracket types
    foreach my $left (sort {length($b) <=> length($a)} keys %$brackets) {
        my $right = $brackets->{$left};

        $parser->_stage5_r($tree, $left, $right, $caremath);
    }
}

# recursive meat of above
#
sub _stage5_r {
    my $parser = shift;

    my $tree = shift;
    my $left = shift;
    my $right = shift;
    my $caremath = shift || 0; # do we care if we're already in math mode?
                               # this matters for \( \), \[ \]

    my $leftpos = -1; # no text pos for found left brace yet.
    my $leftidx = -1; # no array index for found left brace yet.

        # loop through the nodes
        for (my $i = 0; $i < scalar @{$tree->{nodes}}; $i++) {
            my $node = $tree->{nodes}[$i];
            my $pos = 0; # position placeholder within the text block
            my $spos = $node->{start}; # get text start position

            if ($node->{type} eq 'TEXT' && 
               (!$caremath || (!$node->{math} && $caremath))) {

                # search for left brace if we haven't started a pair yet
                if ($leftidx == -1) {
                    $leftpos = _findsymbol($node->{content}, $left, $pos);

                    if ($leftpos != -1) {
                        #print "found (left) $left in [$node->{content}]\n";
                        $leftidx = $i;
                        $pos = $leftpos + 1; # next pos to search from
                    }
                }

                # search for a right brace
                if ($leftpos != -1) {
                    my $rightpos = _findsymbol($node->{content}, $right, $pos);

                    # found
                    if ($rightpos != -1) {

                        # we have to split the text node into 3 parts
                        if ($leftidx == $i) {
                            #print "splitwithin: found (right) $right in [$node->{content}]\n";

                            my ($leftnode, $textnode3) = $node->split($rightpos, $rightpos + length($right) - 1);
                            my ($textnode1, $textnode2) = $leftnode->split($leftpos, $leftpos + length($left) - 1);

                            my $startpos = $spos; # get text start position 

                            # make the math ENVIRONMENT node
                            my $mathnode = LaTeX::TOM::Node->new(
                                {type => 'ENVIRONMENT',
                                class => $left,	# use left delim as class
                                math => 1,
                                start => $startpos + $leftpos,
                                ostart => $startpos + $leftpos - length($left) + 1,
                                end => $startpos + $rightpos,
                                oend => $startpos + $rightpos + length($right) - 1,
                                children => LaTeX::TOM::Tree->new([$textnode2], $parser),
                                });

                            splice @{$tree->{nodes}}, $i, 1, $textnode1, $mathnode, $textnode3;

                            $i++; # skip ahead two nodes, so we'll be parsing textnode3
                        }

                        # split across nodes
                        else {

                            #print "splitacross: found (right) $right in [$node->{content}]\n";

                            # create new set of 4 smaller text nodes from the original two
                            # that contain the left and right delimeters
                            #
                            my ($textnode1, $textnode2) = $tree->{nodes}[$leftidx]->split($leftpos, $leftpos + length($left) - 1);
                            my ($textnode3, $textnode4) = $tree->{nodes}[$i]->split($rightpos, $rightpos + length($right) - 1);

                            # nodes to remove "from the middle" (between the left and right
                            # text nodes which contain the delimeters)
                            #
                            my @remnodes = splice @{$tree->{nodes}}, $leftidx+1, $i - $leftidx - 1;

                            # create a math node that contains the text after the left brace,
                            # then all the nodes up until the next text node, then the text
                            # before the right brace.
                            #
                            my $mathnode = LaTeX::TOM::Node->new(
                                {type => 'ENVIRONMENT',
                                class => $left,
                                math => 1,
                                start => $textnode2->{start} - 1,
                                end => $textnode3->{end} + 1,
                                ostart => $textnode2->{start} - 1 - length($left) + 1,
                                oend => $textnode3->{end} + 1 + length($right) - 1,
                                children => LaTeX::TOM::Tree->new(
                                [$textnode2,
                                 @remnodes,
                                 $textnode3], $parser),
                                });

                            # replace (TEXT_A, ... , TEXT_B) with the mathnode created above
                            splice @{$tree->{nodes}}, $leftidx, 2, $textnode1, $mathnode, $textnode4;

                            # do all nodes again but the very leftmost
                            #
                            $i = $leftidx;
                        }

                        $leftpos = -1; # reset left data    
                        $leftidx = -1;
                    } # right brace
                } # left brace 
                else {

                    my $rightpos = _findsymbol($node->{content}, $right, $pos);

                    if ($rightpos != -1) {
                        my $startpos = $node->{start}; # get text start position 
                        if ($parser->{PARSE_ERRORS_FATAL} == 1) {
                            die "parse error: unmatched '$right' at ".($startpos+$rightpos).".";
                        } elsif ($parser->{PARSE_ERRORS_FATAL} == 0) {
                            warn "parse error: unmatched '$right' at ".($startpos+$rightpos).".";
                        }
                    }
                }
            } # if TEXT

            # recur, but not into verbatim environments!
            #
            elsif ($node->{children} && 
                         !(
                             ($node->{type} eq 'COMMAND' && $node->{command} =~ /^verb/) ||
                             ($node->{type} eq 'ENVIRONMENT' && $node->{class} =~ /^verbatim/))) {

                #print "Recurring into $node->{type} node ";
                #print "$node->{command}" if ($node->{type} eq 'COMMAND');
                #print "$node->{class}" if ($node->{type} eq 'ENVIRONMENT');
                #print "\n";

                $parser->_stage5_r($node->{children}, $left, $right, $caremath);
            }

        } # loop over text blocks

        if ($leftpos != -1) {
            my $startpos = $tree->{nodes}[$leftidx]->{start};   # get text start position
            if ($parser->{PARSE_ERRORS_FATAL} == 1) {
                die "parse error: unmatched '$left' at ".($startpos+$leftpos).".";
            } elsif ($parser->{PARSE_ERRORS_FATAL} == 0) {
                warn "parse error: unmatched '$left' at ".($startpos+$leftpos).".";
            }
        }
}

# This stage propegates the math mode flag and plaintext flags downward.
#
# After this is done, we can make the claim that only text nodes marked with
# the plaintext flag should be printed.	math nodes will have the "math" flag,
# and also plantext = 0.
#
sub _propegateModes {
    my $parser = shift;

    my $tree = shift;
    my $math = shift;       # most likely want to call this with 0
    my $plaintext = shift;  # ditto this-- default to nothing visible.

    foreach my $node (@{$tree->{nodes}}) {

        # handle text nodes on this level. set flags.
        #
        if ($node->{type} eq 'TEXT') {
            $node->{math} = $math;
            $node->{plaintext} = $plaintext;
        }

        # propegate flags downward, possibly modified
        #
        elsif (defined $node->{children}) {

            my $mathflag = $math;    # math propegates down by default
            my $plaintextflag = 0;   # plaintext flag does NOT propegate by default

            # handle math or plain text forcing envs
            #
            if ($node->{type} eq 'ENVIRONMENT' || $node->{type} eq 'COMMAND') {
                if (defined $node->{class} && (
                    $parser->{config}{MATHENVS}->{$node->{class}} ||
                    $parser->{config}{MATHENVS}->{"$node->{class}*"})
                   )
                {
                    $mathflag = 1;
                    $plaintextflag = 0;
                }
                elsif (($node->{type} eq 'COMMAND' && 
                                ($parser->{config}{TEXTENVS}->{$node->{command}} ||
                                 $parser->{config}{TEXTENVS}->{"$node->{command}*"})) ||
                             ($node->{type} eq 'ENVIRONMENT' && 
                                ($parser->{config}{TEXTENVS}->{$node->{class}} ||
                                 $parser->{config}{TEXTENVS}{"$node->{command}*"}))
                            ) {

                    $mathflag = 0;
                    $plaintextflag = 1;
                }
            }

            # groupings change nothing
            #
            elsif ($node->{type} eq 'GROUP') {
                $mathflag = $math;
                $plaintextflag = $plaintext;
            }

            # recur
            $parser->_propegateModes($node->{children}, $mathflag, $plaintextflag);
        }
    }
}

# apply a mapping to text nodes in a tree
#
# for newcommands and defs: mapping is a hash:
#
# {name, nparams, template, type}
#
# name is a string
# nparams is an integer
# template is a tree fragement containing text nodes with #x flags, where
# parameters will be replaced.
# type is "command"
#
# for newenvironments:
#
# {name, nparams, btemplate, etemplate, type}
#
# same as above, except type is "environment" and there are two templates,
# btemplate and etemplate.
#
sub _applyMapping {
    my $parser = shift;

    my $tree = shift;
    my $mapping = shift;
    my $i = shift || 0;  # index to start with, in tree.

    my $applications = 0; # keep track of # of applications

    for (; $i < @{$tree->{nodes}}; $i++) {

        my $node = $tree->{nodes}[$i];

        # begin environment nodes
        #
        if ($node->{type} eq 'COMMAND' &&
                $node->{command} eq 'begin' &&
                $node->{children}->{nodes}[0]->{content} eq $mapping->{name}) {

            # grab the nparams next group nodes as parameters
            #
            my @params = ();

            my $remain = $mapping->{nparams};
            my $j = 1;
            while ($remain > 0 && ($i + $j) < scalar @{$tree->{nodes}}) {

                my $node = $tree->{nodes}[$i + $j];

                # grab group node
                if ($node->{type} eq 'GROUP') {
                    push @params, $node->{children};
                    $remain--;
                }

                $j++;
            }

            # if we didn't get enough group nodes, bomb out
            next if $remain;

            # otherwise make new subtree
            my $applied = _applyParamsToTemplate($mapping->{btemplate}, @params);

            # splice in the result
            splice @{$tree->{nodes}}, $i, $j, @$applied;

            # skip past all the new stuff
            $i += scalar @$applied - 1;
        }

        # end environment nodes
        #
        elsif ($node->{type} eq 'COMMAND' &&
                     $node->{command} eq 'end' &&
                     $node->{children}->{nodes}[0]->{content} eq $mapping->{name}) {

            # make new subtree (no params)
            my $applied = $mapping->{etemplate}->copy();

            # splice in the result
            splice @{$tree->{nodes}}, $i, 1, @$applied;

            # skip past all the new stuff
            $i += scalar @$applied - 1;

            $applications++; # only count end environment nodes
        }

        # newcommand nodes
        #
        elsif ($node->{type} eq 'COMMAND' &&
                $node->{command} eq $mapping->{name} &&
                $mapping->{nparams}) {

            my @params = ();

            # children of COMMAND node will be first parameter
            push @params, $node->{children};

            # find next nparams GROUP nodes and push their children onto @params
            my $remain = $mapping->{nparams} - 1;
            my $j = 1;
            while ($remain > 0 && ($i + $j) < scalar @{$tree->{nodes}}) {

                my $node = $tree->{nodes}[$i + $j];

                # grab group node
                if ($node->{type} eq 'GROUP') {
                    push @params, $node->{children};
                    $remain--;
                }

                $j++;
            }

            # if we didn't get enough group nodes, bomb out
            next if ($remain > 0);

            # apply the params to the template
            my $applied = _applyParamsToTemplate($mapping->{template}, @params);

            # splice in the result
            splice @{$tree->{nodes}}, $i, $j, @$applied;

            # skip past all the new stuff
            $i += scalar @$applied - 1;

            $applications++;
        }

        # find 0-param mappings
        elsif ($node->{type} eq 'TEXT' && !$mapping->{nparams}) {

             my $text = $node->{content};
             my $command = $mapping->{name};

             # find occurrences of the mapping command
             #
             my $wordend = ($command =~ /\w$/ ? 1 : 0); 
             while (($wordend && $text =~ /\\\Q$command\E(\W|$)/g) ||
                            (!$wordend && $text =~ /\\\Q$command\E/g)) {

                 #warn "found occurrence of mapping $command";

                 my $idx = index $node->{content}, '\\' . $command, 0;

                 # split the text node at that command
                 my ($leftnode, $rightnode) = $node->split($idx, $idx + length($command));

                 # copy the mapping template
                 my $applied = $mapping->{template}->copy();

                 # splice the new nodes in
                 splice @{$tree->{nodes}}, $i, 1, $leftnode, @$applied, $rightnode;

                 # adjust i so we end up on rightnode when we're done
                 $i += scalar @$applied + 1;

                 # get the next node
                 $node = $tree->{$node}[$i];

                 # count application
                 $applications++;
             }
        }

        # recur
        elsif ($node->{children}) {

            $applications += $parser->_applyMapping($node->{children}, $mapping);
        }
    }

    return $applications;
}

# find and apply all mappings in the tree, progressively and recursively.
# a mapping applies to the entire tree and subtree consisting of nodes AFTER
# itself in the level array.
#
sub _applyMappings {
    my $parser = shift;

    my $tree = shift;

    for (my $i = 0; $i < @{$tree->{nodes}}; $i++) {

        my $prev = $tree->{nodes}[$i-1];
        my $node = $tree->{nodes}[$i];

        # find newcommands
        if ($node->{type} eq 'COMMAND' &&
                $node->{command} =~ /^(re)?newcommand$/) {

            my $mapping = _makeMapping($tree, $i); 
            next if (!$mapping->{name}); # skip fragged commands

            if ($parser->{USED_COMMANDS}->{$mapping->{name}}) {
                #print "applying (nc) mapping $mapping->{name}\n";
            } else {
                #print "NOT applying (nc) mapping $mapping->{name}\n";
                next;
            }

            # add to mappings list
            #
            $parser->{MAPPEDCMDS}->{"\\$mapping->{name}"} = 1;

            #print "found a mapping with name $mapping->{name}, $mapping->{nparams} params\n";

            # remove the mapping declaration
            #
            splice @{$tree->{nodes}}, $i, $mapping->{skip} + 1;

            # apply the mapping
            my $count = $parser->_applyMapping($tree, $mapping, $i);

            #if ($count > 0) {
            #   print "printing altered subtree\n";
            #   $tree->print();
            #}

            $i--; # since we removed the cmd node, check this index again
        }

        # handle "\newenvironment" mappings
        elsif ($node->{type} eq 'COMMAND' &&
                 $node->{command} =~ /^(re)?newenvironment$/) {

            # make a mapping hash
            #
            my $mapping = $parser->_makeEnvMapping($tree, $i);
            next if (!$mapping->{name}); # skip fragged commands.

            #print "applying (ne) mapping $mapping->{name}\n";

            # remove the mapping declaration
            #
            splice @{$tree->{nodes}}, $i, $mapping->{skip} + 1;

            # apply the mapping
            #
            my $count = $parser->_applyMapping($tree, $mapping, $i);
        }

        # handle "\def" stype commands.
        elsif ($node->{type} eq 'COMMAND' &&
                 defined $prev &&
                 $prev->{type} eq 'TEXT' &&
                 $prev->{content} =~ /\\def\s*$/o) {

             #print "found def style mapping $node->{command}\n";

             # remove the \def
             $prev->{content} =~ s/\\def\s*$//o;

             # make the mapping
             my $mapping = {name => $node->{command},
                nparams => 0,
                template => $node->{children}->copy(),
                type => 'command'};

             next if (!$mapping->{name}); # skip fragged commands

             if ($parser->{USED_COMMANDS}->{$mapping->{name}}) {
                 #print "applying (def) mapping $mapping->{name}\n";
             } else {
                 #print "NOT applying (def) mapping $mapping->{name}\n";
                 next;
             }

             # add to mappings list
             #
             $parser->{MAPPEDCMDS}->{"\\$mapping->{name}"} = 1;

             #print "template is \n";
             #$mapping->{template}->print();

             # remove the command node
             splice @{$tree->{nodes}}, $i, 1;

             # apply the mapping
             my $count = $parser->_applyMapping($tree, $mapping, $i);

             $i--; # check this index again
        }

        # recur
        elsif ($node->{children}) {

            $parser->_applyMappings($node->{children});
        }
    }
}

# read files from \input commands and place into the tree, parsed
#
# also include bibliographies
#
sub _addInputs {
    my $parser = shift;

    my $tree = shift;

    for (my $i = 0; $i < @{$tree->{nodes}}; $i++) {

        my $node = $tree->{nodes}[$i];

        if ($node->{type} eq 'COMMAND' &&
            $node->{command} eq 'input') {

            my $file = $node->{children}->{nodes}[0]->{content};
            next if ($file =~ /pstex/);	# ignore pstex images

            #print "reading input file $file\n";

            # read in contents of file
            my $contents = _readFile($file);
            if ($contents eq '' && not $file =~ /\.tex$/) {
                $file = "$file.tex";
                $contents = _readFile($file); 
            }

            # dump Psfig/TeX files, they aren't useful to us and have
            # nonconforming syntax. Use declaration line as our heuristic.
            #
            if ($contents =~ /^\%\s*Psfig\/TeX\s*$/m) {
                $contents = '';
                warn "ignoring Psfig input '$file'";
            }

            # actually do the parse of the sub-content
            #
            if ($contents) {
                # parse into a tree
                my ($subtree,) = $parser->_basicparse($contents, $parser->{PARSE_ERRORS_FATAL});

                # replace \input command node with subtree
                splice @{$tree->{nodes}}, $i, 1, @{$subtree->{nodes}};

                # step back
                $i--;
            }
        }
        elsif ($node->{type} eq 'COMMAND' &&
             $node->{command} eq 'bibliography') {

             # try to find a .bbl file
             #
             my @FILES = <*>;
             foreach my $file (@FILES) {
                 if ($file =~ /\.bbl$/) {

                     my $contents = _readFile($file);

                     if ($contents) {

                         my ($subtree,) = $parser->_basicparse($contents, $parser->{PARSE_ERRORS_FATAL});
                         splice @{$tree->{nodes}}, $i, 1, @$subtree;
                         $i--;
                     }
                 }
             }
        }

        # recur
        if ($node->{children}) {
            $parser->_addInputs($node->{children});
        }
    }
}

# do pre-mapping parsing
#
sub _parseA {
    my $parser = shift;
    my $text = shift;

    my $tree = $parser->_stage1($text);
    my $bracehash = $parser->_stage2($tree);

    $parser->_stage3($tree);

    $parser->_addInputs($tree) if $parser->{readinputs};

    return ($tree, $bracehash);
}

# do post-mapping parsing (make environments)
#
sub _parseB {
    my $parser = shift;
    my $tree = shift;

    $parser->_stage4($tree);

    #print "done with parseStage4\n";

    $parser->_stage5($tree, 0);

    #print "done with parseStage5\n";
}

###############################################################################
#
# Parser "Static" Subroutines
#
###############################################################################

# find next unescaped char in some text
#
sub _uindex {
    my $text = shift;
    my $char = shift;
    my $pos = shift;

    my $realbrace = 0;
    my $idx = -1;

    # get next opening brace
    do {
        $realbrace = 1;
        $idx = index $text, $char, $pos; 

        if ($idx != -1) {
            $pos = $idx + 1;
            my $prevchar = substr $text, $idx - 1, 1;
            if ($prevchar eq '\\') {
                $realbrace = 0;
                $idx = -1;
            }
        }
    } while (!$realbrace);

    return $idx;
}

# support function: find the next occurrence of some symbol which is 
# not escaped.
#
sub _findsymbol {
    my $text = shift;
    my $symbol = shift;
    my $pos = shift;

    my $realhit = 0; 
    my $index = -1;

    # get next occurrence of the symbol
    do {
        $realhit = 1;
        $index = index $text, $symbol, $pos; 

        if ($index != -1) {
            $pos = $index + 1;

            # make sure this occurrence isn't escaped. this is imperfect.
            #
            my $prevchar = ($index - 1 >= 0) ? 
                                             (substr $text, $index - 1, 1) : '';
            my $pprevchar = ($index - 2 >= 0) ?
                                             (substr $text, $index - 2, 1) : '';
            if ($prevchar eq '\\' && $pprevchar ne '\\') {
                $realhit = 0;
                $index = -1;
            }
        }
    } while (!$realhit);

    return $index;
}

# support function: find the earliest next brace in some (flat) text
#
sub _findbrace {
    my $text = shift;
    my $pos = shift;

    my $realbrace = 0;
    my $index_o = -1;
    my $index_c = -1;

    my $pos_o = $pos;
    my $pos_c = $pos;

    # get next opening brace
    do {
        $realbrace = 1;
        $index_o = index $text, '{', $pos_o;

        if ($index_o != -1) {
            $pos_o = $index_o + 1;

            # make sure this brace isn't escaped. this is imperfect.
            #
            my $prevchar = ($index_o - 1 >= 0) ? 
                (substr $text, $index_o - 1, 1) : '';
            my $pprevchar = ($index_o - 2 >= 0) ?
                (substr $text, $index_o - 2, 1) : '';

            if ($prevchar eq '\\' && $pprevchar ne '\\') {
                $realbrace = 0;
                $index_o = -1;
            }
        }
    } while (!$realbrace);

    # get next closing brace
    do {
        $realbrace = 1;
        $index_c = index $text, '}', $pos_c;

        if (($index_c - 1) >= 0 && substr($text, $index_c - 1, 1) eq ' ') {
            $pos_c = $index_c + 1;
            $index_c = -1;
        }

        if ($index_c != -1) {
            $pos_c = $index_c + 1;

            # make sure this brace isn't escaped. this is imperfect.
            #
            my $prevchar = ($index_c - 1 >= 0) ? 
                (substr $text, $index_c - 1, 1) : '';
            my $pprevchar = ($index_c - 2 >= 0) ?
                (substr $text, $index_c - 2, 1) : '';

            if ($prevchar eq '\\' && $pprevchar ne '\\') {
                $realbrace = 0;
                $index_c = -1;
            }
        }
    } while (!$realbrace);

    # handle all find cases
    return (-1, '') if ($index_o == -1 && $index_c == -1);
    return ($index_o, '{') if ($index_c == -1 || 
        ($index_o != -1 && $index_o < $index_c));

    return ($index_c, '}') if ($index_o == -1 || $index_c < $index_o);
}


# skip "blank nodes" in a tree, starting at some position. will finish 
# at the first non-blank node. (ie, not a comment or whitespace TEXT node.
#
sub _skipBlankNodes {
    my $tree = shift;
    my $i = shift;

    while ($tree->{nodes}[$i]->{type} eq 'COMMENT' ||
        ($tree->{nodes}[$i]->{type} eq 'TEXT' &&
        $tree->{nodes}[$i]->{content} =~ /^\s*$/s)) { 

        $i++;
    }

    return $i;
}

# is the passed-in node a valid parameter node? for this to be true, it must
# either be a GROUP or a position = inner command.
#
sub _validParamNode {
    my $node = shift;

    return 1 if ($node->{type} eq 'GROUP' || 
        ($node->{type} eq 'COMMAND' && $node->{position} eq 'inner'));

    return 0;
}

# duplicate a valid param node.	This means for a group, copy the child tree.
# for a command, make a new tree with just the command node and its child tree.
#
sub _duplicateParam {
    my $parser = shift;
    my $node = shift;

    if ($node->{type} eq 'GROUP') {
        return $node->{children}->copy();
    }
    elsif ($node->{type} eq 'COMMAND') {

        my $subtree = $node->{children}->copy(); # copy child subtree
        my $nodecopy = $node->copy(); # make a new node with old data
        $nodecopy->{children} = $subtree; # set the child pointer to new subtree

        # return a new tree with the new node (subtree) as its only element
        return LaTeX::TOM::Tree->new([$nodecopy], $parser);
    }

    return undef;
}

# make a mapping from a newenvironment fragment
#
# newenvironments have the following syntax:
#
# \newenvironment{name}[nparams]?{beginTeX}{endTeX}
#
sub _makeEnvMapping {
    my $parser = shift;
    my $tree   = shift;
    my $i      = shift;

    return undef if ($tree->{nodes}[$i]->{type} ne 'COMMAND' ||
        ($tree->{nodes}[$i]->{command} ne 'newenvironment' &&
        $tree->{nodes}[$i]->{command} ne 'renewenvironment'));

    # figure out command (first child, text node)
    my $command = $tree->{nodes}[$i]->{children}->{nodes}[0]->{content};
    if ($command =~ /^\s*\\(\S+)\s*$/) {
        $command = $1;
    }

    my $next = $i+1;

    # figure out number of params
    my $nparams = 0;
    if ($tree->{nodes}[$next]->{type} eq 'TEXT') {
        my $text = $tree->{nodes}[$next]->{content};

        if ($text =~ /^\s*\[\s*([0-9])+\s*\]\s*$/) {
            $nparams = $1;
        }

        $next++;
    }

    # default templates-- just repeat the declarations
    #
    my ($btemplate) = $parser->_basicparse("\\begin{$command}", 2, 0);
    my ($etemplate) = $parser->_basicparse("\\end{$command}", 2, 0);

    my $endpos = $next;

    # get two group subtrees... one for the begin and one for the end 
    # templates. we only ignore whitespace TEXT nodes and comments
    #
    $next = _skipBlankNodes($tree, $next);
    if (_validParamNode($tree->{nodes}[$next])) {
        $btemplate = $parser->_duplicateParam($tree->{nodes}[$next]);
        $next++;

        $next = _skipBlankNodes($tree, $next);

        if (_validParamNode($tree->{nodes}[$next])) {
            $etemplate = $parser->_duplicateParam($tree->{nodes}[$next]);
            $endpos = $next;
        }
    }

    # build and return the mapping hash
    #
    return {name => $command,
        nparams => $nparams,
        btemplate => $btemplate,    # begin template
        etemplate => $etemplate,    # end template
        skip => $endpos - $i,
        type => 'environment'};
}

# make a mapping from a newcommand fragment 
# takes tree pointer and index of command node
#
# newcommands have the following syntax:
#
# \newcommand{\name}[nparams]?{anyTeX}
#
sub _makeMapping {
    my $tree = shift;
    my $i = shift;

    return undef if ($tree->{nodes}[$i]->{type} ne 'COMMAND' ||
        ($tree->{nodes}[$i]->{command} ne 'newcommand' &&
        $tree->{nodes}[$i]->{command} ne 'renewcommand'));

    # figure out command (first child, text node)
    my $command = $tree->{nodes}[$i]->{children}->{nodes}[0]->{content}; 
    if ($command =~ /^\s*\\(\S+)\s*$/) {
        $command = $1;
    }

    my $next = $i+1;

    # figure out number of params
    my $nparams = 0;
    if ($tree->{nodes}[$next]->{type} eq 'TEXT') {
        my $text = $tree->{nodes}[$next]->{content};

        if ($text =~ /^\s*\[\s*([0-9])+\s*\]\s*$/) {
            $nparams = $1;
        }

        $next++;
    }

    # grab subtree template (array ref)
    #
    my $template;
    if ($tree->{nodes}[$next]->{type} eq 'GROUP') {
        $template = $tree->{nodes}[$next]->{children}->copy();
    } else {
        return undef;
    }

    # build and return the mapping hash
    #
    return {name => $command,
        nparams => $nparams,
        template => $template,
        skip => $next - $i,
        type => 'command'};
}

# this sub is the main entry point for the sub that actually takes a set of
# parameter trees and inserts them into a template tree. the return result,
# newly allocated, should be plopped back into the original tree where the
# parameters (along with the initial command invocation)
#
sub _applyParamsToTemplate {
    my $template = shift;
    my @params = @_;

    # have to copy the template to a freshly allocated tree
    #
    my $applied = $template->copy();

    # now recursively apply the params.
    #
    _applyParamsToTemplate_r($applied, @params);

    return $applied;
}

# recursive helper for above
#
sub _applyParamsToTemplate_r {
    my $template = shift;
    my @params = @_;

    for (my $i = 0; $i < @$template; $i++) {

        my $node = $template->[$i];

        if ($node->{type} eq 'TEXT') {

            my $text = $node->{content};

            # find occurrences of the parameter flags
            #
            if ($text =~ /(#([0-9]+))/) {

                my $all = $1;
                my $num = $2;

                # get the index of the flag we just found
                #
                my $idx = index $text, $all, 0;

                # split the node on the location of the flag
                #
                my ($leftnode, $rightnode) = $node->split($idx, $idx + length($all) - 1);

                # make a copy of the param we want
                #
                my $param = $params[$num - 1]->copy();

                # splice the new text nodes, along with the parameter subtree, into
                # the old location
                #
                splice @$template, $i, 1, $leftnode, @$param, $rightnode;

                # skip forward to where $rightnode is in $template on next iteration
                #
                $i += scalar @$param;
            }
        }

        # recur
        elsif (defined $node->{children}) {

            _applyParamsToTemplate_r($node->{children}, @params);
        }
    }
}


# This sub takes a chunk of the document text between two points and makes 
# it into a list of TEXT nodes and COMMENT nodes, as we would expect from 
# '%' prefixed LaTeX comment lines
#
sub _getTextAndCommentNodes {
    my $text = shift;
    my $begins = shift;
    my $ends = shift;

    my $nodetext = substr $text, $begins, $ends - $begins;

    #warn "getTextAndCommentNodes: looking at [$nodetext]";

    my @lines = split (/((?: *(?<!\\)\%[^\n]*\n)+)/m, $nodetext);

    my @nodes = ();

    my $startpos = 0;
    my @out = ();
    my $comment = 0; # comment mode
    my $first = 1;

    foreach my $line (@lines) {

        my $mode;

        # set current mode based on whether this is a comment
        if ($line =~ /^\s*\%/ && $nodetext !~ /\\begin\{verbatim\}.*\Q$line\E.*\\end\{verbatim\}/s) {
            $mode = 1;  # comment
        } else {
            $mode = 0;  # not-comment (regular text)
        }

        # if mode stays the same, add to output and do nothing
        if ($mode == $comment || $first) { 

            push @out, $line;

            # handle turning off initialization stuff
            if ($first) {
                $first = 0;
                $comment = $mode;
            }
        }

        # if mode changes, make new node from current chunk, change mode
        # flag, and start a new chunk
        else {
        my $output = join ('', @out);

            my $newnode = LaTeX::TOM::Node->new( 
                {type => $comment ? 'COMMENT' : 'TEXT',
                 start => $begins + $startpos,
                 end => $begins + $startpos + length($output) - 1,
                 content => $output});

            push @nodes, $newnode;

            $startpos += length($output); # update start position 
            @out = ($line);

            $comment = $mode;
        }
    }

    # make node from last chunk 
    my $output = join('', @out);

    my $newnode = LaTeX::TOM::Node->new(
        {type => $comment ? 'COMMENT' : 'TEXT',
         start => $begins + $startpos,
         end => $begins + $startpos + length($output) - 1,
         content => $output});

    push @nodes, $newnode;

    return @nodes;
}

# Read in the contents of a text file on disk. Return in string scalar.
#
sub _readFile {
    my $file = shift || '/dev/stdin';

    my $contents = "";

    open INFILE, $file or return "";

    while (my $line = <INFILE>) {
        $contents .= $line;
    }

    close INFILE;

    return $contents;
}

1;
liblatex-tom-perl 1.00-1 / usr / share / perl5 / LaTeX / TOM / Parser.pm