This file is indexed.

/usr/share/pyshared/patsy/infix_parser.py is in python-patsy 0.2.1-3.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
# This file is part of Patsy
# Copyright (C) 2011 Nathaniel Smith <njs@pobox.com>
# See file COPYING for license information.

# This file implements a simple "shunting yard algorithm" parser for infix
# languages with parentheses. It is used as the core of our parser for
# formulas, but is generic enough to be used for other purposes as well
# (e.g. parsing linear constraints). It just builds a parse tree; semantics
# are somebody else's problem.
# 
# Plus it spends energy on tracking where each item in the parse tree comes
# from, to allow high-quality error reporting.
#
# You are expected to provide an collection of Operators, a collection of
# atomic types, and an iterator that provides Tokens. Each Operator should
# have a unique token_type (which is an arbitrary Python object), and each
# Token should have a matching token_type, or one of the special types
# Token.LPAREN, Token.RPAREN. Each Token is required to have a valid Origin
# attached, for error reporting.

# XX: still seriously consider putting the magic intercept handling into the
# tokenizer. we'd still need separate term-sets that get pasted together by ~
# to create the modeldesc, though... heck maybe we should just have a
# modeldesc be 1-or-more termsets, with the convention that if it's 1, then
# it's a rhs, and if it's 2, it's (lhs, rhs), and otherwise you're on your
# own. Test: would this be useful for multiple-group log-linear models,
# maybe? Answer: Perhaps. outcome ~ x1 + x2 ~ group. But lots of other
# plausible, maybe better ways to write this -- (outcome | group) ~ x1 + x2?
# "outcome ~ x1 + x2", group="group"? etc.

__all__ = ["Token", "ParseNode", "Operator", "parse"]

from patsy import PatsyError
from patsy.origin import Origin
from patsy.util import repr_pretty_delegate, repr_pretty_impl

class _UniqueValue(object):
    def __init__(self, print_as):
        self._print_as = print_as

    def __repr__(self):
        return "%s(%r)" % (self.__class__.__name__, self._print_as)


class Token(object):
    """A token with possible payload.

    .. attribute:: type

       An arbitrary object indicating the type of this token. Should be
      :term:`hashable`, but otherwise 
    """
    LPAREN = _UniqueValue("LPAREN")
    RPAREN = _UniqueValue("RPAREN")

    def __init__(self, type, origin, extra=None):
        self.type = type
        self.origin = origin
        self.extra = extra

    __repr__ = repr_pretty_delegate
    def _repr_pretty_(self, p, cycle):
        assert not cycle
        kwargs = []
        if self.extra is not None:
            kwargs = [("extra", self.extra)]
        return repr_pretty_impl(p, self, [self.type, self.origin], kwargs)

class ParseNode(object):
    def __init__(self, type, token, args, origin):
        self.type = type
        self.token = token
        self.args = args
        self.origin = origin

    __repr__ = repr_pretty_delegate
    def _repr_pretty_(self, p, cycle):
        return repr_pretty_impl(p, self, [self.type, self.token, self.args])

class Operator(object):
    def __init__(self, token_type, arity, precedence):
        self.token_type = token_type
        self.arity = arity
        self.precedence = precedence

    def __repr__(self):
        return "%s(%r, %r, %r)" % (self.__class__.__name__,
                                   self.token_type, self.arity, self.precedence)

class _StackOperator(object):
    def __init__(self, op, token):
        self.op = op
        self.token = token

_open_paren = Operator(Token.LPAREN, -1, -9999999)

class _ParseContext(object):
    def __init__(self, unary_ops, binary_ops, atomic_types, trace):
        self.op_stack = []
        self.noun_stack = []
        self.unary_ops = unary_ops
        self.binary_ops = binary_ops
        self.atomic_types = atomic_types
        self.trace = trace

def _read_noun_context(token, c):
    if token.type == Token.LPAREN:
        if c.trace:
            print "Pushing open-paren"
        c.op_stack.append(_StackOperator(_open_paren, token))
        return True
    elif token.type in c.unary_ops:
        if c.trace:
            print "Pushing unary op %r" % (token.type,)
        c.op_stack.append(_StackOperator(c.unary_ops[token.type], token))
        return True
    elif token.type in c.atomic_types:
        if c.trace:
            print "Pushing noun %r (%r)" % (token.type, token.extra)
        c.noun_stack.append(ParseNode(token.type, token, [],
                                      token.origin))
        return False
    else:
        raise PatsyError("expected a noun, not '%s'"
                            % (token.origin.relevant_code(),),
                            token)

def _run_op(c):
    assert c.op_stack
    stackop = c.op_stack.pop()
    args = []
    for i in xrange(stackop.op.arity):
        args.append(c.noun_stack.pop())
    args.reverse()
    if c.trace:
        print "Reducing %r (%r)" % (stackop.op.token_type, args)
    node = ParseNode(stackop.op.token_type, stackop.token, args,
                     Origin.combine([stackop.token] + args))
    c.noun_stack.append(node)

def _read_op_context(token, c):
    if token.type == Token.RPAREN:
        if c.trace:
            print "Found close-paren"
        while c.op_stack and c.op_stack[-1].op.token_type != Token.LPAREN:
            _run_op(c)
        if not c.op_stack:
            raise PatsyError("missing '(' or extra ')'", token)
        assert c.op_stack[-1].op.token_type == Token.LPAREN
        # Expand the origin of the item on top of the noun stack to include
        # the open and close parens:
        combined = Origin.combine([c.op_stack[-1].token,
                                   c.noun_stack[-1].token,
                                   token])
        c.noun_stack[-1].origin = combined
        # Pop the open-paren
        c.op_stack.pop()
        return False
    elif token.type in c.binary_ops:
        if c.trace:
            print "Found binary operator %r" % (token.type)
        stackop = _StackOperator(c.binary_ops[token.type], token)
        while (c.op_stack
               and stackop.op.precedence <= c.op_stack[-1].op.precedence):
            _run_op(c)
        if c.trace:
            print "Pushing binary operator %r" % (token.type)
        c.op_stack.append(stackop)
        return True
    else:
        raise PatsyError("expected an operator, not '%s'"
                            % (token.origin.relevant_code(),),
                            token)

def infix_parse(tokens, operators, atomic_types, trace=False):
    token_source = iter(tokens)

    unary_ops = {}
    binary_ops = {}
    for op in operators:
        assert op.precedence > _open_paren.precedence
        if op.arity == 1:
            unary_ops[op.token_type] = op
        elif op.arity == 2:
            binary_ops[op.token_type] = op
        else:
            raise ValueError, "operators must be unary or binary"

    c = _ParseContext(unary_ops, binary_ops, atomic_types, trace)

    # This is an implementation of Dijkstra's shunting yard algorithm:
    #   http://en.wikipedia.org/wiki/Shunting_yard_algorithm
    #   http://www.engr.mun.ca/~theo/Misc/exp_parsing.htm

    want_noun = True
    for token in token_source:
        if c.trace:
            print "Reading next token (want_noun=%r)" % (want_noun,)
        if want_noun:
            want_noun = _read_noun_context(token, c)
        else:
            want_noun = _read_op_context(token, c)
    if c.trace:
        print "End of token stream"
        
    if want_noun:
        raise PatsyError("expected a noun, but instead the expression ended",
                            c.op_stack[-1].token.origin)

    while c.op_stack:
        if c.op_stack[-1].op.token_type == Token.LPAREN:
            raise PatsyError("Unmatched '('", c.op_stack[-1].token)
        _run_op(c)

    assert len(c.noun_stack) == 1
    return c.noun_stack.pop()

# Much more thorough tests in parse_formula.py, this is just a smoke test:
def test_infix_parse():
    ops = [Operator("+", 2, 10),
           Operator("*", 2, 20),
           Operator("-", 1, 30)]
    atomic = ["ATOM1", "ATOM2"]
    # a + -b * (c + d)
    mock_origin = Origin("asdf", 2, 3)
    tokens = [Token("ATOM1", mock_origin, "a"),
              Token("+", mock_origin, "+"),
              Token("-", mock_origin, "-"),
              Token("ATOM2", mock_origin, "b"),
              Token("*", mock_origin, "*"),
              Token(Token.LPAREN, mock_origin, "("),
              Token("ATOM1", mock_origin, "c"),
              Token("+", mock_origin, "+"),
              Token("ATOM2", mock_origin, "d"),
              Token(Token.RPAREN, mock_origin, ")")]
    tree = infix_parse(tokens, ops, atomic)
    def te(tree, type, extra):
        assert tree.type == type
        assert tree.token.extra == extra
    te(tree, "+", "+")
    te(tree.args[0], "ATOM1", "a")
    assert tree.args[0].args == []
    te(tree.args[1], "*", "*")
    te(tree.args[1].args[0], "-", "-")
    assert len(tree.args[1].args[0].args) == 1
    te(tree.args[1].args[0].args[0], "ATOM2", "b")
    te(tree.args[1].args[1], "+", "+")
    te(tree.args[1].args[1].args[0], "ATOM1", "c")
    te(tree.args[1].args[1].args[1], "ATOM2", "d")

    from nose.tools import assert_raises
    # No ternary ops
    assert_raises(ValueError,
                  infix_parse, [], [Operator("+", 3, 10)], ["ATOMIC"])

    # smoke test just to make sure there are no egregious bugs in 'trace'
    infix_parse(tokens, ops, atomic, trace=True)