This file is indexed.

/usr/share/pyshared/rdflib/sparql/parser.py is in python-rdflib 2.4.2-1ubuntu1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
""" SPARQL Lexer, Parser and Function-Mapper
By Shawn Brown <http://shawnbrown.com/contact>

TO DO:
  swap current parser functions for Michelp's pyparsing setup
  add mapping for FILTER/constraints
  typed literals
  integer, double or boolean abbreviations
  language tags (e.g., @fr)
  nested OPTIONALs ???
  blank node and RDF collection syntax ???
  GRAPH statements ???

CURRENTLY SUPPORTED:
  Simple SELECT queries
  Predicate-object and object list shorthand
    (e.g., ?x  foaf:name  ?name ; foaf:mbox  ?mbox ; vcard:TITLE  ?title)
  Multi-line/triple-quoted literals
  BASE, PREFIX, SELECT, WHERE, UNION, OPTIONAL, multiple UNIONs and multiple
    OPTIONALs (but not nested OPTIONALs)

USAGE:
    #from sparql_lpm import doSPARQL
    from rdflib.sparql.parser import doSPARQL
    ...load graph...
    ...define SPARQL query as string...
    result = doSPARQL(queryStr, sparqlGr)

"""

import base64
import re
from rdflib.URIRef import URIRef
from rdflib.sparql.graphPattern import GraphPattern

def _escape(text): return base64.encodestring(text).replace("\n", "")
def _unescape(text): return base64.decodestring(text)

def _escapeLiterals(query):
    """ escape all literals with escape() """
    fn = lambda m: "'" + _escape(m.group(2)) + "'" + m.group(3)
    pat = r"(\"\"\"|'''|[\"'])([^\1]*?[^\\]?)\1" # literal
    return re.sub(pat+"(\s*[.,;\}])", fn, query)

def _resolveShorthand(query):
    """ resolve some of the syntactic shorthand (2.8 Other Syntactic Forms) """
    def doList(pat, text):
        pat = re.compile(pat)
        while pat.search(text): text = re.sub(pat, r"\1\2\3 . \2\4", text)
        return text
    # 2.8.1 Predicate-Object Lists
    pat = r"(\{.*?)([^ ]+ )([^ ]+ [^ ]+)\s?; ([^ ]+ [^ ]+\s?[,;\.\}])"
    query = doList(pat, query)
    # 2.8.2 Object Lists
    pat = r"(\{.*?)([^ ]+ [^ ]+ )([^ ]+\s?), ([^ ]+\s?[,\.\}])"
    query = doList(pat, query)
    # TO DO: look at adding all that other crazy stuff!!!
    return query

def _resolvePrefixes(query):
    """ resolve prefixed IRIs, remove PREFIX statements """
    # parse PREFIX statements
    prefixes = re.findall("PREFIX ([\w\d]+:) <([^<>]+)>", query) # get list of prefix tuples
    prefixes.extend([
        ("rdf:", "http://www.w3.org/1999/02/22-rdf-syntax-ns#"),
        ("rdfs:", "http://www.w3.org/2000/01/rdf-schema#"),
        ("xsd:", "http://www.w3.org/2001/XMLSchema#"),
        ("fn:", "http://www.w3.org/2004/07/xpath-functions")])
    matches = re.search("PREFIX : <([^<>]+)>", query) # parse colon-only PREFIX
    if matches != None: prefixes.append((":", matches.group(1)))
    query = re.sub("PREFIX [\w\d]*:[ ]?<[^<>]+>[ ]?", "", query) # remove PREFIX statements
    # escape IRIs (unescaped in ??)
    fn = lambda m: "<" + _escape(m.group(1)) + ">"
    query = re.sub("<([^<>]+)>", fn, query)
    # resolve prefixed IRIs
    for pair in prefixes:
        fn = lambda m: "<" + _escape(pair[1]+m.group(1)) + ">" # escaped too
        query = re.sub(pair[0]+"([^ .\}]+)", fn, query)
    return query

def _resolveBase(query):
    """ resolve relative IRIs using BASE IRI, remove BASE statement """
    pat = re.compile("BASE <([^<>]+)>\s?")
    base = pat.search(query)
    if base != None:
        fn = lambda m: "<" + base.group(1) + m.group(1) + ">"
        query = re.sub("<([^<>: ]+)>", fn, query) # resolve relative IRIs
        query = re.sub(pat, "", query) # remove BASE statement
    return query

def _parseSelect(query):
    """ returns tuple of SELECTed variables or None """
    var = "[?$][\\w\\d]+" # SELECT variable pattern
    select = re.search("SELECT(?: " + var + ")+", query)
    if select != None:
        select = re.findall(var, select.group(0))
        select = tuple(select)
    return select

class _StackManager:
    """ manages token stack for _parser() """
    def __tokenGen(self, tokens):
        for token in tokens:
            yield token
    def __init__(self, tokenList):
        self.stack = self.__tokenGen(tokenList)
        self.current = self.stack.next()
    def next(self):
        try:
            self.current = self.stack.next()
            if self.current == "":
                self.next() # if blank, move to next
        except StopIteration:
            self.current = None
    def token(self):
        return self.current

#
# The following classes, _listTypes dictionary and _makeList() function are
# used to test for recognized keywords and to create "typed" lists for nested
# statements when parsing the SPARQL query's WHERE statement
#
class Where(list): pass
class Union(list): pass
class Optional(list): pass
_listTypes = {
    "OPTIONAL": lambda : Optional([]),
    "UNION": lambda : Union([]),
    "WHERE": lambda : Where([])
}
def _makeList(keyword):
    """ return list of given type or None """
    global _listTypes
    if keyword in _listTypes:
        return _listTypes[keyword]()
    return None

def _parser(stack, listType="WHERE"):
    """ simple recursive descent SPARQL parser """
    typedList = _makeList(listType)
    nestedType = listType
    while stack.token() != None:
        token = stack.token()
        if _makeList(token) != None:
            nestedType = token
        elif token == "{":
            stack.next() # iterate to next token
            typedList.append(_parser(stack, nestedType))
            nestedType = listType # reset nestedType
        elif token == "}":
            return typedList
        elif token != ".":
            statement = ""
            while token != None and token != "." and token != "{" and token != "}":
                statement += " " + token
                stack.next()
                token = stack.token()
            statement = statement.strip()
            typedList.append(statement)
            continue
        stack.next()
    return typedList

def _parseWhere(query):
    """ split query into tokens, return parsed object """
    stackObj = _StackManager(query)
    return _parser(stackObj)

def _findStatements(stmntType, stmntList):
    """ recurse over nested list, compile & return flat list of matching
        statement strings used by _getStatements() """
    statements = []
    typedList = _makeList(stmntType)
    for stmnt in stmntList:
        if type(stmnt) is str:
            statements.append(stmnt)
        if type(stmnt) == type(typedList):
            statements.extend(_findStatements(stmntType, stmnt))
    return statements

def _getStatements(stmntType, stmntList):
    """ gets statements of given type from given list """
    statements = []
    typedList = _makeList(stmntType)
    for item in stmntList:
        if type(item) == type(typedList):
            statements.append(_findStatements(stmntType, item))
    return statements

def _buildGraphPattern(triples):
    # split strings into tuples of strings
    triples = map((lambda x: tuple(re.split(" ", x))), triples)
    # convert tuples of strings into tuples of RDFLib objects
    isIRI = lambda x: x[0]=="<" and x[-1]==">"
    isLit = lambda x: x[0]=="'" and x[-1]=="'" or x[0]=='"' and x[-1]=='"'
    for i in range(len(triples)):
        sub = triples[i][0]
        pred = triples[i][1]
        obj = triples[i][2]
        # unescape and define objects for IRIs and literals
        if isIRI(sub): sub = URIRef(_unescape(sub[1:-1]))
        if isIRI(pred): pred = URIRef(_unescape(pred[1:-1]))
        if isIRI(obj): obj = URIRef(_unescape(obj[1:-1]))
        elif isLit(obj): obj = _unescape(obj[1:-1])
        # build final triple
        triples[i] = (sub, pred, obj)
    return GraphPattern(triples)

def _buildQueryArgs(query):
    """ """
    # query lexer
    query = _escapeLiterals(query) # are unescaped in _buildGraphPattern()
    query = re.sub("\s+", " ", query).strip() # normalize whitespace
    query = _resolveShorthand(query) # resolve pred-obj and obj lists
    query = _resolveBase(query) # resolve relative IRIs
    query = _resolvePrefixes(query) # resolve prefixes
    query = re.sub(r"\s*([.;,\{\}])\s*", r" \1 ", query) # normalize punctuation
    whereObj = query[query.find("{")+1:query.rfind("}")].strip() # strip non-WHERE bits
    whereObj = whereObj.split(" ") # split into token stack
    # query parser
    select = _parseSelect(query) # select is tuple of select variables
    whereObj = _parseWhere(whereObj) # stack parsed into nested list of typed lists
    # map parsed object to arrays of RDFLib graphPattern objects
    where = _getStatements("WHERE", [whereObj]) # pass whereObj as nested list
    where.extend(_getStatements("UNION", whereObj))
    where = map(_buildGraphPattern, where)
    optional = _getStatements("OPTIONAL", whereObj)
    optional = map(_buildGraphPattern, optional)
    # run query
    #return sparqlGr.query(select, where, optional)
    return { "select":select, "where":where, "optional":optional }

def doSPARQL(query, sparqlGr):
    """ Takes SPARQL query & SPARQL graph, returns SPARQL query result object. """
    x = _buildQueryArgs(query)
    return sparqlGr.query(x["select"], x["where"], x["optional"])


if __name__ == "__main__":
    testCases = [
# basic
"""
SELECT ?name
WHERE { ?a <http://xmlns.com/foaf/0.1/name> ?name }
""",
# simple prefix
"""
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
SELECT ?name
WHERE { ?a foaf:name ?name }
""",
# base statement
"""
BASE <http://xmlns.com/foaf/0.1/>
SELECT ?name
WHERE { ?a <name> ?name }
""",
# prefix and colon-only prefix
"""
PREFIX : <http://xmlns.com/foaf/0.1/>
PREFIX vcard: <http://www.w3.org/2001/vcard-rdf/3.0#>
SELECT ?name ?title
WHERE {
    ?a :name ?name .
    ?a vcard:TITLE ?title
}
""",
# predicate-object list notation
"""
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
SELECT ?name ?mbox
WHERE {
    ?x  foaf:name  ?name ;
        foaf:mbox  ?mbox .
}
""",
# object list notation
"""
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
SELECT ?x
WHERE {
    ?x foaf:nick  "Alice" ,
                  "Alice_" .
}
""",
# escaped literals
"""
PREFIX tag: <http://xmlns.com/foaf/0.1/>
PREFIX vcard: <http://www.w3.org/2001/vcard-rdf/3.0#>
SELECT ?name
WHERE {
    ?a tag:name ?name ;
       vcard:TITLE "escape test vcard:TITLE " ;
       <tag://test/escaping> "This is a ''' Test \"\"\"" ;
       <tag://test/escaping> ?d
}
""",
# key word as variable
"""
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
SELECT ?PREFIX ?WHERE
WHERE {
    ?x  foaf:name  ?PREFIX ;
        foaf:mbox  ?WHERE .
}
""",
# key word as prefix
"""
PREFIX WHERE: <http://xmlns.com/foaf/0.1/>
SELECT ?name ?mbox
WHERE {
    ?x  WHERE:name  ?name ;
        WHERE:mbox  ?mbox .
}
""",
# some test cases from grammar.py
"SELECT ?title WHERE { <http://example.org/book/book1> <http://purl.org/dc/elements/1.1/title> ?title . }",

"""PREFIX foaf: <http://xmlns.com/foaf/0.1/>
SELECT ?name ?mbox
WHERE { ?person foaf:name ?name .
OPTIONAL { ?person foaf:mbox ?mbox}
}""",

"""PREFIX foaf: <http://xmlns.com/foaf/0.1/>
SELECT ?name ?name2
WHERE { ?person foaf:name ?name .
OPTIONAL { ?person foaf:knows ?p2 . ?p2 foaf:name   ?name2 . }
}""",

"""PREFIX foaf: <http://xmlns.com/foaf/0.1/>
#PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
SELECT ?name ?mbox
WHERE
{
{ ?person rdf:type foaf:Person } .
OPTIONAL { ?person foaf:name  ?name } .
OPTIONAL {?person foaf:mbox  ?mbox} .
}"""
    ]

    print "Content-type: text/plain\n\n"
    for query in testCases:
        print "\n-----\n"
        print '>>> query = """' + query.replace("\n", "\n... ") + '"""'
        print ">>> result = doSPARQL(query, sparqlGr)\n"
        result = _buildQueryArgs(query);
        print "select = ", result["select"], "\n"
        print "where = ", result["where"], "\n"
        print "optional = ", result["optional"], "\n"
        print "result = sparqlGr.query(select, where, optional)"