/usr/share/pyshared/rdflib/sparql/parser.py is in python-rdflib 2.4.2-1ubuntu1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 | """ SPARQL Lexer, Parser and Function-Mapper
By Shawn Brown <http://shawnbrown.com/contact>
TO DO:
swap current parser functions for Michelp's pyparsing setup
add mapping for FILTER/constraints
typed literals
integer, double or boolean abbreviations
language tags (e.g., @fr)
nested OPTIONALs ???
blank node and RDF collection syntax ???
GRAPH statements ???
CURRENTLY SUPPORTED:
Simple SELECT queries
Predicate-object and object list shorthand
(e.g., ?x foaf:name ?name ; foaf:mbox ?mbox ; vcard:TITLE ?title)
Multi-line/triple-quoted literals
BASE, PREFIX, SELECT, WHERE, UNION, OPTIONAL, multiple UNIONs and multiple
OPTIONALs (but not nested OPTIONALs)
USAGE:
#from sparql_lpm import doSPARQL
from rdflib.sparql.parser import doSPARQL
...load graph...
...define SPARQL query as string...
result = doSPARQL(queryStr, sparqlGr)
"""
import base64
import re
from rdflib.URIRef import URIRef
from rdflib.sparql.graphPattern import GraphPattern
def _escape(text): return base64.encodestring(text).replace("\n", "")
def _unescape(text): return base64.decodestring(text)
def _escapeLiterals(query):
""" escape all literals with escape() """
fn = lambda m: "'" + _escape(m.group(2)) + "'" + m.group(3)
pat = r"(\"\"\"|'''|[\"'])([^\1]*?[^\\]?)\1" # literal
return re.sub(pat+"(\s*[.,;\}])", fn, query)
def _resolveShorthand(query):
""" resolve some of the syntactic shorthand (2.8 Other Syntactic Forms) """
def doList(pat, text):
pat = re.compile(pat)
while pat.search(text): text = re.sub(pat, r"\1\2\3 . \2\4", text)
return text
# 2.8.1 Predicate-Object Lists
pat = r"(\{.*?)([^ ]+ )([^ ]+ [^ ]+)\s?; ([^ ]+ [^ ]+\s?[,;\.\}])"
query = doList(pat, query)
# 2.8.2 Object Lists
pat = r"(\{.*?)([^ ]+ [^ ]+ )([^ ]+\s?), ([^ ]+\s?[,\.\}])"
query = doList(pat, query)
# TO DO: look at adding all that other crazy stuff!!!
return query
def _resolvePrefixes(query):
""" resolve prefixed IRIs, remove PREFIX statements """
# parse PREFIX statements
prefixes = re.findall("PREFIX ([\w\d]+:) <([^<>]+)>", query) # get list of prefix tuples
prefixes.extend([
("rdf:", "http://www.w3.org/1999/02/22-rdf-syntax-ns#"),
("rdfs:", "http://www.w3.org/2000/01/rdf-schema#"),
("xsd:", "http://www.w3.org/2001/XMLSchema#"),
("fn:", "http://www.w3.org/2004/07/xpath-functions")])
matches = re.search("PREFIX : <([^<>]+)>", query) # parse colon-only PREFIX
if matches != None: prefixes.append((":", matches.group(1)))
query = re.sub("PREFIX [\w\d]*:[ ]?<[^<>]+>[ ]?", "", query) # remove PREFIX statements
# escape IRIs (unescaped in ??)
fn = lambda m: "<" + _escape(m.group(1)) + ">"
query = re.sub("<([^<>]+)>", fn, query)
# resolve prefixed IRIs
for pair in prefixes:
fn = lambda m: "<" + _escape(pair[1]+m.group(1)) + ">" # escaped too
query = re.sub(pair[0]+"([^ .\}]+)", fn, query)
return query
def _resolveBase(query):
""" resolve relative IRIs using BASE IRI, remove BASE statement """
pat = re.compile("BASE <([^<>]+)>\s?")
base = pat.search(query)
if base != None:
fn = lambda m: "<" + base.group(1) + m.group(1) + ">"
query = re.sub("<([^<>: ]+)>", fn, query) # resolve relative IRIs
query = re.sub(pat, "", query) # remove BASE statement
return query
def _parseSelect(query):
""" returns tuple of SELECTed variables or None """
var = "[?$][\\w\\d]+" # SELECT variable pattern
select = re.search("SELECT(?: " + var + ")+", query)
if select != None:
select = re.findall(var, select.group(0))
select = tuple(select)
return select
class _StackManager:
""" manages token stack for _parser() """
def __tokenGen(self, tokens):
for token in tokens:
yield token
def __init__(self, tokenList):
self.stack = self.__tokenGen(tokenList)
self.current = self.stack.next()
def next(self):
try:
self.current = self.stack.next()
if self.current == "":
self.next() # if blank, move to next
except StopIteration:
self.current = None
def token(self):
return self.current
#
# The following classes, _listTypes dictionary and _makeList() function are
# used to test for recognized keywords and to create "typed" lists for nested
# statements when parsing the SPARQL query's WHERE statement
#
class Where(list): pass
class Union(list): pass
class Optional(list): pass
_listTypes = {
"OPTIONAL": lambda : Optional([]),
"UNION": lambda : Union([]),
"WHERE": lambda : Where([])
}
def _makeList(keyword):
""" return list of given type or None """
global _listTypes
if keyword in _listTypes:
return _listTypes[keyword]()
return None
def _parser(stack, listType="WHERE"):
""" simple recursive descent SPARQL parser """
typedList = _makeList(listType)
nestedType = listType
while stack.token() != None:
token = stack.token()
if _makeList(token) != None:
nestedType = token
elif token == "{":
stack.next() # iterate to next token
typedList.append(_parser(stack, nestedType))
nestedType = listType # reset nestedType
elif token == "}":
return typedList
elif token != ".":
statement = ""
while token != None and token != "." and token != "{" and token != "}":
statement += " " + token
stack.next()
token = stack.token()
statement = statement.strip()
typedList.append(statement)
continue
stack.next()
return typedList
def _parseWhere(query):
""" split query into tokens, return parsed object """
stackObj = _StackManager(query)
return _parser(stackObj)
def _findStatements(stmntType, stmntList):
""" recurse over nested list, compile & return flat list of matching
statement strings used by _getStatements() """
statements = []
typedList = _makeList(stmntType)
for stmnt in stmntList:
if type(stmnt) is str:
statements.append(stmnt)
if type(stmnt) == type(typedList):
statements.extend(_findStatements(stmntType, stmnt))
return statements
def _getStatements(stmntType, stmntList):
""" gets statements of given type from given list """
statements = []
typedList = _makeList(stmntType)
for item in stmntList:
if type(item) == type(typedList):
statements.append(_findStatements(stmntType, item))
return statements
def _buildGraphPattern(triples):
# split strings into tuples of strings
triples = map((lambda x: tuple(re.split(" ", x))), triples)
# convert tuples of strings into tuples of RDFLib objects
isIRI = lambda x: x[0]=="<" and x[-1]==">"
isLit = lambda x: x[0]=="'" and x[-1]=="'" or x[0]=='"' and x[-1]=='"'
for i in range(len(triples)):
sub = triples[i][0]
pred = triples[i][1]
obj = triples[i][2]
# unescape and define objects for IRIs and literals
if isIRI(sub): sub = URIRef(_unescape(sub[1:-1]))
if isIRI(pred): pred = URIRef(_unescape(pred[1:-1]))
if isIRI(obj): obj = URIRef(_unescape(obj[1:-1]))
elif isLit(obj): obj = _unescape(obj[1:-1])
# build final triple
triples[i] = (sub, pred, obj)
return GraphPattern(triples)
def _buildQueryArgs(query):
""" """
# query lexer
query = _escapeLiterals(query) # are unescaped in _buildGraphPattern()
query = re.sub("\s+", " ", query).strip() # normalize whitespace
query = _resolveShorthand(query) # resolve pred-obj and obj lists
query = _resolveBase(query) # resolve relative IRIs
query = _resolvePrefixes(query) # resolve prefixes
query = re.sub(r"\s*([.;,\{\}])\s*", r" \1 ", query) # normalize punctuation
whereObj = query[query.find("{")+1:query.rfind("}")].strip() # strip non-WHERE bits
whereObj = whereObj.split(" ") # split into token stack
# query parser
select = _parseSelect(query) # select is tuple of select variables
whereObj = _parseWhere(whereObj) # stack parsed into nested list of typed lists
# map parsed object to arrays of RDFLib graphPattern objects
where = _getStatements("WHERE", [whereObj]) # pass whereObj as nested list
where.extend(_getStatements("UNION", whereObj))
where = map(_buildGraphPattern, where)
optional = _getStatements("OPTIONAL", whereObj)
optional = map(_buildGraphPattern, optional)
# run query
#return sparqlGr.query(select, where, optional)
return { "select":select, "where":where, "optional":optional }
def doSPARQL(query, sparqlGr):
""" Takes SPARQL query & SPARQL graph, returns SPARQL query result object. """
x = _buildQueryArgs(query)
return sparqlGr.query(x["select"], x["where"], x["optional"])
if __name__ == "__main__":
testCases = [
# basic
"""
SELECT ?name
WHERE { ?a <http://xmlns.com/foaf/0.1/name> ?name }
""",
# simple prefix
"""
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
SELECT ?name
WHERE { ?a foaf:name ?name }
""",
# base statement
"""
BASE <http://xmlns.com/foaf/0.1/>
SELECT ?name
WHERE { ?a <name> ?name }
""",
# prefix and colon-only prefix
"""
PREFIX : <http://xmlns.com/foaf/0.1/>
PREFIX vcard: <http://www.w3.org/2001/vcard-rdf/3.0#>
SELECT ?name ?title
WHERE {
?a :name ?name .
?a vcard:TITLE ?title
}
""",
# predicate-object list notation
"""
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
SELECT ?name ?mbox
WHERE {
?x foaf:name ?name ;
foaf:mbox ?mbox .
}
""",
# object list notation
"""
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
SELECT ?x
WHERE {
?x foaf:nick "Alice" ,
"Alice_" .
}
""",
# escaped literals
"""
PREFIX tag: <http://xmlns.com/foaf/0.1/>
PREFIX vcard: <http://www.w3.org/2001/vcard-rdf/3.0#>
SELECT ?name
WHERE {
?a tag:name ?name ;
vcard:TITLE "escape test vcard:TITLE " ;
<tag://test/escaping> "This is a ''' Test \"\"\"" ;
<tag://test/escaping> ?d
}
""",
# key word as variable
"""
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
SELECT ?PREFIX ?WHERE
WHERE {
?x foaf:name ?PREFIX ;
foaf:mbox ?WHERE .
}
""",
# key word as prefix
"""
PREFIX WHERE: <http://xmlns.com/foaf/0.1/>
SELECT ?name ?mbox
WHERE {
?x WHERE:name ?name ;
WHERE:mbox ?mbox .
}
""",
# some test cases from grammar.py
"SELECT ?title WHERE { <http://example.org/book/book1> <http://purl.org/dc/elements/1.1/title> ?title . }",
"""PREFIX foaf: <http://xmlns.com/foaf/0.1/>
SELECT ?name ?mbox
WHERE { ?person foaf:name ?name .
OPTIONAL { ?person foaf:mbox ?mbox}
}""",
"""PREFIX foaf: <http://xmlns.com/foaf/0.1/>
SELECT ?name ?name2
WHERE { ?person foaf:name ?name .
OPTIONAL { ?person foaf:knows ?p2 . ?p2 foaf:name ?name2 . }
}""",
"""PREFIX foaf: <http://xmlns.com/foaf/0.1/>
#PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
SELECT ?name ?mbox
WHERE
{
{ ?person rdf:type foaf:Person } .
OPTIONAL { ?person foaf:name ?name } .
OPTIONAL {?person foaf:mbox ?mbox} .
}"""
]
print "Content-type: text/plain\n\n"
for query in testCases:
print "\n-----\n"
print '>>> query = """' + query.replace("\n", "\n... ") + '"""'
print ">>> result = doSPARQL(query, sparqlGr)\n"
result = _buildQueryArgs(query);
print "select = ", result["select"], "\n"
print "where = ", result["where"], "\n"
print "optional = ", result["optional"], "\n"
print "result = sparqlGr.query(select, where, optional)"
|