/usr/lib/hugs/packages/HaXml/Text/XML/HaXml/Xtract/Lex.hs is in libhugs-haxml-bundled 98.200609.21-5.4+b3.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 | -- | This is another hand-written lexer, this time for the Xtract
-- command-language. The entry point is lexXtract. You don't
-- normally need to use this module directly - the lexer is called
-- automatically by the parser. (We only expose this interface
-- for debugging purposes.)
--
-- The Xtract command language is very like the XPath specification.
module Text.XML.HaXml.Xtract.Lex
( lexXtract
, Posn(..)
, TokenT(..)
, Token
) where
import Char
import List(isPrefixOf)
type Token = Either String (Posn, TokenT)
data Posn = Pn Int -- char index only
deriving Eq
instance Show Posn where
showsPrec p (Pn c) = showString "char pos " . shows c
data TokenT =
Symbol String
| TokString String -- begins with letter
| TokNum Integer -- begins with digit
deriving Eq
instance Show TokenT where
showsPrec p (Symbol s) = showString s
showsPrec p (TokString s) = showString s
showsPrec p (TokNum n) = shows n
emit :: TokenT -> Posn -> Token
emit tok p = forcep p `seq` Right (p,tok)
where forcep (Pn n) = n
lexerror :: String -> Posn -> [Token]
lexerror s p = [Left ("Lexical error in selection pattern at "++show p++": "
++s++"\n")]
addcol :: Int -> Posn -> Posn
addcol n (Pn c) = Pn (c+n)
newline, tab :: Posn -> Posn
newline (Pn c) = Pn (c+1)
tab (Pn c) = Pn (((c`div`8)+1)*8)
white :: Char -> Posn -> Posn
white '\t' = tab
white ' ' = addcol 1
white '\n' = addcol 1
white '\r' = addcol 1
white '\xa0' = addcol 1
blank :: (Posn->String->[Token]) -> Posn-> String-> [Token]
blank k p [] = []
blank k p (' ': s) = blank k (addcol 1 p) s
blank k p ('\t':s) = blank k (tab p) s
blank k p ('\n':s) = blank k (newline p) s
blank k p ('\r':s) = blank k p s
blank k p ('\xa0': s) = blank k (addcol 1 p) s
blank k p s = k p s
----
lexXtract :: String -> [Token]
lexXtract = selAny (Pn 1)
syms = "/[]()@,=*&|~$+-<>"
selAny :: Posn -> String -> [Token]
selAny p [] = []
selAny p ('/':ss)
| '/' == head ss = emit (Symbol "//") p: selAny (addcol 2 p) (tail ss)
selAny p ('!':ss)
| '=' == head ss = emit (Symbol "!=") p: selAny (addcol 2 p) (tail ss)
selAny p ('<':ss)
| '=' == head ss = emit (Symbol "<=") p: selAny (addcol 2 p) (tail ss)
selAny p ('>':ss)
| '=' == head ss = emit (Symbol ">=") p: selAny (addcol 2 p) (tail ss)
selAny p ('\'':ss) = emit (Symbol "'") p:
accumulateUntil '\'' (Symbol "'") [] p (addcol 1 p) ss selAny
selAny p ('"':ss) = emit (Symbol "\"") p:
accumulateUntil '"' (Symbol "\"") [] p (addcol 1 p) ss selAny
selAny p ('_':ss) = gatherName "_" p (addcol 1 p) ss (blank selAny)
selAny p (':':ss) = gatherName ":" p (addcol 1 p) ss (blank selAny)
selAny p ('.':ss)
| "=." `isPrefixOf` ss = emit (Symbol ".=.") p: selAny (addcol 3 p) (drop 2 ss)
| "!=." `isPrefixOf` ss = emit (Symbol ".!=.") p: selAny (addcol 4 p) (drop 3 ss)
| "<." `isPrefixOf` ss = emit (Symbol ".<.") p: selAny (addcol 3 p) (drop 2 ss)
| "<=." `isPrefixOf` ss = emit (Symbol ".<=.") p: selAny (addcol 4 p) (drop 3 ss)
| ">." `isPrefixOf` ss = emit (Symbol ".>.") p: selAny (addcol 3 p) (drop 2 ss)
| ">=." `isPrefixOf` ss = emit (Symbol ".>=.") p: selAny (addcol 4 p) (drop 3 ss)
| "/" `isPrefixOf` ss = emit (Symbol "./") p: selAny (addcol 2 p) (drop 1 ss)
selAny p (s:ss)
| s `elem` syms = emit (Symbol [s]) p: selAny (addcol 1 p) ss
| isSpace s = blank selAny p (s:ss)
| isAlpha s = gatherName [s] p (addcol 1 p) ss (blank selAny)
| isDigit s = gatherNum [s] p (addcol 1 p) ss (blank selAny)
| otherwise = lexerror "unrecognised pattern" p
gatherName acc pos p (s:ss) k
| isAlphaNum s || s `elem` "-_:" = gatherName (s:acc) pos (addcol 1 p) ss k
gatherName acc pos p ss k =
emit (TokString (reverse acc)) pos: k p ss
gatherNum acc pos p (s:ss) k
| isHexDigit s = gatherNum (s:acc) pos (addcol 1 p) ss k
gatherNum acc pos p ss k =
emit (TokNum (read (reverse acc))) pos: k p ss
accumulateUntil c tok acc pos p [] k =
lexerror ("found end of pattern while looking for "++c
:" to match opening quote at "++show pos) p
accumulateUntil c tok acc pos p (s:ss) k
| c==s = emit (TokString (reverse acc)) pos:
emit tok p: k (addcol 1 p) ss
| isSpace s = accumulateUntil c tok (s:acc) pos (white s p) ss k
| otherwise = accumulateUntil c tok (s:acc) pos (addcol 1 p) ss k
|