/usr/share/pyshared/ZSI/parse.py

# $Header$
'''SOAP messaging parsing.
'''

from xml.dom import expatbuilder
from ZSI import _copyright, _children, _attrs, _child_elements, _stringtypes, \
        _backtrace, EvaluateException, ParseException, _valid_encoding, \
        _Node, _find_attr, _resolve_prefix
from ZSI.TC import AnyElement
import types

from ZSI.wstools.Namespaces import SOAP, XMLNS
from ZSI.wstools.Utility import SplitQName

_find_actor = lambda E: E.getAttributeNS(SOAP.ENV, "actor") or None
_find_mu = lambda E: E.getAttributeNS(SOAP.ENV, "mustUnderstand")
_find_root = lambda E: E.getAttributeNS(SOAP.ENC, "root")
_find_id = lambda E: _find_attr(E, 'id')

class DefaultReader:
    """ExpatReaderClass"""
    fromString = staticmethod(expatbuilder.parseString)
    fromStream = staticmethod(expatbuilder.parse)

class ParsedSoap:
    '''A Parsed SOAP object.
        Convert the text to a DOM tree and parse SOAP elements.
        Instance data:
            reader -- the DOM reader
            dom -- the DOM object
            ns_cache -- dictionary (by id(node)) of namespace dictionaries
            id_cache -- dictionary (by XML ID attr) of elements
            envelope -- the node holding the SOAP Envelope
            header -- the node holding the SOAP Header (or None)
            body -- the node holding the SOAP Body
            body_root -- the serialization root in the SOAP Body
            data_elements -- list of non-root elements in the SOAP Body
            trailer_elements -- list of elements following the SOAP body
    '''
    defaultReaderClass = DefaultReader

    def __init__(self, input, readerclass=None, keepdom=False,
    trailers=False, resolver=None,  envelope=True, **kw):
        '''Initialize.
        Keyword arguments:
            trailers -- allow trailer elments (default is zero)
            resolver -- function (bound method) to resolve URI's
            readerclass -- factory class to create a reader
            keepdom -- do not release the DOM
            envelope -- look for a SOAP envelope.
        '''

        self.readerclass = readerclass
        self.keepdom = keepdom
        if not self.readerclass:
            self.readerclass = self.defaultReaderClass

        try:
            self.reader = self.readerclass()
            if type(input) in _stringtypes:
                self.dom = self.reader.fromString(input)
            else:
                self.dom = self.reader.fromStream(input)
        except Exception, e:
            # Is this in the header?  Your guess is as good as mine.
            #raise ParseException("Can't parse document (" + \
            #    str(e.__class__) + "): " + str(e), 0)
            raise

        self.ns_cache = {
            id(self.dom): {
                'xml': XMLNS.XML,
                'xmlns': XMLNS.BASE,
                '': ''
            }
        }
        self.trailers, self.resolver, self.id_cache = trailers, resolver, {}

        # Exactly one child element
        c = [ E for E in _children(self.dom)
                if E.nodeType == _Node.ELEMENT_NODE]
        if len(c) == 0:
            raise ParseException("Document has no Envelope", 0)
        if len(c) != 1:
            raise ParseException("Document has extra child elements", 0)

        if envelope is False:
            self.body_root = c[0]
            return

        # And that one child must be the Envelope
        elt = c[0]
        if elt.localName != "Envelope" \
        or elt.namespaceURI != SOAP.ENV:
            raise ParseException('Document has "' + elt.localName + \
                '" element, not Envelope', 0)
        self._check_for_legal_children("Envelope", elt)
        for a in _attrs(elt):
            name = a.nodeName
            if name.find(":") == -1 and name not in [ "xmlns", "id" ]:
                raise ParseException('Unqualified attribute "' + \
                        name + '" in Envelope', 0)
        self.envelope = elt
        if not _valid_encoding(self.envelope):
            raise ParseException("Envelope has invalid encoding", 0)

        # Get Envelope's child elements.
        c = [ E for E in _children(self.envelope)
                if E.nodeType == _Node.ELEMENT_NODE ]
        if len(c) == 0:
            raise ParseException("Envelope is empty (no Body)", 0)

        # Envelope's first child might be the header; if so, nip it off.
        elt = c[0]
        if elt.localName == "Header" \
        and elt.namespaceURI == SOAP.ENV:
            self._check_for_legal_children("Header", elt)
            self._check_for_pi_nodes(_children(elt), 1)
            self.header = c.pop(0)
            self.header_elements = _child_elements(self.header)
        else:
            self.header, self.header_elements = None, []

        # Now the first child must be the body
        if len(c) == 0:
            raise ParseException("Envelope has header but no Body", 0)
        elt = c.pop(0)
        if elt.localName != "Body" \
        or elt.namespaceURI != SOAP.ENV:
            if self.header:
                raise ParseException('Header followed by "' + \
                        elt.localName + \
                        '" element, not Body', 0, elt, self.dom)
            else:
                raise ParseException('Document has "' + \
                        elt.localName + \
                        '" element, not Body', 0, elt, self.dom)
        self._check_for_legal_children("Body", elt, 0)
        self._check_for_pi_nodes(_children(elt), 0)
        self.body = elt
        if not _valid_encoding(self.body):
            raise ParseException("Body has invalid encoding", 0)

        # Trailer elements.
        if not self.trailers:
            if len(c):
                raise ParseException("Element found after Body",
                        0, elt, self.dom)
            # Don't set self.trailer_elements = []; if user didn't ask
            # for trailers we *want* to throw an exception.
        else:
            self.trailer_elements = c
            for elt in self.trailer_elements:
                if not elt.namespaceURI:
                    raise ParseException('Unqualified trailer element',
                            0, elt, self.dom)

        # Find the serialization root.  Divide the Body children into
        # root (root=1), no (root=0), maybe (no root attribute).
        self.body_root, no, maybe = None, [], []
        for elt in _child_elements(self.body):
            root = _find_root(elt)
            if root == "1":
                if self.body_root:
                    raise ParseException("Multiple seralization roots found",
                            0, elt, self.dom)
                self.body_root = elt
            elif root == "0":
                no.append(elt)
            elif not root:
                maybe.append(elt)
            else:
                raise ParseException('Illegal value for root attribute',
                        0, elt, self.dom)

        # If we didn't find a root, get the first one that didn't
        # say "not me", unless they all said "not me."
        if self.body_root is None:
            if len(maybe):
                self.body_root = maybe[0]
            else:
                raise ParseException('No serialization root found',
                        0, self.body, self.dom)
        if not _valid_encoding(self.body_root):
            raise ParseException("Invalid encoding", 0,
                    elt, self.dom)

        # Now get all the non-roots (in order!).
        rootid = id(self.body_root)
        self.data_elements = [ E for E in _child_elements(self.body)
                                if id(E) != rootid ]
        self._check_for_pi_nodes(self.data_elements, 0)

    def __del__(self):
        try:
            if not self.keepdom:
                self.reader.releaseNode(self.dom)
        except:
            pass

    def _check_for_legal_children(self, name, elt, mustqualify=1):
        '''Check if all children of this node are elements or whitespace-only
        text nodes.
        '''
        inheader = name == "Header"
        for n in _children(elt):
            t = n.nodeType
            if t == _Node.COMMENT_NODE: continue
            if t != _Node.ELEMENT_NODE:
                if t == _Node.TEXT_NODE and n.nodeValue.strip() == "":
                    continue
                raise ParseException("Non-element child in " + name, 
                        inheader, elt, self.dom)
            if mustqualify and not n.namespaceURI:
                raise ParseException('Unqualified element "' + \
                        n.nodeName + '" in ' + name, inheader, elt, self.dom)

    def _check_for_pi_nodes(self, list, inheader):
        '''Raise an exception if any of the list descendants are PI nodes.
        '''
        list = list[:]
        while list:
            elt = list.pop()
            t = elt.nodeType
            if t == _Node.PROCESSING_INSTRUCTION_NODE:
                raise ParseException('Found processing instruction "<?' + \
                        elt.nodeName + '...>"',
                        inheader, elt.parentNode, self.dom)
            elif t == _Node.DOCUMENT_TYPE_NODE:
                raise ParseException('Found DTD', inheader,
                        elt.parentNode, self.dom)
            list += _children(elt)

    def Backtrace(self, elt):
        '''Return a human-readable "backtrace" from the document root to
        the specified element.
        '''
        return _backtrace(elt, self.dom)

    def FindLocalHREF(self, href, elt, headers=1):
        '''Find a local HREF in the data elements.
        '''
        if href[0] != '#':
            raise EvaluateException(
                'Absolute HREF ("%s") not implemented' % href,
                self.Backtrace(elt))
        frag = href[1:]
        # Already found?
        e = self.id_cache.get(frag)
        if e: return e
        # Do a breadth-first search, in the data first.  Most likely
        # to find multi-ref targets shallow in the data area.
        list = self.data_elements[:] + [self.body_root]
        if headers: list.extend(self.header_elements)
        while list:
            e = list.pop()
            if e.nodeType == _Node.ELEMENT_NODE:
                nodeid = _find_id(e)
                if nodeid:
                    self.id_cache[nodeid] = e
                    if nodeid == frag: return e
            list += _children(e)
        raise EvaluateException('''Can't find node for HREF "%s"''' % href,
                self.Backtrace(elt))

    def ResolveHREF(self, uri, tc, **keywords):
        r = getattr(tc, 'resolver', self.resolver)
        if not r:
            raise EvaluateException('No resolver for "' + uri + '"')
        try:
            if type(uri) == types.UnicodeType: uri = str(uri)
            retval = r(uri, tc, self, **keywords)
        except Exception, e:
            raise EvaluateException('''Can't resolve "''' + uri + '" (' + \
                str(e.__class__) + "): " + str(e))
        return retval

    def GetMyHeaderElements(self, actorlist=None):
        '''Return a list of all elements intended for these actor(s).
        '''
        if actorlist is None:
            actorlist = [None, SOAP.ACTOR_NEXT]
        else:
            actorlist = list(actorlist) + [None, SOAP.ACTOR_NEXT]
        return [ E for E in self.header_elements
                if _find_actor(E) in actorlist ]

    def GetElementNSdict(self, elt):
        '''Get a dictionary of all the namespace attributes for the indicated
        element.  The dictionaries are cached, and we recurse up the tree
        as necessary.
        '''
        d = self.ns_cache.get(id(elt))
        if not d:
            if elt != self.dom: d = self.GetElementNSdict(elt.parentNode)
            for a in _attrs(elt):
                if a.namespaceURI == XMLNS.BASE:
                    if a.localName == "xmlns":
                        d[''] = a.nodeValue
                    else:
                        d[a.localName] = a.nodeValue
            self.ns_cache[id(elt)] = d
        return d.copy()

    def GetDomAndReader(self):
        '''Returns a tuple containing the dom and reader objects. (dom, reader)
        Unless keepdom is true, the dom and reader objects will go out of scope
        when the ParsedSoap instance is deleted. If keepdom is true, the reader
        object is needed to properly clean up the dom tree with
        reader.releaseNode(dom).
        '''
        return (self.dom, self.reader)

    def IsAFault(self):
        '''Is this a fault message?
        '''
        e = self.body_root
        if not e: return 0
        return e.namespaceURI == SOAP.ENV and e.localName == 'Fault'

    def Parse(self, how):
        '''Parse the message.
        '''
        if type(how) == types.ClassType: how = how.typecode
        return how.parse(self.body_root, self)

    def WhatMustIUnderstand(self):
        '''Return a list of (uri,localname) tuples for all elements in the
        header that have mustUnderstand set.
        '''
        return [ ( E.namespaceURI, E.localName )
                for E in self.header_elements if _find_mu(E) == "1" ]

    def WhatActorsArePresent(self):
        '''Return a list of URI's of all the actor attributes found in
        the header.  The special actor "next" is ignored.
        '''
        results = []
        for E in self.header_elements:
            a = _find_actor(E)
            if a not in [ None, SOAP.ACTOR_NEXT ]: results.append(a)
        return results

    def ParseHeaderElements(self, ofwhat):
        '''Returns a dictionary of pyobjs.
        ofhow -- list of typecodes w/matching nspname/pname to the header_elements.
        '''
        d = {}
        lenofwhat = len(ofwhat)
        c, crange = self.header_elements[:], range(len(self.header_elements))
        for i,what in [ (i, ofwhat[i]) for i in range(lenofwhat) ]:
            if isinstance(what, AnyElement): 
                raise EvaluateException, 'not supporting <any> as child of SOAP-ENC:Header'

            v = []
            occurs = 0
            namespaceURI,tagName = what.nspname,what.pname
            for j,c_elt in [ (j, c[j]) for j in crange if c[j] ]:
                prefix,name = SplitQName(c_elt.tagName)
                nsuri = _resolve_prefix(c_elt, prefix)
                if tagName == name and namespaceURI == nsuri:
                    pyobj = what.parse(c_elt, self)
                else:
                    continue
                v.append(pyobj)
                c[j] = None
            if what.minOccurs > len(v) > what.maxOccurs:
               raise EvaluateException, 'number of occurances(%d) doesnt fit constraints (%d,%s)'\
                   %(len(v),what.minOccurs,what.maxOccurs)
            if what.maxOccurs == 1:
                if len(v) == 0: v = None
                else: v = v[0]
            d[(what.nspname,what.pname)] = v
        return d


if __name__ == '__main__': print _copyright
python-zsi 2.1~a1-3 / usr / share / pyshared / ZSI / parse.py