examples/xmlutils.py
author robin
Fri, 23 Dec 2016 12:55:22 +0000
changeset 56 51219ad2b0bd
parent 11 a0a1adbc7b13
permissions -rw-r--r--
speed up windows 3.6 build
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
1
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
     1
"Some XML helper classes."
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
     2
import os, string, sys
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
     3
from types import StringType, ListType, TupleType
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
     4
import pyRXP
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
     5
assert pyRXP.version>='0.5', 'get the latest pyRXP!'
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
     6
    
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
     7
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
     8
IGNOREWHITESPACE = 1
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
     9
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    10
def ignoreWhitespace(list):
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    11
    newlist = []
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    12
    for elem in list:
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    13
        if type(elem) is StringType:
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    14
            short = string.strip(elem)
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    15
            if short == '':
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    16
                pass
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    17
            else:
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    18
                newlist.append(short)
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    19
        else:
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    20
            newlist.append(elem)
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    21
    return newlist
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    22
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    23
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    24
class TagWrapper:
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    25
    """Lazy utility for navigating XML.
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    26
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    27
    The following Python code works:
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    28
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    29
    tag.attribute      # returns given attribute
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    30
    tag.child          # returns first child with matching tag name
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    31
    for child in tag:  # iterates over them
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    32
    tag[3]             # returns fourth child
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    33
    len(tag)           # no of children
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    34
    """
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    35
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    36
    def __init__(self, node, returnEmptyTagContentAsString=1):
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    37
        tagName, attrs, children, spare = node
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    38
        self.tagName = tagName
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    39
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    40
        # this option affects tags with no content like <Surname></Surname>.
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    41
        # Can either return a None object, which is a pain in a prep file
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    42
        # as you have to  put if expressions around everything, or
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    43
        # an empty string so prep files can just do {{xml.wherever.Surname}}.
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    44
        self.returnEmptyTagContentAsString = returnEmptyTagContentAsString
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    45
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    46
        if attrs is None:
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    47
            self._attrs = {}
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    48
        else:
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    49
            self._attrs = attrs  # share the dictionary
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    50
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    51
        if children is None:
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    52
            self._children = []
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    53
        elif IGNOREWHITESPACE:
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    54
            self._children = ignoreWhitespace(children)
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    55
        else:
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    56
            self._children = children
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    57
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    58
    def __repr__(self):
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    59
        return 'TagWrapper<%s>' % self.tagName
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    60
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    61
    def __str__(self):
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    62
        if len(self):
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    63
            return str(self[0])
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    64
        else:
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    65
            if self.returnEmptyTagContentAsString:
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    66
                return ''
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    67
            else:
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    68
                return None
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    69
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    70
    def __len__(self):
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    71
        return len(self._children)
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    72
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    73
    def _value(self,name,default):
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    74
        try:
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    75
            return getattr(self,name)[0]
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    76
        except (AttributeError, IndexError):
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    77
            return default
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    78
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    79
    def __getattr__(self, attr):
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    80
        "Try various priorities"
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    81
        if self._attrs.has_key(attr):
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    82
            return self._attrs[attr]
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    83
        else:
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    84
            #first child tag whose name matches?
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    85
            for child in self._children:
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    86
                if type(child) is StringType:
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    87
                    pass
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    88
                else:
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    89
                    tagName, attrs, children, spare = child
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    90
                    if tagName == attr:
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    91
                        t = TagWrapper(child)
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    92
                        t.returnEmptyTagContentAsString = self.returnEmptyTagContentAsString
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    93
                        return t
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    94
            # not found, barf
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    95
            msg = '"%s" not found in attributes of tag <%s> or its children' % (attr, self.tagName)
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    96
            raise AttributeError, msg
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    97
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    98
    def keys(self):
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    99
        "return list of valid keys"
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
   100
        result = self._attrs.keys()
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
   101
        for child in self._children:
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
   102
            if type(child) is StringType: pass
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
   103
            else: result.append(child[0])
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
   104
        return result
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
   105
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
   106
    def has_key(self,k):
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
   107
        return k in self.keys()
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
   108
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
   109
    def __getitem__(self, idx):
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
   110
        try:
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
   111
            child = self._children[idx]
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
   112
        except IndexError:
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
   113
            raise IndexError, '%s no index %s' % (self.__repr__(), `idx`)
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
   114
        if type(child) is StringType: return child
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
   115
        else: return TagWrapper(child)
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
   116
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
   117
    def _namedChildren(self,name):
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
   118
        R = []
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
   119
        for c in self:
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
   120
            if type(c) is StringType:
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
   121
                if name is None: R.append(c)
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
   122
            elif name == c.tagName: R.append(c)
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
   123
        return R
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
   124
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
   125
def xml2doctree(xml):
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
   126
    pyRXP_parse = pyRXP.Parser(
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
   127
        ErrorOnValidityErrors=1,
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
   128
        NoNoDTDWarning=1,
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
   129
        ExpandCharacterEntities=0,
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
   130
        ExpandGeneralEntities=0)
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
   131
    return pyRXP_parse.parse(xml)
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
   132
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
   133
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
   134
if __name__=='__main__':
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
   135
    import os
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
   136
    xml = open('rml_manual.xml','r').read()
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
   137
    parsed = xml2doctree(xml)