examples/expattree.py
author robin
Fri, 23 Dec 2016 12:55:22 +0000
changeset 56 51219ad2b0bd
parent 11 a0a1adbc7b13
permissions -rw-r--r--
speed up windows 3.6 build
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
1
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
     1
# uses pyexpat to build the tree. Yuk, globals,
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
     2
# but want a quick speed comparison with pyRXP
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
     3
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
     4
import xml.parsers.expat
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
     5
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
     6
class ExpatTreeParser:
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
     7
    """Crude and incomplete tree-builder based on expat.
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
     8
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
     9
    Need to add a few more handlers before it accurately
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    10
    deals with all relevant elements; but close enough
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    11
    for benchmark comparisons.  It (like expat) returns
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    12
    Unicode strings; we don't want to penalize it for
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    13
    this so leave them as Unicode."""
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    14
    def __init__(self):
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    15
        # fake top node makes it easy to initialize
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    16
        self.curNode = ('_FAKE_ROOT_',{},[],None)
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    17
        self.nodestack = [self.curNode]
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    18
        
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    19
    def handleStartElement(self, name, attrs):
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    20
        #print 'start element %s' % name
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    21
        newNode = (name, attrs, [], None)
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    22
        self.nodestack.append(newNode)
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    23
        self.curNode[2].append(newNode)
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    24
        self.curNode = newNode
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    25
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    26
    def handleCharData(self, data):
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    27
        #print 'char data %s' % data
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    28
        self.curNode[2].append(data)
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    29
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    30
    def handleEndElement(self, name):
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    31
        #print 'end element %s' % name
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    32
        self.nodestack.pop()
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    33
        self.curNode = self.nodestack[-1]
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    34
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    35
    def parse(self, data):
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    36
        p = xml.parsers.expat.ParserCreate()
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    37
        p.StartElementHandler = self.handleStartElement
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    38
        p.EndElementHandler = self.handleEndElement
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    39
        p.CharacterDataHandler = self.handleCharData
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    40
        p.Parse(data)
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    41
        # will be the first child of our fake top node
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    42
        return self.curNode[2][0]
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    43
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    44
def expattree(data):
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    45
    return ExpatTreeParser().parse(data)    
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    46
7cf042be50ba Move pyRXP into its own universe
rgbecker
parents:
diff changeset
    47