src/reportlab/lib/xmllib.py
author rptlab
Tue, 30 Apr 2013 14:28:14 +0100
branchpy33
changeset 3723 99aa837b6703
parent 3721 0c93dd8ff567
child 3884 3bc59a4c3c21
permissions -rw-r--r--
second stage of port to Python 3.3; working hello world
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
190
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
     1
# A parser for XML, using the derived class as static DTD.
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
     2
# Author: Sjoerd Mullender.
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
     3
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
     4
# sgmlop support added by fredrik@pythonware.com (May 19, 1998)
3029
eded59f94021 adding docstrings to lib
andy
parents: 2964
diff changeset
     5
__version__=''' $Id$ '''
eded59f94021 adding docstrings to lib
andy
parents: 2964
diff changeset
     6
__doc__='''From before xmllib was in the Python standard library.
eded59f94021 adding docstrings to lib
andy
parents: 2964
diff changeset
     7
eded59f94021 adding docstrings to lib
andy
parents: 2964
diff changeset
     8
Probably ought to be removed'''
190
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
     9
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    10
import re
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    11
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    12
try:
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1143
diff changeset
    13
    import sgmlop   # this works for both builtin on the path or relative
2053
2a962a30dc46 Removed checkImportError
rgbecker
parents: 1683
diff changeset
    14
except ImportError:
190
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    15
    sgmlop = None
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    16
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    17
# standard entity defs
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    18
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    19
ENTITYDEFS = {
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    20
    'lt': '<',
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    21
    'gt': '>',
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    22
    'amp': '&',
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    23
    'quot': '"',
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    24
    'apos': '\''
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    25
    }
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    26
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    27
# XML parser base class -- find tags and call handler functions.
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    28
# Usage: p = XMLParser(); p.feed(data); ...; p.close().
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    29
# The dtd is defined by deriving a class which defines methods with
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    30
# special names to handle tags: start_foo and end_foo to handle <foo>
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    31
# and </foo>, respectively.  The data between tags is passed to the
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    32
# parser by calling self.handle_data() with some data as argument (the
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    33
# data may be split up in arbutrary chunks).  Entity references are
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    34
# passed by calling self.handle_entityref() with the entity reference
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    35
# as argument.
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    36
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    37
# --------------------------------------------------------------------
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    38
# original re-based XML parser
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    39
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    40
_S = '[ \t\r\n]+'
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    41
_opS = '[ \t\r\n]*'
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    42
_Name = '[a-zA-Z_:][-a-zA-Z0-9._:]*'
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    43
interesting = re.compile('[&<]')
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    44
incomplete = re.compile('&(' + _Name + '|#[0-9]*|#x[0-9a-fA-F]*)?|'
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    45
                           '<([a-zA-Z_:][^<>]*|'
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    46
                              '/([a-zA-Z_:][^<>]*)?|'
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    47
                              '![^<>]*|'
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    48
                              '\?[^<>]*)?')
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    49
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    50
ref = re.compile('&(' + _Name + '|#[0-9]+|#x[0-9a-fA-F]+);?')
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    51
entityref = re.compile('&(?P<name>' + _Name + ')[^-a-zA-Z0-9._:]')
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    52
charref = re.compile('&#(?P<char>[0-9]+[^0-9]|x[0-9a-fA-F]+[^0-9a-fA-F])')
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    53
space = re.compile(_S)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    54
newline = re.compile('\n')
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    55
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    56
starttagopen = re.compile('<' + _Name)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    57
endtagopen = re.compile('</')
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    58
starttagend = re.compile(_opS + '(?P<slash>/?)>')
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    59
endbracket = re.compile('>')
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    60
tagfind = re.compile(_Name)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    61
cdataopen = re.compile('<!\[CDATA\[')
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    62
cdataclose = re.compile('\]\]>')
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    63
special = re.compile('<!(?P<special>[^<>]*)>')
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    64
procopen = re.compile('<\?(?P<proc>' + _Name + ')' + _S)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    65
procclose = re.compile('\?>')
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    66
commentopen = re.compile('<!--')
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    67
commentclose = re.compile('-->')
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    68
doubledash = re.compile('--')
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    69
attrfind = re.compile(
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    70
    _opS + '(?P<name>' + _Name + ')'
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    71
    '(' + _opS + '=' + _opS +
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    72
    '(?P<value>\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9.:+*%?!()_#=~]+))')
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    73
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    74
class SlowXMLParser:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    75
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    76
    # Interface -- initialize and reset this instance
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    77
    def __init__(self, verbose=0):
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    78
        self.verbose = verbose
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    79
        self.reset()
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    80
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    81
    # Interface -- reset this instance.  Loses all unprocessed data
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    82
    def reset(self):
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    83
        self.rawdata = ''
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    84
        self.stack = []
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    85
        self.lasttag = '???'
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    86
        self.nomoretags = 0
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    87
        self.literal = 0
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    88
        self.lineno = 1
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    89
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    90
    # For derived classes only -- enter literal mode (CDATA) till EOF
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    91
    def setnomoretags(self):
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    92
        self.nomoretags = self.literal = 1
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    93
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    94
    # For derived classes only -- enter literal mode (CDATA)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    95
    def setliteral(self, *args):
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    96
        self.literal = 1
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    97
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    98
    # Interface -- feed some data to the parser.  Call this as
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
    99
    # often as you want, with as little or as much text as you
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   100
    # want (may include '\n').  (This just saves the text, all the
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   101
    # processing is done by goahead().)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   102
    def feed(self, data):
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   103
        self.rawdata = self.rawdata + data
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   104
        self.goahead(0)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   105
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   106
    # Interface -- handle the remaining data
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   107
    def close(self):
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   108
        self.goahead(1)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   109
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   110
    # Interface -- translate references
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   111
    def translate_references(self, data):
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   112
        newdata = []
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   113
        i = 0
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   114
        while 1:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   115
            res = ref.search(data, i)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   116
            if res is None:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   117
                newdata.append(data[i:])
3723
99aa837b6703 second stage of port to Python 3.3; working hello world
rptlab
parents: 3721
diff changeset
   118
                return ''.join(newdata)
190
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   119
            if data[res.end(0) - 1] != ';':
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   120
                self.syntax_error(self.lineno,
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   121
                                  '; missing after entity/char reference')
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   122
            newdata.append(data[i:res.start(0)])
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   123
            str = res.group(1)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   124
            if str[0] == '#':
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   125
                if str[1] == 'x':
3723
99aa837b6703 second stage of port to Python 3.3; working hello world
rptlab
parents: 3721
diff changeset
   126
                    newdata.append(chr(int(str[2:], 16)))
190
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   127
                else:
3723
99aa837b6703 second stage of port to Python 3.3; working hello world
rptlab
parents: 3721
diff changeset
   128
                    newdata.append(chr(int(str[1:])))
190
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   129
            else:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   130
                try:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   131
                    newdata.append(self.entitydefs[str])
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   132
                except KeyError:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   133
                    # can't do it, so keep the entity ref in
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   134
                    newdata.append('&' + str + ';')
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   135
            i = res.end(0)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   136
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   137
    # Internal -- handle data as far as reasonable.  May leave state
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   138
    # and data to be processed by a subsequent call.  If 'end' is
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   139
    # true, force handling all data as if followed by EOF marker.
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   140
    def goahead(self, end):
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   141
        rawdata = self.rawdata
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   142
        i = 0
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   143
        n = len(rawdata)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   144
        while i < n:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   145
            if self.nomoretags:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   146
                data = rawdata[i:n]
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   147
                self.handle_data(data)
3723
99aa837b6703 second stage of port to Python 3.3; working hello world
rptlab
parents: 3721
diff changeset
   148
                self.lineno = self.lineno + '\n'.count(data)
190
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   149
                i = n
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   150
                break
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   151
            res = interesting.search(rawdata, i)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   152
            if res:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   153
                    j = res.start(0)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   154
            else:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   155
                    j = n
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   156
            if i < j:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   157
                data = rawdata[i:j]
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   158
                self.handle_data(data)
3723
99aa837b6703 second stage of port to Python 3.3; working hello world
rptlab
parents: 3721
diff changeset
   159
                self.lineno = self.lineno + '\n'.count(data)
190
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   160
            i = j
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   161
            if i == n: break
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   162
            if rawdata[i] == '<':
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   163
                if starttagopen.match(rawdata, i):
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   164
                    if self.literal:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   165
                        data = rawdata[i]
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   166
                        self.handle_data(data)
3723
99aa837b6703 second stage of port to Python 3.3; working hello world
rptlab
parents: 3721
diff changeset
   167
                        self.lineno = self.lineno + '\n'.count(data)
190
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   168
                        i = i+1
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   169
                        continue
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   170
                    k = self.parse_starttag(i)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   171
                    if k < 0: break
3723
99aa837b6703 second stage of port to Python 3.3; working hello world
rptlab
parents: 3721
diff changeset
   172
                    self.lineno = self.lineno + '\n'.count(rawdata[i:k])
190
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   173
                    i = k
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   174
                    continue
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   175
                if endtagopen.match(rawdata, i):
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   176
                    k = self.parse_endtag(i)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   177
                    if k < 0: break
3723
99aa837b6703 second stage of port to Python 3.3; working hello world
rptlab
parents: 3721
diff changeset
   178
                    self.lineno = self.lineno + '\n'.count(rawdata[i:k])
190
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   179
                    i =  k
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   180
                    self.literal = 0
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   181
                    continue
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   182
                if commentopen.match(rawdata, i):
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   183
                    if self.literal:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   184
                        data = rawdata[i]
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   185
                        self.handle_data(data)
3723
99aa837b6703 second stage of port to Python 3.3; working hello world
rptlab
parents: 3721
diff changeset
   186
                        self.lineno = self.lineno + '\n'.count(data)
190
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   187
                        i = i+1
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   188
                        continue
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   189
                    k = self.parse_comment(i)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   190
                    if k < 0: break
3723
99aa837b6703 second stage of port to Python 3.3; working hello world
rptlab
parents: 3721
diff changeset
   191
                    self.lineno = self.lineno + '\n'.count(rawdata[i:k])
190
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   192
                    i = k
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   193
                    continue
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   194
                if cdataopen.match(rawdata, i):
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   195
                    k = self.parse_cdata(i)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   196
                    if k < 0: break
3723
99aa837b6703 second stage of port to Python 3.3; working hello world
rptlab
parents: 3721
diff changeset
   197
                    self.lineno = self.lineno + '\n'.count(rawdata[i:i])
190
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   198
                    i = k
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   199
                    continue
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   200
                res = procopen.match(rawdata, i)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   201
                if res:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   202
                    k = self.parse_proc(i, res)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   203
                    if k < 0: break
3723
99aa837b6703 second stage of port to Python 3.3; working hello world
rptlab
parents: 3721
diff changeset
   204
                    self.lineno = self.lineno + '\n'.count(rawdata[i:k])
190
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   205
                    i = k
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   206
                    continue
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   207
                res = special.match(rawdata, i)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   208
                if res:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   209
                    if self.literal:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   210
                        data = rawdata[i]
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   211
                        self.handle_data(data)
3723
99aa837b6703 second stage of port to Python 3.3; working hello world
rptlab
parents: 3721
diff changeset
   212
                        self.lineno = self.lineno + '\n'.count(data)
190
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   213
                        i = i+1
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   214
                        continue
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   215
                    self.handle_special(res.group('special'))
3723
99aa837b6703 second stage of port to Python 3.3; working hello world
rptlab
parents: 3721
diff changeset
   216
                    self.lineno = self.lineno + '\n'.count(res.group(0))
190
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   217
                    i = res.end(0)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   218
                    continue
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   219
            elif rawdata[i] == '&':
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   220
                res = charref.match(rawdata, i)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   221
                if res is not None:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   222
                    i = res.end(0)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   223
                    if rawdata[i-1] != ';':
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   224
                        self.syntax_error(self.lineno, '; missing in charref')
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   225
                        i = i-1
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   226
                    self.handle_charref(res.group('char')[:-1])
3723
99aa837b6703 second stage of port to Python 3.3; working hello world
rptlab
parents: 3721
diff changeset
   227
                    self.lineno = self.lineno + '\n'.count(res.group(0))
190
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   228
                    continue
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   229
                res = entityref.match(rawdata, i)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   230
                if res is not None:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   231
                    i = res.end(0)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   232
                    if rawdata[i-1] != ';':
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   233
                        self.syntax_error(self.lineno, '; missing in entityref')
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   234
                        i = i-1
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   235
                    self.handle_entityref(res.group('name'))
3723
99aa837b6703 second stage of port to Python 3.3; working hello world
rptlab
parents: 3721
diff changeset
   236
                    self.lineno = self.lineno + '\n'.count(res.group(0))
190
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   237
                    continue
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   238
            else:
3721
0c93dd8ff567 initial changes from 2to3-3.3
rptlab
parents: 3326
diff changeset
   239
                raise RuntimeError('neither < nor & ??')
190
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   240
            # We get here only if incomplete matches but
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   241
            # nothing else
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   242
            res = incomplete.match(rawdata, i)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   243
            if not res:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   244
                data = rawdata[i]
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   245
                self.handle_data(data)
3723
99aa837b6703 second stage of port to Python 3.3; working hello world
rptlab
parents: 3721
diff changeset
   246
                self.lineno = self.lineno + '\n'.count(data)
190
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   247
                i = i+1
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   248
                continue
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   249
            j = res.end(0)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   250
            if j == n:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   251
                break # Really incomplete
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   252
            self.syntax_error(self.lineno, 'bogus < or &')
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   253
            data = res.group(0)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   254
            self.handle_data(data)
3723
99aa837b6703 second stage of port to Python 3.3; working hello world
rptlab
parents: 3721
diff changeset
   255
            self.lineno = self.lineno + '\n'.count(data)
190
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   256
            i = j
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   257
        # end while
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   258
        if end and i < n:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   259
            data = rawdata[i:n]
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   260
            self.handle_data(data)
3723
99aa837b6703 second stage of port to Python 3.3; working hello world
rptlab
parents: 3721
diff changeset
   261
            self.lineno = self.lineno + '\n'.count(data)
190
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   262
            i = n
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   263
        self.rawdata = rawdata[i:]
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   264
        # XXX if end: check for empty stack
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   265
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   266
    # Internal -- parse comment, return length or -1 if not terminated
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   267
    def parse_comment(self, i):
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   268
        rawdata = self.rawdata
3326
ce725978d11c Initial Python3 compatibility fixes
damian
parents: 3029
diff changeset
   269
        if rawdata[i:i+4] != '<!--':
3721
0c93dd8ff567 initial changes from 2to3-3.3
rptlab
parents: 3326
diff changeset
   270
            raise RuntimeError('unexpected call to handle_comment')
190
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   271
        res = commentclose.search(rawdata, i+4)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   272
        if not res:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   273
            return -1
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   274
        # doubledash search will succeed because it's a subset of commentclose
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   275
        if doubledash.search(rawdata, i+4).start(0) < res.start(0):
3326
ce725978d11c Initial Python3 compatibility fixes
damian
parents: 3029
diff changeset
   276
            self.syntax_error(self.lineno, "'--' inside comment")
190
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   277
        self.handle_comment(rawdata[i+4: res.start(0)])
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   278
        return res.end(0)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   279
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   280
    # Internal -- handle CDATA tag, return lenth or -1 if not terminated
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   281
    def parse_cdata(self, i):
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   282
        rawdata = self.rawdata
3326
ce725978d11c Initial Python3 compatibility fixes
damian
parents: 3029
diff changeset
   283
        if rawdata[i:i+9] != '<![CDATA[':
3721
0c93dd8ff567 initial changes from 2to3-3.3
rptlab
parents: 3326
diff changeset
   284
            raise RuntimeError('unexpected call to handle_cdata')
190
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   285
        res = cdataclose.search(rawdata, i+9)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   286
        if not res:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   287
            return -1
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   288
        self.handle_cdata(rawdata[i+9:res.start(0)])
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   289
        return res.end(0)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   290
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   291
    def parse_proc(self, i, res):
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   292
        rawdata = self.rawdata
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   293
        if not res:
3721
0c93dd8ff567 initial changes from 2to3-3.3
rptlab
parents: 3326
diff changeset
   294
            raise RuntimeError('unexpected call to parse_proc')
190
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   295
        name = res.group('proc')
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   296
        res = procclose.search(rawdata, res.end(0))
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   297
        if not res:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   298
            return -1
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   299
        self.handle_proc(name, rawdata[res.pos:res.start(0)])
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   300
        return res.end(0)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   301
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   302
    # Internal -- handle starttag, return length or -1 if not terminated
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   303
    def parse_starttag(self, i):
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   304
        rawdata = self.rawdata
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   305
        # i points to start of tag
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   306
        end = endbracket.search(rawdata, i+1)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   307
        if not end:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   308
            return -1
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   309
        j = end.start(0)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   310
        # Now parse the data between i+1 and j into a tag and attrs
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   311
        attrdict = {}
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   312
        res = tagfind.match(rawdata, i+1)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   313
        if not res:
3721
0c93dd8ff567 initial changes from 2to3-3.3
rptlab
parents: 3326
diff changeset
   314
            raise RuntimeError('unexpected call to parse_starttag')
190
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   315
        k = res.end(0)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   316
        tag = res.group(0)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   317
        if hasattr(self, tag + '_attributes'):
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   318
            attrlist = getattr(self, tag + '_attributes')
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   319
        else:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   320
            attrlist = None
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   321
        self.lasttag = tag
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   322
        while k < j:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   323
            res = attrfind.match(rawdata, k)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   324
            if not res: break
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   325
            attrname, attrvalue = res.group('name', 'value')
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   326
            if attrvalue is None:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   327
                self.syntax_error(self.lineno, 'no attribute value specified')
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   328
                attrvalue = attrname
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   329
            elif attrvalue[:1] == "'" == attrvalue[-1:] or \
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   330
                 attrvalue[:1] == '"' == attrvalue[-1:]:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   331
                attrvalue = attrvalue[1:-1]
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   332
            else:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   333
                self.syntax_error(self.lineno, 'attribute value not quoted')
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   334
            if attrlist is not None and attrname not in attrlist:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   335
                self.syntax_error(self.lineno,
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   336
                                  'unknown attribute %s of element %s' %
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   337
                                  (attrname, tag))
3326
ce725978d11c Initial Python3 compatibility fixes
damian
parents: 3029
diff changeset
   338
            if attrname in attrdict:
190
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   339
                self.syntax_error(self.lineno, 'attribute specified twice')
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   340
            attrdict[attrname] = self.translate_references(attrvalue)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   341
            k = res.end(0)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   342
        res = starttagend.match(rawdata, k)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   343
        if not res:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   344
            self.syntax_error(self.lineno, 'garbage in start tag')
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   345
        self.finish_starttag(tag, attrdict)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   346
        if res and res.group('slash') == '/':
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   347
            self.finish_endtag(tag)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   348
        return end.end(0)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   349
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   350
    # Internal -- parse endtag
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   351
    def parse_endtag(self, i):
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   352
        rawdata = self.rawdata
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   353
        end = endbracket.search(rawdata, i+1)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   354
        if not end:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   355
            return -1
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   356
        res = tagfind.match(rawdata, i+2)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   357
        if not res:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   358
            self.syntax_error(self.lineno, 'no name specified in end tag')
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   359
            tag = ''
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   360
            k = i+2
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   361
        else:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   362
            tag = res.group(0)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   363
            k = res.end(0)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   364
        if k != end.start(0):
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   365
            # check that there is only white space at end of tag
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   366
            res = space.match(rawdata, k)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   367
            if res is None or res.end(0) != end.start(0):
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   368
                self.syntax_error(self.lineno, 'garbage in end tag')
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   369
        self.finish_endtag(tag)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   370
        return end.end(0)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   371
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   372
    # Internal -- finish processing of start tag
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   373
    # Return -1 for unknown tag, 1 for balanced tag
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   374
    def finish_starttag(self, tag, attrs):
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   375
        self.stack.append(tag)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   376
        try:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   377
            method = getattr(self, 'start_' + tag)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   378
        except AttributeError:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   379
            self.unknown_starttag(tag, attrs)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   380
            return -1
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   381
        else:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   382
            self.handle_starttag(tag, method, attrs)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   383
            return 1
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   384
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   385
    # Internal -- finish processing of end tag
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   386
    def finish_endtag(self, tag):
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   387
        if not tag:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   388
            found = len(self.stack) - 1
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   389
            if found < 0:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   390
                self.unknown_endtag(tag)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   391
                return
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   392
        else:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   393
            if tag not in self.stack:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   394
                try:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   395
                    method = getattr(self, 'end_' + tag)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   396
                except AttributeError:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   397
                    self.unknown_endtag(tag)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   398
                return
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   399
            found = len(self.stack)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   400
            for i in range(found):
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   401
                if self.stack[i] == tag: found = i
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   402
        while len(self.stack) > found:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   403
            tag = self.stack[-1]
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   404
            try:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   405
                method = getattr(self, 'end_' + tag)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   406
            except AttributeError:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   407
                method = None
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   408
            if method:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   409
                self.handle_endtag(tag, method)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   410
            else:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   411
                self.unknown_endtag(tag)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   412
            del self.stack[-1]
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   413
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   414
    # Overridable -- handle start tag
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   415
    def handle_starttag(self, tag, method, attrs):
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   416
        method(attrs)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   417
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   418
    # Overridable -- handle end tag
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   419
    def handle_endtag(self, tag, method):
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   420
        method()
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   421
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   422
    # Example -- handle character reference, no need to override
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   423
    def handle_charref(self, name):
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   424
        try:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   425
            if name[0] == 'x':
3723
99aa837b6703 second stage of port to Python 3.3; working hello world
rptlab
parents: 3721
diff changeset
   426
                n = int(name[1:], 16)
190
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   427
            else:
3723
99aa837b6703 second stage of port to Python 3.3; working hello world
rptlab
parents: 3721
diff changeset
   428
                n = int(name)
99aa837b6703 second stage of port to Python 3.3; working hello world
rptlab
parents: 3721
diff changeset
   429
        except int_error:
190
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   430
            self.unknown_charref(name)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   431
            return
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   432
        if not 0 <= n <= 255:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   433
            self.unknown_charref(name)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   434
            return
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   435
        self.handle_data(chr(n))
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   436
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   437
    # Definition of entities -- derived classes may override
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   438
    entitydefs = ENTITYDEFS
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   439
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   440
    # Example -- handle entity reference, no need to override
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   441
    def handle_entityref(self, name):
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   442
        table = self.entitydefs
3326
ce725978d11c Initial Python3 compatibility fixes
damian
parents: 3029
diff changeset
   443
        if name in table:
190
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   444
            self.handle_data(table[name])
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   445
        else:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   446
            self.unknown_entityref(name)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   447
            return
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   448
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   449
    # Example -- handle data, should be overridden
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   450
    def handle_data(self, data):
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   451
        pass
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   452
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   453
    # Example -- handle cdata, could be overridden
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   454
    def handle_cdata(self, data):
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   455
        pass
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   456
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   457
    # Example -- handle comment, could be overridden
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   458
    def handle_comment(self, data):
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   459
        pass
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   460
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   461
    # Example -- handle processing instructions, could be overridden
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   462
    def handle_proc(self, name, data):
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   463
        pass
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   464
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   465
    # Example -- handle special instructions, could be overridden
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   466
    def handle_special(self, data):
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   467
        pass
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   468
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   469
    # Example -- handle relatively harmless syntax errors, could be overridden
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   470
    def syntax_error(self, lineno, message):
3721
0c93dd8ff567 initial changes from 2to3-3.3
rptlab
parents: 3326
diff changeset
   471
        raise RuntimeError('Syntax error at line %d: %s' % (lineno, message))
190
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   472
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   473
    # To be overridden -- handlers for unknown objects
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   474
    def unknown_starttag(self, tag, attrs): pass
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   475
    def unknown_endtag(self, tag): pass
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   476
    def unknown_charref(self, ref): pass
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   477
    def unknown_entityref(self, ref): pass
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   478
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   479
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   480
# --------------------------------------------------------------------
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   481
# accelerated XML parser
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   482
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   483
class FastXMLParser:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   484
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   485
    # Interface -- initialize and reset this instance
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   486
    def __init__(self, verbose=0):
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   487
        self.verbose = verbose
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   488
        self.reset()
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   489
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   490
    # Interface -- reset this instance.  Loses all unprocessed data
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   491
    def reset(self):
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   492
        self.rawdata = ''
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   493
        self.stack = []
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   494
        self.lasttag = '???'
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   495
        self.nomoretags = 0
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   496
        self.literal = 0
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   497
        self.lineno = 1
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   498
        self.parser = sgmlop.XMLParser()
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   499
        self.feed = self.parser.feed
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   500
        self.parser.register(self)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   501
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   502
    # For derived classes only -- enter literal mode (CDATA) till EOF
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   503
    def setnomoretags(self):
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   504
        self.nomoretags = self.literal = 1
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   505
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   506
    # For derived classes only -- enter literal mode (CDATA)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   507
    def setliteral(self, *args):
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   508
        self.literal = 1
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   509
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   510
    # Interface -- feed some data to the parser.  Call this as
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   511
    # often as you want, with as little or as much text as you
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   512
    # want (may include '\n').  (This just saves the text, all the
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   513
    # processing is done by goahead().)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   514
    def feed(self, data): # overridden by reset
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   515
        self.parser.feed(data)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   516
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   517
    # Interface -- handle the remaining data
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   518
    def close(self):
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   519
        try:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   520
            self.parser.close()
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   521
        finally:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   522
            self.parser = None
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   523
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   524
    # Interface -- translate references
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   525
    def translate_references(self, data):
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   526
        newdata = []
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   527
        i = 0
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   528
        while 1:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   529
            res = ref.search(data, i)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   530
            if res is None:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   531
                newdata.append(data[i:])
3723
99aa837b6703 second stage of port to Python 3.3; working hello world
rptlab
parents: 3721
diff changeset
   532
                return ''.join(newdata)
190
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   533
            if data[res.end(0) - 1] != ';':
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   534
                self.syntax_error(self.lineno,
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   535
                                  '; missing after entity/char reference')
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   536
            newdata.append(data[i:res.start(0)])
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   537
            str = res.group(1)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   538
            if str[0] == '#':
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   539
                if str[1] == 'x':
3723
99aa837b6703 second stage of port to Python 3.3; working hello world
rptlab
parents: 3721
diff changeset
   540
                    newdata.append(chr(int(str[2:], 16)))
190
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   541
                else:
3723
99aa837b6703 second stage of port to Python 3.3; working hello world
rptlab
parents: 3721
diff changeset
   542
                    newdata.append(chr(int(str[1:])))
190
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   543
            else:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   544
                try:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   545
                    newdata.append(self.entitydefs[str])
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   546
                except KeyError:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   547
                    # can't do it, so keep the entity ref in
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   548
                    newdata.append('&' + str + ';')
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   549
            i = res.end(0)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   550
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   551
    # Internal -- finish processing of start tag
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   552
    # Return -1 for unknown tag, 1 for balanced tag
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   553
    def finish_starttag(self, tag, attrs):
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   554
        self.stack.append(tag)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   555
        try:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   556
            method = getattr(self, 'start_' + tag)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   557
        except AttributeError:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   558
            self.unknown_starttag(tag, attrs)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   559
            return -1
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   560
        else:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   561
            self.handle_starttag(tag, method, attrs)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   562
            return 1
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   563
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   564
    # Internal -- finish processing of end tag
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   565
    def finish_endtag(self, tag):
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   566
        if not tag:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   567
            found = len(self.stack) - 1
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   568
            if found < 0:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   569
                self.unknown_endtag(tag)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   570
                return
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   571
        else:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   572
            if tag not in self.stack:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   573
                try:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   574
                    method = getattr(self, 'end_' + tag)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   575
                except AttributeError:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   576
                    self.unknown_endtag(tag)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   577
                return
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   578
            found = len(self.stack)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   579
            for i in range(found):
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   580
                if self.stack[i] == tag: found = i
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   581
        while len(self.stack) > found:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   582
            tag = self.stack[-1]
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   583
            try:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   584
                method = getattr(self, 'end_' + tag)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   585
            except AttributeError:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   586
                method = None
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   587
            if method:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   588
                self.handle_endtag(tag, method)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   589
            else:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   590
                self.unknown_endtag(tag)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   591
            del self.stack[-1]
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   592
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   593
    # Overridable -- handle start tag
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   594
    def handle_starttag(self, tag, method, attrs):
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   595
        method(attrs)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   596
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   597
    # Overridable -- handle end tag
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   598
    def handle_endtag(self, tag, method):
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   599
        method()
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   600
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   601
    # Example -- handle character reference, no need to override
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   602
    def handle_charref(self, name):
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   603
        try:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   604
            if name[0] == 'x':
3723
99aa837b6703 second stage of port to Python 3.3; working hello world
rptlab
parents: 3721
diff changeset
   605
                n = int(name[1:], 16)
190
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   606
            else:
3723
99aa837b6703 second stage of port to Python 3.3; working hello world
rptlab
parents: 3721
diff changeset
   607
                n = int(name)
99aa837b6703 second stage of port to Python 3.3; working hello world
rptlab
parents: 3721
diff changeset
   608
        except ValueError:
190
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   609
            self.unknown_charref(name)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   610
            return
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   611
        if not 0 <= n <= 255:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   612
            self.unknown_charref(name)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   613
            return
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   614
        self.handle_data(chr(n))
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   615
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   616
    # Definition of entities -- derived classes may override
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   617
    entitydefs = ENTITYDEFS
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   618
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   619
    # Example -- handle entity reference, no need to override
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   620
    def handle_entityref(self, name):
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   621
        table = self.entitydefs
3326
ce725978d11c Initial Python3 compatibility fixes
damian
parents: 3029
diff changeset
   622
        if name in table:
190
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   623
            self.handle_data(table[name])
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   624
        else:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   625
            self.unknown_entityref(name)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   626
            return
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   627
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   628
    # Example -- handle data, should be overridden
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   629
    def handle_data(self, data):
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   630
        pass
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   631
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   632
    # Example -- handle cdata, could be overridden
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   633
    def handle_cdata(self, data):
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   634
        pass
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   635
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   636
    # Example -- handle comment, could be overridden
396
9e1229009952 Uncommented methods in fast parser
rgbecker
parents: 193
diff changeset
   637
    def handle_comment(self, data):
9e1229009952 Uncommented methods in fast parser
rgbecker
parents: 193
diff changeset
   638
        pass
190
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   639
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   640
    # Example -- handle processing instructions, could be overridden
396
9e1229009952 Uncommented methods in fast parser
rgbecker
parents: 193
diff changeset
   641
    def handle_proc(self, name, data):
9e1229009952 Uncommented methods in fast parser
rgbecker
parents: 193
diff changeset
   642
        pass
190
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   643
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   644
    # Example -- handle special instructions, could be overridden
396
9e1229009952 Uncommented methods in fast parser
rgbecker
parents: 193
diff changeset
   645
    def handle_special(self, data):
9e1229009952 Uncommented methods in fast parser
rgbecker
parents: 193
diff changeset
   646
        pass
190
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   647
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   648
    # Example -- handle relatively harmless syntax errors, could be overridden
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   649
    def syntax_error(self, lineno, message):
3721
0c93dd8ff567 initial changes from 2to3-3.3
rptlab
parents: 3326
diff changeset
   650
        raise RuntimeError('Syntax error at line %d: %s' % (lineno, message))
190
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   651
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   652
    # To be overridden -- handlers for unknown objects
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   653
    def unknown_starttag(self, tag, attrs): pass
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   654
    def unknown_endtag(self, tag): pass
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   655
    def unknown_charref(self, ref): pass
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   656
    def unknown_entityref(self, ref): pass
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   657
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   658
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   659
#sgmlop = None
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   660
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   661
# pick a suitable parser
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   662
if sgmlop:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   663
    XMLParser = FastXMLParser
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   664
else:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   665
    XMLParser = SlowXMLParser
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   666
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   667
# --------------------------------------------------------------------
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   668
# test stuff
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   669
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   670
class TestXMLParser(XMLParser):
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   671
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   672
    def __init__(self, verbose=0):
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   673
        self.testdata = ""
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   674
        XMLParser.__init__(self, verbose)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   675
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   676
    def handle_data(self, data):
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   677
        self.testdata = self.testdata + data
3326
ce725978d11c Initial Python3 compatibility fixes
damian
parents: 3029
diff changeset
   678
        if len(repr(self.testdata)) >= 70:
190
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   679
            self.flush()
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   680
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   681
    def flush(self):
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   682
        data = self.testdata
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   683
        if data:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   684
            self.testdata = ""
3721
0c93dd8ff567 initial changes from 2to3-3.3
rptlab
parents: 3326
diff changeset
   685
            print('data:', repr(data))
190
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   686
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   687
    def handle_cdata(self, data):
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   688
        self.flush()
3721
0c93dd8ff567 initial changes from 2to3-3.3
rptlab
parents: 3326
diff changeset
   689
        print('cdata:', repr(data))
190
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   690
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   691
    def handle_proc(self, name, data):
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   692
        self.flush()
3721
0c93dd8ff567 initial changes from 2to3-3.3
rptlab
parents: 3326
diff changeset
   693
        print('processing:',name,repr(data))
190
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   694
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   695
    def handle_special(self, data):
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   696
        self.flush()
3721
0c93dd8ff567 initial changes from 2to3-3.3
rptlab
parents: 3326
diff changeset
   697
        print('special:',repr(data))
190
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   698
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   699
    def handle_comment(self, data):
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   700
        self.flush()
3326
ce725978d11c Initial Python3 compatibility fixes
damian
parents: 3029
diff changeset
   701
        r = repr(data)
190
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   702
        if len(r) > 68:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   703
            r = r[:32] + '...' + r[-32:]
3721
0c93dd8ff567 initial changes from 2to3-3.3
rptlab
parents: 3326
diff changeset
   704
        print('comment:', r)
190
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   705
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   706
    def syntax_error(self, lineno, message):
3721
0c93dd8ff567 initial changes from 2to3-3.3
rptlab
parents: 3326
diff changeset
   707
        print('error at line %d:' % lineno, message)
190
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   708
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   709
    def unknown_starttag(self, tag, attrs):
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   710
        self.flush()
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   711
        if not attrs:
3721
0c93dd8ff567 initial changes from 2to3-3.3
rptlab
parents: 3326
diff changeset
   712
            print('start tag: <' + tag + '>')
190
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   713
        else:
3721
0c93dd8ff567 initial changes from 2to3-3.3
rptlab
parents: 3326
diff changeset
   714
            print('start tag: <' + tag, end=' ')
3723
99aa837b6703 second stage of port to Python 3.3; working hello world
rptlab
parents: 3721
diff changeset
   715
            for name, value in attrs.items():
3721
0c93dd8ff567 initial changes from 2to3-3.3
rptlab
parents: 3326
diff changeset
   716
                print(name + '=' + '"' + value + '"', end=' ')
0c93dd8ff567 initial changes from 2to3-3.3
rptlab
parents: 3326
diff changeset
   717
            print('>')
190
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   718
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   719
    def unknown_endtag(self, tag):
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   720
        self.flush()
3721
0c93dd8ff567 initial changes from 2to3-3.3
rptlab
parents: 3326
diff changeset
   721
        print('end tag: </' + tag + '>')
190
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   722
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   723
    def unknown_entityref(self, ref):
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   724
        self.flush()
3721
0c93dd8ff567 initial changes from 2to3-3.3
rptlab
parents: 3326
diff changeset
   725
        print('*** unknown entity ref: &' + ref + ';')
190
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   726
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   727
    def unknown_charref(self, ref):
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   728
        self.flush()
3721
0c93dd8ff567 initial changes from 2to3-3.3
rptlab
parents: 3326
diff changeset
   729
        print('*** unknown char ref: &#' + ref + ';')
190
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   730
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   731
    def close(self):
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   732
        XMLParser.close(self)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   733
        self.flush()
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   734
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   735
def test(args = None):
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   736
    import sys
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   737
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   738
    if not args:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   739
        args = sys.argv[1:]
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   740
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   741
    if args and args[0] == '-s':
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   742
        args = args[1:]
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   743
        klass = XMLParser
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   744
    else:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   745
        klass = TestXMLParser
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   746
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   747
    if args:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   748
        file = args[0]
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   749
    else:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   750
        file = 'test.xml'
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   751
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   752
    if file == '-':
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   753
        f = sys.stdin
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   754
    else:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   755
        try:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   756
            f = open(file, 'r')
3721
0c93dd8ff567 initial changes from 2to3-3.3
rptlab
parents: 3326
diff changeset
   757
        except IOError as msg:
0c93dd8ff567 initial changes from 2to3-3.3
rptlab
parents: 3326
diff changeset
   758
            print(file, ":", msg)
190
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   759
            sys.exit(1)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   760
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   761
    data = f.read()
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   762
    if f is not sys.stdin:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   763
        f.close()
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   764
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   765
    x = klass()
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   766
    for c in data:
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   767
        x.feed(c)
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   768
    x.close()
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   769
74df7a489c81 Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff changeset
   770
193
8a23e98a8c3f Avoid autotests
rgbecker
parents: 190
diff changeset
   771
if __name__ == '__main__': #NO_REPORTLAB_TEST
2053
2a962a30dc46 Removed checkImportError
rgbecker
parents: 1683
diff changeset
   772
    test()