author  rptlab 
Tue, 30 Apr 2013 14:28:14 +0100  
branch  py33 
changeset 3723  99aa837b6703 
parent 3721  0c93dd8ff567 
child 3884  3bc59a4c3c21 
permissions  rwrr 
190
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

1 
# A parser for XML, using the derived class as static DTD. 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

2 
# Author: Sjoerd Mullender. 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

3 

74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

4 
# sgmlop support added by fredrik@pythonware.com (May 19, 1998) 
3029  5 
__version__=''' $Id$ ''' 
6 
__doc__='''From before xmllib was in the Python standard library. 

7 

8 
Probably ought to be removed''' 

190
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

9 

74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

10 
import re 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

11 

74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

12 
try: 
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1143
diff
changeset

13 
import sgmlop # this works for both builtin on the path or relative 
2053  14 
except ImportError: 
190
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

15 
sgmlop = None 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

16 

74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

17 
# standard entity defs 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

18 

74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

19 
ENTITYDEFS = { 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

20 
'lt': '<', 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

21 
'gt': '>', 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

22 
'amp': '&', 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

23 
'quot': '"', 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

24 
'apos': '\'' 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

25 
} 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

26 

74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

27 
# XML parser base class  find tags and call handler functions. 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

28 
# Usage: p = XMLParser(); p.feed(data); ...; p.close(). 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

29 
# The dtd is defined by deriving a class which defines methods with 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

30 
# special names to handle tags: start_foo and end_foo to handle <foo> 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

31 
# and </foo>, respectively. The data between tags is passed to the 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

32 
# parser by calling self.handle_data() with some data as argument (the 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

33 
# data may be split up in arbutrary chunks). Entity references are 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

34 
# passed by calling self.handle_entityref() with the entity reference 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

35 
# as argument. 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

36 

74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

37 
#  
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

38 
# original rebased XML parser 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

39 

74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

40 
_S = '[ \t\r\n]+' 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

41 
_opS = '[ \t\r\n]*' 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

42 
_Name = '[azAZ_:][azAZ09._:]*' 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

43 
interesting = re.compile('[&<]') 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

44 
incomplete = re.compile('&(' + _Name + '#[09]*#x[09afAF]*)?' 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

45 
'<([azAZ_:][^<>]*' 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

46 
'/([azAZ_:][^<>]*)?' 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

47 
'![^<>]*' 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

48 
'\?[^<>]*)?') 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

49 

74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

50 
ref = re.compile('&(' + _Name + '#[09]+#x[09afAF]+);?') 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

51 
entityref = re.compile('&(?P<name>' + _Name + ')[^azAZ09._:]') 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

52 
charref = re.compile('&#(?P<char>[09]+[^09]x[09afAF]+[^09afAF])') 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

53 
space = re.compile(_S) 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

54 
newline = re.compile('\n') 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

55 

74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

56 
starttagopen = re.compile('<' + _Name) 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

57 
endtagopen = re.compile('</') 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

58 
starttagend = re.compile(_opS + '(?P<slash>/?)>') 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

59 
endbracket = re.compile('>') 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

60 
tagfind = re.compile(_Name) 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

61 
cdataopen = re.compile('<!\[CDATA\[') 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

62 
cdataclose = re.compile('\]\]>') 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

63 
special = re.compile('<!(?P<special>[^<>]*)>') 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

64 
procopen = re.compile('<\?(?P<proc>' + _Name + ')' + _S) 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

65 
procclose = re.compile('\?>') 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

66 
commentopen = re.compile('<!') 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

67 
commentclose = re.compile('>') 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

68 
doubledash = re.compile('') 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

69 
attrfind = re.compile( 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

70 
_opS + '(?P<name>' + _Name + ')' 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

71 
'(' + _opS + '=' + _opS + 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

72 
'(?P<value>\'[^\']*\'"[^"]*"[azAZ09.:+*%?!()_#=~]+))') 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

73 

74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

74 
class SlowXMLParser: 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

75 

74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

76 
# Interface  initialize and reset this instance 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

77 
def __init__(self, verbose=0): 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

78 
self.verbose = verbose 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

79 
self.reset() 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

80 

74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

81 
# Interface  reset this instance. Loses all unprocessed data 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

82 
def reset(self): 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

83 
self.rawdata = '' 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

84 
self.stack = [] 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

85 
self.lasttag = '???' 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

86 
self.nomoretags = 0 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

87 
self.literal = 0 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

88 
self.lineno = 1 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

89 

74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

90 
# For derived classes only  enter literal mode (CDATA) till EOF 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

91 
def setnomoretags(self): 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

92 
self.nomoretags = self.literal = 1 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

93 

74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

94 
# For derived classes only  enter literal mode (CDATA) 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

95 
def setliteral(self, *args): 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

96 
self.literal = 1 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

97 

74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

98 
# Interface  feed some data to the parser. Call this as 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

99 
# often as you want, with as little or as much text as you 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

100 
# want (may include '\n'). (This just saves the text, all the 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

101 
# processing is done by goahead().) 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

102 
def feed(self, data): 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

103 
self.rawdata = self.rawdata + data 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

104 
self.goahead(0) 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

105 

74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

106 
# Interface  handle the remaining data 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

107 
def close(self): 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

108 
self.goahead(1) 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

109 

74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

110 
# Interface  translate references 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

111 
def translate_references(self, data): 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

112 
newdata = [] 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

113 
i = 0 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

114 
while 1: 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

115 
res = ref.search(data, i) 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

116 
if res is None: 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

117 
newdata.append(data[i:]) 
3723
99aa837b6703
second stage of port to Python 3.3; working hello world
rptlab
parents:
3721
diff
changeset

118 
return ''.join(newdata) 
190
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

119 
if data[res.end(0)  1] != ';': 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

120 
self.syntax_error(self.lineno, 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

121 
'; missing after entity/char reference') 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

122 
newdata.append(data[i:res.start(0)]) 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

123 
str = res.group(1) 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

124 
if str[0] == '#': 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

125 
if str[1] == 'x': 
3723
99aa837b6703
second stage of port to Python 3.3; working hello world
rptlab
parents:
3721
diff
changeset

126 
newdata.append(chr(int(str[2:], 16))) 
190
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

127 
else: 
3723
99aa837b6703
second stage of port to Python 3.3; working hello world
rptlab
parents:
3721
diff
changeset

128 
newdata.append(chr(int(str[1:]))) 
190
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

129 
else: 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

130 
try: 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

131 
newdata.append(self.entitydefs[str]) 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

132 
except KeyError: 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

133 
# can't do it, so keep the entity ref in 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

134 
newdata.append('&' + str + ';') 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

135 
i = res.end(0) 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

136 

74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

137 
# Internal  handle data as far as reasonable. May leave state 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

138 
# and data to be processed by a subsequent call. If 'end' is 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

139 
# true, force handling all data as if followed by EOF marker. 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

140 
def goahead(self, end): 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

141 
rawdata = self.rawdata 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

142 
i = 0 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

143 
n = len(rawdata) 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

144 
while i < n: 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

145 
if self.nomoretags: 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

146 
data = rawdata[i:n] 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

147 
self.handle_data(data) 
3723
99aa837b6703
second stage of port to Python 3.3; working hello world
rptlab
parents:
3721
diff
changeset

148 
self.lineno = self.lineno + '\n'.count(data) 
190
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

149 
i = n 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

150 
break 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

151 
res = interesting.search(rawdata, i) 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

152 
if res: 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

153 
j = res.start(0) 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

154 
else: 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

155 
j = n 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

156 
if i < j: 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

157 
data = rawdata[i:j] 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

158 
self.handle_data(data) 
3723
99aa837b6703
second stage of port to Python 3.3; working hello world
rptlab
parents:
3721
diff
changeset

159 
self.lineno = self.lineno + '\n'.count(data) 
190
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

160 
i = j 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

161 
if i == n: break 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

162 
if rawdata[i] == '<': 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

163 
if starttagopen.match(rawdata, i): 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

164 
if self.literal: 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

165 
data = rawdata[i] 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

166 
self.handle_data(data) 
3723
99aa837b6703
second stage of port to Python 3.3; working hello world
rptlab
parents:
3721
diff
changeset

167 
self.lineno = self.lineno + '\n'.count(data) 
190
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

168 
i = i+1 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

169 
continue 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

170 
k = self.parse_starttag(i) 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

171 
if k < 0: break 
3723
99aa837b6703
second stage of port to Python 3.3; working hello world
rptlab
parents:
3721
diff
changeset

172 
self.lineno = self.lineno + '\n'.count(rawdata[i:k]) 
190
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

173 
i = k 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

174 
continue 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

175 
if endtagopen.match(rawdata, i): 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

176 
k = self.parse_endtag(i) 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

177 
if k < 0: break 
3723
99aa837b6703
second stage of port to Python 3.3; working hello world
rptlab
parents:
3721
diff
changeset

178 
self.lineno = self.lineno + '\n'.count(rawdata[i:k]) 
190
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

179 
i = k 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

180 
self.literal = 0 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

181 
continue 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

182 
if commentopen.match(rawdata, i): 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

183 
if self.literal: 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

184 
data = rawdata[i] 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

185 
self.handle_data(data) 
3723
99aa837b6703
second stage of port to Python 3.3; working hello world
rptlab
parents:
3721
diff
changeset

186 
self.lineno = self.lineno + '\n'.count(data) 
190
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

187 
i = i+1 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

188 
continue 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

189 
k = self.parse_comment(i) 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

190 
if k < 0: break 
3723
99aa837b6703
second stage of port to Python 3.3; working hello world
rptlab
parents:
3721
diff
changeset

191 
self.lineno = self.lineno + '\n'.count(rawdata[i:k]) 
190
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

192 
i = k 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

193 
continue 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

194 
if cdataopen.match(rawdata, i): 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

195 
k = self.parse_cdata(i) 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

196 
if k < 0: break 
3723
99aa837b6703
second stage of port to Python 3.3; working hello world
rptlab
parents:
3721
diff
changeset

197 
self.lineno = self.lineno + '\n'.count(rawdata[i:i]) 
190
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

198 
i = k 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

199 
continue 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

200 
res = procopen.match(rawdata, i) 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

201 
if res: 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

202 
k = self.parse_proc(i, res) 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

203 
if k < 0: break 
3723
99aa837b6703
second stage of port to Python 3.3; working hello world
rptlab
parents:
3721
diff
changeset

204 
self.lineno = self.lineno + '\n'.count(rawdata[i:k]) 
190
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

205 
i = k 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

206 
continue 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

207 
res = special.match(rawdata, i) 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

208 
if res: 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

209 
if self.literal: 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

210 
data = rawdata[i] 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

211 
self.handle_data(data) 
3723
99aa837b6703
second stage of port to Python 3.3; working hello world
rptlab
parents:
3721
diff
changeset

212 
self.lineno = self.lineno + '\n'.count(data) 
190
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

213 
i = i+1 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

214 
continue 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

215 
self.handle_special(res.group('special')) 
3723
99aa837b6703
second stage of port to Python 3.3; working hello world
rptlab
parents:
3721
diff
changeset

216 
self.lineno = self.lineno + '\n'.count(res.group(0)) 
190
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

217 
i = res.end(0) 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

218 
continue 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

219 
elif rawdata[i] == '&': 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

220 
res = charref.match(rawdata, i) 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

221 
if res is not None: 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

222 
i = res.end(0) 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

223 
if rawdata[i1] != ';': 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

224 
self.syntax_error(self.lineno, '; missing in charref') 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

225 
i = i1 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

226 
self.handle_charref(res.group('char')[:1]) 
3723
99aa837b6703
second stage of port to Python 3.3; working hello world
rptlab
parents:
3721
diff
changeset

227 
self.lineno = self.lineno + '\n'.count(res.group(0)) 
190
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

228 
continue 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

229 
res = entityref.match(rawdata, i) 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

230 
if res is not None: 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

231 
i = res.end(0) 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

232 
if rawdata[i1] != ';': 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

233 
self.syntax_error(self.lineno, '; missing in entityref') 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

234 
i = i1 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

235 
self.handle_entityref(res.group('name')) 
3723
99aa837b6703
second stage of port to Python 3.3; working hello world
rptlab
parents:
3721
diff
changeset

236 
self.lineno = self.lineno + '\n'.count(res.group(0)) 
190
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

237 
continue 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

238 
else: 
3721  239 
raise RuntimeError('neither < nor & ??') 
190
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

240 
# We get here only if incomplete matches but 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

241 
# nothing else 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

242 
res = incomplete.match(rawdata, i) 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

243 
if not res: 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

244 
data = rawdata[i] 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

245 
self.handle_data(data) 
3723
99aa837b6703
second stage of port to Python 3.3; working hello world
rptlab
parents:
3721
diff
changeset

246 
self.lineno = self.lineno + '\n'.count(data) 
190
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

247 
i = i+1 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

248 
continue 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

249 
j = res.end(0) 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

250 
if j == n: 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

251 
break # Really incomplete 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

252 
self.syntax_error(self.lineno, 'bogus < or &') 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

253 
data = res.group(0) 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

254 
self.handle_data(data) 
3723
99aa837b6703
second stage of port to Python 3.3; working hello world
rptlab
parents:
3721
diff
changeset

255 
self.lineno = self.lineno + '\n'.count(data) 
190
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

256 
i = j 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

257 
# end while 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

258 
if end and i < n: 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

259 
data = rawdata[i:n] 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

260 
self.handle_data(data) 
3723
99aa837b6703
second stage of port to Python 3.3; working hello world
rptlab
parents:
3721
diff
changeset

261 
self.lineno = self.lineno + '\n'.count(data) 
190
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

262 
i = n 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

263 
self.rawdata = rawdata[i:] 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

264 
# XXX if end: check for empty stack 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

265 

74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

266 
# Internal  parse comment, return length or 1 if not terminated 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

267 
def parse_comment(self, i): 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

268 
rawdata = self.rawdata 
3326  269 
if rawdata[i:i+4] != '<!': 
3721  270 
raise RuntimeError('unexpected call to handle_comment') 
190
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

271 
res = commentclose.search(rawdata, i+4) 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

272 
if not res: 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

273 
return 1 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

274 
# doubledash search will succeed because it's a subset of commentclose 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

275 
if doubledash.search(rawdata, i+4).start(0) < res.start(0): 
3326  276 
self.syntax_error(self.lineno, "'' inside comment") 
190
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

277 
self.handle_comment(rawdata[i+4: res.start(0)]) 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

278 
return res.end(0) 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

279 

74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

280 
# Internal  handle CDATA tag, return lenth or 1 if not terminated 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

281 
def parse_cdata(self, i): 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

282 
rawdata = self.rawdata 
3326  283 
if rawdata[i:i+9] != '<![CDATA[': 
3721  284 
raise RuntimeError('unexpected call to handle_cdata') 
190
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

285 
res = cdataclose.search(rawdata, i+9) 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

286 
if not res: 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

287 
return 1 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

288 
self.handle_cdata(rawdata[i+9:res.start(0)]) 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

289 
return res.end(0) 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

290 

74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

291 
def parse_proc(self, i, res): 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

292 
rawdata = self.rawdata 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

293 
if not res: 
3721  294 
raise RuntimeError('unexpected call to parse_proc') 
190
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

295 
name = res.group('proc') 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

296 
res = procclose.search(rawdata, res.end(0)) 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

297 
if not res: 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

298 
return 1 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

299 
self.handle_proc(name, rawdata[res.pos:res.start(0)]) 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

300 
return res.end(0) 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

301 

74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

302 
# Internal  handle starttag, return length or 1 if not terminated 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

303 
def parse_starttag(self, i): 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

304 
rawdata = self.rawdata 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

305 
# i points to start of tag 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

306 
end = endbracket.search(rawdata, i+1) 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

307 
if not end: 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

308 
return 1 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

309 
j = end.start(0) 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

310 
# Now parse the data between i+1 and j into a tag and attrs 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

311 
attrdict = {} 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

312 
res = tagfind.match(rawdata, i+1) 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

313 
if not res: 
3721  314 
raise RuntimeError('unexpected call to parse_starttag') 
190
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

315 
k = res.end(0) 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

316 
tag = res.group(0) 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

317 
if hasattr(self, tag + '_attributes'): 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

318 
attrlist = getattr(self, tag + '_attributes') 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

319 
else: 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

320 
attrlist = None 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

321 
self.lasttag = tag 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

322 
while k < j: 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

323 
res = attrfind.match(rawdata, k) 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

324 
if not res: break 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

325 
attrname, attrvalue = res.group('name', 'value') 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

326 
if attrvalue is None: 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

327 
self.syntax_error(self.lineno, 'no attribute value specified') 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

328 
attrvalue = attrname 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

329 
elif attrvalue[:1] == "'" == attrvalue[1:] or \ 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

330 
attrvalue[:1] == '"' == attrvalue[1:]: 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

331 
attrvalue = attrvalue[1:1] 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

332 
else: 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

333 
self.syntax_error(self.lineno, 'attribute value not quoted') 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

334 
if attrlist is not None and attrname not in attrlist: 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

335 
self.syntax_error(self.lineno, 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

336 
'unknown attribute %s of element %s' % 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

337 
(attrname, tag)) 
3326  338 
if attrname in attrdict: 
190
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

339 
self.syntax_error(self.lineno, 'attribute specified twice') 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

340 
attrdict[attrname] = self.translate_references(attrvalue) 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

341 
k = res.end(0) 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

342 
res = starttagend.match(rawdata, k) 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

343 
if not res: 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

344 
self.syntax_error(self.lineno, 'garbage in start tag') 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

345 
self.finish_starttag(tag, attrdict) 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

346 
if res and res.group('slash') == '/': 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

347 
self.finish_endtag(tag) 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

348 
return end.end(0) 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

349 

74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

350 
# Internal  parse endtag 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

351 
def parse_endtag(self, i): 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

352 
rawdata = self.rawdata 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

353 
end = endbracket.search(rawdata, i+1) 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

354 
if not end: 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

355 
return 1 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

356 
res = tagfind.match(rawdata, i+2) 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

357 
if not res: 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

358 
self.syntax_error(self.lineno, 'no name specified in end tag') 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

359 
tag = '' 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

360 
k = i+2 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

361 
else: 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

362 
tag = res.group(0) 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

363 
k = res.end(0) 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

364 
if k != end.start(0): 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

365 
# check that there is only white space at end of tag 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

366 
res = space.match(rawdata, k) 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

367 
if res is None or res.end(0) != end.start(0): 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

368 
self.syntax_error(self.lineno, 'garbage in end tag') 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

369 
self.finish_endtag(tag) 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

370 
return end.end(0) 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

371 

74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

372 
# Internal  finish processing of start tag 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

373 
# Return 1 for unknown tag, 1 for balanced tag 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

374 
def finish_starttag(self, tag, attrs): 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

375 
self.stack.append(tag) 
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
changeset

376 
try: 
74df7a489c81
method = getattr(self, 'start_' + tag) 
except AttributeError: 
self.unknown_starttag(tag, attrs) 
return 1 
else: 
self.handle_starttag(tag, method, attrs) 
return 1 
# Internal  finish processing of end tag 
def finish_endtag(self, tag): 
if not tag: 
found = len(self.stack)  1 
if found < 0: 
self.unknown_endtag(tag) 
return 
else: 
if tag not in self.stack: 
try: 
method = getattr(self, 'end_' + tag) 
except AttributeError: 
self.unknown_endtag(tag) 
return 
found = len(self.stack) 
for i in range(found): 
if self.stack[i] == tag: found = i 
while len(self.stack) > found: 
tag = self.stack[1] 
try: 
method = getattr(self, 'end_' + tag) 
except AttributeError: 
method = None 
if method: 
self.handle_endtag(tag, method) 
else: 
self.unknown_endtag(tag) 
del self.stack[1] 
def handle_starttag(self, tag, method, attrs): 
method(attrs) 
def handle_endtag(self, tag, method): 
method() 
def handle_charref(self, name): 
try: 
if name[0] == 'x': 
426 
n = int(name[1:], 16) 
else: 
428 
n = int(name) 
except int_error: 
self.unknown_charref(name) 
return 
if not 0 <= n <= 255: 
self.unknown_charref(name) 
return 
self.handle_data(chr(n)) 
entitydefs = ENTITYDEFS 
def handle_entityref(self, name): 
table = self.entitydefs 
3326  443 
if name in table: 
self.handle_data(table[name]) 
else: 
self.unknown_entityref(name) 
return 
def handle_data(self, data): 
pass 
def handle_cdata(self, data): 
pass 
def handle_comment(self, data): 
pass 
def handle_proc(self, name, data): 
pass 
def handle_special(self, data): 
pass 
def syntax_error(self, lineno, message): 
3721  471 
raise RuntimeError('Syntax error at line %d: %s' % (lineno, message)) 
def unknown_starttag(self, tag, attrs): pass 
def unknown_endtag(self, tag): pass 
def unknown_charref(self, ref): pass 
def unknown_entityref(self, ref): pass 
# accelerated XML parser 
def __init__(self, verbose=0): 
self.verbose = verbose 
self.reset() 
def reset(self): 
self.rawdata = '' 
self.stack = [] 
self.lasttag = '???' 
self.nomoretags = 0 
self.literal = 0 
self.lineno = 1 
self.parser = sgmlop.XMLParser() 
self.feed = self.parser.feed 
self.parser.register(self) 
def setnomoretags(self): 
self.nomoretags = self.literal = 1 
def setliteral(self, *args): 
self.literal = 1 
# often as you want, with as little or as much text as you 
# want (may include '\n'). (This just saves the text, all the 
# processing is done by goahead().) 
def feed(self, data): # overridden by reset 
self.parser.feed(data) 
def close(self): 
try: 
self.parser.close() 
finally: 
self.parser = None 
def translate_references(self, data): 
newdata = [] 
i = 0 
while 1: 
res = ref.search(data, i) 
if res is None: 
newdata.append(data[i:]) 
532 
return ''.join(newdata) 
if data[res.end(0)  1] != ';': 
self.syntax_error(self.lineno, 
'; missing after entity/char reference') 
newdata.append(data[i:res.start(0)]) 
str = res.group(1) 
if str[0] == '#': 
if str[1] == 'x': 
540 
newdata.append(chr(int(str[2:], 16))) 
else: 
542 
newdata.append(chr(int(str[1:]))) 
else: 
try: 
newdata.append(self.entitydefs[str]) 
except KeyError: 
# can't do it, so keep the entity ref in 
newdata.append('&' + str + ';') 
i = res.end(0) 
# Return 1 for unknown tag, 1 for balanced tag 
def finish_starttag(self, tag, attrs): 
self.stack.append(tag) 
try: 
method = getattr(self, 'start_' + tag) 
except AttributeError: 
self.unknown_starttag(tag, attrs) 
return 1 
else: 
self.handle_starttag(tag, method, attrs) 
return 1 
def finish_endtag(self, tag): 
if not tag: 
found = len(self.stack)  1 
if found < 0: 
self.unknown_endtag(tag) 
return 
else: 
if tag not in self.stack: 
try: 
method = getattr(self, 'end_' + tag) 
except AttributeError: 
self.unknown_endtag(tag) 
return 
found = len(self.stack) 
for i in range(found): 
if self.stack[i] == tag: found = i 
while len(self.stack) > found: 
tag = self.stack[1] 
try: 
method = getattr(self, 'end_' + tag) 
except AttributeError: 
method = None 
if method: 
self.handle_endtag(tag, method) 
else: 
self.unknown_endtag(tag) 
del self.stack[1] 
def handle_starttag(self, tag, method, attrs): 
method(attrs) 
def handle_endtag(self, tag, method): 
method() 
def handle_charref(self, name): 
try: 
if name[0] == 'x': 
605 
n = int(name[1:], 16) 
else: 
607 
n = int(name) 
except ValueError: 
self.unknown_charref(name) 
return 
if not 0 <= n <= 255: 
self.unknown_charref(name) 
return 
self.handle_data(chr(n)) 
entitydefs = ENTITYDEFS 
def handle_entityref(self, name): 
table = self.entitydefs 
3326  622 
if name in table: 
self.handle_data(table[name]) 
else: 
self.unknown_entityref(name) 
return 
def handle_data(self, data): 
pass 
def handle_cdata(self, data): 
pass 
396  637 
def handle_comment(self, data): 
638 
pass 

396  641 
def handle_proc(self, name, data): 
642 
pass 

396  645 
def handle_special(self, data): 
646 
pass 

def syntax_error(self, lineno, message): 
3721  650 
raise RuntimeError('Syntax error at line %d: %s' % (lineno, message)) 
def unknown_starttag(self, tag, attrs): pass 
def unknown_endtag(self, tag): pass 
def unknown_charref(self, ref): pass 
def unknown_entityref(self, ref): pass 
if sgmlop: 
XMLParser = FastXMLParser 
else: 
XMLParser = SlowXMLParser 
self.testdata = "" 
XMLParser.__init__(self, verbose) 
self.testdata = self.testdata + data 
3326  678 
if len(repr(self.testdata)) >= 70: 
self.flush() 
data = self.testdata 
if data: 
self.testdata = "" 
changeset

686 

74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
parents:
diff
rgbecker
parents:
diff
changeset

def handle_proc(self, name, data): 
self.flush() 
diff
changeset

changeset

rgbecker
702 
703 
parents:
diff
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
rgbecker
rgbecker
190
else: 
715 
for name, value in attrs.items(): 
3721  716 
print(name + '=' + '"' + value + '"', end=' ') 
717 
print('>') 

718 

def unknown_endtag(self, tag): 
self.flush() 
3721  721 
print('end tag: </' + tag + '>') 
74df7a489c81
74df7a489c81
3721  725 
726 

def unknown_charref(self, ref): 
self.flush() 
3721  729 
print('*** unknown char ref: &#' + ref + ';') 
74df7a489c81
74df7a489c81
74df7a489c81
74df7a489c81
74df7a489c81
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
Version of xmllib from the PyXML distribution. This one can be accelerated.
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
rgbecker
rgbecker
rgbecker
rgbecker
rgbecker
parents:
parents:
parents:
parents:
parents:
diff
diff
diff
diff
diff
sys.exit(1) 
74df7a489c81
74df7a489c81
74df7a489c81
74df7a489c81
Version of xmllib from the PyXML distribution. This one can be accelerated.
Version of xmllib from the PyXML distribution. This one can be accelerated.
Version of xmllib from the PyXML distribution. This one can be accelerated.
Version of xmllib from the PyXML distribution. This one can be accelerated.
Version of xmllib from the PyXML distribution. This one can be accelerated.
rgbecker
193  771 
if __name__ == '__main__': #NO_REPORTLAB_TEST 
2053  772 
test() 