src/reportlab/lib/xmllib.py
branchpy33
changeset 3723 99aa837b6703
parent 3721 0c93dd8ff567
child 3884 3bc59a4c3c21
equal deleted inserted replaced
3722:29c11b905751 3723:99aa837b6703
     6 __doc__='''From before xmllib was in the Python standard library.
     6 __doc__='''From before xmllib was in the Python standard library.
     7 
     7 
     8 Probably ought to be removed'''
     8 Probably ought to be removed'''
     9 
     9 
    10 import re
    10 import re
    11 import string
       
    12 
    11 
    13 try:
    12 try:
    14     import sgmlop   # this works for both builtin on the path or relative
    13     import sgmlop   # this works for both builtin on the path or relative
    15 except ImportError:
    14 except ImportError:
    16     sgmlop = None
    15     sgmlop = None
   114         i = 0
   113         i = 0
   115         while 1:
   114         while 1:
   116             res = ref.search(data, i)
   115             res = ref.search(data, i)
   117             if res is None:
   116             if res is None:
   118                 newdata.append(data[i:])
   117                 newdata.append(data[i:])
   119                 return string.join(newdata, '')
   118                 return ''.join(newdata)
   120             if data[res.end(0) - 1] != ';':
   119             if data[res.end(0) - 1] != ';':
   121                 self.syntax_error(self.lineno,
   120                 self.syntax_error(self.lineno,
   122                                   '; missing after entity/char reference')
   121                                   '; missing after entity/char reference')
   123             newdata.append(data[i:res.start(0)])
   122             newdata.append(data[i:res.start(0)])
   124             str = res.group(1)
   123             str = res.group(1)
   125             if str[0] == '#':
   124             if str[0] == '#':
   126                 if str[1] == 'x':
   125                 if str[1] == 'x':
   127                     newdata.append(chr(string.atoi(str[2:], 16)))
   126                     newdata.append(chr(int(str[2:], 16)))
   128                 else:
   127                 else:
   129                     newdata.append(chr(string.atoi(str[1:])))
   128                     newdata.append(chr(int(str[1:])))
   130             else:
   129             else:
   131                 try:
   130                 try:
   132                     newdata.append(self.entitydefs[str])
   131                     newdata.append(self.entitydefs[str])
   133                 except KeyError:
   132                 except KeyError:
   134                     # can't do it, so keep the entity ref in
   133                     # can't do it, so keep the entity ref in
   144         n = len(rawdata)
   143         n = len(rawdata)
   145         while i < n:
   144         while i < n:
   146             if self.nomoretags:
   145             if self.nomoretags:
   147                 data = rawdata[i:n]
   146                 data = rawdata[i:n]
   148                 self.handle_data(data)
   147                 self.handle_data(data)
   149                 self.lineno = self.lineno + string.count(data, '\n')
   148                 self.lineno = self.lineno + '\n'.count(data)
   150                 i = n
   149                 i = n
   151                 break
   150                 break
   152             res = interesting.search(rawdata, i)
   151             res = interesting.search(rawdata, i)
   153             if res:
   152             if res:
   154                     j = res.start(0)
   153                     j = res.start(0)
   155             else:
   154             else:
   156                     j = n
   155                     j = n
   157             if i < j:
   156             if i < j:
   158                 data = rawdata[i:j]
   157                 data = rawdata[i:j]
   159                 self.handle_data(data)
   158                 self.handle_data(data)
   160                 self.lineno = self.lineno + string.count(data, '\n')
   159                 self.lineno = self.lineno + '\n'.count(data)
   161             i = j
   160             i = j
   162             if i == n: break
   161             if i == n: break
   163             if rawdata[i] == '<':
   162             if rawdata[i] == '<':
   164                 if starttagopen.match(rawdata, i):
   163                 if starttagopen.match(rawdata, i):
   165                     if self.literal:
   164                     if self.literal:
   166                         data = rawdata[i]
   165                         data = rawdata[i]
   167                         self.handle_data(data)
   166                         self.handle_data(data)
   168                         self.lineno = self.lineno + string.count(data, '\n')
   167                         self.lineno = self.lineno + '\n'.count(data)
   169                         i = i+1
   168                         i = i+1
   170                         continue
   169                         continue
   171                     k = self.parse_starttag(i)
   170                     k = self.parse_starttag(i)
   172                     if k < 0: break
   171                     if k < 0: break
   173                     self.lineno = self.lineno + string.count(rawdata[i:k], '\n')
   172                     self.lineno = self.lineno + '\n'.count(rawdata[i:k])
   174                     i = k
   173                     i = k
   175                     continue
   174                     continue
   176                 if endtagopen.match(rawdata, i):
   175                 if endtagopen.match(rawdata, i):
   177                     k = self.parse_endtag(i)
   176                     k = self.parse_endtag(i)
   178                     if k < 0: break
   177                     if k < 0: break
   179                     self.lineno = self.lineno + string.count(rawdata[i:k], '\n')
   178                     self.lineno = self.lineno + '\n'.count(rawdata[i:k])
   180                     i =  k
   179                     i =  k
   181                     self.literal = 0
   180                     self.literal = 0
   182                     continue
   181                     continue
   183                 if commentopen.match(rawdata, i):
   182                 if commentopen.match(rawdata, i):
   184                     if self.literal:
   183                     if self.literal:
   185                         data = rawdata[i]
   184                         data = rawdata[i]
   186                         self.handle_data(data)
   185                         self.handle_data(data)
   187                         self.lineno = self.lineno + string.count(data, '\n')
   186                         self.lineno = self.lineno + '\n'.count(data)
   188                         i = i+1
   187                         i = i+1
   189                         continue
   188                         continue
   190                     k = self.parse_comment(i)
   189                     k = self.parse_comment(i)
   191                     if k < 0: break
   190                     if k < 0: break
   192                     self.lineno = self.lineno + string.count(rawdata[i:k], '\n')
   191                     self.lineno = self.lineno + '\n'.count(rawdata[i:k])
   193                     i = k
   192                     i = k
   194                     continue
   193                     continue
   195                 if cdataopen.match(rawdata, i):
   194                 if cdataopen.match(rawdata, i):
   196                     k = self.parse_cdata(i)
   195                     k = self.parse_cdata(i)
   197                     if k < 0: break
   196                     if k < 0: break
   198                     self.lineno = self.lineno + string.count(rawdata[i:i], '\n')
   197                     self.lineno = self.lineno + '\n'.count(rawdata[i:i])
   199                     i = k
   198                     i = k
   200                     continue
   199                     continue
   201                 res = procopen.match(rawdata, i)
   200                 res = procopen.match(rawdata, i)
   202                 if res:
   201                 if res:
   203                     k = self.parse_proc(i, res)
   202                     k = self.parse_proc(i, res)
   204                     if k < 0: break
   203                     if k < 0: break
   205                     self.lineno = self.lineno + string.count(rawdata[i:k], '\n')
   204                     self.lineno = self.lineno + '\n'.count(rawdata[i:k])
   206                     i = k
   205                     i = k
   207                     continue
   206                     continue
   208                 res = special.match(rawdata, i)
   207                 res = special.match(rawdata, i)
   209                 if res:
   208                 if res:
   210                     if self.literal:
   209                     if self.literal:
   211                         data = rawdata[i]
   210                         data = rawdata[i]
   212                         self.handle_data(data)
   211                         self.handle_data(data)
   213                         self.lineno = self.lineno + string.count(data, '\n')
   212                         self.lineno = self.lineno + '\n'.count(data)
   214                         i = i+1
   213                         i = i+1
   215                         continue
   214                         continue
   216                     self.handle_special(res.group('special'))
   215                     self.handle_special(res.group('special'))
   217                     self.lineno = self.lineno + string.count(res.group(0), '\n')
   216                     self.lineno = self.lineno + '\n'.count(res.group(0))
   218                     i = res.end(0)
   217                     i = res.end(0)
   219                     continue
   218                     continue
   220             elif rawdata[i] == '&':
   219             elif rawdata[i] == '&':
   221                 res = charref.match(rawdata, i)
   220                 res = charref.match(rawdata, i)
   222                 if res is not None:
   221                 if res is not None:
   223                     i = res.end(0)
   222                     i = res.end(0)
   224                     if rawdata[i-1] != ';':
   223                     if rawdata[i-1] != ';':
   225                         self.syntax_error(self.lineno, '; missing in charref')
   224                         self.syntax_error(self.lineno, '; missing in charref')
   226                         i = i-1
   225                         i = i-1
   227                     self.handle_charref(res.group('char')[:-1])
   226                     self.handle_charref(res.group('char')[:-1])
   228                     self.lineno = self.lineno + string.count(res.group(0), '\n')
   227                     self.lineno = self.lineno + '\n'.count(res.group(0))
   229                     continue
   228                     continue
   230                 res = entityref.match(rawdata, i)
   229                 res = entityref.match(rawdata, i)
   231                 if res is not None:
   230                 if res is not None:
   232                     i = res.end(0)
   231                     i = res.end(0)
   233                     if rawdata[i-1] != ';':
   232                     if rawdata[i-1] != ';':
   234                         self.syntax_error(self.lineno, '; missing in entityref')
   233                         self.syntax_error(self.lineno, '; missing in entityref')
   235                         i = i-1
   234                         i = i-1
   236                     self.handle_entityref(res.group('name'))
   235                     self.handle_entityref(res.group('name'))
   237                     self.lineno = self.lineno + string.count(res.group(0), '\n')
   236                     self.lineno = self.lineno + '\n'.count(res.group(0))
   238                     continue
   237                     continue
   239             else:
   238             else:
   240                 raise RuntimeError('neither < nor & ??')
   239                 raise RuntimeError('neither < nor & ??')
   241             # We get here only if incomplete matches but
   240             # We get here only if incomplete matches but
   242             # nothing else
   241             # nothing else
   243             res = incomplete.match(rawdata, i)
   242             res = incomplete.match(rawdata, i)
   244             if not res:
   243             if not res:
   245                 data = rawdata[i]
   244                 data = rawdata[i]
   246                 self.handle_data(data)
   245                 self.handle_data(data)
   247                 self.lineno = self.lineno + string.count(data, '\n')
   246                 self.lineno = self.lineno + '\n'.count(data)
   248                 i = i+1
   247                 i = i+1
   249                 continue
   248                 continue
   250             j = res.end(0)
   249             j = res.end(0)
   251             if j == n:
   250             if j == n:
   252                 break # Really incomplete
   251                 break # Really incomplete
   253             self.syntax_error(self.lineno, 'bogus < or &')
   252             self.syntax_error(self.lineno, 'bogus < or &')
   254             data = res.group(0)
   253             data = res.group(0)
   255             self.handle_data(data)
   254             self.handle_data(data)
   256             self.lineno = self.lineno + string.count(data, '\n')
   255             self.lineno = self.lineno + '\n'.count(data)
   257             i = j
   256             i = j
   258         # end while
   257         # end while
   259         if end and i < n:
   258         if end and i < n:
   260             data = rawdata[i:n]
   259             data = rawdata[i:n]
   261             self.handle_data(data)
   260             self.handle_data(data)
   262             self.lineno = self.lineno + string.count(data, '\n')
   261             self.lineno = self.lineno + '\n'.count(data)
   263             i = n
   262             i = n
   264         self.rawdata = rawdata[i:]
   263         self.rawdata = rawdata[i:]
   265         # XXX if end: check for empty stack
   264         # XXX if end: check for empty stack
   266 
   265 
   267     # Internal -- parse comment, return length or -1 if not terminated
   266     # Internal -- parse comment, return length or -1 if not terminated
   422 
   421 
   423     # Example -- handle character reference, no need to override
   422     # Example -- handle character reference, no need to override
   424     def handle_charref(self, name):
   423     def handle_charref(self, name):
   425         try:
   424         try:
   426             if name[0] == 'x':
   425             if name[0] == 'x':
   427                 n = string.atoi(name[1:], 16)
   426                 n = int(name[1:], 16)
   428             else:
   427             else:
   429                 n = string.atoi(name)
   428                 n = int(name)
   430         except string.atoi_error:
   429         except int_error:
   431             self.unknown_charref(name)
   430             self.unknown_charref(name)
   432             return
   431             return
   433         if not 0 <= n <= 255:
   432         if not 0 <= n <= 255:
   434             self.unknown_charref(name)
   433             self.unknown_charref(name)
   435             return
   434             return
   528         i = 0
   527         i = 0
   529         while 1:
   528         while 1:
   530             res = ref.search(data, i)
   529             res = ref.search(data, i)
   531             if res is None:
   530             if res is None:
   532                 newdata.append(data[i:])
   531                 newdata.append(data[i:])
   533                 return string.join(newdata, '')
   532                 return ''.join(newdata)
   534             if data[res.end(0) - 1] != ';':
   533             if data[res.end(0) - 1] != ';':
   535                 self.syntax_error(self.lineno,
   534                 self.syntax_error(self.lineno,
   536                                   '; missing after entity/char reference')
   535                                   '; missing after entity/char reference')
   537             newdata.append(data[i:res.start(0)])
   536             newdata.append(data[i:res.start(0)])
   538             str = res.group(1)
   537             str = res.group(1)
   539             if str[0] == '#':
   538             if str[0] == '#':
   540                 if str[1] == 'x':
   539                 if str[1] == 'x':
   541                     newdata.append(chr(string.atoi(str[2:], 16)))
   540                     newdata.append(chr(int(str[2:], 16)))
   542                 else:
   541                 else:
   543                     newdata.append(chr(string.atoi(str[1:])))
   542                     newdata.append(chr(int(str[1:])))
   544             else:
   543             else:
   545                 try:
   544                 try:
   546                     newdata.append(self.entitydefs[str])
   545                     newdata.append(self.entitydefs[str])
   547                 except KeyError:
   546                 except KeyError:
   548                     # can't do it, so keep the entity ref in
   547                     # can't do it, so keep the entity ref in
   601 
   600 
   602     # Example -- handle character reference, no need to override
   601     # Example -- handle character reference, no need to override
   603     def handle_charref(self, name):
   602     def handle_charref(self, name):
   604         try:
   603         try:
   605             if name[0] == 'x':
   604             if name[0] == 'x':
   606                 n = string.atoi(name[1:], 16)
   605                 n = int(name[1:], 16)
   607             else:
   606             else:
   608                 n = string.atoi(name)
   607                 n = int(name)
   609         except string.atoi_error:
   608         except ValueError:
   610             self.unknown_charref(name)
   609             self.unknown_charref(name)
   611             return
   610             return
   612         if not 0 <= n <= 255:
   611         if not 0 <= n <= 255:
   613             self.unknown_charref(name)
   612             self.unknown_charref(name)
   614             return
   613             return
   711         self.flush()
   710         self.flush()
   712         if not attrs:
   711         if not attrs:
   713             print('start tag: <' + tag + '>')
   712             print('start tag: <' + tag + '>')
   714         else:
   713         else:
   715             print('start tag: <' + tag, end=' ')
   714             print('start tag: <' + tag, end=' ')
   716             for name, value in list(attrs.items()):
   715             for name, value in attrs.items():
   717                 print(name + '=' + '"' + value + '"', end=' ')
   716                 print(name + '=' + '"' + value + '"', end=' ')
   718             print('>')
   717             print('>')
   719 
   718 
   720     def unknown_endtag(self, tag):
   719     def unknown_endtag(self, tag):
   721         self.flush()
   720         self.flush()