src/reportlab/platypus/paraparser.py
branchpy33
changeset 3787 8f9be6d6f75c
parent 3731 b233dd0577ff
child 3809 cbd390e6d557
--- a/src/reportlab/platypus/paraparser.py	Mon Nov 18 15:17:31 2013 +0000
+++ b/src/reportlab/platypus/paraparser.py	Mon Nov 18 17:27:49 2013 +0000
@@ -16,9 +16,7 @@
 import unicodedata
 import reportlab.lib.sequencer
 from reportlab.lib.abag import ABag
-from reportlab.lib.utils import ImageReader
-
-from reportlab.lib import xmllib
+from reportlab.lib.utils import ImageReader, isPy3, annotateException
 
 from reportlab.lib.colors import toColor, white, black, red, Color
 from reportlab.lib.fonts import tt2ps, ps2tt
@@ -218,9 +216,7 @@
 
 def _applyAttributes(obj, attr):
     for k, v in attr.items():
-        if isisntance(v,(list,tuple)) and v[0]=='relative':
-            #AR 20/5/2000 - remove 1.5.2-ism
-            #v = v[1]+getattr(obj,k,0)
+        if isinstance(v,(list,tuple)) and v[0]=='relative':
             if hasattr(obj, k):
                 v = v[1]+getattr(obj,k)
             else:
@@ -231,259 +227,258 @@
 #with additions suggested by Christoph Zwerschke who also suggested the
 #numeric entity names that follow.
 greeks = {
-    'Aacute': '\xc3\x81',
-    'aacute': '\xc3\xa1',
-    'Acirc': '\xc3\x82',
-    'acirc': '\xc3\xa2',
-    'acute': '\xc2\xb4',
-    'AElig': '\xc3\x86',
-    'aelig': '\xc3\xa6',
-    'Agrave': '\xc3\x80',
-    'agrave': '\xc3\xa0',
-    'alefsym': '\xe2\x84\xb5',
-    'Alpha': '\xce\x91',
-    'alpha': '\xce\xb1',
-    'and': '\xe2\x88\xa7',
-    'ang': '\xe2\x88\xa0',
-    'Aring': '\xc3\x85',
-    'aring': '\xc3\xa5',
-    'asymp': '\xe2\x89\x88',
-    'Atilde': '\xc3\x83',
-    'atilde': '\xc3\xa3',
-    'Auml': '\xc3\x84',
-    'auml': '\xc3\xa4',
-    'bdquo': '\xe2\x80\x9e',
-    'Beta': '\xce\x92',
-    'beta': '\xce\xb2',
-    'brvbar': '\xc2\xa6',
-    'bull': '\xe2\x80\xa2',
-    'cap': '\xe2\x88\xa9',
-    'Ccedil': '\xc3\x87',
-    'ccedil': '\xc3\xa7',
-    'cedil': '\xc2\xb8',
-    'cent': '\xc2\xa2',
-    'Chi': '\xce\xa7',
-    'chi': '\xcf\x87',
-    'circ': '\xcb\x86',
-    'clubs': '\xe2\x99\xa3',
-    'cong': '\xe2\x89\x85',
-    'copy': '\xc2\xa9',
-    'crarr': '\xe2\x86\xb5',
-    'cup': '\xe2\x88\xaa',
-    'curren': '\xc2\xa4',
-    'dagger': '\xe2\x80\xa0',
-    'Dagger': '\xe2\x80\xa1',
-    'darr': '\xe2\x86\x93',
-    'dArr': '\xe2\x87\x93',
-    'deg': '\xc2\xb0',
-    'delta': '\xce\xb4',
-    'Delta': '\xe2\x88\x86',
-    'diams': '\xe2\x99\xa6',
-    'divide': '\xc3\xb7',
-    'Eacute': '\xc3\x89',
-    'eacute': '\xc3\xa9',
-    'Ecirc': '\xc3\x8a',
-    'ecirc': '\xc3\xaa',
-    'Egrave': '\xc3\x88',
-    'egrave': '\xc3\xa8',
-    'empty': '\xe2\x88\x85',
-    'emsp': '\xe2\x80\x83',
-    'ensp': '\xe2\x80\x82',
-    'Epsilon': '\xce\x95',
-    'epsilon': '\xce\xb5',
-    'epsiv': '\xce\xb5',
-    'equiv': '\xe2\x89\xa1',
-    'Eta': '\xce\x97',
-    'eta': '\xce\xb7',
-    'ETH': '\xc3\x90',
-    'eth': '\xc3\xb0',
-    'Euml': '\xc3\x8b',
-    'euml': '\xc3\xab',
-    'euro': '\xe2\x82\xac',
-    'exist': '\xe2\x88\x83',
-    'fnof': '\xc6\x92',
-    'forall': '\xe2\x88\x80',
-    'frac12': '\xc2\xbd',
-    'frac14': '\xc2\xbc',
-    'frac34': '\xc2\xbe',
-    'frasl': '\xe2\x81\x84',
-    'Gamma': '\xce\x93',
-    'gamma': '\xce\xb3',
-    'ge': '\xe2\x89\xa5',
-    'harr': '\xe2\x86\x94',
-    'hArr': '\xe2\x87\x94',
-    'hearts': '\xe2\x99\xa5',
-    'hellip': '\xe2\x80\xa6',
-    'Iacute': '\xc3\x8d',
-    'iacute': '\xc3\xad',
-    'Icirc': '\xc3\x8e',
-    'icirc': '\xc3\xae',
-    'iexcl': '\xc2\xa1',
-    'Igrave': '\xc3\x8c',
-    'igrave': '\xc3\xac',
-    'image': '\xe2\x84\x91',
-    'infin': '\xe2\x88\x9e',
-    'int': '\xe2\x88\xab',
-    'Iota': '\xce\x99',
-    'iota': '\xce\xb9',
-    'iquest': '\xc2\xbf',
-    'isin': '\xe2\x88\x88',
-    'Iuml': '\xc3\x8f',
-    'iuml': '\xc3\xaf',
-    'Kappa': '\xce\x9a',
-    'kappa': '\xce\xba',
-    'Lambda': '\xce\x9b',
-    'lambda': '\xce\xbb',
-    'lang': '\xe2\x8c\xa9',
-    'laquo': '\xc2\xab',
-    'larr': '\xe2\x86\x90',
-    'lArr': '\xe2\x87\x90',
-    'lceil': '\xef\xa3\xae',
-    'ldquo': '\xe2\x80\x9c',
-    'le': '\xe2\x89\xa4',
-    'lfloor': '\xef\xa3\xb0',
-    'lowast': '\xe2\x88\x97',
-    'loz': '\xe2\x97\x8a',
-    'lrm': '\xe2\x80\x8e',
-    'lsaquo': '\xe2\x80\xb9',
-    'lsquo': '\xe2\x80\x98',
-    'macr': '\xc2\xaf',
-    'mdash': '\xe2\x80\x94',
-    'micro': '\xc2\xb5',
-    'middot': '\xc2\xb7',
-    'minus': '\xe2\x88\x92',
-    'mu': '\xc2\xb5',
-    'Mu': '\xce\x9c',
-    'nabla': '\xe2\x88\x87',
-    'nbsp': '\xc2\xa0',
-    'ndash': '\xe2\x80\x93',
-    'ne': '\xe2\x89\xa0',
-    'ni': '\xe2\x88\x8b',
-    'notin': '\xe2\x88\x89',
-    'not': '\xc2\xac',
-    'nsub': '\xe2\x8a\x84',
-    'Ntilde': '\xc3\x91',
-    'ntilde': '\xc3\xb1',
-    'Nu': '\xce\x9d',
-    'nu': '\xce\xbd',
-    'Oacute': '\xc3\x93',
-    'oacute': '\xc3\xb3',
-    'Ocirc': '\xc3\x94',
-    'ocirc': '\xc3\xb4',
-    'OElig': '\xc5\x92',
-    'oelig': '\xc5\x93',
-    'Ograve': '\xc3\x92',
-    'ograve': '\xc3\xb2',
-    'oline': '\xef\xa3\xa5',
-    'omega': '\xcf\x89',
-    'Omega': '\xe2\x84\xa6',
-    'Omicron': '\xce\x9f',
-    'omicron': '\xce\xbf',
-    'oplus': '\xe2\x8a\x95',
-    'ordf': '\xc2\xaa',
-    'ordm': '\xc2\xba',
-    'or': '\xe2\x88\xa8',
-    'Oslash': '\xc3\x98',
-    'oslash': '\xc3\xb8',
-    'Otilde': '\xc3\x95',
-    'otilde': '\xc3\xb5',
-    'otimes': '\xe2\x8a\x97',
-    'Ouml': '\xc3\x96',
-    'ouml': '\xc3\xb6',
-    'para': '\xc2\xb6',
-    'part': '\xe2\x88\x82',
-    'permil': '\xe2\x80\xb0',
-    'perp': '\xe2\x8a\xa5',
-    'phis': '\xcf\x86',
-    'Phi': '\xce\xa6',
-    'phi': '\xcf\x95',
-    'piv': '\xcf\x96',
-    'Pi': '\xce\xa0',
-    'pi': '\xcf\x80',
-    'plusmn': '\xc2\xb1',
-    'pound': '\xc2\xa3',
-    'prime': '\xe2\x80\xb2',
-    'Prime': '\xe2\x80\xb3',
-    'prod': '\xe2\x88\x8f',
-    'prop': '\xe2\x88\x9d',
-    'Psi': '\xce\xa8',
-    'psi': '\xcf\x88',
-    'radic': '\xe2\x88\x9a',
-    'rang': '\xe2\x8c\xaa',
-    'raquo': '\xc2\xbb',
-    'rarr': '\xe2\x86\x92',
-    'rArr': '\xe2\x87\x92',
-    'rceil': '\xef\xa3\xb9',
-    'rdquo': '\xe2\x80\x9d',
-    'real': '\xe2\x84\x9c',
-    'reg': '\xc2\xae',
-    'rfloor': '\xef\xa3\xbb',
-    'Rho': '\xce\xa1',
-    'rho': '\xcf\x81',
-    'rlm': '\xe2\x80\x8f',
-    'rsaquo': '\xe2\x80\xba',
-    'rsquo': '\xe2\x80\x99',
-    'sbquo': '\xe2\x80\x9a',
-    'Scaron': '\xc5\xa0',
-    'scaron': '\xc5\xa1',
-    'sdot': '\xe2\x8b\x85',
-    'sect': '\xc2\xa7',
-    'shy': '\xc2\xad',
-    'sigmaf': '\xcf\x82',
-    'sigmav': '\xcf\x82',
-    'Sigma': '\xce\xa3',
-    'sigma': '\xcf\x83',
-    'sim': '\xe2\x88\xbc',
-    'spades': '\xe2\x99\xa0',
-    'sube': '\xe2\x8a\x86',
-    'sub': '\xe2\x8a\x82',
-    'sum': '\xe2\x88\x91',
-    'sup1': '\xc2\xb9',
-    'sup2': '\xc2\xb2',
-    'sup3': '\xc2\xb3',
-    'supe': '\xe2\x8a\x87',
-    'sup': '\xe2\x8a\x83',
-    'szlig': '\xc3\x9f',
-    'Tau': '\xce\xa4',
-    'tau': '\xcf\x84',
-    'there4': '\xe2\x88\xb4',
-    'thetasym': '\xcf\x91',
-    'thetav': '\xcf\x91',
-    'Theta': '\xce\x98',
-    'theta': '\xce\xb8',
-    'thinsp': '\xe2\x80\x89',
-    'THORN': '\xc3\x9e',
-    'thorn': '\xc3\xbe',
-    'tilde': '\xcb\x9c',
-    'times': '\xc3\x97',
-    'trade': '\xef\xa3\xaa',
-    'Uacute': '\xc3\x9a',
-    'uacute': '\xc3\xba',
-    'uarr': '\xe2\x86\x91',
-    'uArr': '\xe2\x87\x91',
-    'Ucirc': '\xc3\x9b',
-    'ucirc': '\xc3\xbb',
-    'Ugrave': '\xc3\x99',
-    'ugrave': '\xc3\xb9',
-    'uml': '\xc2\xa8',
-    'upsih': '\xcf\x92',
-    'Upsilon': '\xce\xa5',
-    'upsilon': '\xcf\x85',
-    'Uuml': '\xc3\x9c',
-    'uuml': '\xc3\xbc',
-    'weierp': '\xe2\x84\x98',
-    'Xi': '\xce\x9e',
-    'xi': '\xce\xbe',
-    'Yacute': '\xc3\x9d',
-    'yacute': '\xc3\xbd',
-    'yen': '\xc2\xa5',
-    'yuml': '\xc3\xbf',
-    'Yuml': '\xc5\xb8',
-    'Zeta': '\xce\x96',
-    'zeta': '\xce\xb6',
-    'zwj': '\xe2\x80\x8d',
-    'zwnj': '\xe2\x80\x8c',
-
+    'Aacute': b'\xc3\x81',
+    'aacute': b'\xc3\xa1',
+    'Acirc': b'\xc3\x82',
+    'acirc': b'\xc3\xa2',
+    'acute': b'\xc2\xb4',
+    'AElig': b'\xc3\x86',
+    'aelig': b'\xc3\xa6',
+    'Agrave': b'\xc3\x80',
+    'agrave': b'\xc3\xa0',
+    'alefsym': b'\xe2\x84\xb5',
+    'Alpha': b'\xce\x91',
+    'alpha': b'\xce\xb1',
+    'and': b'\xe2\x88\xa7',
+    'ang': b'\xe2\x88\xa0',
+    'Aring': b'\xc3\x85',
+    'aring': b'\xc3\xa5',
+    'asymp': b'\xe2\x89\x88',
+    'Atilde': b'\xc3\x83',
+    'atilde': b'\xc3\xa3',
+    'Auml': b'\xc3\x84',
+    'auml': b'\xc3\xa4',
+    'bdquo': b'\xe2\x80\x9e',
+    'Beta': b'\xce\x92',
+    'beta': b'\xce\xb2',
+    'brvbar': b'\xc2\xa6',
+    'bull': b'\xe2\x80\xa2',
+    'cap': b'\xe2\x88\xa9',
+    'Ccedil': b'\xc3\x87',
+    'ccedil': b'\xc3\xa7',
+    'cedil': b'\xc2\xb8',
+    'cent': b'\xc2\xa2',
+    'Chi': b'\xce\xa7',
+    'chi': b'\xcf\x87',
+    'circ': b'\xcb\x86',
+    'clubs': b'\xe2\x99\xa3',
+    'cong': b'\xe2\x89\x85',
+    'copy': b'\xc2\xa9',
+    'crarr': b'\xe2\x86\xb5',
+    'cup': b'\xe2\x88\xaa',
+    'curren': b'\xc2\xa4',
+    'dagger': b'\xe2\x80\xa0',
+    'Dagger': b'\xe2\x80\xa1',
+    'darr': b'\xe2\x86\x93',
+    'dArr': b'\xe2\x87\x93',
+    'deg': b'\xc2\xb0',
+    'delta': b'\xce\xb4',
+    'Delta': b'\xe2\x88\x86',
+    'diams': b'\xe2\x99\xa6',
+    'divide': b'\xc3\xb7',
+    'Eacute': b'\xc3\x89',
+    'eacute': b'\xc3\xa9',
+    'Ecirc': b'\xc3\x8a',
+    'ecirc': b'\xc3\xaa',
+    'Egrave': b'\xc3\x88',
+    'egrave': b'\xc3\xa8',
+    'empty': b'\xe2\x88\x85',
+    'emsp': b'\xe2\x80\x83',
+    'ensp': b'\xe2\x80\x82',
+    'Epsilon': b'\xce\x95',
+    'epsilon': b'\xce\xb5',
+    'epsiv': b'\xce\xb5',
+    'equiv': b'\xe2\x89\xa1',
+    'Eta': b'\xce\x97',
+    'eta': b'\xce\xb7',
+    'ETH': b'\xc3\x90',
+    'eth': b'\xc3\xb0',
+    'Euml': b'\xc3\x8b',
+    'euml': b'\xc3\xab',
+    'euro': b'\xe2\x82\xac',
+    'exist': b'\xe2\x88\x83',
+    'fnof': b'\xc6\x92',
+    'forall': b'\xe2\x88\x80',
+    'frac12': b'\xc2\xbd',
+    'frac14': b'\xc2\xbc',
+    'frac34': b'\xc2\xbe',
+    'frasl': b'\xe2\x81\x84',
+    'Gamma': b'\xce\x93',
+    'gamma': b'\xce\xb3',
+    'ge': b'\xe2\x89\xa5',
+    'harr': b'\xe2\x86\x94',
+    'hArr': b'\xe2\x87\x94',
+    'hearts': b'\xe2\x99\xa5',
+    'hellip': b'\xe2\x80\xa6',
+    'Iacute': b'\xc3\x8d',
+    'iacute': b'\xc3\xad',
+    'Icirc': b'\xc3\x8e',
+    'icirc': b'\xc3\xae',
+    'iexcl': b'\xc2\xa1',
+    'Igrave': b'\xc3\x8c',
+    'igrave': b'\xc3\xac',
+    'image': b'\xe2\x84\x91',
+    'infin': b'\xe2\x88\x9e',
+    'int': b'\xe2\x88\xab',
+    'Iota': b'\xce\x99',
+    'iota': b'\xce\xb9',
+    'iquest': b'\xc2\xbf',
+    'isin': b'\xe2\x88\x88',
+    'Iuml': b'\xc3\x8f',
+    'iuml': b'\xc3\xaf',
+    'Kappa': b'\xce\x9a',
+    'kappa': b'\xce\xba',
+    'Lambda': b'\xce\x9b',
+    'lambda': b'\xce\xbb',
+    'lang': b'\xe2\x8c\xa9',
+    'laquo': b'\xc2\xab',
+    'larr': b'\xe2\x86\x90',
+    'lArr': b'\xe2\x87\x90',
+    'lceil': b'\xef\xa3\xae',
+    'ldquo': b'\xe2\x80\x9c',
+    'le': b'\xe2\x89\xa4',
+    'lfloor': b'\xef\xa3\xb0',
+    'lowast': b'\xe2\x88\x97',
+    'loz': b'\xe2\x97\x8a',
+    'lrm': b'\xe2\x80\x8e',
+    'lsaquo': b'\xe2\x80\xb9',
+    'lsquo': b'\xe2\x80\x98',
+    'macr': b'\xc2\xaf',
+    'mdash': b'\xe2\x80\x94',
+    'micro': b'\xc2\xb5',
+    'middot': b'\xc2\xb7',
+    'minus': b'\xe2\x88\x92',
+    'mu': b'\xc2\xb5',
+    'Mu': b'\xce\x9c',
+    'nabla': b'\xe2\x88\x87',
+    'nbsp': b'\xc2\xa0',
+    'ndash': b'\xe2\x80\x93',
+    'ne': b'\xe2\x89\xa0',
+    'ni': b'\xe2\x88\x8b',
+    'notin': b'\xe2\x88\x89',
+    'not': b'\xc2\xac',
+    'nsub': b'\xe2\x8a\x84',
+    'Ntilde': b'\xc3\x91',
+    'ntilde': b'\xc3\xb1',
+    'Nu': b'\xce\x9d',
+    'nu': b'\xce\xbd',
+    'Oacute': b'\xc3\x93',
+    'oacute': b'\xc3\xb3',
+    'Ocirc': b'\xc3\x94',
+    'ocirc': b'\xc3\xb4',
+    'OElig': b'\xc5\x92',
+    'oelig': b'\xc5\x93',
+    'Ograve': b'\xc3\x92',
+    'ograve': b'\xc3\xb2',
+    'oline': b'\xef\xa3\xa5',
+    'omega': b'\xcf\x89',
+    'Omega': b'\xe2\x84\xa6',
+    'Omicron': b'\xce\x9f',
+    'omicron': b'\xce\xbf',
+    'oplus': b'\xe2\x8a\x95',
+    'ordf': b'\xc2\xaa',
+    'ordm': b'\xc2\xba',
+    'or': b'\xe2\x88\xa8',
+    'Oslash': b'\xc3\x98',
+    'oslash': b'\xc3\xb8',
+    'Otilde': b'\xc3\x95',
+    'otilde': b'\xc3\xb5',
+    'otimes': b'\xe2\x8a\x97',
+    'Ouml': b'\xc3\x96',
+    'ouml': b'\xc3\xb6',
+    'para': b'\xc2\xb6',
+    'part': b'\xe2\x88\x82',
+    'permil': b'\xe2\x80\xb0',
+    'perp': b'\xe2\x8a\xa5',
+    'phis': b'\xcf\x86',
+    'Phi': b'\xce\xa6',
+    'phi': b'\xcf\x95',
+    'piv': b'\xcf\x96',
+    'Pi': b'\xce\xa0',
+    'pi': b'\xcf\x80',
+    'plusmn': b'\xc2\xb1',
+    'pound': b'\xc2\xa3',
+    'prime': b'\xe2\x80\xb2',
+    'Prime': b'\xe2\x80\xb3',
+    'prod': b'\xe2\x88\x8f',
+    'prop': b'\xe2\x88\x9d',
+    'Psi': b'\xce\xa8',
+    'psi': b'\xcf\x88',
+    'radic': b'\xe2\x88\x9a',
+    'rang': b'\xe2\x8c\xaa',
+    'raquo': b'\xc2\xbb',
+    'rarr': b'\xe2\x86\x92',
+    'rArr': b'\xe2\x87\x92',
+    'rceil': b'\xef\xa3\xb9',
+    'rdquo': b'\xe2\x80\x9d',
+    'real': b'\xe2\x84\x9c',
+    'reg': b'\xc2\xae',
+    'rfloor': b'\xef\xa3\xbb',
+    'Rho': b'\xce\xa1',
+    'rho': b'\xcf\x81',
+    'rlm': b'\xe2\x80\x8f',
+    'rsaquo': b'\xe2\x80\xba',
+    'rsquo': b'\xe2\x80\x99',
+    'sbquo': b'\xe2\x80\x9a',
+    'Scaron': b'\xc5\xa0',
+    'scaron': b'\xc5\xa1',
+    'sdot': b'\xe2\x8b\x85',
+    'sect': b'\xc2\xa7',
+    'shy': b'\xc2\xad',
+    'sigmaf': b'\xcf\x82',
+    'sigmav': b'\xcf\x82',
+    'Sigma': b'\xce\xa3',
+    'sigma': b'\xcf\x83',
+    'sim': b'\xe2\x88\xbc',
+    'spades': b'\xe2\x99\xa0',
+    'sube': b'\xe2\x8a\x86',
+    'sub': b'\xe2\x8a\x82',
+    'sum': b'\xe2\x88\x91',
+    'sup1': b'\xc2\xb9',
+    'sup2': b'\xc2\xb2',
+    'sup3': b'\xc2\xb3',
+    'supe': b'\xe2\x8a\x87',
+    'sup': b'\xe2\x8a\x83',
+    'szlig': b'\xc3\x9f',
+    'Tau': b'\xce\xa4',
+    'tau': b'\xcf\x84',
+    'there4': b'\xe2\x88\xb4',
+    'thetasym': b'\xcf\x91',
+    'thetav': b'\xcf\x91',
+    'Theta': b'\xce\x98',
+    'theta': b'\xce\xb8',
+    'thinsp': b'\xe2\x80\x89',
+    'THORN': b'\xc3\x9e',
+    'thorn': b'\xc3\xbe',
+    'tilde': b'\xcb\x9c',
+    'times': b'\xc3\x97',
+    'trade': b'\xef\xa3\xaa',
+    'Uacute': b'\xc3\x9a',
+    'uacute': b'\xc3\xba',
+    'uarr': b'\xe2\x86\x91',
+    'uArr': b'\xe2\x87\x91',
+    'Ucirc': b'\xc3\x9b',
+    'ucirc': b'\xc3\xbb',
+    'Ugrave': b'\xc3\x99',
+    'ugrave': b'\xc3\xb9',
+    'uml': b'\xc2\xa8',
+    'upsih': b'\xcf\x92',
+    'Upsilon': b'\xce\xa5',
+    'upsilon': b'\xcf\x85',
+    'Uuml': b'\xc3\x9c',
+    'uuml': b'\xc3\xbc',
+    'weierp': b'\xe2\x84\x98',
+    'Xi': b'\xce\x9e',
+    'xi': b'\xce\xbe',
+    'Yacute': b'\xc3\x9d',
+    'yacute': b'\xc3\xbd',
+    'yen': b'\xc2\xa5',
+    'yuml': b'\xc3\xbf',
+    'Yuml': b'\xc5\xb8',
+    'Zeta': b'\xce\x96',
+    'zeta': b'\xce\xb6',
+    'zwj': b'\xe2\x80\x8d',
+    'zwnj': b'\xe2\x80\x8c',
     }
 
 #------------------------------------------------------------------------
@@ -509,10 +504,33 @@
             if not v:
                 u = '\0'
             else:
-                u = chr(v).encode('utf8')
+                if isPy3:
+                    u = chr(v)
+                else:
+                    u = chr(v).encode('utf8')
             _greek2Utf8[chr(k)] = u
     return ''.join(map(_greek2Utf8.__getitem__,data))
 
+
+def ugeCB(name):
+    '''undefined general entity handler'''
+    try:
+        return greeks[name]
+    except:
+        return ('&'+name+';').encode('utf8')
+
+try:
+    import pyRXPU
+    def makeParser():
+        return pyRXPU.Parser(
+            ErrorOnUnquotedAttributeValues=0,
+            Validate=0,
+            srcName='Paragraph text',
+            ugeCB = ugeCB,
+            )
+except ImportError:
+    raise ImportError("pyRXPU not importable Alternate parser not yet implemented")
+
 #------------------------------------------------------------------
 # !!! NOTE !!! THIS TEXT IS NOW REPLICATED IN PARAGRAPH.PY !!!
 # The ParaFormatter will be able to format the following
@@ -555,7 +573,7 @@
 #
 # It will also be able to handle any MathML specified Greek characters.
 #------------------------------------------------------------------
-class ParaParser(xmllib.XMLParser):
+class ParaParser:
 
     #----------------------------------------------------------
     # First we will define all of the xml tag handler functions.
@@ -714,12 +732,6 @@
             return
         self.handle_data(chr(n).encode('utf8'))
 
-    def handle_entityref(self,name):
-        if name in greeks:
-            self.handle_data(greeks[name])
-        else:
-            xmllib.XMLParser.handle_entityref(self,name)
-
     def syntax_error(self,lineno,message):
         self._syntax_error(message)
 
@@ -995,7 +1007,6 @@
 
     def __init__(self,verbose=0):
         self.caseSensitive = 0
-        xmllib.XMLParser.__init__(self,verbose=verbose)
 
     def _iReset(self):
         self.fragList = []
@@ -1003,7 +1014,6 @@
 
     def _reset(self, style):
         '''reset the parser'''
-        xmllib.XMLParser.reset(self)
 
         # initialize list of string segments to empty
         self.errors = []
@@ -1063,24 +1073,17 @@
         If errors occur None will be returned and the
         self.errors holds a list of the error messages.
         """
-        # AR 20040612 - when we feed Unicode strings in, sgmlop
-        # tries to coerce to ASCII.  Must intercept, coerce to
-        # any 8-bit encoding which defines most of 256 points,
-        # and revert at end.  Yuk.  Preliminary step prior to
-        # removal of parser altogether.
-        enc = self._enc = 'utf8' #our legacy default
-        self._UNI = isinstance(text,str)
-        if self._UNI:
-            text = text.encode(enc)
-
         self._setup_for_parse(style)
-        # the xmlparser requires that all text be surrounded by xml
-        # tags, therefore we must throw some unused flags around the
-        # given string
         if not(len(text)>=6 and text[0]=='<' and _re_para.match(text)):
             text = "<para>"+text+"</para>"
-        self.feed(text)
-        self.close()    # force parsing to complete
+        try:
+            tt = makeParser()(text)
+        except Exception as exc:
+            if isPy3:
+                raise exc.__class__('paragraph text %s caused exception\n%s' % (ascii(text),str(exc))) from exc
+            else:
+                annotateException('paragraph text %s caused exception' % ascii(text))
+        self._tt_start(tt)
         return self._complete_parse()
 
     def _complete_parse(self):
@@ -1094,18 +1097,9 @@
         else:
             fragList = bFragList = None
 
-        if self._UNI:
-            #reconvert to unicode
-            if fragList:
-                for frag in fragList:
-                    frag.text = str(frag.text, self._enc)
-            if bFragList:
-                for frag in bFragList:
-                    frag.text = str(frag.text, self._enc)
-
         return style, fragList, bFragList
 
-    def _tt_parse(self,tt):
+    def _tt_handle(self,tt):
         tag = tt[0]
         try:
             start = getattr(self,'start_'+tag)
@@ -1120,11 +1114,14 @@
                 M[isinstance(c,(list,tuple))](c)
         end()
 
+    def _tt_start(self,tt):
+        self._tt_handlers = self.handle_data,self._tt_handle
+        self._tt_handle(tt)
+
     def tt_parse(self,tt,style):
         '''parse from tupletree form'''
         self._setup_for_parse(style)
-        self._tt_handlers = self.handle_data,self._tt_parse
-        self._tt_parse(tt)
+        self._tt_start(tt)
         return self._complete_parse()
 
     def findSpanStyle(self,style):