author | robin |
Thu, 07 Aug 2014 15:56:46 +0100 | |
changeset 4129 | 1266500a2c22 |
parent 4116 | cf49463fc067 |
child 4130 | 49161ce56cad |
permissions | -rw-r--r-- |
3617 | 1 |
#Copyright ReportLab Europe Ltd. 2000-2012 |
494 | 2 |
#see license.txt for license details |
2332 | 3 |
#history http://www.reportlab.co.uk/cgi-bin/viewcvs.cgi/public/reportlab/trunk/reportlab/platypus/paraparser.py |
2321 | 4 |
__version__=''' $Id$ ''' |
3032 | 5 |
__doc__='''The parser used to process markup within paragraphs''' |
96 | 6 |
import string |
119 | 7 |
import re |
96 | 8 |
import sys |
9 |
import os |
|
10 |
import copy |
|
3187
2d5a6655556e
tableofcontents/paraparser: allow for format and offset parameters
rgbecker
parents:
3165
diff
changeset
|
11 |
import base64 |
3954
44dbe56eb858
first attempt at HTMLParser-based paraparser
Andy Robinson <andy@reportlab.com>
parents:
3937
diff
changeset
|
12 |
from pprint import pprint as pp |
44dbe56eb858
first attempt at HTMLParser-based paraparser
Andy Robinson <andy@reportlab.com>
parents:
3937
diff
changeset
|
13 |
|
3187
2d5a6655556e
tableofcontents/paraparser: allow for format and offset parameters
rgbecker
parents:
3165
diff
changeset
|
14 |
try: |
3721 | 15 |
import pickle as pickle |
3187
2d5a6655556e
tableofcontents/paraparser: allow for format and offset parameters
rgbecker
parents:
3165
diff
changeset
|
16 |
except: |
2d5a6655556e
tableofcontents/paraparser: allow for format and offset parameters
rgbecker
parents:
3165
diff
changeset
|
17 |
import pickle |
2693 | 18 |
import unicodedata |
279 | 19 |
import reportlab.lib.sequencer |
3954
44dbe56eb858
first attempt at HTMLParser-based paraparser
Andy Robinson <andy@reportlab.com>
parents:
3937
diff
changeset
|
20 |
|
518 | 21 |
from reportlab.lib.abag import ABag |
4004 | 22 |
from reportlab.lib.utils import ImageReader, isPy3, annotateException, encode_label, asUnicode, asBytes, uniChr |
248 | 23 |
from reportlab.lib.colors import toColor, white, black, red, Color |
96 | 24 |
from reportlab.lib.fonts import tt2ps, ps2tt |
119 | 25 |
from reportlab.lib.enums import TA_LEFT, TA_RIGHT, TA_CENTER, TA_JUSTIFY |
1940 | 26 |
from reportlab.lib.units import inch,mm,cm,pica |
3955 | 27 |
if isPy3: |
28 |
from html.parser import HTMLParser |
|
29 |
from html.entities import name2codepoint |
|
30 |
else: |
|
31 |
from HTMLParser import HTMLParser |
|
32 |
from htmlentitydefs import name2codepoint |
|
33 |
||
2410 | 34 |
_re_para = re.compile(r'^\s*<\s*para(?:\s+|>|/>)') |
96 | 35 |
|
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
36 |
sizeDelta = 2 # amount to reduce font size by for super and sub script |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
37 |
subFraction = 0.5 # fraction of font size that a sub script should be lowered |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
38 |
superFraction = 0.5 # fraction of font size that a super script should be raised |
96 | 39 |
|
3165 | 40 |
DEFAULT_INDEX_NAME='_indexAdd' |
41 |
||
3434
3c14212cc997
platypus: preliminary working version of % height/width for <img> tag
rgbecker
parents:
3368
diff
changeset
|
42 |
def _convnum(s, unit=1, allowRelative=True): |
3c14212cc997
platypus: preliminary working version of % height/width for <img> tag
rgbecker
parents:
3368
diff
changeset
|
43 |
if s[0] in ('+','-') and allowRelative: |
2857
487dc2450eec
reprotlab: inline images horizontal positioning OK
rgbecker
parents:
2836
diff
changeset
|
44 |
try: |
487dc2450eec
reprotlab: inline images horizontal positioning OK
rgbecker
parents:
2836
diff
changeset
|
45 |
return ('relative',int(s)*unit) |
487dc2450eec
reprotlab: inline images horizontal positioning OK
rgbecker
parents:
2836
diff
changeset
|
46 |
except ValueError: |
487dc2450eec
reprotlab: inline images horizontal positioning OK
rgbecker
parents:
2836
diff
changeset
|
47 |
return ('relative',float(s)*unit) |
487dc2450eec
reprotlab: inline images horizontal positioning OK
rgbecker
parents:
2836
diff
changeset
|
48 |
else: |
487dc2450eec
reprotlab: inline images horizontal positioning OK
rgbecker
parents:
2836
diff
changeset
|
49 |
try: |
487dc2450eec
reprotlab: inline images horizontal positioning OK
rgbecker
parents:
2836
diff
changeset
|
50 |
return int(s)*unit |
487dc2450eec
reprotlab: inline images horizontal positioning OK
rgbecker
parents:
2836
diff
changeset
|
51 |
except ValueError: |
487dc2450eec
reprotlab: inline images horizontal positioning OK
rgbecker
parents:
2836
diff
changeset
|
52 |
return float(s)*unit |
487dc2450eec
reprotlab: inline images horizontal positioning OK
rgbecker
parents:
2836
diff
changeset
|
53 |
|
3434
3c14212cc997
platypus: preliminary working version of % height/width for <img> tag
rgbecker
parents:
3368
diff
changeset
|
54 |
def _num(s, unit=1, allowRelative=True): |
1940 | 55 |
"""Convert a string like '10cm' to an int or float (in points). |
56 |
The default unit is point, but optionally you can use other |
|
57 |
default units like mm. |
|
58 |
""" |
|
3434
3c14212cc997
platypus: preliminary working version of % height/width for <img> tag
rgbecker
parents:
3368
diff
changeset
|
59 |
if s.endswith('cm'): |
1940 | 60 |
unit=cm |
61 |
s = s[:-2] |
|
3434
3c14212cc997
platypus: preliminary working version of % height/width for <img> tag
rgbecker
parents:
3368
diff
changeset
|
62 |
if s.endswith('in'): |
1940 | 63 |
unit=inch |
64 |
s = s[:-2] |
|
3434
3c14212cc997
platypus: preliminary working version of % height/width for <img> tag
rgbecker
parents:
3368
diff
changeset
|
65 |
if s.endswith('pt'): |
1940 | 66 |
unit=1 |
67 |
s = s[:-2] |
|
3434
3c14212cc997
platypus: preliminary working version of % height/width for <img> tag
rgbecker
parents:
3368
diff
changeset
|
68 |
if s.endswith('i'): |
1940 | 69 |
unit=inch |
70 |
s = s[:-1] |
|
3434
3c14212cc997
platypus: preliminary working version of % height/width for <img> tag
rgbecker
parents:
3368
diff
changeset
|
71 |
if s.endswith('mm'): |
1940 | 72 |
unit=mm |
73 |
s = s[:-2] |
|
3434
3c14212cc997
platypus: preliminary working version of % height/width for <img> tag
rgbecker
parents:
3368
diff
changeset
|
74 |
if s.endswith('pica'): |
1940 | 75 |
unit=pica |
76 |
s = s[:-4] |
|
3434
3c14212cc997
platypus: preliminary working version of % height/width for <img> tag
rgbecker
parents:
3368
diff
changeset
|
77 |
return _convnum(s,unit,allowRelative) |
3c14212cc997
platypus: preliminary working version of % height/width for <img> tag
rgbecker
parents:
3368
diff
changeset
|
78 |
|
3c14212cc997
platypus: preliminary working version of % height/width for <img> tag
rgbecker
parents:
3368
diff
changeset
|
79 |
def _numpct(s,unit=1,allowRelative=False): |
3c14212cc997
platypus: preliminary working version of % height/width for <img> tag
rgbecker
parents:
3368
diff
changeset
|
80 |
if s.endswith('%'): |
3c14212cc997
platypus: preliminary working version of % height/width for <img> tag
rgbecker
parents:
3368
diff
changeset
|
81 |
return _PCT(_convnum(s[:-1],allowRelative=allowRelative)) |
3c14212cc997
platypus: preliminary working version of % height/width for <img> tag
rgbecker
parents:
3368
diff
changeset
|
82 |
else: |
3c14212cc997
platypus: preliminary working version of % height/width for <img> tag
rgbecker
parents:
3368
diff
changeset
|
83 |
return _num(s,unit,allowRelative) |
2857
487dc2450eec
reprotlab: inline images horizontal positioning OK
rgbecker
parents:
2836
diff
changeset
|
84 |
|
487dc2450eec
reprotlab: inline images horizontal positioning OK
rgbecker
parents:
2836
diff
changeset
|
85 |
class _PCT: |
487dc2450eec
reprotlab: inline images horizontal positioning OK
rgbecker
parents:
2836
diff
changeset
|
86 |
def __init__(self,v): |
487dc2450eec
reprotlab: inline images horizontal positioning OK
rgbecker
parents:
2836
diff
changeset
|
87 |
self._value = v*0.01 |
487dc2450eec
reprotlab: inline images horizontal positioning OK
rgbecker
parents:
2836
diff
changeset
|
88 |
|
487dc2450eec
reprotlab: inline images horizontal positioning OK
rgbecker
parents:
2836
diff
changeset
|
89 |
def normalizedValue(self,normalizer): |
3434
3c14212cc997
platypus: preliminary working version of % height/width for <img> tag
rgbecker
parents:
3368
diff
changeset
|
90 |
normalizer = normalizer or getattr(self,'_normalizer') |
2857
487dc2450eec
reprotlab: inline images horizontal positioning OK
rgbecker
parents:
2836
diff
changeset
|
91 |
return normalizer*self._value |
487dc2450eec
reprotlab: inline images horizontal positioning OK
rgbecker
parents:
2836
diff
changeset
|
92 |
|
487dc2450eec
reprotlab: inline images horizontal positioning OK
rgbecker
parents:
2836
diff
changeset
|
93 |
def _valignpc(s): |
487dc2450eec
reprotlab: inline images horizontal positioning OK
rgbecker
parents:
2836
diff
changeset
|
94 |
s = s.lower() |
487dc2450eec
reprotlab: inline images horizontal positioning OK
rgbecker
parents:
2836
diff
changeset
|
95 |
if s in ('baseline','sub','super','top','text-top','middle','bottom','text-bottom'): |
487dc2450eec
reprotlab: inline images horizontal positioning OK
rgbecker
parents:
2836
diff
changeset
|
96 |
return s |
487dc2450eec
reprotlab: inline images horizontal positioning OK
rgbecker
parents:
2836
diff
changeset
|
97 |
if s.endswith('%'): |
487dc2450eec
reprotlab: inline images horizontal positioning OK
rgbecker
parents:
2836
diff
changeset
|
98 |
n = _convnum(s[:-1]) |
487dc2450eec
reprotlab: inline images horizontal positioning OK
rgbecker
parents:
2836
diff
changeset
|
99 |
if isinstance(n,tuple): |
487dc2450eec
reprotlab: inline images horizontal positioning OK
rgbecker
parents:
2836
diff
changeset
|
100 |
n = n[1] |
487dc2450eec
reprotlab: inline images horizontal positioning OK
rgbecker
parents:
2836
diff
changeset
|
101 |
return _PCT(n) |
487dc2450eec
reprotlab: inline images horizontal positioning OK
rgbecker
parents:
2836
diff
changeset
|
102 |
n = _num(s) |
487dc2450eec
reprotlab: inline images horizontal positioning OK
rgbecker
parents:
2836
diff
changeset
|
103 |
if isinstance(n,tuple): |
487dc2450eec
reprotlab: inline images horizontal positioning OK
rgbecker
parents:
2836
diff
changeset
|
104 |
n = n[1] |
487dc2450eec
reprotlab: inline images horizontal positioning OK
rgbecker
parents:
2836
diff
changeset
|
105 |
return n |
119 | 106 |
|
2836 | 107 |
def _autoLeading(x): |
108 |
x = x.lower() |
|
109 |
if x in ('','min','max','off'): |
|
110 |
return x |
|
111 |
raise ValueError('Invalid autoLeading=%r' % x ) |
|
112 |
||
119 | 113 |
def _align(s): |
3731 | 114 |
s = s.lower() |
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
115 |
if s=='left': return TA_LEFT |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
116 |
elif s=='right': return TA_RIGHT |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
117 |
elif s=='justify': return TA_JUSTIFY |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
118 |
elif s in ('centre','center'): return TA_CENTER |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
119 |
else: raise ValueError |
119 | 120 |
|
121 |
_paraAttrMap = {'font': ('fontName', None), |
|
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
122 |
'face': ('fontName', None), |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
123 |
'fontsize': ('fontSize', _num), |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
124 |
'size': ('fontSize', _num), |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
125 |
'leading': ('leading', _num), |
2836 | 126 |
'autoleading': ('autoLeading', _autoLeading), |
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
127 |
'lindent': ('leftIndent', _num), |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
128 |
'rindent': ('rightIndent', _num), |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
129 |
'findent': ('firstLineIndent', _num), |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
130 |
'align': ('alignment', _align), |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
131 |
'spaceb': ('spaceBefore', _num), |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
132 |
'spacea': ('spaceAfter', _num), |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
133 |
'bfont': ('bulletFontName', None), |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
134 |
'bfontsize': ('bulletFontSize',_num), |
2860
3f14d66194c2
platypus: added bulletOffsetY inspired by haraldarminmassa@gmail.com
rgbecker
parents:
2857
diff
changeset
|
135 |
'boffsety': ('bulletOffsetY',_num), |
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
136 |
'bindent': ('bulletIndent',_num), |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
137 |
'bcolor': ('bulletColor',toColor), |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
138 |
'color':('textColor',toColor), |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
139 |
'backcolor':('backColor',toColor), |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
140 |
'bgcolor':('backColor',toColor), |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
141 |
'bg':('backColor',toColor), |
1940 | 142 |
'fg': ('textColor',toColor), |
143 |
} |
|
119 | 144 |
|
250 | 145 |
_bulletAttrMap = { |
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
146 |
'font': ('bulletFontName', None), |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
147 |
'face': ('bulletFontName', None), |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
148 |
'size': ('bulletFontSize',_num), |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
149 |
'fontsize': ('bulletFontSize',_num), |
2860
3f14d66194c2
platypus: added bulletOffsetY inspired by haraldarminmassa@gmail.com
rgbecker
parents:
2857
diff
changeset
|
150 |
'offsety': ('bulletOffsetY',_num), |
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
151 |
'indent': ('bulletIndent',_num), |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
152 |
'color': ('bulletColor',toColor), |
1940 | 153 |
'fg': ('bulletColor',toColor), |
154 |
} |
|
250 | 155 |
|
119 | 156 |
#things which are valid font attributes |
157 |
_fontAttrMap = {'size': ('fontSize', _num), |
|
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
158 |
'face': ('fontName', None), |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
159 |
'name': ('fontName', None), |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
160 |
'fg': ('textColor', toColor), |
1940 | 161 |
'color':('textColor', toColor), |
2446 | 162 |
'backcolor':('backColor',toColor), |
163 |
'bgcolor':('backColor',toColor), |
|
1940 | 164 |
} |
3552
20ecbcc53c15
paraparser.py add support for <span style=stylename>
rgbecker
parents:
3440
diff
changeset
|
165 |
#things which are valid span attributes |
20ecbcc53c15
paraparser.py add support for <span style=stylename>
rgbecker
parents:
3440
diff
changeset
|
166 |
_spanAttrMap = {'size': ('fontSize', _num), |
20ecbcc53c15
paraparser.py add support for <span style=stylename>
rgbecker
parents:
3440
diff
changeset
|
167 |
'face': ('fontName', None), |
20ecbcc53c15
paraparser.py add support for <span style=stylename>
rgbecker
parents:
3440
diff
changeset
|
168 |
'name': ('fontName', None), |
20ecbcc53c15
paraparser.py add support for <span style=stylename>
rgbecker
parents:
3440
diff
changeset
|
169 |
'fg': ('textColor', toColor), |
20ecbcc53c15
paraparser.py add support for <span style=stylename>
rgbecker
parents:
3440
diff
changeset
|
170 |
'color':('textColor', toColor), |
20ecbcc53c15
paraparser.py add support for <span style=stylename>
rgbecker
parents:
3440
diff
changeset
|
171 |
'backcolor':('backColor',toColor), |
20ecbcc53c15
paraparser.py add support for <span style=stylename>
rgbecker
parents:
3440
diff
changeset
|
172 |
'bgcolor':('backColor',toColor), |
20ecbcc53c15
paraparser.py add support for <span style=stylename>
rgbecker
parents:
3440
diff
changeset
|
173 |
'style': ('style',None), |
20ecbcc53c15
paraparser.py add support for <span style=stylename>
rgbecker
parents:
3440
diff
changeset
|
174 |
} |
2575 | 175 |
#things which are valid font attributes |
176 |
_linkAttrMap = {'size': ('fontSize', _num), |
|
177 |
'face': ('fontName', None), |
|
178 |
'name': ('fontName', None), |
|
179 |
'fg': ('textColor', toColor), |
|
180 |
'color':('textColor', toColor), |
|
181 |
'backcolor':('backColor',toColor), |
|
182 |
'bgcolor':('backColor',toColor), |
|
183 |
'dest': ('link', None), |
|
184 |
'destination': ('link', None), |
|
185 |
'target': ('link', None), |
|
2594 | 186 |
'href': ('link', None), |
2575 | 187 |
} |
2744 | 188 |
_anchorAttrMap = {'fontSize': ('fontSize', _num), |
189 |
'fontName': ('fontName', None), |
|
190 |
'name': ('name', None), |
|
191 |
'fg': ('textColor', toColor), |
|
192 |
'color':('textColor', toColor), |
|
193 |
'backcolor':('backColor',toColor), |
|
194 |
'bgcolor':('backColor',toColor), |
|
195 |
'href': ('href', None), |
|
196 |
} |
|
2857
487dc2450eec
reprotlab: inline images horizontal positioning OK
rgbecker
parents:
2836
diff
changeset
|
197 |
_imgAttrMap = { |
487dc2450eec
reprotlab: inline images horizontal positioning OK
rgbecker
parents:
2836
diff
changeset
|
198 |
'src': ('src', None), |
3434
3c14212cc997
platypus: preliminary working version of % height/width for <img> tag
rgbecker
parents:
3368
diff
changeset
|
199 |
'width': ('width',_numpct), |
3c14212cc997
platypus: preliminary working version of % height/width for <img> tag
rgbecker
parents:
3368
diff
changeset
|
200 |
'height':('height',_numpct), |
2857
487dc2450eec
reprotlab: inline images horizontal positioning OK
rgbecker
parents:
2836
diff
changeset
|
201 |
'valign':('valign',_valignpc), |
487dc2450eec
reprotlab: inline images horizontal positioning OK
rgbecker
parents:
2836
diff
changeset
|
202 |
} |
3165 | 203 |
_indexAttrMap = { |
204 |
'name': ('name',None), |
|
205 |
'item': ('item',None), |
|
3187
2d5a6655556e
tableofcontents/paraparser: allow for format and offset parameters
rgbecker
parents:
3165
diff
changeset
|
206 |
'offset': ('offset',None), |
2d5a6655556e
tableofcontents/paraparser: allow for format and offset parameters
rgbecker
parents:
3165
diff
changeset
|
207 |
'format': ('format',None), |
3165 | 208 |
} |
119 | 209 |
|
210 |
def _addAttributeNames(m): |
|
3721 | 211 |
K = list(m.keys()) |
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
212 |
for k in K: |
1944 | 213 |
n = m[k][0] |
3326 | 214 |
if n not in m: m[n] = m[k] |
3731 | 215 |
n = n.lower() |
3326 | 216 |
if n not in m: m[n] = m[k] |
119 | 217 |
|
218 |
_addAttributeNames(_paraAttrMap) |
|
219 |
_addAttributeNames(_fontAttrMap) |
|
3552
20ecbcc53c15
paraparser.py add support for <span style=stylename>
rgbecker
parents:
3440
diff
changeset
|
220 |
_addAttributeNames(_spanAttrMap) |
250 | 221 |
_addAttributeNames(_bulletAttrMap) |
2747 | 222 |
_addAttributeNames(_anchorAttrMap) |
223 |
_addAttributeNames(_linkAttrMap) |
|
119 | 224 |
|
225 |
def _applyAttributes(obj, attr): |
|
3723
99aa837b6703
second stage of port to Python 3.3; working hello world
rptlab
parents:
3721
diff
changeset
|
226 |
for k, v in attr.items(): |
3787
8f9be6d6f75c
convert paraparser to use pyRXP directly (or any TT producer)
robin
parents:
3731
diff
changeset
|
227 |
if isinstance(v,(list,tuple)) and v[0]=='relative': |
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
228 |
if hasattr(obj, k): |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
229 |
v = v[1]+getattr(obj,k) |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
230 |
else: |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
231 |
v = v[1] |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
232 |
setattr(obj,k,v) |
102 | 233 |
|
1931
784fce255e2d
Added in more special entities as suggested by Christoph Zwerschke
rgbecker
parents:
1736
diff
changeset
|
234 |
#Named character entities intended to be supported from the special font |
2200
be0cfccc662a
Fixed up tabs and whitespace in all source files
andy_robinson
parents:
2053
diff
changeset
|
235 |
#with additions suggested by Christoph Zwerschke who also suggested the |
1931
784fce255e2d
Added in more special entities as suggested by Christoph Zwerschke
rgbecker
parents:
1736
diff
changeset
|
236 |
#numeric entity names that follow. |
96 | 237 |
greeks = { |
3957
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
238 |
'Aacute': u'\xc1', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
239 |
'aacute': u'\xe1', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
240 |
'Acirc': u'\xc2', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
241 |
'acirc': u'\xe2', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
242 |
'acute': u'\xb4', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
243 |
'AElig': u'\xc6', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
244 |
'aelig': u'\xe6', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
245 |
'Agrave': u'\xc0', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
246 |
'agrave': u'\xe0', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
247 |
'alefsym': u'\u2135', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
248 |
'Alpha': u'\u0391', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
249 |
'alpha': u'\u03b1', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
250 |
'and': u'\u2227', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
251 |
'ang': u'\u2220', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
252 |
'Aring': u'\xc5', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
253 |
'aring': u'\xe5', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
254 |
'asymp': u'\u2248', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
255 |
'Atilde': u'\xc3', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
256 |
'atilde': u'\xe3', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
257 |
'Auml': u'\xc4', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
258 |
'auml': u'\xe4', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
259 |
'bdquo': u'\u201e', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
260 |
'Beta': u'\u0392', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
261 |
'beta': u'\u03b2', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
262 |
'brvbar': u'\xa6', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
263 |
'bull': u'\u2022', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
264 |
'cap': u'\u2229', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
265 |
'Ccedil': u'\xc7', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
266 |
'ccedil': u'\xe7', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
267 |
'cedil': u'\xb8', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
268 |
'cent': u'\xa2', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
269 |
'Chi': u'\u03a7', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
270 |
'chi': u'\u03c7', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
271 |
'circ': u'\u02c6', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
272 |
'clubs': u'\u2663', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
273 |
'cong': u'\u2245', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
274 |
'copy': u'\xa9', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
275 |
'crarr': u'\u21b5', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
276 |
'cup': u'\u222a', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
277 |
'curren': u'\xa4', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
278 |
'dagger': u'\u2020', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
279 |
'Dagger': u'\u2021', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
280 |
'darr': u'\u2193', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
281 |
'dArr': u'\u21d3', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
282 |
'deg': u'\xb0', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
283 |
'delta': u'\u03b4', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
284 |
'Delta': u'\u2206', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
285 |
'diams': u'\u2666', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
286 |
'divide': u'\xf7', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
287 |
'Eacute': u'\xc9', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
288 |
'eacute': u'\xe9', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
289 |
'Ecirc': u'\xca', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
290 |
'ecirc': u'\xea', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
291 |
'Egrave': u'\xc8', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
292 |
'egrave': u'\xe8', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
293 |
'empty': u'\u2205', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
294 |
'emsp': u'\u2003', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
295 |
'ensp': u'\u2002', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
296 |
'Epsilon': u'\u0395', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
297 |
'epsilon': u'\u03b5', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
298 |
'epsiv': u'\u03b5', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
299 |
'equiv': u'\u2261', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
300 |
'Eta': u'\u0397', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
301 |
'eta': u'\u03b7', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
302 |
'ETH': u'\xd0', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
303 |
'eth': u'\xf0', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
304 |
'Euml': u'\xcb', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
305 |
'euml': u'\xeb', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
306 |
'euro': u'\u20ac', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
307 |
'exist': u'\u2203', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
308 |
'fnof': u'\u0192', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
309 |
'forall': u'\u2200', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
310 |
'frac12': u'\xbd', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
311 |
'frac14': u'\xbc', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
312 |
'frac34': u'\xbe', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
313 |
'frasl': u'\u2044', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
314 |
'Gamma': u'\u0393', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
315 |
'gamma': u'\u03b3', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
316 |
'ge': u'\u2265', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
317 |
'harr': u'\u2194', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
318 |
'hArr': u'\u21d4', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
319 |
'hearts': u'\u2665', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
320 |
'hellip': u'\u2026', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
321 |
'Iacute': u'\xcd', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
322 |
'iacute': u'\xed', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
323 |
'Icirc': u'\xce', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
324 |
'icirc': u'\xee', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
325 |
'iexcl': u'\xa1', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
326 |
'Igrave': u'\xcc', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
327 |
'igrave': u'\xec', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
328 |
'image': u'\u2111', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
329 |
'infin': u'\u221e', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
330 |
'int': u'\u222b', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
331 |
'Iota': u'\u0399', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
332 |
'iota': u'\u03b9', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
333 |
'iquest': u'\xbf', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
334 |
'isin': u'\u2208', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
335 |
'Iuml': u'\xcf', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
336 |
'iuml': u'\xef', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
337 |
'Kappa': u'\u039a', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
338 |
'kappa': u'\u03ba', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
339 |
'Lambda': u'\u039b', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
340 |
'lambda': u'\u03bb', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
341 |
'lang': u'\u2329', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
342 |
'laquo': u'\xab', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
343 |
'larr': u'\u2190', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
344 |
'lArr': u'\u21d0', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
345 |
'lceil': u'\uf8ee', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
346 |
'ldquo': u'\u201c', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
347 |
'le': u'\u2264', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
348 |
'lfloor': u'\uf8f0', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
349 |
'lowast': u'\u2217', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
350 |
'loz': u'\u25ca', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
351 |
'lrm': u'\u200e', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
352 |
'lsaquo': u'\u2039', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
353 |
'lsquo': u'\u2018', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
354 |
'macr': u'\xaf', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
355 |
'mdash': u'\u2014', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
356 |
'micro': u'\xb5', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
357 |
'middot': u'\xb7', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
358 |
'minus': u'\u2212', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
359 |
'mu': u'\xb5', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
360 |
'Mu': u'\u039c', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
361 |
'nabla': u'\u2207', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
362 |
'nbsp': u'\xa0', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
363 |
'ndash': u'\u2013', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
364 |
'ne': u'\u2260', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
365 |
'ni': u'\u220b', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
366 |
'notin': u'\u2209', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
367 |
'not': u'\xac', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
368 |
'nsub': u'\u2284', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
369 |
'Ntilde': u'\xd1', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
370 |
'ntilde': u'\xf1', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
371 |
'Nu': u'\u039d', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
372 |
'nu': u'\u03bd', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
373 |
'Oacute': u'\xd3', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
374 |
'oacute': u'\xf3', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
375 |
'Ocirc': u'\xd4', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
376 |
'ocirc': u'\xf4', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
377 |
'OElig': u'\u0152', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
378 |
'oelig': u'\u0153', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
379 |
'Ograve': u'\xd2', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
380 |
'ograve': u'\xf2', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
381 |
'oline': u'\uf8e5', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
382 |
'omega': u'\u03c9', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
383 |
'Omega': u'\u2126', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
384 |
'Omicron': u'\u039f', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
385 |
'omicron': u'\u03bf', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
386 |
'oplus': u'\u2295', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
387 |
'ordf': u'\xaa', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
388 |
'ordm': u'\xba', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
389 |
'or': u'\u2228', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
390 |
'Oslash': u'\xd8', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
391 |
'oslash': u'\xf8', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
392 |
'Otilde': u'\xd5', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
393 |
'otilde': u'\xf5', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
394 |
'otimes': u'\u2297', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
395 |
'Ouml': u'\xd6', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
396 |
'ouml': u'\xf6', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
397 |
'para': u'\xb6', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
398 |
'part': u'\u2202', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
399 |
'permil': u'\u2030', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
400 |
'perp': u'\u22a5', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
401 |
'phis': u'\u03c6', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
402 |
'Phi': u'\u03a6', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
403 |
'phi': u'\u03d5', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
404 |
'piv': u'\u03d6', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
405 |
'Pi': u'\u03a0', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
406 |
'pi': u'\u03c0', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
407 |
'plusmn': u'\xb1', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
408 |
'pound': u'\xa3', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
409 |
'prime': u'\u2032', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
410 |
'Prime': u'\u2033', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
411 |
'prod': u'\u220f', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
412 |
'prop': u'\u221d', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
413 |
'Psi': u'\u03a8', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
414 |
'psi': u'\u03c8', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
415 |
'radic': u'\u221a', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
416 |
'rang': u'\u232a', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
417 |
'raquo': u'\xbb', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
418 |
'rarr': u'\u2192', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
419 |
'rArr': u'\u21d2', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
420 |
'rceil': u'\uf8f9', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
421 |
'rdquo': u'\u201d', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
422 |
'real': u'\u211c', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
423 |
'reg': u'\xae', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
424 |
'rfloor': u'\uf8fb', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
425 |
'Rho': u'\u03a1', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
426 |
'rho': u'\u03c1', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
427 |
'rlm': u'\u200f', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
428 |
'rsaquo': u'\u203a', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
429 |
'rsquo': u'\u2019', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
430 |
'sbquo': u'\u201a', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
431 |
'Scaron': u'\u0160', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
432 |
'scaron': u'\u0161', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
433 |
'sdot': u'\u22c5', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
434 |
'sect': u'\xa7', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
435 |
'shy': u'\xad', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
436 |
'sigmaf': u'\u03c2', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
437 |
'sigmav': u'\u03c2', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
438 |
'Sigma': u'\u03a3', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
439 |
'sigma': u'\u03c3', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
440 |
'sim': u'\u223c', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
441 |
'spades': u'\u2660', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
442 |
'sube': u'\u2286', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
443 |
'sub': u'\u2282', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
444 |
'sum': u'\u2211', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
445 |
'sup1': u'\xb9', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
446 |
'sup2': u'\xb2', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
447 |
'sup3': u'\xb3', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
448 |
'supe': u'\u2287', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
449 |
'sup': u'\u2283', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
450 |
'szlig': u'\xdf', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
451 |
'Tau': u'\u03a4', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
452 |
'tau': u'\u03c4', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
453 |
'there4': u'\u2234', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
454 |
'thetasym': u'\u03d1', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
455 |
'thetav': u'\u03d1', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
456 |
'Theta': u'\u0398', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
457 |
'theta': u'\u03b8', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
458 |
'thinsp': u'\u2009', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
459 |
'THORN': u'\xde', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
460 |
'thorn': u'\xfe', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
461 |
'tilde': u'\u02dc', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
462 |
'times': u'\xd7', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
463 |
'trade': u'\uf8ea', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
464 |
'Uacute': u'\xda', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
465 |
'uacute': u'\xfa', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
466 |
'uarr': u'\u2191', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
467 |
'uArr': u'\u21d1', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
468 |
'Ucirc': u'\xdb', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
469 |
'ucirc': u'\xfb', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
470 |
'Ugrave': u'\xd9', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
471 |
'ugrave': u'\xf9', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
472 |
'uml': u'\xa8', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
473 |
'upsih': u'\u03d2', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
474 |
'Upsilon': u'\u03a5', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
475 |
'upsilon': u'\u03c5', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
476 |
'Uuml': u'\xdc', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
477 |
'uuml': u'\xfc', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
478 |
'weierp': u'\u2118', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
479 |
'Xi': u'\u039e', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
480 |
'xi': u'\u03be', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
481 |
'Yacute': u'\xdd', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
482 |
'yacute': u'\xfd', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
483 |
'yen': u'\xa5', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
484 |
'yuml': u'\xff', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
485 |
'Yuml': u'\u0178', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
486 |
'Zeta': u'\u0396', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
487 |
'zeta': u'\u03b6', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
488 |
'zwj': u'\u200d', |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
489 |
'zwnj': u'\u200c', |
1931
784fce255e2d
Added in more special entities as suggested by Christoph Zwerschke
rgbecker
parents:
1736
diff
changeset
|
490 |
} |
96 | 491 |
|
4004 | 492 |
known_entities = dict([(k,uniChr(v)) for k,v in name2codepoint.items()]) |
3957
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
493 |
for k in greeks: |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
494 |
if k not in known_entities: |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
495 |
known_entities[k] = greeks[k] |
3994 | 496 |
f = isPy3 and asBytes or asUnicode |
497 |
K = list(known_entities.keys()) |
|
498 |
for k in K: |
|
499 |
known_entities[f(k)] = known_entities[k] |
|
500 |
del k, f, K |
|
3957
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
501 |
|
96 | 502 |
#------------------------------------------------------------------------ |
518 | 503 |
class ParaFrag(ABag): |
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
504 |
"""class ParaFrag contains the intermediate representation of string |
3961 | 505 |
segments as they are being parsed by the ParaParser. |
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
506 |
fontname, fontSize, rise, textColor, cbDefn |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
507 |
""" |
96 | 508 |
|
2584
0fed2bd8ef90
reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents:
2575
diff
changeset
|
509 |
_greek2Utf8=None |
0fed2bd8ef90
reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents:
2575
diff
changeset
|
510 |
def _greekConvert(data): |
0fed2bd8ef90
reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents:
2575
diff
changeset
|
511 |
global _greek2Utf8 |
0fed2bd8ef90
reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents:
2575
diff
changeset
|
512 |
if not _greek2Utf8: |
0fed2bd8ef90
reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents:
2575
diff
changeset
|
513 |
from reportlab.pdfbase.rl_codecs import RL_Codecs |
0fed2bd8ef90
reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents:
2575
diff
changeset
|
514 |
import codecs |
3812 | 515 |
#our decoding map |
516 |
dm = codecs.make_identity_dict(range(32,256)) |
|
3721 | 517 |
for k in range(0,32): |
2584
0fed2bd8ef90
reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents:
2575
diff
changeset
|
518 |
dm[k] = None |
0fed2bd8ef90
reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents:
2575
diff
changeset
|
519 |
dm.update(RL_Codecs._RL_Codecs__rl_codecs_data['symbol'][0]) |
0fed2bd8ef90
reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents:
2575
diff
changeset
|
520 |
_greek2Utf8 = {} |
3721 | 521 |
for k,v in dm.items(): |
2584
0fed2bd8ef90
reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents:
2575
diff
changeset
|
522 |
if not v: |
0fed2bd8ef90
reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents:
2575
diff
changeset
|
523 |
u = '\0' |
0fed2bd8ef90
reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents:
2575
diff
changeset
|
524 |
else: |
3787
8f9be6d6f75c
convert paraparser to use pyRXP directly (or any TT producer)
robin
parents:
3731
diff
changeset
|
525 |
if isPy3: |
8f9be6d6f75c
convert paraparser to use pyRXP directly (or any TT producer)
robin
parents:
3731
diff
changeset
|
526 |
u = chr(v) |
8f9be6d6f75c
convert paraparser to use pyRXP directly (or any TT producer)
robin
parents:
3731
diff
changeset
|
527 |
else: |
3900 | 528 |
u = unichr(v).encode('utf8') |
2584
0fed2bd8ef90
reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents:
2575
diff
changeset
|
529 |
_greek2Utf8[chr(k)] = u |
0fed2bd8ef90
reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents:
2575
diff
changeset
|
530 |
return ''.join(map(_greek2Utf8.__getitem__,data)) |
0fed2bd8ef90
reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents:
2575
diff
changeset
|
531 |
|
96 | 532 |
#------------------------------------------------------------------ |
267
52a348f6c4c3
noted replication of XML markup comment between paraparser.py and paragraph.py
aaron_watters
parents:
266
diff
changeset
|
533 |
# !!! NOTE !!! THIS TEXT IS NOW REPLICATED IN PARAGRAPH.PY !!! |
2584
0fed2bd8ef90
reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents:
2575
diff
changeset
|
534 |
# The ParaFormatter will be able to format the following |
96 | 535 |
# tags: |
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
536 |
# < /b > - bold |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
537 |
# < /i > - italics |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
538 |
# < u > < /u > - underline |
2644 | 539 |
# < strike > < /strike > - strike through |
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
540 |
# < super > < /super > - superscript |
1736 | 541 |
# < sup > < /sup > - superscript |
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
542 |
# < sub > < /sub > - subscript |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
543 |
# <font name=fontfamily/fontname color=colorname size=float> |
3552
20ecbcc53c15
paraparser.py add support for <span style=stylename>
rgbecker
parents:
3440
diff
changeset
|
544 |
# <span name=fontfamily/fontname color=colorname backcolor=colorname size=float style=stylename> |
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
545 |
# < bullet > </bullet> - bullet text (at head of para only) |
3165 | 546 |
# <onDraw name=callable label="a label"/> |
547 |
# <index [name="callablecanvasattribute"] label="a label"/> |
|
2670 | 548 |
# <link>link text</link> |
549 |
# attributes of links |
|
550 |
# size/fontSize=num |
|
551 |
# name/face/fontName=name |
|
552 |
# fg/textColor/color=color |
|
553 |
# backcolor/backColor/bgcolor=color |
|
554 |
# dest/destination/target/href/link=target |
|
2745 | 555 |
# <a>anchor text</a> |
2744 | 556 |
# attributes of anchors |
557 |
# fontSize=num |
|
558 |
# fontName=name |
|
559 |
# fg/textColor/color=color |
|
560 |
# backcolor/backColor/bgcolor=color |
|
561 |
# href=href |
|
562 |
# <a name="anchorpoint"/> |
|
2584
0fed2bd8ef90
reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents:
2575
diff
changeset
|
563 |
# <unichar name="unicode character name"/> |
0fed2bd8ef90
reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents:
2575
diff
changeset
|
564 |
# <unichar value="unicode code point"/> |
2857
487dc2450eec
reprotlab: inline images horizontal positioning OK
rgbecker
parents:
2836
diff
changeset
|
565 |
# <img src="path" width="1in" height="1in" valign="bottom"/> |
3440
739ddbe7feab
paaraparser/paragraph.py: add info re percentage in <img> for idea contributed by Roberto Alsina <ralsina@netmanagers.com.ar>
rgbecker
parents:
3434
diff
changeset
|
566 |
# width="w%" --> fontSize*w/100 idea from Roberto Alsina |
739ddbe7feab
paaraparser/paragraph.py: add info re percentage in <img> for idea contributed by Roberto Alsina <ralsina@netmanagers.com.ar>
rgbecker
parents:
3434
diff
changeset
|
567 |
# height="h%" --> linewidth*h/100 <ralsina@netmanagers.com.ar> |
2584
0fed2bd8ef90
reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents:
2575
diff
changeset
|
568 |
# <greek> - </greek> |
1683 | 569 |
# |
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
570 |
# The whole may be surrounded by <para> </para> tags |
119 | 571 |
# |
96 | 572 |
# It will also be able to handle any MathML specified Greek characters. |
573 |
#------------------------------------------------------------------ |
|
3954
44dbe56eb858
first attempt at HTMLParser-based paraparser
Andy Robinson <andy@reportlab.com>
parents:
3937
diff
changeset
|
574 |
class ParaParser(HTMLParser): |
96 | 575 |
|
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
576 |
#---------------------------------------------------------- |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
577 |
# First we will define all of the xml tag handler functions. |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
578 |
# |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
579 |
# start_<tag>(attributes) |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
580 |
# end_<tag>() |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
581 |
# |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
582 |
# While parsing the xml ParaFormatter will call these |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
583 |
# functions to handle the string formatting tags. |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
584 |
# At the start of each tag the corresponding field will |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
585 |
# be set to 1 and at the end tag the corresponding field will |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
586 |
# be set to 0. Then when handle_data is called the options |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
587 |
# for that data will be aparent by the current settings. |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
588 |
#---------------------------------------------------------- |
96 | 589 |
|
1940 | 590 |
def __getattr__( self, attrName ): |
591 |
"""This way we can handle <TAG> the same way as <tag> (ignoring case).""" |
|
2369 | 592 |
if attrName!=attrName.lower() and attrName!="caseSensitive" and not self.caseSensitive and \ |
593 |
(attrName.startswith("start_") or attrName.startswith("end_")): |
|
594 |
return getattr(self,attrName.lower()) |
|
3721 | 595 |
raise AttributeError(attrName) |
1940 | 596 |
|
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
597 |
#### bold |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
598 |
def start_b( self, attributes ): |
4097
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
599 |
self._push('b',bold=1) |
96 | 600 |
|
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
601 |
def end_b( self ): |
4097
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
602 |
self._pop('b') |
96 | 603 |
|
1940 | 604 |
def start_strong( self, attributes ): |
4097
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
605 |
self._push('strong',bold=1) |
1940 | 606 |
|
607 |
def end_strong( self ): |
|
4097
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
608 |
self._pop('strong') |
1940 | 609 |
|
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
610 |
#### italics |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
611 |
def start_i( self, attributes ): |
4097
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
612 |
self._push('i',italic=1) |
96 | 613 |
|
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
614 |
def end_i( self ): |
4097
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
615 |
self._pop('i') |
96 | 616 |
|
1940 | 617 |
def start_em( self, attributes ): |
4097
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
618 |
self._push('em', italic=1) |
1940 | 619 |
|
620 |
def end_em( self ): |
|
4097
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
621 |
self._pop('em') |
1940 | 622 |
|
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
623 |
#### underline |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
624 |
def start_u( self, attributes ): |
4097
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
625 |
self._push('u',underline=1) |
96 | 626 |
|
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
627 |
def end_u( self ): |
4097
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
628 |
self._pop('u') |
96 | 629 |
|
2644 | 630 |
#### strike |
631 |
def start_strike( self, attributes ): |
|
4097
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
632 |
self._push('strike',strike=1) |
2644 | 633 |
|
634 |
def end_strike( self ): |
|
4097
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
635 |
self._pop('strike') |
2644 | 636 |
|
2575 | 637 |
#### link |
638 |
def start_link(self, attributes): |
|
4097
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
639 |
self._push('link',**self.getAttributes(attributes,_linkAttrMap)) |
2575 | 640 |
|
641 |
def end_link(self): |
|
4097
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
642 |
if self._pop('link').link is None: |
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
643 |
raise ValueError('<link> has no target or href') |
2575 | 644 |
|
2744 | 645 |
#### anchor |
646 |
def start_a(self, attributes): |
|
647 |
A = self.getAttributes(attributes,_anchorAttrMap) |
|
2893
7432e06445ba
paraparser.py: improved checking on <a> tag attributes
rgbecker
parents:
2861
diff
changeset
|
648 |
name = A.get('name',None) |
7432e06445ba
paraparser.py: improved checking on <a> tag attributes
rgbecker
parents:
2861
diff
changeset
|
649 |
if name is not None: |
7432e06445ba
paraparser.py: improved checking on <a> tag attributes
rgbecker
parents:
2861
diff
changeset
|
650 |
name = name.strip() |
7432e06445ba
paraparser.py: improved checking on <a> tag attributes
rgbecker
parents:
2861
diff
changeset
|
651 |
if not name: |
7432e06445ba
paraparser.py: improved checking on <a> tag attributes
rgbecker
parents:
2861
diff
changeset
|
652 |
self._syntax_error('<a name="..."/> anchor variant requires non-blank name') |
2744 | 653 |
if len(A)>1: |
654 |
self._syntax_error('<a name="..."/> anchor variant only allows name attribute') |
|
655 |
A = dict(name=A['name']) |
|
656 |
A['_selfClosingTag'] = 'anchor' |
|
657 |
else: |
|
2893
7432e06445ba
paraparser.py: improved checking on <a> tag attributes
rgbecker
parents:
2861
diff
changeset
|
658 |
href = A.get('href','').strip() |
7432e06445ba
paraparser.py: improved checking on <a> tag attributes
rgbecker
parents:
2861
diff
changeset
|
659 |
A['link'] = href #convert to our link form |
3931 | 660 |
A.pop('href',None) |
4097
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
661 |
self._push('a',**A) |
2744 | 662 |
|
663 |
def end_a(self): |
|
664 |
frag = self._stack[-1] |
|
665 |
sct = getattr(frag,'_selfClosingTag','') |
|
666 |
if sct: |
|
4097
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
667 |
if not (sct=='anchor' and frag.name): |
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
668 |
raise ValueError('Parser failure in <a/>') |
2744 | 669 |
defn = frag.cbDefn = ABag() |
670 |
defn.label = defn.kind = 'anchor' |
|
671 |
defn.name = frag.name |
|
672 |
del frag.name, frag._selfClosingTag |
|
673 |
self.handle_data('') |
|
4097
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
674 |
self._pop('a') |
2744 | 675 |
else: |
4097
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
676 |
if self._pop('a').link is None: |
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
677 |
raise ValueError('<link> has no href') |
2742 | 678 |
|
2857
487dc2450eec
reprotlab: inline images horizontal positioning OK
rgbecker
parents:
2836
diff
changeset
|
679 |
def start_img(self,attributes): |
487dc2450eec
reprotlab: inline images horizontal positioning OK
rgbecker
parents:
2836
diff
changeset
|
680 |
A = self.getAttributes(attributes,_imgAttrMap) |
487dc2450eec
reprotlab: inline images horizontal positioning OK
rgbecker
parents:
2836
diff
changeset
|
681 |
if not A.get('src'): |
487dc2450eec
reprotlab: inline images horizontal positioning OK
rgbecker
parents:
2836
diff
changeset
|
682 |
self._syntax_error('<img> needs src attribute') |
487dc2450eec
reprotlab: inline images horizontal positioning OK
rgbecker
parents:
2836
diff
changeset
|
683 |
A['_selfClosingTag'] = 'img' |
4097
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
684 |
self._push('img',**A) |
2857
487dc2450eec
reprotlab: inline images horizontal positioning OK
rgbecker
parents:
2836
diff
changeset
|
685 |
|
487dc2450eec
reprotlab: inline images horizontal positioning OK
rgbecker
parents:
2836
diff
changeset
|
686 |
def end_img(self): |
487dc2450eec
reprotlab: inline images horizontal positioning OK
rgbecker
parents:
2836
diff
changeset
|
687 |
frag = self._stack[-1] |
4097
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
688 |
if not getattr(frag,'_selfClosingTag',''): |
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
689 |
raise ValueError('Parser failure in <img/>') |
2857
487dc2450eec
reprotlab: inline images horizontal positioning OK
rgbecker
parents:
2836
diff
changeset
|
690 |
defn = frag.cbDefn = ABag() |
487dc2450eec
reprotlab: inline images horizontal positioning OK
rgbecker
parents:
2836
diff
changeset
|
691 |
defn.kind = 'img' |
487dc2450eec
reprotlab: inline images horizontal positioning OK
rgbecker
parents:
2836
diff
changeset
|
692 |
defn.src = getattr(frag,'src',None) |
487dc2450eec
reprotlab: inline images horizontal positioning OK
rgbecker
parents:
2836
diff
changeset
|
693 |
defn.image = ImageReader(defn.src) |
487dc2450eec
reprotlab: inline images horizontal positioning OK
rgbecker
parents:
2836
diff
changeset
|
694 |
size = defn.image.getSize() |
487dc2450eec
reprotlab: inline images horizontal positioning OK
rgbecker
parents:
2836
diff
changeset
|
695 |
defn.width = getattr(frag,'width',size[0]) |
487dc2450eec
reprotlab: inline images horizontal positioning OK
rgbecker
parents:
2836
diff
changeset
|
696 |
defn.height = getattr(frag,'height',size[1]) |
487dc2450eec
reprotlab: inline images horizontal positioning OK
rgbecker
parents:
2836
diff
changeset
|
697 |
defn.valign = getattr(frag,'valign','bottom') |
487dc2450eec
reprotlab: inline images horizontal positioning OK
rgbecker
parents:
2836
diff
changeset
|
698 |
del frag._selfClosingTag |
487dc2450eec
reprotlab: inline images horizontal positioning OK
rgbecker
parents:
2836
diff
changeset
|
699 |
self.handle_data('') |
4097
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
700 |
self._pop('img') |
2857
487dc2450eec
reprotlab: inline images horizontal positioning OK
rgbecker
parents:
2836
diff
changeset
|
701 |
|
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
702 |
#### super script |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
703 |
def start_super( self, attributes ): |
4097
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
704 |
self._push('super',super=1) |
96 | 705 |
|
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
706 |
def end_super( self ): |
4097
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
707 |
self._pop('super') |
96 | 708 |
|
4097
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
709 |
def start_sup( self, attributes ): |
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
710 |
self._push('sup',super=1) |
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
711 |
|
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
712 |
def end_sup( self ): |
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
713 |
self._pop('sup') |
1736 | 714 |
|
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
715 |
#### sub script |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
716 |
def start_sub( self, attributes ): |
4097
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
717 |
self._push('sub',sub=1) |
96 | 718 |
|
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
719 |
def end_sub( self ): |
4097
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
720 |
self._pop('sub') |
96 | 721 |
|
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
722 |
#### greek script |
2376 | 723 |
#### add symbol encoding |
724 |
def handle_charref(self, name): |
|
725 |
try: |
|
2575 | 726 |
if name[0]=='x': |
727 |
n = int(name[1:],16) |
|
1931
784fce255e2d
Added in more special entities as suggested by Christoph Zwerschke
rgbecker
parents:
1736
diff
changeset
|
728 |
else: |
2575 | 729 |
n = int(name) |
730 |
except ValueError: |
|
2376 | 731 |
self.unknown_charref(name) |
732 |
return |
|
4004 | 733 |
self.handle_data(uniChr(n)) #.encode('utf8')) |
134 | 734 |
|
2376 | 735 |
def syntax_error(self,lineno,message): |
736 |
self._syntax_error(message) |
|
134 | 737 |
|
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
738 |
def _syntax_error(self,message): |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
739 |
if message[:10]=="attribute " and message[-17:]==" value not quoted": return |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
740 |
self.errors.append(message) |
134 | 741 |
|
2584
0fed2bd8ef90
reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents:
2575
diff
changeset
|
742 |
def start_greek(self, attr): |
4097
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
743 |
self._push('greek',greek=1) |
96 | 744 |
|
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
745 |
def end_greek(self): |
4097
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
746 |
self._pop('greek') |
96 | 747 |
|
2584
0fed2bd8ef90
reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents:
2575
diff
changeset
|
748 |
def start_unichar(self, attr): |
3326 | 749 |
if 'name' in attr: |
750 |
if 'code' in attr: |
|
2584
0fed2bd8ef90
reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents:
2575
diff
changeset
|
751 |
self._syntax_error('<unichar/> invalid with both name and code attributes') |
0fed2bd8ef90
reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents:
2575
diff
changeset
|
752 |
try: |
3809 | 753 |
v = unicodedata.lookup(attr['name']) |
2584
0fed2bd8ef90
reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents:
2575
diff
changeset
|
754 |
except KeyError: |
4116
cf49463fc067
Fix a bunch of undefined names. Mostly typos or missing imports.
Matthew Duggan <mgithub@guarana.org>
parents:
4097
diff
changeset
|
755 |
self._syntax_error('<unichar/> invalid name attribute\n"%s"' % ascii(attr['name'])) |
2584
0fed2bd8ef90
reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents:
2575
diff
changeset
|
756 |
v = '\0' |
3326 | 757 |
elif 'code' in attr: |
2584
0fed2bd8ef90
reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents:
2575
diff
changeset
|
758 |
try: |
3937 | 759 |
v = int(eval(attr['code'])) |
760 |
v = chr(v) if isPy3 else unichr(v) |
|
2584
0fed2bd8ef90
reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents:
2575
diff
changeset
|
761 |
except: |
3809 | 762 |
self._syntax_error('<unichar/> invalid code attribute %s' % ascii(attr['code'])) |
2584
0fed2bd8ef90
reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents:
2575
diff
changeset
|
763 |
v = '\0' |
0fed2bd8ef90
reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents:
2575
diff
changeset
|
764 |
else: |
0fed2bd8ef90
reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents:
2575
diff
changeset
|
765 |
v = None |
2664 | 766 |
if attr: |
3721 | 767 |
self._syntax_error('<unichar/> invalid attribute %s' % list(attr.keys())[0]) |
2584
0fed2bd8ef90
reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents:
2575
diff
changeset
|
768 |
|
0fed2bd8ef90
reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents:
2575
diff
changeset
|
769 |
if v is not None: |
0fed2bd8ef90
reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents:
2575
diff
changeset
|
770 |
self.handle_data(v) |
4097
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
771 |
self._push('unichar',_selfClosingTag='unichar') |
2584
0fed2bd8ef90
reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents:
2575
diff
changeset
|
772 |
|
0fed2bd8ef90
reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents:
2575
diff
changeset
|
773 |
def end_unichar(self): |
4097
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
774 |
self._pop('unichar') |
2584
0fed2bd8ef90
reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents:
2575
diff
changeset
|
775 |
|
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
776 |
def start_font(self,attr): |
4129
1266500a2c22
paraparser.py: fix handling of fontName attributes in span/font; version-->3.1.29
robin
parents:
4116
diff
changeset
|
777 |
A = self.getAttributes(attr,_spanAttrMap) |
1266500a2c22
paraparser.py: fix handling of fontName attributes in span/font; version-->3.1.29
robin
parents:
4116
diff
changeset
|
778 |
if 'fontName' in A: |
1266500a2c22
paraparser.py: fix handling of fontName attributes in span/font; version-->3.1.29
robin
parents:
4116
diff
changeset
|
779 |
A['fontName'], A['bold'], A['italic'] = ps2tt(A['fontName']) |
4097
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
780 |
self._push('font',**self.getAttributes(attr,_fontAttrMap)) |
96 | 781 |
|
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
782 |
def end_font(self): |
4097
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
783 |
self._pop('font') |
96 | 784 |
|
3552
20ecbcc53c15
paraparser.py add support for <span style=stylename>
rgbecker
parents:
3440
diff
changeset
|
785 |
def start_span(self,attr): |
20ecbcc53c15
paraparser.py add support for <span style=stylename>
rgbecker
parents:
3440
diff
changeset
|
786 |
A = self.getAttributes(attr,_spanAttrMap) |
20ecbcc53c15
paraparser.py add support for <span style=stylename>
rgbecker
parents:
3440
diff
changeset
|
787 |
if 'style' in A: |
20ecbcc53c15
paraparser.py add support for <span style=stylename>
rgbecker
parents:
3440
diff
changeset
|
788 |
style = self.findSpanStyle(A.pop('style')) |
20ecbcc53c15
paraparser.py add support for <span style=stylename>
rgbecker
parents:
3440
diff
changeset
|
789 |
D = {} |
20ecbcc53c15
paraparser.py add support for <span style=stylename>
rgbecker
parents:
3440
diff
changeset
|
790 |
for k in 'fontName fontSize textColor backColor'.split(): |
20ecbcc53c15
paraparser.py add support for <span style=stylename>
rgbecker
parents:
3440
diff
changeset
|
791 |
v = getattr(style,k,self) |
20ecbcc53c15
paraparser.py add support for <span style=stylename>
rgbecker
parents:
3440
diff
changeset
|
792 |
if v is self: continue |
20ecbcc53c15
paraparser.py add support for <span style=stylename>
rgbecker
parents:
3440
diff
changeset
|
793 |
D[k] = v |
20ecbcc53c15
paraparser.py add support for <span style=stylename>
rgbecker
parents:
3440
diff
changeset
|
794 |
D.update(A) |
20ecbcc53c15
paraparser.py add support for <span style=stylename>
rgbecker
parents:
3440
diff
changeset
|
795 |
A = D |
4129
1266500a2c22
paraparser.py: fix handling of fontName attributes in span/font; version-->3.1.29
robin
parents:
4116
diff
changeset
|
796 |
if 'fontName' in A: |
1266500a2c22
paraparser.py: fix handling of fontName attributes in span/font; version-->3.1.29
robin
parents:
4116
diff
changeset
|
797 |
A['fontName'], A['bold'], A['italic'] = ps2tt(A['fontName']) |
4097
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
798 |
self._push('span',**A) |
3552
20ecbcc53c15
paraparser.py add support for <span style=stylename>
rgbecker
parents:
3440
diff
changeset
|
799 |
|
4097
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
800 |
def end_span(self): |
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
801 |
self._pop('span') |
3552
20ecbcc53c15
paraparser.py add support for <span style=stylename>
rgbecker
parents:
3440
diff
changeset
|
802 |
|
2663 | 803 |
def start_br(self, attr): |
4097
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
804 |
self._push('br',_selfClosingTag='br',lineBreak=True,text='') |
3954
44dbe56eb858
first attempt at HTMLParser-based paraparser
Andy Robinson <andy@reportlab.com>
parents:
3937
diff
changeset
|
805 |
|
2663 | 806 |
def end_br(self): |
3954
44dbe56eb858
first attempt at HTMLParser-based paraparser
Andy Robinson <andy@reportlab.com>
parents:
3937
diff
changeset
|
807 |
#print('\nend_br called, %d frags in list' % len(self.fragList)) |
2664 | 808 |
frag = self._stack[-1] |
4097
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
809 |
if not (frag._selfClosingTag=='br' and frag.lineBreak): |
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
810 |
raise ValueError('Parser failure in <br/>') |
2664 | 811 |
del frag._selfClosingTag |
812 |
self.handle_data('') |
|
4097
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
813 |
self._pop('br') |
2663 | 814 |
|
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
815 |
def _initial_frag(self,attr,attrMap,bullet=0): |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
816 |
style = self._style |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
817 |
if attr!={}: |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
818 |
style = copy.deepcopy(style) |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
819 |
_applyAttributes(style,self.getAttributes(attr,attrMap)) |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
820 |
self._style = style |
119 | 821 |
|
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
822 |
# initialize semantic values |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
823 |
frag = ParaFrag() |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
824 |
frag.sub = 0 |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
825 |
frag.super = 0 |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
826 |
frag.rise = 0 |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
827 |
frag.underline = 0 |
2644 | 828 |
frag.strike = 0 |
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
829 |
frag.greek = 0 |
2575 | 830 |
frag.link = None |
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
831 |
if bullet: |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
832 |
frag.fontName, frag.bold, frag.italic = ps2tt(style.bulletFontName) |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
833 |
frag.fontSize = style.bulletFontSize |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
834 |
frag.textColor = hasattr(style,'bulletColor') and style.bulletColor or style.textColor |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
835 |
else: |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
836 |
frag.fontName, frag.bold, frag.italic = ps2tt(style.fontName) |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
837 |
frag.fontSize = style.fontSize |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
838 |
frag.textColor = style.textColor |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
839 |
return frag |
250 | 840 |
|
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
841 |
def start_para(self,attr): |
4097
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
842 |
frag = self._initial_frag(attr,_paraAttrMap) |
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
843 |
frag.__tag__ = 'para' |
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
844 |
self._stack = [frag] |
119 | 845 |
|
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
846 |
def end_para(self): |
4097
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
847 |
self._pop('para') |
119 | 848 |
|
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
849 |
def start_bullet(self,attr): |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
850 |
if hasattr(self,'bFragList'): |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
851 |
self._syntax_error('only one <bullet> tag allowed') |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
852 |
self.bFragList = [] |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
853 |
frag = self._initial_frag(attr,_bulletAttrMap,1) |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
854 |
frag.isBullet = 1 |
4097
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
855 |
frag.__tag__ = 'bullet' |
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
856 |
self._stack.append(frag) |
250 | 857 |
|
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
858 |
def end_bullet(self): |
4097
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
859 |
self._pop('bullet') |
250 | 860 |
|
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
861 |
#--------------------------------------------------------------- |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
862 |
def start_seqdefault(self, attr): |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
863 |
try: |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
864 |
default = attr['id'] |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
865 |
except KeyError: |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
866 |
default = None |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
867 |
self._seq.setDefaultCounter(default) |
266 | 868 |
|
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
869 |
def end_seqdefault(self): |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
870 |
pass |
1683 | 871 |
|
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
872 |
def start_seqreset(self, attr): |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
873 |
try: |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
874 |
id = attr['id'] |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
875 |
except KeyError: |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
876 |
id = None |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
877 |
try: |
2368 | 878 |
base = int(attr['base']) |
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
879 |
except: |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
880 |
base=0 |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
881 |
self._seq.reset(id, base) |
266 | 882 |
|
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
883 |
def end_seqreset(self): |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
884 |
pass |
744
2abd99baf95b
Accepts seqdefault/seqDefault and seqreset/seqReset
andy_robinson
parents:
677
diff
changeset
|
885 |
|
2368 | 886 |
def start_seqchain(self, attr): |
887 |
try: |
|
888 |
order = attr['order'] |
|
889 |
except KeyError: |
|
890 |
order = '' |
|
891 |
order = order.split() |
|
892 |
seq = self._seq |
|
893 |
for p,c in zip(order[:-1],order[1:]): |
|
894 |
seq.chain(p, c) |
|
895 |
end_seqchain = end_seqreset |
|
896 |
||
897 |
def start_seqformat(self, attr): |
|
898 |
try: |
|
899 |
id = attr['id'] |
|
900 |
except KeyError: |
|
901 |
id = None |
|
902 |
try: |
|
903 |
value = attr['value'] |
|
904 |
except KeyError: |
|
905 |
value = '1' |
|
906 |
self._seq.setFormat(id,value) |
|
907 |
end_seqformat = end_seqreset |
|
908 |
||
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
909 |
# AR hacking in aliases to allow the proper casing for RML. |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
910 |
# the above ones should be deprecated over time. 2001-03-22 |
2368 | 911 |
start_seqDefault = start_seqdefault |
912 |
end_seqDefault = end_seqdefault |
|
913 |
start_seqReset = start_seqreset |
|
914 |
end_seqReset = end_seqreset |
|
915 |
start_seqChain = start_seqchain |
|
916 |
end_seqChain = end_seqchain |
|
917 |
start_seqFormat = start_seqformat |
|
918 |
end_seqFormat = end_seqformat |
|
1683 | 919 |
|
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
920 |
def start_seq(self, attr): |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
921 |
#if it has a template, use that; otherwise try for id; |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
922 |
#otherwise take default sequence |
3326 | 923 |
if 'template' in attr: |
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
924 |
templ = attr['template'] |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
925 |
self.handle_data(templ % self._seq) |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
926 |
return |
3326 | 927 |
elif 'id' in attr: |
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
928 |
id = attr['id'] |
1683 | 929 |
else: |
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
930 |
id = None |
2694
dd0ea6474ea0
fixes to crashing PTO trailer when empty, and numbering
andy
parents:
2693
diff
changeset
|
931 |
increment = attr.get('inc', None) |
dd0ea6474ea0
fixes to crashing PTO trailer when empty, and numbering
andy
parents:
2693
diff
changeset
|
932 |
if not increment: |
dd0ea6474ea0
fixes to crashing PTO trailer when empty, and numbering
andy
parents:
2693
diff
changeset
|
933 |
output = self._seq.nextf(id) |
dd0ea6474ea0
fixes to crashing PTO trailer when empty, and numbering
andy
parents:
2693
diff
changeset
|
934 |
else: |
dd0ea6474ea0
fixes to crashing PTO trailer when empty, and numbering
andy
parents:
2693
diff
changeset
|
935 |
#accepts "no" for do not increment, or an integer. |
dd0ea6474ea0
fixes to crashing PTO trailer when empty, and numbering
andy
parents:
2693
diff
changeset
|
936 |
#thus, 0 and 1 increment by the right amounts. |
dd0ea6474ea0
fixes to crashing PTO trailer when empty, and numbering
andy
parents:
2693
diff
changeset
|
937 |
if increment.lower() == 'no': |
dd0ea6474ea0
fixes to crashing PTO trailer when empty, and numbering
andy
parents:
2693
diff
changeset
|
938 |
output = self._seq.thisf(id) |
dd0ea6474ea0
fixes to crashing PTO trailer when empty, and numbering
andy
parents:
2693
diff
changeset
|
939 |
else: |
dd0ea6474ea0
fixes to crashing PTO trailer when empty, and numbering
andy
parents:
2693
diff
changeset
|
940 |
incr = int(increment) |
dd0ea6474ea0
fixes to crashing PTO trailer when empty, and numbering
andy
parents:
2693
diff
changeset
|
941 |
output = self._seq.thisf(id) |
dd0ea6474ea0
fixes to crashing PTO trailer when empty, and numbering
andy
parents:
2693
diff
changeset
|
942 |
self._seq.reset(id, self._seq._this() + incr) |
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
943 |
self.handle_data(output) |
1683 | 944 |
|
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
945 |
def end_seq(self): |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
946 |
pass |
266 | 947 |
|
4059 | 948 |
def start_ondraw(self,attr): |
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
949 |
defn = ABag() |
3326 | 950 |
if 'name' in attr: defn.name = attr['name'] |
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
951 |
else: self._syntax_error('<onDraw> needs at least a name attribute') |
506 | 952 |
|
3326 | 953 |
if 'label' in attr: defn.label = attr['label'] |
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
954 |
defn.kind='onDraw' |
4097
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
955 |
self._push('ondraw',cbDefn=defn) |
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
956 |
self.handle_data('') |
4097
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
957 |
self._pop('ondraw') |
4059 | 958 |
start_onDraw=start_ondraw |
959 |
end_onDraw=end_ondraw=end_seq |
|
3111
86a3158c50bd
reportlab: improved support for onDraw and SimpleIndex
rgbecker
parents:
3032
diff
changeset
|
960 |
|
3165 | 961 |
def start_index(self,attr): |
962 |
attr=self.getAttributes(attr,_indexAttrMap) |
|
963 |
defn = ABag() |
|
3326 | 964 |
if 'item' in attr: |
3187
2d5a6655556e
tableofcontents/paraparser: allow for format and offset parameters
rgbecker
parents:
3165
diff
changeset
|
965 |
label = attr['item'] |
3165 | 966 |
else: |
967 |
self._syntax_error('<index> needs at least an item attribute') |
|
3326 | 968 |
if 'name' in attr: |
3165 | 969 |
name = attr['name'] |
970 |
else: |
|
971 |
name = DEFAULT_INDEX_NAME |
|
3187
2d5a6655556e
tableofcontents/paraparser: allow for format and offset parameters
rgbecker
parents:
3165
diff
changeset
|
972 |
format = attr.get('format',None) |
2d5a6655556e
tableofcontents/paraparser: allow for format and offset parameters
rgbecker
parents:
3165
diff
changeset
|
973 |
if format is not None and format not in ('123','I','i','ABC','abc'): |
2d5a6655556e
tableofcontents/paraparser: allow for format and offset parameters
rgbecker
parents:
3165
diff
changeset
|
974 |
raise ValueError('index tag format is %r not valid 123 I i ABC or abc' % offset) |
2d5a6655556e
tableofcontents/paraparser: allow for format and offset parameters
rgbecker
parents:
3165
diff
changeset
|
975 |
offset = attr.get('offset',None) |
2d5a6655556e
tableofcontents/paraparser: allow for format and offset parameters
rgbecker
parents:
3165
diff
changeset
|
976 |
if offset is not None: |
2d5a6655556e
tableofcontents/paraparser: allow for format and offset parameters
rgbecker
parents:
3165
diff
changeset
|
977 |
try: |
2d5a6655556e
tableofcontents/paraparser: allow for format and offset parameters
rgbecker
parents:
3165
diff
changeset
|
978 |
offset = int(offset) |
2d5a6655556e
tableofcontents/paraparser: allow for format and offset parameters
rgbecker
parents:
3165
diff
changeset
|
979 |
except: |
2d5a6655556e
tableofcontents/paraparser: allow for format and offset parameters
rgbecker
parents:
3165
diff
changeset
|
980 |
raise ValueError('index tag offset is %r not an int' % offset) |
3856 | 981 |
defn.label = encode_label((label,format,offset)) |
3165 | 982 |
defn.name = name |
983 |
defn.kind='index' |
|
4097
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
984 |
self._push('index',cbDefn=defn) |
3165 | 985 |
self.handle_data('') |
4097
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
986 |
self._pop('index',) |
3165 | 987 |
end_index=end_seq |
2663 | 988 |
|
3826
02f216b6e38e
paraparser.py: support old behaviours eg ignore unknown tags etc etc
robin
parents:
3812
diff
changeset
|
989 |
def start_unknown(self,attr): |
02f216b6e38e
paraparser.py: support old behaviours eg ignore unknown tags etc etc
robin
parents:
3812
diff
changeset
|
990 |
pass |
02f216b6e38e
paraparser.py: support old behaviours eg ignore unknown tags etc etc
robin
parents:
3812
diff
changeset
|
991 |
end_unknown=end_seq |
02f216b6e38e
paraparser.py: support old behaviours eg ignore unknown tags etc etc
robin
parents:
3812
diff
changeset
|
992 |
|
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
993 |
#--------------------------------------------------------------- |
4097
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
994 |
def _push(self,tag,**attr): |
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
995 |
frag = copy.copy(self._stack[-1]) |
4097
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
996 |
frag.__tag__ = tag |
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
997 |
_applyAttributes(frag,attr) |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
998 |
self._stack.append(frag) |
96 | 999 |
|
4097
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
1000 |
def _pop(self,tag): |
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
1001 |
frag = self._stack.pop() |
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
1002 |
if tag==frag.__tag__: return frag |
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
1003 |
raise ValueError('Parse error: saw </%s> instead of expected </%s>' % (tag,frag.__tag__)) |
96 | 1004 |
|
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
1005 |
def getAttributes(self,attr,attrMap): |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
1006 |
A = {} |
3723
99aa837b6703
second stage of port to Python 3.3; working hello world
rptlab
parents:
3721
diff
changeset
|
1007 |
for k, v in attr.items(): |
1940 | 1008 |
if not self.caseSensitive: |
3731 | 1009 |
k = k.lower() |
3721 | 1010 |
if k in list(attrMap.keys()): |
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
1011 |
j = attrMap[k] |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
1012 |
func = j[1] |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
1013 |
try: |
3931 | 1014 |
A[j[0]] = v if func is None else func(v) |
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
1015 |
except: |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
1016 |
self._syntax_error('%s: invalid value %s'%(k,v)) |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
1017 |
else: |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
1018 |
self._syntax_error('invalid attribute name %s'%k) |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
1019 |
return A |
119 | 1020 |
|
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
1021 |
#---------------------------------------------------------------- |
96 | 1022 |
|
3826
02f216b6e38e
paraparser.py: support old behaviours eg ignore unknown tags etc etc
robin
parents:
3812
diff
changeset
|
1023 |
def __init__(self,verbose=0, caseSensitive=0, ignoreUnknownTags=1): |
4077
ac3fcb7cc6f4
paraparser.py: fix convert_charrefs usage in HTMLParser.__init__; pointed out by Ivan Tchomgue @ bitbucket
robin
parents:
4067
diff
changeset
|
1024 |
HTMLParser.__init__(self, |
ac3fcb7cc6f4
paraparser.py: fix convert_charrefs usage in HTMLParser.__init__; pointed out by Ivan Tchomgue @ bitbucket
robin
parents:
4067
diff
changeset
|
1025 |
**(dict(convert_charrefs=False) if sys.version_info>=(3,4) else {})) |
3826
02f216b6e38e
paraparser.py: support old behaviours eg ignore unknown tags etc etc
robin
parents:
3812
diff
changeset
|
1026 |
self.verbose = verbose |
4077
ac3fcb7cc6f4
paraparser.py: fix convert_charrefs usage in HTMLParser.__init__; pointed out by Ivan Tchomgue @ bitbucket
robin
parents:
4067
diff
changeset
|
1027 |
#HTMLParser is case insenstive anyway, but the rml interface still needs this |
4059 | 1028 |
#all start/end_ methods should have a lower case version for HMTMParser |
3826
02f216b6e38e
paraparser.py: support old behaviours eg ignore unknown tags etc etc
robin
parents:
3812
diff
changeset
|
1029 |
self.caseSensitive = caseSensitive |
02f216b6e38e
paraparser.py: support old behaviours eg ignore unknown tags etc etc
robin
parents:
3812
diff
changeset
|
1030 |
self.ignoreUnknownTags = ignoreUnknownTags |
266 | 1031 |
|
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
1032 |
def _iReset(self): |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
1033 |
self.fragList = [] |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
1034 |
if hasattr(self, 'bFragList'): delattr(self,'bFragList') |
250 | 1035 |
|
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
1036 |
def _reset(self, style): |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
1037 |
'''reset the parser''' |
96 | 1038 |
|
3957
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
1039 |
HTMLParser.reset(self) |
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
1040 |
# initialize list of string segments to empty |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
1041 |
self.errors = [] |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
1042 |
self._style = style |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
1043 |
self._iReset() |
96 | 1044 |
|
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
1045 |
#---------------------------------------------------------------- |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
1046 |
def handle_data(self,data): |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
1047 |
"Creates an intermediate representation of string segments." |
96 | 1048 |
|
3954
44dbe56eb858
first attempt at HTMLParser-based paraparser
Andy Robinson <andy@reportlab.com>
parents:
3937
diff
changeset
|
1049 |
#The old parser would only 'see' a string after all entities had |
44dbe56eb858
first attempt at HTMLParser-based paraparser
Andy Robinson <andy@reportlab.com>
parents:
3937
diff
changeset
|
1050 |
#been processed. Thus, 'Hello ™ World' would emerge as one |
44dbe56eb858
first attempt at HTMLParser-based paraparser
Andy Robinson <andy@reportlab.com>
parents:
3937
diff
changeset
|
1051 |
#fragment. HTMLParser processes these separately. We want to ensure |
44dbe56eb858
first attempt at HTMLParser-based paraparser
Andy Robinson <andy@reportlab.com>
parents:
3937
diff
changeset
|
1052 |
#that successive calls like this are concatenated, to prevent too many |
44dbe56eb858
first attempt at HTMLParser-based paraparser
Andy Robinson <andy@reportlab.com>
parents:
3937
diff
changeset
|
1053 |
#fragments being created. |
44dbe56eb858
first attempt at HTMLParser-based paraparser
Andy Robinson <andy@reportlab.com>
parents:
3937
diff
changeset
|
1054 |
|
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
1055 |
frag = copy.copy(self._stack[-1]) |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
1056 |
if hasattr(frag,'cbDefn'): |
2857
487dc2450eec
reprotlab: inline images horizontal positioning OK
rgbecker
parents:
2836
diff
changeset
|
1057 |
kind = frag.cbDefn.kind |
487dc2450eec
reprotlab: inline images horizontal positioning OK
rgbecker
parents:
2836
diff
changeset
|
1058 |
if data: self._syntax_error('Only empty <%s> tag allowed' % kind) |
2584
0fed2bd8ef90
reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents:
2575
diff
changeset
|
1059 |
elif hasattr(frag,'_selfClosingTag'): |
2663 | 1060 |
if data!='': self._syntax_error('No content allowed in %s tag' % frag._selfClosingTag) |
2584
0fed2bd8ef90
reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents:
2575
diff
changeset
|
1061 |
return |
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
1062 |
else: |
1736 | 1063 |
# if sub and super are both on they will cancel each other out |
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
1064 |
if frag.sub == 1 and frag.super == 1: |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
1065 |
frag.sub = 0 |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
1066 |
frag.super = 0 |
96 | 1067 |
|
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
1068 |
if frag.sub: |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
1069 |
frag.rise = -frag.fontSize*subFraction |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
1070 |
frag.fontSize = max(frag.fontSize-sizeDelta,3) |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
1071 |
elif frag.super: |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
1072 |
frag.rise = frag.fontSize*superFraction |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
1073 |
frag.fontSize = max(frag.fontSize-sizeDelta,3) |
112 | 1074 |
|
2584
0fed2bd8ef90
reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents:
2575
diff
changeset
|
1075 |
if frag.greek: |
0fed2bd8ef90
reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents:
2575
diff
changeset
|
1076 |
frag.fontName = 'symbol' |
0fed2bd8ef90
reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents:
2575
diff
changeset
|
1077 |
data = _greekConvert(data) |
514
3784fe357a72
Slight optimisation in handle_data for cbdefn frags
rgbecker
parents:
508
diff
changeset
|
1078 |
|
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
1079 |
# bold, italic, and underline |
2861 | 1080 |
frag.fontName = tt2ps(frag.fontName,frag.bold,frag.italic) |
96 | 1081 |
|
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
1082 |
#save our data |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
1083 |
frag.text = data |
514
3784fe357a72
Slight optimisation in handle_data for cbdefn frags
rgbecker
parents:
508
diff
changeset
|
1084 |
|
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
1085 |
if hasattr(frag,'isBullet'): |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
1086 |
delattr(frag,'isBullet') |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
1087 |
self.bFragList.append(frag) |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
1088 |
else: |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
1089 |
self.fragList.append(frag) |
96 | 1090 |
|
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
1091 |
def handle_cdata(self,data): |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
1092 |
self.handle_data(data) |
211 | 1093 |
|
2376 | 1094 |
def _setup_for_parse(self,style): |
1095 |
self._seq = reportlab.lib.sequencer.getSequencer() |
|
1096 |
self._reset(style) # reinitialise the parser |
|
1097 |
||
1098 |
def _complete_parse(self): |
|
3954
44dbe56eb858
first attempt at HTMLParser-based paraparser
Andy Robinson <andy@reportlab.com>
parents:
3937
diff
changeset
|
1099 |
"Reset after parsing, to be ready for next paragraph" |
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
1100 |
del self._seq |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
1101 |
style = self._style |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
1102 |
del self._style |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
1103 |
if len(self.errors)==0: |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
1104 |
fragList = self.fragList |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
1105 |
bFragList = hasattr(self,'bFragList') and self.bFragList or None |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
1106 |
self._iReset() |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
1107 |
else: |
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
1108 |
fragList = bFragList = None |
2575 | 1109 |
|
1677
1450177dd19e
Exterminated all tab characters and added a test to make sure
andy_robinson
parents:
1160
diff
changeset
|
1110 |
return style, fragList, bFragList |
96 | 1111 |
|
3787
8f9be6d6f75c
convert paraparser to use pyRXP directly (or any TT producer)
robin
parents:
3731
diff
changeset
|
1112 |
def _tt_handle(self,tt): |
3954
44dbe56eb858
first attempt at HTMLParser-based paraparser
Andy Robinson <andy@reportlab.com>
parents:
3937
diff
changeset
|
1113 |
"Iterate through a pre-parsed tuple tree (e.g. from pyRXP)" |
44dbe56eb858
first attempt at HTMLParser-based paraparser
Andy Robinson <andy@reportlab.com>
parents:
3937
diff
changeset
|
1114 |
#import pprint |
44dbe56eb858
first attempt at HTMLParser-based paraparser
Andy Robinson <andy@reportlab.com>
parents:
3937
diff
changeset
|
1115 |
#pprint.pprint(tt) |
44dbe56eb858
first attempt at HTMLParser-based paraparser
Andy Robinson <andy@reportlab.com>
parents:
3937
diff
changeset
|
1116 |
#find the corresponding start_tagname and end_tagname methods. |
44dbe56eb858
first attempt at HTMLParser-based paraparser
Andy Robinson <andy@reportlab.com>
parents:
3937
diff
changeset
|
1117 |
#These must be defined. |
2376 | 1118 |
tag = tt[0] |
1119 |
try: |
|
1120 |
start = getattr(self,'start_'+tag) |
|
1121 |
end = getattr(self,'end_'+tag) |
|
1122 |
except AttributeError: |
|
3826
02f216b6e38e
paraparser.py: support old behaviours eg ignore unknown tags etc etc
robin
parents:
3812
diff
changeset
|
1123 |
if not self.ignoreUnknownTags: |
02f216b6e38e
paraparser.py: support old behaviours eg ignore unknown tags etc etc
robin
parents:
3812
diff
changeset
|
1124 |
raise ValueError('Invalid tag "%s"' % tag) |
02f216b6e38e
paraparser.py: support old behaviours eg ignore unknown tags etc etc
robin
parents:
3812
diff
changeset
|
1125 |
start = self.start_unknown |
02f216b6e38e
paraparser.py: support old behaviours eg ignore unknown tags etc etc
robin
parents:
3812
diff
changeset
|
1126 |
end = self.end_unknown |
3954
44dbe56eb858
first attempt at HTMLParser-based paraparser
Andy Robinson <andy@reportlab.com>
parents:
3937
diff
changeset
|
1127 |
|
44dbe56eb858
first attempt at HTMLParser-based paraparser
Andy Robinson <andy@reportlab.com>
parents:
3937
diff
changeset
|
1128 |
#call the start_tagname method |
2376 | 1129 |
start(tt[1] or {}) |
3954
44dbe56eb858
first attempt at HTMLParser-based paraparser
Andy Robinson <andy@reportlab.com>
parents:
3937
diff
changeset
|
1130 |
#if tree node has any children, they will either be further nodes, |
44dbe56eb858
first attempt at HTMLParser-based paraparser
Andy Robinson <andy@reportlab.com>
parents:
3937
diff
changeset
|
1131 |
#or text. Accordingly, call either this function, or handle_data. |
2376 | 1132 |
C = tt[2] |
1133 |
if C: |
|
1134 |
M = self._tt_handlers |
|
1135 |
for c in C: |
|
3731 | 1136 |
M[isinstance(c,(list,tuple))](c) |
3954
44dbe56eb858
first attempt at HTMLParser-based paraparser
Andy Robinson <andy@reportlab.com>
parents:
3937
diff
changeset
|
1137 |
|
44dbe56eb858
first attempt at HTMLParser-based paraparser
Andy Robinson <andy@reportlab.com>
parents:
3937
diff
changeset
|
1138 |
#call the end_tagname method |
2376 | 1139 |
end() |
1140 |
||
3787
8f9be6d6f75c
convert paraparser to use pyRXP directly (or any TT producer)
robin
parents:
3731
diff
changeset
|
1141 |
def _tt_start(self,tt): |
8f9be6d6f75c
convert paraparser to use pyRXP directly (or any TT producer)
robin
parents:
3731
diff
changeset
|
1142 |
self._tt_handlers = self.handle_data,self._tt_handle |
8f9be6d6f75c
convert paraparser to use pyRXP directly (or any TT producer)
robin
parents:
3731
diff
changeset
|
1143 |
self._tt_handle(tt) |
8f9be6d6f75c
convert paraparser to use pyRXP directly (or any TT producer)
robin
parents:
3731
diff
changeset
|
1144 |
|
2376 | 1145 |
def tt_parse(self,tt,style): |
1146 |
'''parse from tupletree form''' |
|
1147 |
self._setup_for_parse(style) |
|
3787
8f9be6d6f75c
convert paraparser to use pyRXP directly (or any TT producer)
robin
parents:
3731
diff
changeset
|
1148 |
self._tt_start(tt) |
2376 | 1149 |
return self._complete_parse() |
1150 |
||
3552
20ecbcc53c15
paraparser.py add support for <span style=stylename>
rgbecker
parents:
3440
diff
changeset
|
1151 |
def findSpanStyle(self,style): |
20ecbcc53c15
paraparser.py add support for <span style=stylename>
rgbecker
parents:
3440
diff
changeset
|
1152 |
raise ValueError('findSpanStyle not implemented in this parser') |
20ecbcc53c15
paraparser.py add support for <span style=stylename>
rgbecker
parents:
3440
diff
changeset
|
1153 |
|
3957
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
1154 |
#HTMLParser interface |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
1155 |
def parse(self, text, style): |
3954
44dbe56eb858
first attempt at HTMLParser-based paraparser
Andy Robinson <andy@reportlab.com>
parents:
3937
diff
changeset
|
1156 |
"attempt replacement for parse" |
44dbe56eb858
first attempt at HTMLParser-based paraparser
Andy Robinson <andy@reportlab.com>
parents:
3937
diff
changeset
|
1157 |
self._setup_for_parse(style) |
44dbe56eb858
first attempt at HTMLParser-based paraparser
Andy Robinson <andy@reportlab.com>
parents:
3937
diff
changeset
|
1158 |
text = asUnicode(text) |
44dbe56eb858
first attempt at HTMLParser-based paraparser
Andy Robinson <andy@reportlab.com>
parents:
3937
diff
changeset
|
1159 |
if not(len(text)>=6 and text[0]=='<' and _re_para.match(text)): |
44dbe56eb858
first attempt at HTMLParser-based paraparser
Andy Robinson <andy@reportlab.com>
parents:
3937
diff
changeset
|
1160 |
text = u"<para>"+text+u"</para>" |
3957
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
1161 |
try: |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
1162 |
self.feed(text) |
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
1163 |
except: |
4097
1c2ebf285cb7
paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents:
4077
diff
changeset
|
1164 |
annotateException('\nparagraph text %s caused exception' % ascii(text)) |
3954
44dbe56eb858
first attempt at HTMLParser-based paraparser
Andy Robinson <andy@reportlab.com>
parents:
3937
diff
changeset
|
1165 |
return self._complete_parse() |
44dbe56eb858
first attempt at HTMLParser-based paraparser
Andy Robinson <andy@reportlab.com>
parents:
3937
diff
changeset
|
1166 |
|
44dbe56eb858
first attempt at HTMLParser-based paraparser
Andy Robinson <andy@reportlab.com>
parents:
3937
diff
changeset
|
1167 |
def handle_starttag(self, tag, attrs): |
44dbe56eb858
first attempt at HTMLParser-based paraparser
Andy Robinson <andy@reportlab.com>
parents:
3937
diff
changeset
|
1168 |
"Called by HTMLParser when a tag starts" |
44dbe56eb858
first attempt at HTMLParser-based paraparser
Andy Robinson <andy@reportlab.com>
parents:
3937
diff
changeset
|
1169 |
|
44dbe56eb858
first attempt at HTMLParser-based paraparser
Andy Robinson <andy@reportlab.com>
parents:
3937
diff
changeset
|
1170 |
#tuple tree parser used to expect a dict. HTML parser |
44dbe56eb858
first attempt at HTMLParser-based paraparser
Andy Robinson <andy@reportlab.com>
parents:
3937
diff
changeset
|
1171 |
#gives list of two-element tuples |
44dbe56eb858
first attempt at HTMLParser-based paraparser
Andy Robinson <andy@reportlab.com>
parents:
3937
diff
changeset
|
1172 |
if isinstance(attrs, list): |
44dbe56eb858
first attempt at HTMLParser-based paraparser
Andy Robinson <andy@reportlab.com>
parents:
3937
diff
changeset
|
1173 |
d = {} |
44dbe56eb858
first attempt at HTMLParser-based paraparser
Andy Robinson <andy@reportlab.com>
parents:
3937
diff
changeset
|
1174 |
for (k, v) in attrs: |
44dbe56eb858
first attempt at HTMLParser-based paraparser
Andy Robinson <andy@reportlab.com>
parents:
3937
diff
changeset
|
1175 |
d[k] = v |
44dbe56eb858
first attempt at HTMLParser-based paraparser
Andy Robinson <andy@reportlab.com>
parents:
3937
diff
changeset
|
1176 |
attrs = d |
3957
c7cedb1dde29
paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents:
3956
diff
changeset
|
1177 |
if not self.caseSensitive: tag = tag.lower() |
3954
44dbe56eb858
first attempt at HTMLParser-based paraparser
Andy Robinson <andy@reportlab.com>
parents:
3937
diff
changeset
|
1178 |
try: |
44dbe56eb858
first attempt at HTMLParser-based paraparser
Andy Robinson <andy@reportlab.com>
parents:
3937
diff
changeset
|
1179 |
start = getattr(self,'start_'+tag) |
44dbe56eb858
first attempt at HTMLParser-based paraparser
Andy Robinson <andy@reportlab.com>
parents:
3937
diff
changeset
|
1180 |
except AttributeError: |
44dbe56eb858
first attempt at HTMLParser-based paraparser
Andy Robinson <andy@reportlab.com>
parents:
3937
diff
changeset
|
1181 |
if not self.ignoreUnknownTags: |
44dbe56eb858
first attempt at HTMLParser-based paraparser
Andy Robinson <andy@reportlab.com>
parents:
3937
diff
changeset
|
1182 |
raise ValueError('Invalid tag "%s"' % tag) |
44dbe56eb858
first attempt at HTMLParser-based paraparser
Andy Robinson <andy@reportlab.com>
parents:
3937
diff
changeset
|
1183 |
start = self.start_unknown |
44dbe56eb858
first attempt at HTMLParser-based paraparser
Andy Robinson <andy@reportlab.co& |