author | robin |
Thu, 24 Oct 2019 16:07:15 +0100 | |
changeset 4551 | d357e2acc856 |
parent 4413 | dec33c7e7922 |
child 4567 | 5612ef8ef0a8 |
permissions | -rw-r--r-- |
3779 | 1 |
#this is the interface module that imports all from the C extension _rl_accel |
2 |
_c_funcs = {} |
|
3 |
_py_funcs = {} |
|
4 |
### NOTE! FP_STR SHOULD PROBABLY ALWAYS DO A PYTHON STR() CONVERSION ON ARGS |
|
5 |
### IN CASE THEY ARE "LAZY OBJECTS". ACCELLERATOR DOESN'T DO THIS (YET) |
|
6 |
__all__ = list(filter(None,''' |
|
7 |
fp_str |
|
8 |
unicode2T1 |
|
9 |
instanceStringWidthT1 |
|
10 |
instanceStringWidthTTF |
|
11 |
asciiBase85Encode |
|
12 |
asciiBase85Decode |
|
13 |
escapePDF |
|
14 |
sameFrag |
|
15 |
calcChecksum |
|
16 |
add32 |
|
17 |
hex32 |
|
18 |
'''.split())) |
|
4089
ca6c60fd1f62
testutils.py & rl_accel.py: use reportlab not __main__ to pacify App Engine, bump version to 3.1.10
robin
parents:
4071
diff
changeset
|
19 |
import reportlab |
ca6c60fd1f62
testutils.py & rl_accel.py: use reportlab not __main__ to pacify App Engine, bump version to 3.1.10
robin
parents:
4071
diff
changeset
|
20 |
testing = getattr(reportlab,'_rl_testing',False) |
ca6c60fd1f62
testutils.py & rl_accel.py: use reportlab not __main__ to pacify App Engine, bump version to 3.1.10
robin
parents:
4071
diff
changeset
|
21 |
del reportlab |
3725 | 22 |
|
3779 | 23 |
for fn in __all__: |
4551 | 24 |
D={} |
3779 | 25 |
try: |
4551 | 26 |
exec('from reportlab.lib._rl_accel import %s as f' % fn,D) |
27 |
_c_funcs[fn] = D['f'] |
|
3862 | 28 |
if testing: _py_funcs[fn] = None |
3779 | 29 |
except ImportError: |
30 |
_py_funcs[fn] = None |
|
4551 | 31 |
del D |
3779 | 32 |
|
33 |
if _py_funcs: |
|
4071
8a945e72d376
rl_accel.py: make asciiBase85Decode always return bytes
robin
parents:
4026
diff
changeset
|
34 |
from reportlab.lib.utils import isBytes, isUnicode, isSeq, isPy3, rawBytes, asNative, asUnicode, asBytes |
3779 | 35 |
from math import log |
3862 | 36 |
from struct import unpack |
3779 | 37 |
|
38 |
if 'fp_str' in _py_funcs: |
|
39 |
_log_10 = lambda x,log=log,_log_e_10=log(10.0): log(x)/_log_e_10 |
|
40 |
_fp_fmts = "%.0f", "%.1f", "%.2f", "%.3f", "%.4f", "%.5f", "%.6f" |
|
3725 | 41 |
def fp_str(*a): |
3779 | 42 |
'''convert separate arguments (or single sequence arg) into space separated numeric strings''' |
43 |
if len(a)==1 and isSeq(a[0]): a = a[0] |
|
44 |
s = [] |
|
45 |
A = s.append |
|
46 |
for i in a: |
|
47 |
sa =abs(i) |
|
48 |
if sa<=1e-7: A('0') |
|
49 |
else: |
|
50 |
l = sa<=1 and 6 or min(max(0,(6-int(_log_10(sa)))),6) |
|
51 |
n = _fp_fmts[l]%i |
|
52 |
if l: |
|
53 |
j = len(n) |
|
54 |
while j: |
|
55 |
j -= 1 |
|
56 |
if n[j]!='0': |
|
57 |
if n[j]!='.': j += 1 |
|
58 |
break |
|
59 |
n = n[:j] |
|
60 |
A((n[0]!='0' or len(n)==1) and n or n[1:]) |
|
61 |
return ' '.join(s) |
|
62 |
||
63 |
#hack test for comma users |
|
64 |
if ',' in fp_str(0.25): |
|
65 |
_FP_STR = _fp_str |
|
66 |
def _fp_str(*a): |
|
67 |
return _FP_STR(*a).replace(',','.') |
|
68 |
_py_funcs['fp_str'] = fp_str |
|
69 |
||
70 |
if 'unicode2T1' in _py_funcs: |
|
71 |
def unicode2T1(utext,fonts): |
|
72 |
'''return a list of (font,string) pairs representing the unicode text''' |
|
73 |
R = [] |
|
74 |
font, fonts = fonts[0], fonts[1:] |
|
75 |
enc = font.encName |
|
76 |
if 'UCS-2' in enc: |
|
77 |
enc = 'UTF16' |
|
78 |
while utext: |
|
79 |
try: |
|
80 |
if isUnicode(utext): |
|
81 |
s = utext.encode(enc) |
|
82 |
else: |
|
83 |
s = utext |
|
84 |
R.append((font,s)) |
|
85 |
break |
|
86 |
except UnicodeEncodeError as e: |
|
87 |
i0, il = e.args[2:4] |
|
88 |
if i0: |
|
89 |
R.append((font,utext[:i0].encode(enc))) |
|
90 |
if fonts: |
|
91 |
R.extend(unicode2T1(utext[i0:il],fonts)) |
|
92 |
else: |
|
93 |
R.append((font._notdefFont,font._notdefChar*(il-i0))) |
|
94 |
utext = utext[il:] |
|
95 |
return R |
|
96 |
_py_funcs['unicode2T1'] = unicode2T1 |
|
97 |
||
98 |
if 'instanceStringWidthT1' in _py_funcs: |
|
3862 | 99 |
if isPy3: |
100 |
def instanceStringWidthT1(self, text, size, encoding='utf8'): |
|
101 |
"""This is the "purist" approach to width""" |
|
102 |
if not isUnicode(text): text = text.decode(encoding) |
|
103 |
return sum([sum(map(f.widths.__getitem__,t)) for f, t in unicode2T1(text,[self]+self.substitutionFonts)])*0.001*size |
|
104 |
else: |
|
105 |
def instanceStringWidthT1(self, text, size, encoding='utf8'): |
|
106 |
"""This is the "purist" approach to width""" |
|
107 |
if not isUnicode(text): text = text.decode(encoding) |
|
108 |
return sum([sum(map(f.widths.__getitem__,list(map(ord,t)))) for f, t in unicode2T1(text,[self]+self.substitutionFonts)])*0.001*size |
|
3779 | 109 |
_py_funcs['instanceStringWidthT1'] = instanceStringWidthT1 |
3725 | 110 |
|
3779 | 111 |
if 'instanceStringWidthTTF' in _py_funcs: |
112 |
def instanceStringWidthTTF(self, text, size, encoding='utf-8'): |
|
113 |
"Calculate text width" |
|
3873
73d4262b7edd
rl_accel.py: fix python version of instanceStringWidthTTF
robin
parents:
3872
diff
changeset
|
114 |
if not isUnicode(text): |
73d4262b7edd
rl_accel.py: fix python version of instanceStringWidthTTF
robin
parents:
3872
diff
changeset
|
115 |
text = text.decode(encoding or 'utf-8') |
3779 | 116 |
g = self.face.charWidths.get |
117 |
dw = self.face.defaultWidth |
|
118 |
return 0.001*size*sum([g(ord(u),dw) for u in text]) |
|
119 |
_py_funcs['instanceStringWidthTTF'] = instanceStringWidthTTF |
|
120 |
||
121 |
if 'hex32' in _py_funcs: |
|
122 |
def hex32(i): |
|
123 |
return '0X%8.8X' % (int(i)&0xFFFFFFFF) |
|
124 |
_py_funcs['hex32'] = hex32 |
|
125 |
||
126 |
if 'add32' in _py_funcs: |
|
127 |
def add32(x, y): |
|
128 |
"Calculate (x + y) modulo 2**32" |
|
129 |
return (x+y) & 0xFFFFFFFF |
|
130 |
_py_funcs['add32'] = add32 |
|
131 |
||
132 |
if 'calcChecksum' in _py_funcs: |
|
133 |
def calcChecksum(data): |
|
134 |
"""Calculates TTF-style checksums""" |
|
3862 | 135 |
data = rawBytes(data) |
4026 | 136 |
if len(data)&3: data = data + (4-(len(data)&3))*b"\0" |
3779 | 137 |
return sum(unpack(">%dl" % (len(data)>>2), data)) & 0xFFFFFFFF |
138 |
_py_funcs['calcChecksum'] = calcChecksum |
|
139 |
||
140 |
if 'escapePDF' in _py_funcs: |
|
141 |
_ESCAPEDICT={} |
|
142 |
for c in range(256): |
|
143 |
if c<32 or c>=127: |
|
144 |
_ESCAPEDICT[c]= '\\%03o' % c |
|
145 |
elif c in (ord('\\'),ord('('),ord(')')): |
|
146 |
_ESCAPEDICT[c] = '\\'+chr(c) |
|
147 |
else: |
|
148 |
_ESCAPEDICT[c] = chr(c) |
|
149 |
del c |
|
150 |
#Michael Hudson donated this |
|
151 |
def escapePDF(s): |
|
152 |
r = [] |
|
153 |
for c in s: |
|
154 |
if not type(c) is int: |
|
155 |
c = ord(c) |
|
156 |
r.append(_ESCAPEDICT[c]) |
|
157 |
return ''.join(r) |
|
158 |
_py_funcs['escapePDF'] = escapePDF |
|
159 |
||
160 |
if 'asciiBase85Encode' in _py_funcs: |
|
161 |
def asciiBase85Encode(input): |
|
162 |
"""Encodes input using ASCII-Base85 coding. |
|
163 |
||
164 |
This is a compact encoding used for binary data within |
|
165 |
a PDF file. Four bytes of binary data become five bytes of |
|
166 |
ASCII. This is the default method used for encoding images.""" |
|
3879
991bbd763dbc
rl_accel.py: improve test (suggested by) Lele Gaifax
robin
parents:
3873
diff
changeset
|
167 |
doOrd = not isPy3 or isUnicode(input) |
3779 | 168 |
# special rules apply if not a multiple of four bytes. |
169 |
whole_word_count, remainder_size = divmod(len(input), 4) |
|
170 |
cut = 4 * whole_word_count |
|
171 |
body, lastbit = input[0:cut], input[cut:] |
|
172 |
||
173 |
out = [].append |
|
174 |
for i in range(whole_word_count): |
|
175 |
offset = i*4 |
|
176 |
b1 = body[offset] |
|
177 |
b2 = body[offset+1] |
|
178 |
b3 = body[offset+2] |
|
179 |
b4 = body[offset+3] |
|
3835 | 180 |
if doOrd: |
181 |
b1 = ord(b1) |
|
182 |
b2 = ord(b2) |
|
183 |
b3 = ord(b3) |
|
184 |
b4 = ord(b4) |
|
3779 | 185 |
|
186 |
if b1<128: |
|
187 |
num = (((((b1<<8)|b2)<<8)|b3)<<8)|b4 |
|
3725 | 188 |
else: |
3779 | 189 |
num = 16777216 * b1 + 65536 * b2 + 256 * b3 + b4 |
190 |
||
191 |
if num == 0: |
|
192 |
#special case |
|
193 |
out('z') |
|
3725 | 194 |
else: |
3779 | 195 |
#solve for five base-85 numbers |
196 |
temp, c5 = divmod(num, 85) |
|
197 |
temp, c4 = divmod(temp, 85) |
|
198 |
temp, c3 = divmod(temp, 85) |
|
199 |
c1, c2 = divmod(temp, 85) |
|
200 |
assert ((85**4) * c1) + ((85**3) * c2) + ((85**2) * c3) + (85*c4) + c5 == num, 'dodgy code!' |
|
201 |
out(chr(c1+33)) |
|
202 |
out(chr(c2+33)) |
|
203 |
out(chr(c3+33)) |
|
204 |
out(chr(c4+33)) |
|
205 |
out(chr(c5+33)) |
|
206 |
||
207 |
# now we do the final bit at the end. I repeated this separately as |
|
208 |
# the loop above is the time-critical part of a script, whereas this |
|
209 |
# happens only once at the end. |
|
210 |
||
211 |
#encode however many bytes we have as usual |
|
212 |
if remainder_size > 0: |
|
3862 | 213 |
lastbit += (4-len(lastbit))*('\0' if doOrd else b'\000') |
3779 | 214 |
b1 = lastbit[0] |
215 |
b2 = lastbit[1] |
|
216 |
b3 = lastbit[2] |
|
217 |
b4 = lastbit[3] |
|
3835 | 218 |
if doOrd: |
219 |
b1 = ord(b1) |
|
220 |
b2 = ord(b2) |
|
221 |
b3 = ord(b3) |
|
222 |
b4 = ord(b4) |
|
3779 | 223 |
|
224 |
num = 16777216 * b1 + 65536 * b2 + 256 * b3 + b4 |
|
225 |
||
226 |
#solve for c1..c5 |
|
227 |
temp, c5 = divmod(num, 85) |
|
228 |
temp, c4 = divmod(temp, 85) |
|
229 |
temp, c3 = divmod(temp, 85) |
|
230 |
c1, c2 = divmod(temp, 85) |
|
231 |
||
232 |
#print 'encoding: %d %d %d %d -> %d -> %d %d %d %d %d' % ( |
|
233 |
# b1,b2,b3,b4,num,c1,c2,c3,c4,c5) |
|
234 |
lastword = chr(c1+33) + chr(c2+33) + chr(c3+33) + chr(c4+33) + chr(c5+33) |
|
235 |
#write out most of the bytes. |
|
236 |
out(lastword[0:remainder_size + 1]) |
|
237 |
||
238 |
#terminator code for ascii 85 |
|
239 |
out('~>') |
|
240 |
return ''.join(out.__self__) |
|
241 |
_py_funcs['asciiBase85Encode'] = asciiBase85Encode |
|
242 |
||
243 |
if 'asciiBase85Decode' in _py_funcs: |
|
244 |
def asciiBase85Decode(input): |
|
245 |
"""Decodes input using ASCII-Base85 coding. |
|
246 |
||
4002
4bf4b598196e
_rl_accel.c & rl_accel.py: fix aascii85 encode decode to do the 'right' thing
robin
parents:
3879
diff
changeset
|
247 |
This is not normally used - Acrobat Reader decodes for you |
3779 | 248 |
- but a round trip is essential for testing.""" |
249 |
#strip all whitespace |
|
4002
4bf4b598196e
_rl_accel.c & rl_accel.py: fix aascii85 encode decode to do the 'right' thing
robin
parents:
3879
diff
changeset
|
250 |
stripped = ''.join(asNative(input).split()) |
3779 | 251 |
#check end |
252 |
assert stripped[-2:] == '~>', 'Invalid terminator for Ascii Base 85 Stream' |
|
253 |
stripped = stripped[:-2] #chop off terminator |
|
254 |
||
255 |
#may have 'z' in it which complicates matters - expand them |
|
256 |
stripped = stripped.replace('z','!!!!!') |
|
257 |
# special rules apply if not a multiple of five bytes. |
|
258 |
whole_word_count, remainder_size = divmod(len(stripped), 5) |
|
259 |
#print '%d words, %d leftover' % (whole_word_count, remainder_size) |
|
260 |
#assert remainder_size != 1, 'invalid Ascii 85 stream!' |
|
261 |
cut = 5 * whole_word_count |
|
262 |
body, lastbit = stripped[0:cut], stripped[cut:] |
|
3725 | 263 |
|
3779 | 264 |
out = [].append |
265 |
for i in range(whole_word_count): |
|
266 |
offset = i*5 |
|
267 |
c1 = ord(body[offset]) - 33 |
|
268 |
c2 = ord(body[offset+1]) - 33 |
|
269 |
c3 = ord(body[offset+2]) - 33 |
|
270 |
c4 = ord(body[offset+3]) - 33 |
|
271 |
c5 = ord(body[offset+4]) - 33 |
|
272 |
||
273 |
num = ((85**4) * c1) + ((85**3) * c2) + ((85**2) * c3) + (85*c4) + c5 |
|
274 |
||
275 |
temp, b4 = divmod(num,256) |
|
276 |
temp, b3 = divmod(temp,256) |
|
277 |
b1, b2 = divmod(temp, 256) |
|
278 |
||
279 |
assert num == 16777216 * b1 + 65536 * b2 + 256 * b3 + b4, 'dodgy code!' |
|
280 |
out(chr(b1)) |
|
281 |
out(chr(b2)) |
|
282 |
out(chr(b3)) |
|
283 |
out(chr(b4)) |
|
284 |
||
285 |
#decode however many bytes we have as usual |
|
286 |
if remainder_size > 0: |
|
287 |
while len(lastbit) < 5: |
|
288 |
lastbit = lastbit + '!' |
|
289 |
c1 = ord(lastbit[0]) - 33 |
|
290 |
c2 = ord(lastbit[1]) - 33 |
|
291 |
c3 = ord(lastbit[2]) - 33 |
|
292 |
c4 = ord(lastbit[3]) - 33 |
|
293 |
c5 = ord(lastbit[4]) - 33 |
|
294 |
num = (((85*c1+c2)*85+c3)*85+c4)*85 + (c5 |
|
295 |
+(0,0,0xFFFFFF,0xFFFF,0xFF)[remainder_size]) |
|
296 |
temp, b4 = divmod(num,256) |
|
297 |
temp, b3 = divmod(temp,256) |
|
298 |
b1, b2 = divmod(temp, 256) |
|
299 |
assert num == 16777216 * b1 + 65536 * b2 + 256 * b3 + b4, 'dodgy code!' |
|
300 |
#print 'decoding: %d %d %d %d %d -> %d -> %d %d %d %d' % ( |
|
301 |
# c1,c2,c3,c4,c5,num,b1,b2,b3,b4) |
|
302 |
||
303 |
#the last character needs 1 adding; the encoding loses |
|
304 |
#data by rounding the number to x bytes, and when |
|
305 |
#divided repeatedly we get one less |
|
306 |
if remainder_size == 2: |
|
307 |
lastword = chr(b1) |
|
308 |
elif remainder_size == 3: |
|
309 |
lastword = chr(b1) + chr(b2) |
|
310 |
elif remainder_size == 4: |
|
311 |
lastword = chr(b1) + chr(b2) + chr(b3) |
|
312 |
else: |
|
313 |
lastword = '' |
|
314 |
out(lastword) |
|
315 |
||
4002
4bf4b598196e
_rl_accel.c & rl_accel.py: fix aascii85 encode decode to do the 'right' thing
robin
parents:
3879
diff
changeset
|
316 |
r = ''.join(out.__self__) |
4071
8a945e72d376
rl_accel.py: make asciiBase85Decode always return bytes
robin
parents:
4026
diff
changeset
|
317 |
return asBytes(r,enc='latin1') |
3779 | 318 |
_py_funcs['asciiBase85Decode'] = asciiBase85Decode |
319 |
||
320 |
if 'sameFrag' in _py_funcs: |
|
321 |
def sameFrag(f,g): |
|
322 |
'returns 1 if two ParaFrags map out the same' |
|
323 |
if (hasattr(f,'cbDefn') or hasattr(g,'cbDefn') |
|
324 |
or hasattr(f,'lineBreak') or hasattr(g,'lineBreak')): return 0 |
|
4413 | 325 |
for a in ('fontName', 'fontSize', 'textColor', 'rise', 'us_lines', 'link', "backColor", "nobr"): |
3779 | 326 |
if getattr(f,a,None)!=getattr(g,a,None): return 0 |
327 |
return 1 |
|
328 |
_py_funcs['sameFrag'] = sameFrag |
|
329 |
||
330 |
G=globals() |
|
331 |
for fn in __all__: |
|
4026 | 332 |
f = _c_funcs[fn] if fn in _c_funcs else _py_funcs[fn] |
3779 | 333 |
if not f: |
334 |
raise RuntimeError('function %s is not properly defined' % fn) |
|
335 |
G[fn] = f |
|
336 |
del fn, f, G |
|
3725 | 337 |
|
338 |
if __name__=='__main__': |
|
4551 | 339 |
import sys, os, subprocess |
3725 | 340 |
for modname in 'reportlab.lib.rl_accel','reportlab.lib._rl_accel': |
341 |
for cmd in ( |
|
342 |
#"unicode2T1('abcde fghi . jkl ; mno',fonts)", |
|
343 |
#"unicode2T1(u'abcde fghi . jkl ; mno',fonts)", |
|
4551 | 344 |
"instanceStringWidthT1(font,'abcde fghi . jkl ; mno',10)", |
345 |
"instanceStringWidthT1(font,u'abcde fghi . jkl ; mno',10)", |
|
3725 | 346 |
): |
3731 | 347 |
print('%s %s' % (modname,cmd)) |
3725 | 348 |
s=';'.join(( |
349 |
"from reportlab.pdfbase.pdfmetrics import getFont", |
|
4551 | 350 |
"from %s import unicode2T1,instanceStringWidthT1" % modname, |
3725 | 351 |
"fonts=[getFont('Helvetica')]+getFont('Helvetica').substitutionFonts""", |
352 |
"font=fonts[0]", |
|
353 |
)) |
|
4551 | 354 |
subprocess.check_call([sys.executable,'-mtimeit','-s',s,cmd]) |