author | robin |
Fri, 07 Feb 2014 16:51:11 +0000 | |
branch | py33 |
changeset 4026 | 6c8ac18e0c9c |
parent 4002 | 4bf4b598196e |
child 4071 | 8a945e72d376 |
permissions | -rw-r--r-- |
3779 | 1 |
#this is the interface module that imports all from the C extension _rl_accel |
2 |
_c_funcs = {} |
|
3 |
_py_funcs = {} |
|
4 |
### NOTE! FP_STR SHOULD PROBABLY ALWAYS DO A PYTHON STR() CONVERSION ON ARGS |
|
5 |
### IN CASE THEY ARE "LAZY OBJECTS". ACCELLERATOR DOESN'T DO THIS (YET) |
|
6 |
__all__ = list(filter(None,''' |
|
7 |
fp_str |
|
8 |
unicode2T1 |
|
9 |
instanceStringWidthT1 |
|
10 |
instanceStringWidthTTF |
|
11 |
asciiBase85Encode |
|
12 |
asciiBase85Decode |
|
13 |
escapePDF |
|
14 |
sameFrag |
|
15 |
calcChecksum |
|
16 |
add32 |
|
17 |
hex32 |
|
18 |
'''.split())) |
|
19 |
import __main__ |
|
20 |
testing = getattr(__main__,'_rl_testing',False) |
|
21 |
del __main__ |
|
3725 | 22 |
|
3779 | 23 |
for fn in __all__: |
24 |
try: |
|
25 |
exec('from reportlab.lib._rl_accel import %s as f' % fn) |
|
26 |
_c_funcs[fn] = f |
|
3862 | 27 |
if testing: _py_funcs[fn] = None |
3779 | 28 |
except ImportError: |
29 |
_py_funcs[fn] = None |
|
30 |
||
31 |
if _py_funcs: |
|
4002
4bf4b598196e
_rl_accel.c & rl_accel.py: fix aascii85 encode decode to do the 'right' thing
robin
parents:
3879
diff
changeset
|
32 |
from reportlab.lib.utils import isBytes, isUnicode, isSeq, isPy3, rawBytes, asNative |
3779 | 33 |
from math import log |
3862 | 34 |
from struct import unpack |
3779 | 35 |
|
36 |
if 'fp_str' in _py_funcs: |
|
37 |
_log_10 = lambda x,log=log,_log_e_10=log(10.0): log(x)/_log_e_10 |
|
38 |
_fp_fmts = "%.0f", "%.1f", "%.2f", "%.3f", "%.4f", "%.5f", "%.6f" |
|
3725 | 39 |
def fp_str(*a): |
3779 | 40 |
'''convert separate arguments (or single sequence arg) into space separated numeric strings''' |
41 |
if len(a)==1 and isSeq(a[0]): a = a[0] |
|
42 |
s = [] |
|
43 |
A = s.append |
|
44 |
for i in a: |
|
45 |
sa =abs(i) |
|
46 |
if sa<=1e-7: A('0') |
|
47 |
else: |
|
48 |
l = sa<=1 and 6 or min(max(0,(6-int(_log_10(sa)))),6) |
|
49 |
n = _fp_fmts[l]%i |
|
50 |
if l: |
|
51 |
j = len(n) |
|
52 |
while j: |
|
53 |
j -= 1 |
|
54 |
if n[j]!='0': |
|
55 |
if n[j]!='.': j += 1 |
|
56 |
break |
|
57 |
n = n[:j] |
|
58 |
A((n[0]!='0' or len(n)==1) and n or n[1:]) |
|
59 |
return ' '.join(s) |
|
60 |
||
61 |
#hack test for comma users |
|
62 |
if ',' in fp_str(0.25): |
|
63 |
_FP_STR = _fp_str |
|
64 |
def _fp_str(*a): |
|
65 |
return _FP_STR(*a).replace(',','.') |
|
66 |
_py_funcs['fp_str'] = fp_str |
|
67 |
||
68 |
if 'unicode2T1' in _py_funcs: |
|
69 |
def unicode2T1(utext,fonts): |
|
70 |
'''return a list of (font,string) pairs representing the unicode text''' |
|
71 |
R = [] |
|
72 |
font, fonts = fonts[0], fonts[1:] |
|
73 |
enc = font.encName |
|
74 |
if 'UCS-2' in enc: |
|
75 |
enc = 'UTF16' |
|
76 |
while utext: |
|
77 |
try: |
|
78 |
if isUnicode(utext): |
|
79 |
s = utext.encode(enc) |
|
80 |
else: |
|
81 |
s = utext |
|
82 |
R.append((font,s)) |
|
83 |
break |
|
84 |
except UnicodeEncodeError as e: |
|
85 |
i0, il = e.args[2:4] |
|
86 |
if i0: |
|
87 |
R.append((font,utext[:i0].encode(enc))) |
|
88 |
if fonts: |
|
89 |
R.extend(unicode2T1(utext[i0:il],fonts)) |
|
90 |
else: |
|
91 |
R.append((font._notdefFont,font._notdefChar*(il-i0))) |
|
92 |
utext = utext[il:] |
|
93 |
return R |
|
94 |
_py_funcs['unicode2T1'] = unicode2T1 |
|
95 |
||
96 |
if 'instanceStringWidthT1' in _py_funcs: |
|
3862 | 97 |
if isPy3: |
98 |
def instanceStringWidthT1(self, text, size, encoding='utf8'): |
|
99 |
"""This is the "purist" approach to width""" |
|
100 |
if not isUnicode(text): text = text.decode(encoding) |
|
101 |
return sum([sum(map(f.widths.__getitem__,t)) for f, t in unicode2T1(text,[self]+self.substitutionFonts)])*0.001*size |
|
102 |
else: |
|
103 |
def instanceStringWidthT1(self, text, size, encoding='utf8'): |
|
104 |
"""This is the "purist" approach to width""" |
|
105 |
if not isUnicode(text): text = text.decode(encoding) |
|
106 |
return sum([sum(map(f.widths.__getitem__,list(map(ord,t)))) for f, t in unicode2T1(text,[self]+self.substitutionFonts)])*0.001*size |
|
3779 | 107 |
_py_funcs['instanceStringWidthT1'] = instanceStringWidthT1 |
3725 | 108 |
|
3779 | 109 |
if 'instanceStringWidthTTF' in _py_funcs: |
110 |
def instanceStringWidthTTF(self, text, size, encoding='utf-8'): |
|
111 |
"Calculate text width" |
|
3873
73d4262b7edd
rl_accel.py: fix python version of instanceStringWidthTTF
robin
parents:
3872
diff
changeset
|
112 |
if not isUnicode(text): |
73d4262b7edd
rl_accel.py: fix python version of instanceStringWidthTTF
robin
parents:
3872
diff
changeset
|
113 |
text = text.decode(encoding or 'utf-8') |
3779 | 114 |
g = self.face.charWidths.get |
115 |
dw = self.face.defaultWidth |
|
116 |
return 0.001*size*sum([g(ord(u),dw) for u in text]) |
|
117 |
_py_funcs['instanceStringWidthTTF'] = instanceStringWidthTTF |
|
118 |
||
119 |
if 'hex32' in _py_funcs: |
|
120 |
def hex32(i): |
|
121 |
return '0X%8.8X' % (int(i)&0xFFFFFFFF) |
|
122 |
_py_funcs['hex32'] = hex32 |
|
123 |
||
124 |
if 'add32' in _py_funcs: |
|
125 |
def add32(x, y): |
|
126 |
"Calculate (x + y) modulo 2**32" |
|
127 |
return (x+y) & 0xFFFFFFFF |
|
128 |
_py_funcs['add32'] = add32 |
|
129 |
||
130 |
if 'calcChecksum' in _py_funcs: |
|
131 |
def calcChecksum(data): |
|
132 |
"""Calculates TTF-style checksums""" |
|
3862 | 133 |
data = rawBytes(data) |
4026 | 134 |
if len(data)&3: data = data + (4-(len(data)&3))*b"\0" |
3779 | 135 |
return sum(unpack(">%dl" % (len(data)>>2), data)) & 0xFFFFFFFF |
136 |
_py_funcs['calcChecksum'] = calcChecksum |
|
137 |
||
138 |
if 'escapePDF' in _py_funcs: |
|
139 |
_ESCAPEDICT={} |
|
140 |
for c in range(256): |
|
141 |
if c<32 or c>=127: |
|
142 |
_ESCAPEDICT[c]= '\\%03o' % c |
|
143 |
elif c in (ord('\\'),ord('('),ord(')')): |
|
144 |
_ESCAPEDICT[c] = '\\'+chr(c) |
|
145 |
else: |
|
146 |
_ESCAPEDICT[c] = chr(c) |
|
147 |
del c |
|
148 |
#Michael Hudson donated this |
|
149 |
def escapePDF(s): |
|
150 |
r = [] |
|
151 |
for c in s: |
|
152 |
if not type(c) is int: |
|
153 |
c = ord(c) |
|
154 |
r.append(_ESCAPEDICT[c]) |
|
155 |
return ''.join(r) |
|
156 |
_py_funcs['escapePDF'] = escapePDF |
|
157 |
||
158 |
if 'asciiBase85Encode' in _py_funcs: |
|
159 |
def asciiBase85Encode(input): |
|
160 |
"""Encodes input using ASCII-Base85 coding. |
|
161 |
||
162 |
This is a compact encoding used for binary data within |
|
163 |
a PDF file. Four bytes of binary data become five bytes of |
|
164 |
ASCII. This is the default method used for encoding images.""" |
|
3879
991bbd763dbc
rl_accel.py: improve test (suggested by) Lele Gaifax
robin
parents:
3873
diff
changeset
|
165 |
doOrd = not isPy3 or isUnicode(input) |
3779 | 166 |
# special rules apply if not a multiple of four bytes. |
167 |
whole_word_count, remainder_size = divmod(len(input), 4) |
|
168 |
cut = 4 * whole_word_count |
|
169 |
body, lastbit = input[0:cut], input[cut:] |
|
170 |
||
171 |
out = [].append |
|
172 |
for i in range(whole_word_count): |
|
173 |
offset = i*4 |
|
174 |
b1 = body[offset] |
|
175 |
b2 = body[offset+1] |
|
176 |
b3 = body[offset+2] |
|
177 |
b4 = body[offset+3] |
|
3835 | 178 |
if doOrd: |
179 |
b1 = ord(b1) |
|
180 |
b2 = ord(b2) |
|
181 |
b3 = ord(b3) |
|
182 |
b4 = ord(b4) |
|
3779 | 183 |
|
184 |
if b1<128: |
|
185 |
num = (((((b1<<8)|b2)<<8)|b3)<<8)|b4 |
|
3725 | 186 |
else: |
3779 | 187 |
num = 16777216 * b1 + 65536 * b2 + 256 * b3 + b4 |
188 |
||
189 |
if num == 0: |
|
190 |
#special case |
|
191 |
out('z') |
|
3725 | 192 |
else: |
3779 | 193 |
#solve for five base-85 numbers |
194 |
temp, c5 = divmod(num, 85) |
|
195 |
temp, c4 = divmod(temp, 85) |
|
196 |
temp, c3 = divmod(temp, 85) |
|
197 |
c1, c2 = divmod(temp, 85) |
|
198 |
assert ((85**4) * c1) + ((85**3) * c2) + ((85**2) * c3) + (85*c4) + c5 == num, 'dodgy code!' |
|
199 |
out(chr(c1+33)) |
|
200 |
out(chr(c2+33)) |
|
201 |
out(chr(c3+33)) |
|
202 |
out(chr(c4+33)) |
|
203 |
out(chr(c5+33)) |
|
204 |
||
205 |
# now we do the final bit at the end. I repeated this separately as |
|
206 |
# the loop above is the time-critical part of a script, whereas this |
|
207 |
# happens only once at the end. |
|
208 |
||
209 |
#encode however many bytes we have as usual |
|
210 |
if remainder_size > 0: |
|
3862 | 211 |
lastbit += (4-len(lastbit))*('\0' if doOrd else b'\000') |
3779 | 212 |
b1 = lastbit[0] |
213 |
b2 = lastbit[1] |
|
214 |
b3 = lastbit[2] |
|
215 |
b4 = lastbit[3] |
|
3835 | 216 |
if doOrd: |
217 |
b1 = ord(b1) |
|
218 |
b2 = ord(b2) |
|
219 |
b3 = ord(b3) |
|
220 |
b4 = ord(b4) |
|
3779 | 221 |
|
222 |
num = 16777216 * b1 + 65536 * b2 + 256 * b3 + b4 |
|
223 |
||
224 |
#solve for c1..c5 |
|
225 |
temp, c5 = divmod(num, 85) |
|
226 |
temp, c4 = divmod(temp, 85) |
|
227 |
temp, c3 = divmod(temp, 85) |
|
228 |
c1, c2 = divmod(temp, 85) |
|
229 |
||
230 |
#print 'encoding: %d %d %d %d -> %d -> %d %d %d %d %d' % ( |
|
231 |
# b1,b2,b3,b4,num,c1,c2,c3,c4,c5) |
|
232 |
lastword = chr(c1+33) + chr(c2+33) + chr(c3+33) + chr(c4+33) + chr(c5+33) |
|
233 |
#write out most of the bytes. |
|
234 |
out(lastword[0:remainder_size + 1]) |
|
235 |
||
236 |
#terminator code for ascii 85 |
|
237 |
out('~>') |
|
238 |
return ''.join(out.__self__) |
|
239 |
_py_funcs['asciiBase85Encode'] = asciiBase85Encode |
|
240 |
||
241 |
if 'asciiBase85Decode' in _py_funcs: |
|
242 |
def asciiBase85Decode(input): |
|
243 |
"""Decodes input using ASCII-Base85 coding. |
|
244 |
||
4002
4bf4b598196e
_rl_accel.c & rl_accel.py: fix aascii85 encode decode to do the 'right' thing
robin
parents:
3879
diff
changeset
|
245 |
This is not normally used - Acrobat Reader decodes for you |
3779 | 246 |
- but a round trip is essential for testing.""" |
247 |
#strip all whitespace |
|
4002
4bf4b598196e
_rl_accel.c & rl_accel.py: fix aascii85 encode decode to do the 'right' thing
robin
parents:
3879
diff
changeset
|
248 |
stripped = ''.join(asNative(input).split()) |
3779 | 249 |
#check end |
250 |
assert stripped[-2:] == '~>', 'Invalid terminator for Ascii Base 85 Stream' |
|
251 |
stripped = stripped[:-2] #chop off terminator |
|
252 |
||
253 |
#may have 'z' in it which complicates matters - expand them |
|
254 |
stripped = stripped.replace('z','!!!!!') |
|
255 |
# special rules apply if not a multiple of five bytes. |
|
256 |
whole_word_count, remainder_size = divmod(len(stripped), 5) |
|
257 |
#print '%d words, %d leftover' % (whole_word_count, remainder_size) |
|
258 |
#assert remainder_size != 1, 'invalid Ascii 85 stream!' |
|
259 |
cut = 5 * whole_word_count |
|
260 |
body, lastbit = stripped[0:cut], stripped[cut:] |
|
3725 | 261 |
|
3779 | 262 |
out = [].append |
263 |
for i in range(whole_word_count): |
|
264 |
offset = i*5 |
|
265 |
c1 = ord(body[offset]) - 33 |
|
266 |
c2 = ord(body[offset+1]) - 33 |
|
267 |
c3 = ord(body[offset+2]) - 33 |
|
268 |
c4 = ord(body[offset+3]) - 33 |
|
269 |
c5 = ord(body[offset+4]) - 33 |
|
270 |
||
271 |
num = ((85**4) * c1) + ((85**3) * c2) + ((85**2) * c3) + (85*c4) + c5 |
|
272 |
||
273 |
temp, b4 = divmod(num,256) |
|
274 |
temp, b3 = divmod(temp,256) |
|
275 |
b1, b2 = divmod(temp, 256) |
|
276 |
||
277 |
assert num == 16777216 * b1 + 65536 * b2 + 256 * b3 + b4, 'dodgy code!' |
|
278 |
out(chr(b1)) |
|
279 |
out(chr(b2)) |
|
280 |
out(chr(b3)) |
|
281 |
out(chr(b4)) |
|
282 |
||
283 |
#decode however many bytes we have as usual |
|
284 |
if remainder_size > 0: |
|
285 |
while len(lastbit) < 5: |
|
286 |
lastbit = lastbit + '!' |
|
287 |
c1 = ord(lastbit[0]) - 33 |
|
288 |
c2 = ord(lastbit[1]) - 33 |
|
289 |
c3 = ord(lastbit[2]) - 33 |
|
290 |
c4 = ord(lastbit[3]) - 33 |
|
291 |
c5 = ord(lastbit[4]) - 33 |
|
292 |
num = (((85*c1+c2)*85+c3)*85+c4)*85 + (c5 |
|
293 |
+(0,0,0xFFFFFF,0xFFFF,0xFF)[remainder_size]) |
|
294 |
temp, b4 = divmod(num,256) |
|
295 |
temp, b3 = divmod(temp,256) |
|
296 |
b1, b2 = divmod(temp, 256) |
|
297 |
assert num == 16777216 * b1 + 65536 * b2 + 256 * b3 + b4, 'dodgy code!' |
|
298 |
#print 'decoding: %d %d %d %d %d -> %d -> %d %d %d %d' % ( |
|
299 |
# c1,c2,c3,c4,c5,num,b1,b2,b3,b4) |
|
300 |
||
301 |
#the last character needs 1 adding; the encoding loses |
|
302 |
#data by rounding the number to x bytes, and when |
|
303 |
#divided repeatedly we get one less |
|
304 |
if remainder_size == 2: |
|
305 |
lastword = chr(b1) |
|
306 |
elif remainder_size == 3: |
|
307 |
lastword = chr(b1) + chr(b2) |
|
308 |
elif remainder_size == 4: |
|
309 |
lastword = chr(b1) + chr(b2) + chr(b3) |
|
310 |
else: |
|
311 |
lastword = '' |
|
312 |
out(lastword) |
|
313 |
||
4002
4bf4b598196e
_rl_accel.c & rl_accel.py: fix aascii85 encode decode to do the 'right' thing
robin
parents:
3879
diff
changeset
|
314 |
r = ''.join(out.__self__) |
4bf4b598196e
_rl_accel.c & rl_accel.py: fix aascii85 encode decode to do the 'right' thing
robin
parents:
3879
diff
changeset
|
315 |
return r.encode('latin1') if isUnicode(input) else r |
3779 | 316 |
_py_funcs['asciiBase85Decode'] = asciiBase85Decode |
317 |
||
318 |
if 'sameFrag' in _py_funcs: |
|
319 |
def sameFrag(f,g): |
|
320 |
'returns 1 if two ParaFrags map out the same' |
|
321 |
if (hasattr(f,'cbDefn') or hasattr(g,'cbDefn') |
|
322 |
or hasattr(f,'lineBreak') or hasattr(g,'lineBreak')): return 0 |
|
323 |
for a in ('fontName', 'fontSize', 'textColor', 'rise', 'underline', 'strike', 'link', "backColor"): |
|
324 |
if getattr(f,a,None)!=getattr(g,a,None): return 0 |
|
325 |
return 1 |
|
326 |
_py_funcs['sameFrag'] = sameFrag |
|
327 |
||
328 |
G=globals() |
|
329 |
for fn in __all__: |
|
4026 | 330 |
f = _c_funcs[fn] if fn in _c_funcs else _py_funcs[fn] |
3779 | 331 |
if not f: |
332 |
raise RuntimeError('function %s is not properly defined' % fn) |
|
333 |
G[fn] = f |
|
334 |
del fn, f, G |
|
3725 | 335 |
|
336 |
if __name__=='__main__': |
|
337 |
import sys, os |
|
338 |
for modname in 'reportlab.lib.rl_accel','reportlab.lib._rl_accel': |
|
339 |
for cmd in ( |
|
340 |
#"unicode2T1('abcde fghi . jkl ; mno',fonts)", |
|
341 |
#"unicode2T1(u'abcde fghi . jkl ; mno',fonts)", |
|
342 |
"_instanceStringWidthU(font,'abcde fghi . jkl ; mno',10)", |
|
343 |
"_instanceStringWidthU(font,u'abcde fghi . jkl ; mno',10)", |
|
344 |
): |
|
3731 | 345 |
print('%s %s' % (modname,cmd)) |
3725 | 346 |
s=';'.join(( |
347 |
"from reportlab.pdfbase.pdfmetrics import getFont", |
|
348 |
"from %s import unicode2T1,_instanceStringWidthU" % modname, |
|
349 |
"fonts=[getFont('Helvetica')]+getFont('Helvetica').substitutionFonts""", |
|
350 |
"font=fonts[0]", |
|
351 |
)) |
|
352 |
os.system('%s -m timeit -s"%s" "%s"' % (sys.executable,s,cmd)) |