4 __version__=''' $Id$ ''' |
4 __version__=''' $Id$ ''' |
5 __doc__='' |
5 __doc__='' |
6 # pdfutils.py - everything to do with images, streams, |
6 # pdfutils.py - everything to do with images, streams, |
7 # compression, and some constants |
7 # compression, and some constants |
8 |
8 |
|
9 import sys |
9 import os |
10 import os |
|
11 import binascii |
10 from reportlab import rl_config |
12 from reportlab import rl_config |
11 from reportlab.lib.utils import getStringIO, ImageReader |
13 from reportlab.lib.utils import getBytesIO, ImageReader, isStrType, isUnicodeType, isPython3 |
12 |
14 |
13 LINEEND = '\015\012' |
15 LINEEND = '\015\012' |
14 |
16 |
15 def _chunker(src,dst=[],chunkSize=60): |
17 def _chunker(src,dst=[],chunkSize=60): |
16 for i in range(0,len(src),chunkSize): |
18 for i in range(0,len(src),chunkSize): |
78 |
80 |
79 cachedname = os.path.splitext(filename)[0] + (rl_config.useA85 and '.a85' or '.bin') |
81 cachedname = os.path.splitext(filename)[0] + (rl_config.useA85 and '.a85' or '.bin') |
80 if filename==cachedname: |
82 if filename==cachedname: |
81 if cachedImageExists(filename): |
83 if cachedImageExists(filename): |
82 from reportlab.lib.utils import open_for_read |
84 from reportlab.lib.utils import open_for_read |
83 if returnInMemory: return [_f for _f in open_for_read(cachedname).read().split(LINEEND) if _f] |
85 if returnInMemory: return filter(None,open_for_read(cachedname).read().split(LINEEND)) |
84 else: |
86 else: |
85 raise IOError('No such cached image %s' % filename) |
87 raise IOError('No such cached image %s' % filename) |
86 else: |
88 else: |
87 if rl_config.useA85: |
89 if rl_config.useA85: |
88 code = makeA85Image(filename,IMG) |
90 code = makeA85Image(filename,IMG) |
106 to save huge amounts of time when repeatedly building image |
108 to save huge amounts of time when repeatedly building image |
107 documents.""" |
109 documents.""" |
108 |
110 |
109 import types, glob |
111 import types, glob |
110 |
112 |
111 if type(spec) is bytes: |
113 if type(spec) is types.StringType: |
112 filelist = glob.glob(spec) |
114 filelist = glob.glob(spec) |
113 else: #list or tuple OK |
115 else: #list or tuple OK |
114 filelist = spec |
116 filelist = spec |
115 |
117 |
116 for filename in filelist: |
118 for filename in filelist: |
152 try: |
154 try: |
153 from reportlab.lib._rl_accel import escapePDF, _instanceEscapePDF |
155 from reportlab.lib._rl_accel import escapePDF, _instanceEscapePDF |
154 _escape = escapePDF |
156 _escape = escapePDF |
155 except ImportError: |
157 except ImportError: |
156 _instanceEscapePDF=None |
158 _instanceEscapePDF=None |
157 if rl_config.sys_version>='2.1': |
159 _ESCAPEDICT={} |
158 _ESCAPEDICT={} |
160 for c in range(256): |
159 for c in range(0,256): |
161 if c<32 or c>=127: |
160 if c<32 or c>=127: |
162 _ESCAPEDICT[c]= '\\%03o' % c |
161 _ESCAPEDICT[chr(c)]= '\\%03o' % c |
163 elif c in (ord('\\'),ord('('),ord(')')): |
162 elif c in (ord('\\'),ord('('),ord(')')): |
164 _ESCAPEDICT[c] = '\\'+chr(c) |
163 _ESCAPEDICT[chr(c)] = '\\'+chr(c) |
165 else: |
164 else: |
166 _ESCAPEDICT[c] = chr(c) |
165 _ESCAPEDICT[chr(c)] = chr(c) |
167 del c |
166 del c |
168 #Michael Hudson donated this |
167 #Michael Hudson donated this |
169 def _escape(s): |
168 def _escape(s): |
170 r = [] |
169 return ''.join(map(lambda c, d=_ESCAPEDICT: d[c],s)) |
171 for c in s: |
170 else: |
172 if not type(c) is int: |
171 def _escape(s): |
173 c = ord(c) |
172 """Escapes some PDF symbols (in fact, parenthesis). |
174 r.append(_ESCAPEDICT[c]) |
173 PDF escapes are almost like Python ones, but brackets |
175 return ''.join(r) |
174 need slashes before them too. Uses Python's repr function |
|
175 and chops off the quotes first.""" |
|
176 return repr(s)[1:-1].replace('(','\(').replace(')','\)') |
|
177 |
176 |
178 def _normalizeLineEnds(text,desired=LINEEND,unlikely='\x00\x01\x02\x03'): |
177 def _normalizeLineEnds(text,desired=LINEEND,unlikely='\x00\x01\x02\x03'): |
179 """Normalizes different line end character(s). |
178 """Normalizes different line end character(s). |
180 |
179 |
181 Ensures all instances of CR, LF and CRLF end up as |
180 Ensures all instances of CR, LF and CRLF end up as |
191 """Encodes input using ASCII-Hex coding. |
190 """Encodes input using ASCII-Hex coding. |
192 |
191 |
193 This is a verbose encoding used for binary data within |
192 This is a verbose encoding used for binary data within |
194 a PDF file. One byte binary becomes two bytes of ASCII. |
193 a PDF file. One byte binary becomes two bytes of ASCII. |
195 Helper function used by images.""" |
194 Helper function used by images.""" |
196 output = getStringIO() |
195 if isUnicodeType(input): |
197 for char in input: |
196 input = input.encode('utf-8') |
198 output.write('%02x' % ord(char)) |
197 output = getBytesIO() |
199 output.write('>') |
198 output.write(binascii.b2a_hex(input)) |
|
199 output.write(b'>') |
200 return output.getvalue() |
200 return output.getvalue() |
201 |
201 |
202 |
202 |
203 def _AsciiHexDecode(input): |
203 def _AsciiHexDecode(input): |
204 """Decodes input using ASCII-Hex coding. |
204 """Decodes input using ASCII-Hex coding. |
205 |
205 |
206 Not used except to provide a test of the inverse function.""" |
206 Not used except to provide a test of the inverse function.""" |
207 |
207 |
208 #strip out all whitespace |
208 #strip out all whitespace |
|
209 if not isStrType(input): |
|
210 input = input.decode('utf-8') |
209 stripped = ''.join(input.split()) |
211 stripped = ''.join(input.split()) |
210 assert stripped[-1] == '>', 'Invalid terminator for Ascii Hex Stream' |
212 assert stripped[-1] == '>', 'Invalid terminator for Ascii Hex Stream' |
211 stripped = stripped[:-1] #chop off terminator |
213 stripped = stripped[:-1] #chop off terminator |
212 assert len(stripped) % 2 == 0, 'Ascii Hex stream has odd number of bytes' |
214 assert len(stripped) % 2 == 0, 'Ascii Hex stream has odd number of bytes' |
213 |
215 |
222 ASCII. This is the default method used for encoding images.""" |
224 ASCII. This is the default method used for encoding images.""" |
223 # special rules apply if not a multiple of four bytes. |
225 # special rules apply if not a multiple of four bytes. |
224 whole_word_count, remainder_size = divmod(len(input), 4) |
226 whole_word_count, remainder_size = divmod(len(input), 4) |
225 cut = 4 * whole_word_count |
227 cut = 4 * whole_word_count |
226 body, lastbit = input[0:cut], input[cut:] |
228 body, lastbit = input[0:cut], input[cut:] |
|
229 if isPython3 and isStrType(lastbit): |
|
230 lastbit = lastbit.encode('utf-8') |
227 |
231 |
228 out = [].append |
232 out = [].append |
229 for i in range(whole_word_count): |
233 for i in range(whole_word_count): |
230 offset = i*4 |
234 offset = i*4 |
231 b1 = ord(body[offset]) |
235 b1 = body[offset] |
232 b2 = ord(body[offset+1]) |
236 b2 = body[offset+1] |
233 b3 = ord(body[offset+2]) |
237 b3 = body[offset+2] |
234 b4 = ord(body[offset+3]) |
238 b4 = body[offset+3] |
|
239 if isStrType(b1): b1 = ord(b1) |
|
240 if isStrType(b2): b2 = ord(b2) |
|
241 if isStrType(b3): b3 = ord(b3) |
|
242 if isStrType(b4): b4 = ord(b4) |
235 |
243 |
236 if b1<128: |
244 if b1<128: |
237 num = (((((b1<<8)|b2)<<8)|b3)<<8)|b4 |
245 num = (((((b1<<8)|b2)<<8)|b3)<<8)|b4 |
238 else: |
246 else: |
239 num = 16777216 * b1 + 65536 * b2 + 256 * b3 + b4 |
247 num = 16777216 * b1 + 65536 * b2 + 256 * b3 + b4 |
259 # happens only once at the end. |
267 # happens only once at the end. |
260 |
268 |
261 #encode however many bytes we have as usual |
269 #encode however many bytes we have as usual |
262 if remainder_size > 0: |
270 if remainder_size > 0: |
263 while len(lastbit) < 4: |
271 while len(lastbit) < 4: |
264 lastbit = lastbit + '\000' |
272 lastbit = lastbit + b'\000' |
265 b1 = ord(lastbit[0]) |
273 b1 = lastbit[0] |
266 b2 = ord(lastbit[1]) |
274 b2 = lastbit[1] |
267 b3 = ord(lastbit[2]) |
275 b3 = lastbit[2] |
268 b4 = ord(lastbit[3]) |
276 b4 = lastbit[3] |
|
277 if isStrType(b1): b1 = ord(b1) |
|
278 if isStrType(b2): b2 = ord(b2) |
|
279 if isStrType(b3): b3 = ord(b3) |
|
280 if isStrType(b4): b4 = ord(b4) |
269 |
281 |
270 num = 16777216 * b1 + 65536 * b2 + 256 * b3 + b4 |
282 num = 16777216 * b1 + 65536 * b2 + 256 * b3 + b4 |
271 |
283 |
272 #solve for c1..c5 |
284 #solve for c1..c5 |
273 temp, c5 = divmod(num, 85) |
285 temp, c5 = divmod(num, 85) |