src/reportlab/pdfbase/pdfutils.py
branchpy33
changeset 3723 99aa837b6703
parent 3721 0c93dd8ff567
child 3731 b233dd0577ff
equal deleted inserted replaced
3722:29c11b905751 3723:99aa837b6703
     4 __version__=''' $Id$ '''
     4 __version__=''' $Id$ '''
     5 __doc__=''
     5 __doc__=''
     6 # pdfutils.py - everything to do with images, streams,
     6 # pdfutils.py - everything to do with images, streams,
     7 # compression, and some constants
     7 # compression, and some constants
     8 
     8 
       
     9 import sys
     9 import os
    10 import os
       
    11 import binascii
    10 from reportlab import rl_config
    12 from reportlab import rl_config
    11 from reportlab.lib.utils import getStringIO, ImageReader
    13 from reportlab.lib.utils import getBytesIO, ImageReader, isStrType, isUnicodeType, isPython3
    12 
    14 
    13 LINEEND = '\015\012'
    15 LINEEND = '\015\012'
    14 
    16 
    15 def _chunker(src,dst=[],chunkSize=60):
    17 def _chunker(src,dst=[],chunkSize=60):
    16     for i in range(0,len(src),chunkSize):
    18     for i in range(0,len(src),chunkSize):
    78 
    80 
    79     cachedname = os.path.splitext(filename)[0] + (rl_config.useA85 and '.a85' or '.bin')
    81     cachedname = os.path.splitext(filename)[0] + (rl_config.useA85 and '.a85' or '.bin')
    80     if filename==cachedname:
    82     if filename==cachedname:
    81         if cachedImageExists(filename):
    83         if cachedImageExists(filename):
    82             from reportlab.lib.utils import open_for_read
    84             from reportlab.lib.utils import open_for_read
    83             if returnInMemory: return [_f for _f in open_for_read(cachedname).read().split(LINEEND) if _f]
    85             if returnInMemory: return filter(None,open_for_read(cachedname).read().split(LINEEND))
    84         else:
    86         else:
    85             raise IOError('No such cached image %s' % filename)
    87             raise IOError('No such cached image %s' % filename)
    86     else:
    88     else:
    87         if rl_config.useA85:
    89         if rl_config.useA85:
    88             code = makeA85Image(filename,IMG)
    90             code = makeA85Image(filename,IMG)
   106     to save huge amounts of time when repeatedly building image
   108     to save huge amounts of time when repeatedly building image
   107     documents."""
   109     documents."""
   108 
   110 
   109     import types, glob
   111     import types, glob
   110 
   112 
   111     if type(spec) is bytes:
   113     if type(spec) is types.StringType:
   112         filelist = glob.glob(spec)
   114         filelist = glob.glob(spec)
   113     else:  #list or tuple OK
   115     else:  #list or tuple OK
   114         filelist = spec
   116         filelist = spec
   115 
   117 
   116     for filename in filelist:
   118     for filename in filelist:
   152     try:
   154     try:
   153         from reportlab.lib._rl_accel import escapePDF, _instanceEscapePDF
   155         from reportlab.lib._rl_accel import escapePDF, _instanceEscapePDF
   154         _escape = escapePDF
   156         _escape = escapePDF
   155     except ImportError:
   157     except ImportError:
   156         _instanceEscapePDF=None
   158         _instanceEscapePDF=None
   157         if rl_config.sys_version>='2.1':
   159         _ESCAPEDICT={}
   158             _ESCAPEDICT={}
   160         for c in range(256):
   159             for c in range(0,256):
   161             if c<32 or c>=127:
   160                 if c<32 or c>=127:
   162                 _ESCAPEDICT[c]= '\\%03o' % c
   161                     _ESCAPEDICT[chr(c)]= '\\%03o' % c
   163             elif c in (ord('\\'),ord('('),ord(')')):
   162                 elif c in (ord('\\'),ord('('),ord(')')):
   164                 _ESCAPEDICT[c] = '\\'+chr(c)
   163                     _ESCAPEDICT[chr(c)] = '\\'+chr(c)
   165             else:
   164                 else:
   166                 _ESCAPEDICT[c] = chr(c)
   165                     _ESCAPEDICT[chr(c)] = chr(c)
   167         del c
   166             del c
   168         #Michael Hudson donated this
   167             #Michael Hudson donated this
   169         def _escape(s):
   168             def _escape(s):
   170             r = []
   169                 return ''.join(map(lambda c, d=_ESCAPEDICT: d[c],s))
   171             for c in s:
   170         else:
   172                 if not type(c) is int:
   171             def _escape(s):
   173                     c = ord(c)
   172                 """Escapes some PDF symbols (in fact, parenthesis).
   174                 r.append(_ESCAPEDICT[c])
   173                 PDF escapes are almost like Python ones, but brackets
   175             return ''.join(r)
   174                 need slashes before them too. Uses Python's repr function
       
   175                 and chops off the quotes first."""
       
   176                 return repr(s)[1:-1].replace('(','\(').replace(')','\)')
       
   177 
   176 
   178 def _normalizeLineEnds(text,desired=LINEEND,unlikely='\x00\x01\x02\x03'):
   177 def _normalizeLineEnds(text,desired=LINEEND,unlikely='\x00\x01\x02\x03'):
   179     """Normalizes different line end character(s).
   178     """Normalizes different line end character(s).
   180 
   179 
   181     Ensures all instances of CR, LF and CRLF end up as
   180     Ensures all instances of CR, LF and CRLF end up as
   191     """Encodes input using ASCII-Hex coding.
   190     """Encodes input using ASCII-Hex coding.
   192 
   191 
   193     This is a verbose encoding used for binary data within
   192     This is a verbose encoding used for binary data within
   194     a PDF file.  One byte binary becomes two bytes of ASCII.
   193     a PDF file.  One byte binary becomes two bytes of ASCII.
   195     Helper function used by images."""
   194     Helper function used by images."""
   196     output = getStringIO()
   195     if isUnicodeType(input):
   197     for char in input:
   196         input = input.encode('utf-8')
   198         output.write('%02x' % ord(char))
   197     output = getBytesIO()
   199     output.write('>')
   198     output.write(binascii.b2a_hex(input))
       
   199     output.write(b'>')
   200     return output.getvalue()
   200     return output.getvalue()
   201 
   201 
   202 
   202 
   203 def _AsciiHexDecode(input):
   203 def _AsciiHexDecode(input):
   204     """Decodes input using ASCII-Hex coding.
   204     """Decodes input using ASCII-Hex coding.
   205 
   205 
   206     Not used except to provide a test of the inverse function."""
   206     Not used except to provide a test of the inverse function."""
   207 
   207 
   208     #strip out all whitespace
   208     #strip out all whitespace
       
   209     if not isStrType(input):
       
   210         input = input.decode('utf-8')
   209     stripped = ''.join(input.split())
   211     stripped = ''.join(input.split())
   210     assert stripped[-1] == '>', 'Invalid terminator for Ascii Hex Stream'
   212     assert stripped[-1] == '>', 'Invalid terminator for Ascii Hex Stream'
   211     stripped = stripped[:-1]  #chop off terminator
   213     stripped = stripped[:-1]  #chop off terminator
   212     assert len(stripped) % 2 == 0, 'Ascii Hex stream has odd number of bytes'
   214     assert len(stripped) % 2 == 0, 'Ascii Hex stream has odd number of bytes'
   213 
   215 
   222         ASCII.  This is the default method used for encoding images."""
   224         ASCII.  This is the default method used for encoding images."""
   223         # special rules apply if not a multiple of four bytes.
   225         # special rules apply if not a multiple of four bytes.
   224         whole_word_count, remainder_size = divmod(len(input), 4)
   226         whole_word_count, remainder_size = divmod(len(input), 4)
   225         cut = 4 * whole_word_count
   227         cut = 4 * whole_word_count
   226         body, lastbit = input[0:cut], input[cut:]
   228         body, lastbit = input[0:cut], input[cut:]
       
   229         if isPython3 and isStrType(lastbit):
       
   230             lastbit = lastbit.encode('utf-8')
   227 
   231 
   228         out = [].append
   232         out = [].append
   229         for i in range(whole_word_count):
   233         for i in range(whole_word_count):
   230             offset = i*4
   234             offset = i*4
   231             b1 = ord(body[offset])
   235             b1 = body[offset]
   232             b2 = ord(body[offset+1])
   236             b2 = body[offset+1]
   233             b3 = ord(body[offset+2])
   237             b3 = body[offset+2]
   234             b4 = ord(body[offset+3])
   238             b4 = body[offset+3]
       
   239             if isStrType(b1): b1 = ord(b1)
       
   240             if isStrType(b2): b2 = ord(b2)
       
   241             if isStrType(b3): b3 = ord(b3)
       
   242             if isStrType(b4): b4 = ord(b4)
   235 
   243 
   236             if b1<128:
   244             if b1<128:
   237                 num = (((((b1<<8)|b2)<<8)|b3)<<8)|b4
   245                 num = (((((b1<<8)|b2)<<8)|b3)<<8)|b4
   238             else:
   246             else:
   239                 num = 16777216 * b1 + 65536 * b2 + 256 * b3 + b4
   247                 num = 16777216 * b1 + 65536 * b2 + 256 * b3 + b4
   259         # happens only once at the end.
   267         # happens only once at the end.
   260 
   268 
   261         #encode however many bytes we have as usual
   269         #encode however many bytes we have as usual
   262         if remainder_size > 0:
   270         if remainder_size > 0:
   263             while len(lastbit) < 4:
   271             while len(lastbit) < 4:
   264                 lastbit = lastbit + '\000'
   272                 lastbit = lastbit + b'\000'
   265             b1 = ord(lastbit[0])
   273             b1 = lastbit[0]
   266             b2 = ord(lastbit[1])
   274             b2 = lastbit[1]
   267             b3 = ord(lastbit[2])
   275             b3 = lastbit[2]
   268             b4 = ord(lastbit[3])
   276             b4 = lastbit[3]
       
   277             if isStrType(b1): b1 = ord(b1)
       
   278             if isStrType(b2): b2 = ord(b2)
       
   279             if isStrType(b3): b3 = ord(b3)
       
   280             if isStrType(b4): b4 = ord(b4)
   269 
   281 
   270             num = 16777216 * b1 + 65536 * b2 + 256 * b3 + b4
   282             num = 16777216 * b1 + 65536 * b2 + 256 * b3 + b4
   271 
   283 
   272             #solve for c1..c5
   284             #solve for c1..c5
   273             temp, c5 = divmod(num, 85)
   285             temp, c5 = divmod(num, 85)