src/reportlab/pdfbase/pdfdoc.py
branchpy33
changeset 3723 99aa837b6703
parent 3721 0c93dd8ff567
child 3731 b233dd0577ff
equal deleted inserted replaced
3722:29c11b905751 3723:99aa837b6703
    16 """
    16 """
    17 import string, types, binascii, codecs
    17 import string, types, binascii, codecs
    18 from reportlab.pdfbase import pdfutils
    18 from reportlab.pdfbase import pdfutils
    19 from reportlab.pdfbase.pdfutils import LINEEND # this constant needed in both
    19 from reportlab.pdfbase.pdfutils import LINEEND # this constant needed in both
    20 from reportlab import rl_config
    20 from reportlab import rl_config
    21 from reportlab.lib.utils import import_zlib, open_for_read, fp_str, _digester, makeFileName
    21 from reportlab.lib.utils import import_zlib, open_for_read, fp_str, makeFileName, isSeqType, isBytesType, isUnicodeType, _digester
    22 from reportlab.pdfbase import pdfmetrics
    22 from reportlab.pdfbase import pdfmetrics
    23 try:
    23 from hashlib import md5
    24     from hashlib import md5
       
    25 except ImportError:
       
    26     from md5 import md5
       
    27 
    24 
    28 from sys import platform
    25 from sys import platform
    29 try:
    26 from sys import version_info
    30     from sys import version_info
       
    31 except: # pre-2.0
       
    32     # may be inaccurate but will at least
       
    33     #work in anything which seeks to format
       
    34     # version_info into a string
       
    35     version_info = (1,5,2,'unknown',0)
       
    36 
    27 
    37 if platform[:4] == 'java' and version_info[:2] == (2, 1):
    28 if platform[:4] == 'java' and version_info[:2] == (2, 1):
    38     # workaround for list()-bug in Jython 2.1 (should be fixed in 2.2)
    29     # workaround for list()-bug in Jython 2.1 (should be fixed in 2.2)
    39     def list(sequence):
    30     def list(sequence):
    40         def f(x):
    31         def f(x):
    85 PDF_VERSION_DEFAULT = (1, 3)
    76 PDF_VERSION_DEFAULT = (1, 3)
    86 PDF_SUPPORT_VERSION = dict(     #map keyword to min version that supports it
    77 PDF_SUPPORT_VERSION = dict(     #map keyword to min version that supports it
    87     transparency = (1, 4),
    78     transparency = (1, 4),
    88     )
    79     )
    89 
    80 
    90 from types import InstanceType
    81 def format(element, document, toplevel=0):
    91 def format(element, document, toplevel=0, InstanceType=InstanceType):
       
    92     """Indirection step for formatting.
    82     """Indirection step for formatting.
    93        Ensures that document parameters alter behaviour
    83        Ensures that document parameters alter behaviour
    94        of formatting for all elements.
    84        of formatting for all elements.
    95     """
    85     """
    96     if hasattr(element,'__PDFObject__'):
    86     if hasattr(element,'__PDFObject__'):
   162             self.invariant = invariant
   152             self.invariant = invariant
   163         self.setCompression(compression)
   153         self.setCompression(compression)
   164         self._pdfVersion = pdfVersion
   154         self._pdfVersion = pdfVersion
   165         # signature for creating PDF ID
   155         # signature for creating PDF ID
   166         sig = self.signature = md5()
   156         sig = self.signature = md5()
   167         sig.update("a reportlab document")
   157         sig.update(b"a reportlab document")
   168         if not self.invariant:
   158         if not self.invariant:
   169             cat = _getTimeStamp()
   159             cat = _getTimeStamp()
   170         else:
   160         else:
   171             cat = 946684800.0
   161             cat = 946684800.0
   172         sig.update(repr(cat)) # initialize with timestamp digest
   162         cat = repr(cat)
       
   163         import sys
       
   164         if sys.version_info[0] == 3:
       
   165             cat = bytes(repr(cat), 'utf-8')
       
   166         sig.update(cat) # initialize with timestamp digest
   173         # mapping of internal identifier ("Page001") to PDF objectnumber and generation number (34, 0)
   167         # mapping of internal identifier ("Page001") to PDF objectnumber and generation number (34, 0)
   174         self.idToObjectNumberAndVersion = {}
   168         self.idToObjectNumberAndVersion = {}
   175         # mapping of internal identifier ("Page001") to PDF object (PDFPage instance)
   169         # mapping of internal identifier ("Page001") to PDF object (PDFPage instance)
   176         self.idToObject = {}
   170         self.idToObject = {}
   177         # internal id to file location
   171         # internal id to file location
   231             filename = makeFileName(getattr(filename,'name',''))
   225             filename = makeFileName(getattr(filename,'name',''))
   232         else :
   226         else :
   233             myfile = 1
   227             myfile = 1
   234             filename = makeFileName(filename)
   228             filename = makeFileName(filename)
   235             f = open(filename, "wb")
   229             f = open(filename, "wb")
   236         f.write(self.GetPDFData(canvas))
   230         data = self.GetPDFData(canvas)
       
   231         if isUnicodeType(data):
       
   232             data = data.encode('utf8')
       
   233         f.write(data)
   237         if myfile:
   234         if myfile:
   238             f.close()
   235             f.close()
   239             import os
   236             import os
   240             if os.name=='mac':
   237             if os.name=='mac':
   241                 from reportlab.lib.utils import markfilename
   238                 from reportlab.lib.utils import markfilename
   242                 markfilename(filename) # do platform specific file junk
   239                 markfilename(filename) # do platform specific file junk
   243         if getattr(canvas,'_verbosity',None): print('saved', filename)
   240         if getattr(canvas,'_verbosity',None): print('saved %s' % (filename,))
   244 
   241 
   245     def GetPDFData(self, canvas):
   242     def GetPDFData(self, canvas):
   246         # realize delayed fonts
   243         # realize delayed fonts
   247         for fnt in self.delayedFonts:
   244         for fnt in self.delayedFonts:
   248             fnt.addObjects(self)
   245             fnt.addObjects(self)
   266 
   263 
   267     def inForm(self):
   264     def inForm(self):
   268         """specify that we are in a form xobject (disable page features, etc)"""
   265         """specify that we are in a form xobject (disable page features, etc)"""
   269         # don't need this check anymore since going in a form pushes old context at canvas level.
   266         # don't need this check anymore since going in a form pushes old context at canvas level.
   270         #if self.inObject not in ["form", None]:
   267         #if self.inObject not in ["form", None]:
   271         #    raise ValueError, "can't go in form already in object %s" % self.inObject
   268         #    raise ValueError("can't go in form already in object %s" % self.inObject)
   272         self.inObject = "form"
   269         self.inObject = "form"
   273         # don't need to do anything else, I think...
   270         # don't need to do anything else, I think...
   274 
   271 
   275     def getInternalFontName(self, psfontname):
   272     def getInternalFontName(self, psfontname):
   276         fm = self.fontMapping
   273         fm = self.fontMapping
   507             reference = PDFObjectReference(internalname)
   504             reference = PDFObjectReference(internalname)
   508             D[internalname] = reference
   505             D[internalname] = reference
   509         #print "xobjDict D", D
   506         #print "xobjDict D", D
   510         return PDFDictionary(D)
   507         return PDFDictionary(D)
   511 
   508 
   512     def Reference(self, object, name=None, InstanceType=InstanceType):
   509     def Reference(self, object, name=None):
   513         ### note references may "grow" during the final formatting pass: don't use d.keys()!
   510         ### note references may "grow" during the final formatting pass: don't use d.keys()!
   514         # don't make references to other references, or non instances, unless they are named!
   511         # don't make references to other references, or non instances, unless they are named!
   515         #print"object type is ", type(object)
   512         #print"object type is ", type(object)
   516         iob = hasattr(object,'__PDFObject__')
   513         iob = hasattr(object,'__PDFObject__')
   517         idToObject = self.idToObject
   514         idToObject = self.idToObject
   551 class PDFText:
   548 class PDFText:
   552     __PDFObject__ = True
   549     __PDFObject__ = True
   553     def __init__(self, t):
   550     def __init__(self, t):
   554         self.t = t
   551         self.t = t
   555     def format(self, document):
   552     def format(self, document):
   556         result = binascii.hexlify(document.encrypt.encode(self.t))
   553         t = self.t
   557         return "<%s>" % result
   554         if isUnicodeType(t):
       
   555             t = t.encode('utf-8')
       
   556         result = binascii.hexlify(document.encrypt.encode(t))
       
   557         return b"<" + result + b">"
   558     def __str__(self):
   558     def __str__(self):
   559         dummydoc = DummyDoc()
   559         dummydoc = DummyDoc()
   560         return self.format(dummydoc)
   560         return self.format(dummydoc)
   561 
   561 
   562 def PDFnumber(n):
   562 def PDFnumber(n):
   602             self.escape = escape
   602             self.escape = escape
   603             self.enc = enc
   603             self.enc = enc
   604     def format(self, document):
   604     def format(self, document):
   605         s = self.s
   605         s = self.s
   606         enc = getattr(self,'enc','auto')
   606         enc = getattr(self,'enc','auto')
   607         if type(s) is str:
   607         if (isBytesType(s)):
   608             if enc is 'auto':
   608             if enc is 'auto':
   609                 try:
   609                 try:
   610                     u = s.decode(s.startswith(codecs.BOM_UTF16_BE) and 'utf16' or 'utf8')
   610                     u = s.decode(s.startswith(codecs.BOM_UTF16_BE) and 'utf16' or 'utf8')
   611                     if _checkPdfdoc(u):
   611                     if _checkPdfdoc(u):
   612                         s = u.encode('pdfdoc')
   612                         s = u.encode('pdfdoc')
   614                         s = codecs.BOM_UTF16_BE+u.encode('utf_16_be')
   614                         s = codecs.BOM_UTF16_BE+u.encode('utf_16_be')
   615                 except:
   615                 except:
   616                     try:
   616                     try:
   617                         s.decode('pdfdoc')
   617                         s.decode('pdfdoc')
   618                     except:
   618                     except:
   619                         import sys
   619                         sys.stderr.write('Error in %s' % (repr(s),))
   620                         print('Error in',repr(s), file=sys.stderr)
       
   621                         raise
   620                         raise
   622         elif type(s) is str:
   621         elif isUnicodeType(s):
   623             if enc is 'auto':
   622             if enc is 'auto':
   624                 if _checkPdfdoc(s):
   623                 if _checkPdfdoc(s):
   625                     s = s.encode('pdfdoc')
   624                     s = s.encode('pdfdoc')
   626                 else:
   625                 else:
   627                     s = codecs.BOM_UTF16_BE+s.encode('utf_16_be')
   626                     s = codecs.BOM_UTF16_BE+s.encode('utf_16_be')
   643                 es = es.replace('\\012','\n')
   642                 es = es.replace('\\012','\n')
   644             if escape&4 and _isbalanced(s):
   643             if escape&4 and _isbalanced(s):
   645                 es = es.replace('\\(','(').replace('\\)',')')
   644                 es = es.replace('\\(','(').replace('\\)',')')
   646             return es
   645             return es
   647         else:
   646         else:
   648             return '(%s)' % s
   647             return b'(' + s + b')'
   649     def __str__(self):
   648     def __str__(self):
   650         return "(%s)" % pdfutils._escape(self.s)
   649         return "(%s)" % pdfutils._escape(self.s)
   651 
   650 
   652 def PDFName(data,lo=chr(0x21),hi=chr(0x7e)):
   651 def PDFName(data,lo=chr(0x21),hi=chr(0x7e)):
   653     # might need to change this to class for encryption
   652     # might need to change this to class for encryption
   754     pdfname = "FlateDecode"
   753     pdfname = "FlateDecode"
   755     def encode(self, text):
   754     def encode(self, text):
   756         from reportlab.lib.utils import import_zlib
   755         from reportlab.lib.utils import import_zlib
   757         zlib = import_zlib()
   756         zlib = import_zlib()
   758         if not zlib: raise ImportError("cannot z-compress zlib unavailable")
   757         if not zlib: raise ImportError("cannot z-compress zlib unavailable")
       
   758         if isUnicodeType(text):
       
   759             text = text.encode('utf8')
   759         return zlib.compress(text)
   760         return zlib.compress(text)
   760     def decode(self, encoded):
   761     def decode(self, encoded):
   761         from reportlab.lib.utils import import_zlib
   762         from reportlab.lib.utils import import_zlib
   762         zlib = import_zlib()
   763         zlib = import_zlib()
   763         if not zlib: raise ImportError("cannot z-decompress zlib unavailable")
   764         if not zlib: raise ImportError("cannot z-decompress zlib unavailable")
   839 
   840 
   840 def teststream(content=None):
   841 def teststream(content=None):
   841     #content = "" # test
   842     #content = "" # test
   842     if content is None:
   843     if content is None:
   843         content = teststreamcontent
   844         content = teststreamcontent
   844     content = string.strip(content)
   845     content = content.strip()
   845     content = string.replace(content, "\n", LINEEND) + LINEEND
   846     content = string.replace(content, "\n", LINEEND) + LINEEND
   846     S = PDFStream(content = content,
   847     S = PDFStream(content = content,
   847                     filters=rl_config.useA85 and [PDFBase85Encode,PDFZCompress] or [PDFZCompress])
   848                     filters=rl_config.useA85 and [PDFBase85Encode,PDFZCompress] or [PDFZCompress])
   848     # nothing else needed...
   849     # nothing else needed...
   849     S.__Comment__ = "test stream"
   850     S.__Comment__ = "test stream"
   938         result = self.offset
   939         result = self.offset
   939         self.offset = result+len(s)
   940         self.offset = result+len(s)
   940         self.write(s)
   941         self.write(s)
   941         return result
   942         return result
   942     def format(self, document):
   943     def format(self, document):
   943         strings = list(map(str, self.strings)) # final conversion, in case of lazy objects
   944         strings = map(str, self.strings) # final conversion, in case of lazy objects
   944         return string.join(strings, "")
   945         return "".join(strings)
   945 
   946 
   946 XREFFMT = '%0.10d %0.5d n'
   947 XREFFMT = '%0.10d %0.5d n'
   947 
   948 
   948 class PDFCrossReferenceSubsection:
   949 class PDFCrossReferenceSubsection:
   949     __PDFObject__ = True
   950     __PDFObject__ = True
   985             reflineend = " "+LINEEND # as per spec
   986             reflineend = " "+LINEEND # as per spec
   986         elif LINEEND=="\r\n":
   987         elif LINEEND=="\r\n":
   987             reflineend = LINEEND
   988             reflineend = LINEEND
   988         else:
   989         else:
   989             raise ValueError("bad end of line! %s" % repr(LINEEND))
   990             raise ValueError("bad end of line! %s" % repr(LINEEND))
   990         return string.join(entries, LINEEND)
   991         return LINEEND.join(entries)
   991 
   992 
   992 class PDFCrossReferenceTable:
   993 class PDFCrossReferenceTable:
   993     __PDFObject__ = True
   994     __PDFObject__ = True
   994 
   995 
   995     def __init__(self):
   996     def __init__(self):
  1003             raise ValueError("no crossref sections")
  1004             raise ValueError("no crossref sections")
  1004         L = ["xref"+LINEEND]
  1005         L = ["xref"+LINEEND]
  1005         for s in self.sections:
  1006         for s in self.sections:
  1006             fs = format(s, document)
  1007             fs = format(s, document)
  1007             L.append(fs)
  1008             L.append(fs)
  1008         return string.join(L, "")
  1009         return "".join(L)
  1009 
  1010 
  1010 TRAILERFMT = ("trailer%(LINEEND)s"
  1011 TRAILERFMT = ("trailer%(LINEEND)s"
  1011               "%(dict)s%(LINEEND)s"
  1012               "%(dict)s%(LINEEND)s"
  1012               "startxref%(LINEEND)s"
  1013               "startxref%(LINEEND)s"
  1013               "%(startxref)s%(LINEEND)s"
  1014               "%(startxref)s%(LINEEND)s"
  1043     __RefOnly__ = 1
  1044     __RefOnly__ = 1
  1044     # to override, set as attributes
  1045     # to override, set as attributes
  1045     __Defaults__ = {"Type": PDFName("Catalog"),
  1046     __Defaults__ = {"Type": PDFName("Catalog"),
  1046                 "PageMode": PDFName("UseNone"),
  1047                 "PageMode": PDFName("UseNone"),
  1047                 }
  1048                 }
  1048     __NoDefault__ = string.split("""
  1049     __NoDefault__ = """
  1049         Dests Outlines Pages Threads AcroForm Names OpenAction PageMode URI
  1050         Dests Outlines Pages Threads AcroForm Names OpenAction PageMode URI
  1050         ViewerPreferences PageLabels PageLayout JavaScript StructTreeRoot SpiderInfo"""
  1051         ViewerPreferences PageLabels PageLayout JavaScript StructTreeRoot SpiderInfo""".split()
  1051                                  )
       
  1052     __Refs__ = __NoDefault__ # make these all into references, if present
  1052     __Refs__ = __NoDefault__ # make these all into references, if present
  1053 
  1053 
  1054     def format(self, document):
  1054     def format(self, document):
  1055         self.check_format(document)
  1055         self.check_format(document)
  1056         defaults = self.__Defaults__
  1056         defaults = self.__Defaults__
  1057         Refs = self.__Refs__
  1057         Refs = self.__Refs__
  1058         D = {}
  1058         D = {}
  1059         for k in list(defaults.keys()):
  1059         for k in defaults.keys():
  1060             default = defaults[k]
  1060             default = defaults[k]
  1061             v = None
  1061             v = None
  1062             if hasattr(self, k) and getattr(self,k) is not None:
  1062             if hasattr(self, k) and getattr(self,k) is not None:
  1063                 v = getattr(self, k)
  1063                 v = getattr(self, k)
  1064             elif default is not None:
  1064             elif default is not None:
  1101     __Comment__ = "page tree"
  1101     __Comment__ = "page tree"
  1102     __RefOnly__ = 1
  1102     __RefOnly__ = 1
  1103     # note: could implement page attribute inheritance...
  1103     # note: could implement page attribute inheritance...
  1104     __Defaults__ = {"Type": PDFName("Pages"),
  1104     __Defaults__ = {"Type": PDFName("Pages"),
  1105                     }
  1105                     }
  1106     __NoDefault__ = string.split("Kids Count Parent")
  1106     __NoDefault__ = "Kids Count Parent".split()
  1107     __Refs__ = ["Parent"]
  1107     __Refs__ = ["Parent"]
  1108     def __init__(self):
  1108     def __init__(self):
  1109         self.pages = []
  1109         self.pages = []
  1110     def __getitem__(self, item):
  1110     def __getitem__(self, item):
  1111         return self.pages[item]
  1111         return self.pages[item]
  1127     Override_default_compilation = 0
  1127     Override_default_compilation = 0
  1128     __RefOnly__ = 1
  1128     __RefOnly__ = 1
  1129     __Defaults__ = {"Type": PDFName("Page"),
  1129     __Defaults__ = {"Type": PDFName("Page"),
  1130                    # "Parent": PDFObjectReference(Pages),  # no! use document.Pages
  1130                    # "Parent": PDFObjectReference(Pages),  # no! use document.Pages
  1131                     }
  1131                     }
  1132     __NoDefault__ = string.split(""" Parent
  1132     __NoDefault__ = """Parent
  1133         MediaBox Resources Contents CropBox Rotate Thumb Annots B Dur Hid Trans AA
  1133         MediaBox Resources Contents CropBox Rotate Thumb Annots B Dur Hid Trans AA
  1134         PieceInfo LastModified SeparationInfo ArtBox TrimBox BleedBox ID PZ
  1134         PieceInfo LastModified SeparationInfo ArtBox TrimBox BleedBox ID PZ
  1135         Trans
  1135         Trans""".split()
  1136     """)
  1136     __Refs__ = """Contents Parent ID""".split()
  1137     __Refs__ = string.split("""
       
  1138         Contents Parent ID
       
  1139     """)
       
  1140     pagewidth = 595
  1137     pagewidth = 595
  1141     pageheight = 842
  1138     pageheight = 842
  1142     stream = None
  1139     stream = None
  1143     hasImages = 0
  1140     hasImages = 0
  1144     compression = 0
  1141     compression = 0
  1156     def setCompression(self, onoff):
  1153     def setCompression(self, onoff):
  1157         self.compression = onoff
  1154         self.compression = onoff
  1158     def setStream(self, code):
  1155     def setStream(self, code):
  1159         if self.Override_default_compilation:
  1156         if self.Override_default_compilation:
  1160             raise ValueError("overridden! must set stream explicitly")
  1157             raise ValueError("overridden! must set stream explicitly")
  1161         from types import ListType
  1158         if isSeqType(code):
  1162         if type(code) is ListType:
  1159             code = LINEEND.join(code)+LINEEND
  1163             code = string.join(code, LINEEND)+LINEEND
       
  1164         self.stream = code
  1160         self.stream = code
  1165 
  1161 
  1166     def setPageTransition(self, tranDict):
  1162     def setPageTransition(self, tranDict):
  1167         self.Trans = PDFDictionary(tranDict)
  1163         self.Trans = PDFDictionary(tranDict)
  1168 
  1164 
  1173         self.MediaBox = self.MediaBox or PDFArray(self.Rotate in (90,270) and [0,0,self.pageheight,self.pagewidth] or [0, 0, self.pagewidth, self.pageheight])
  1169         self.MediaBox = self.MediaBox or PDFArray(self.Rotate in (90,270) and [0,0,self.pageheight,self.pagewidth] or [0, 0, self.pagewidth, self.pageheight])
  1174         if not self.Annots:
  1170         if not self.Annots:
  1175             self.Annots = None
  1171             self.Annots = None
  1176         else:
  1172         else:
  1177             #print self.Annots
  1173             #print self.Annots
  1178             #raise ValueError, "annotations not reimplemented yet"
  1174             #raise ValueError("annotations not reimplemented yet")
  1179             if not hasattr(self.Annots,'__PDFObject__'):
  1175             if not hasattr(self.Annots,'__PDFObject__'):
  1180                 self.Annots = PDFArray(self.Annots)
  1176                 self.Annots = PDFArray(self.Annots)
  1181         if not self.Contents:
  1177         if not self.Contents:
  1182             stream = self.stream
  1178             stream = self.stream
  1183             if not stream:
  1179             if not stream:
  1339 >>"""
  1335 >>"""
  1340 
  1336 
  1341 class PDFOutlines0:
  1337 class PDFOutlines0:
  1342     __PDFObject__ = True
  1338     __PDFObject__ = True
  1343     __Comment__ = "TEST OUTLINE!"
  1339     __Comment__ = "TEST OUTLINE!"
  1344     text = string.replace(DUMMYOUTLINE, "\n", LINEEND)
  1340     text = DUMMYOUTLINE.replace("\n", LINEEND)
  1345     __RefOnly__ = 1
  1341     __RefOnly__ = 1
  1346     def format(self, document):
  1342     def format(self, document):
  1347         return self.text
  1343         return self.text
  1348 
  1344 
  1349 class OutlineEntryObject:
  1345 class OutlineEntryObject:
  1394         self.buildtree = []
  1390         self.buildtree = []
  1395         self.closedict = {} # dictionary of "closed" destinations in the outline
  1391         self.closedict = {} # dictionary of "closed" destinations in the outline
  1396 
  1392 
  1397     def addOutlineEntry(self, destinationname, level=0, title=None, closed=None):
  1393     def addOutlineEntry(self, destinationname, level=0, title=None, closed=None):
  1398         """destinationname of None means "close the tree" """
  1394         """destinationname of None means "close the tree" """
  1399         from types import IntType, TupleType
       
  1400         if destinationname is None and level!=0:
  1395         if destinationname is None and level!=0:
  1401             raise ValueError("close tree must have level of 0")
  1396             raise ValueError("close tree must have level of 0")
  1402         if type(level) is not IntType: raise ValueError("level must be integer, got %s" % type(level))
  1397         if not isinstance(level,int): raise ValueError("level must be integer, got %s" % type(level))
  1403         if level<0: raise ValueError("negative levels not allowed")
  1398         if level<0: raise ValueError("negative levels not allowed")
  1404         if title is None: title = destinationname
  1399         if title is None: title = destinationname
  1405         currentlevel = self.currentlevel
  1400         currentlevel = self.currentlevel
  1406         stack = self.levelstack
  1401         stack = self.levelstack
  1407         tree = self.buildtree
  1402         tree = self.buildtree
  1415             # pop off levels to match
  1410             # pop off levels to match
  1416             current = stack[-1]
  1411             current = stack[-1]
  1417             del stack[-1]
  1412             del stack[-1]
  1418             previous = stack[-1]
  1413             previous = stack[-1]
  1419             lastinprevious = previous[-1]
  1414             lastinprevious = previous[-1]
  1420             if type(lastinprevious) is TupleType:
  1415             if isinstance(lastinprevious,tuple):
  1421                 (name, sectionlist) = lastinprevious
  1416                 (name, sectionlist) = lastinprevious
  1422                 raise ValueError("cannot reset existing sections: " + repr(lastinprevious))
  1417                 raise ValueError("cannot reset existing sections: " + repr(lastinprevious))
  1423             else:
  1418             else:
  1424                 name = lastinprevious
  1419                 name = lastinprevious
  1425                 sectionlist = current
  1420                 sectionlist = current
  1455         desttree = self.translateNames(canvas, nametree)
  1450         desttree = self.translateNames(canvas, nametree)
  1456         self.setDestinations(desttree)
  1451         self.setDestinations(desttree)
  1457 
  1452 
  1458     def translateNames(self, canvas, object):
  1453     def translateNames(self, canvas, object):
  1459         "recursively translate tree of names into tree of destinations"
  1454         "recursively translate tree of names into tree of destinations"
  1460         from types import StringType, ListType, TupleType
       
  1461         Ot = type(object)
       
  1462         destinationnamestotitles = self.destinationnamestotitles
  1455         destinationnamestotitles = self.destinationnamestotitles
  1463         destinationstotitles = self.destinationstotitles
  1456         destinationstotitles = self.destinationstotitles
  1464         closedict = self.closedict
  1457         closedict = self.closedict
  1465         if Ot is StringType:
  1458         if isStrType(object):
  1466             destination = canvas._bookmarkReference(object)
  1459             destination = canvas._bookmarkReference(object)
  1467             title = object
  1460             title = object
  1468             if object in destinationnamestotitles:
  1461             if object in destinationnamestotitles:
  1469                 title = destinationnamestotitles[object]
  1462                 title = destinationnamestotitles[object]
  1470             else:
  1463             else:
  1471                 destinationnamestotitles[title] = title
  1464                 destinationnamestotitles[title] = title
  1472             destinationstotitles[destination] = title
  1465             destinationstotitles[destination] = title
  1473             if object in closedict:
  1466             if object in closedict:
  1474                 closedict[destination] = 1 # mark destination closed
  1467                 closedict[destination] = 1 # mark destination closed
  1475             return {object: canvas._bookmarkReference(object)} # name-->ref
  1468             return {object: canvas._bookmarkReference(object)} # name-->ref
  1476         if Ot is ListType or Ot is TupleType:
  1469         if isSeqType(object):
  1477             L = []
  1470             L = []
  1478             for o in object:
  1471             for o in object:
  1479                 L.append(self.translateNames(canvas, o))
  1472                 L.append(self.translateNames(canvas, o))
  1480             if Ot is TupleType:
  1473             if isinstance(object,tuple):
  1481                 return tuple(L)
  1474                 return tuple(L)
  1482             return L
  1475             return L
  1483         # bug contributed by Benjamin Dumke <reportlab@benjamin-dumke.de>
  1476         # bug contributed by Benjamin Dumke <reportlab@benjamin-dumke.de>
  1484         raise TypeError("in outline, destination name must be string: got a %s"%Ot)
  1477         raise TypeError("in outline, destination name must be string: got a %s"%Ot)
  1485 
  1478 
  1502         self.count = count(self.mydestinations, self.closedict)
  1495         self.count = count(self.mydestinations, self.closedict)
  1503         (self.first, self.last) = self.maketree(document, self.mydestinations, toplevel=1)
  1496         (self.first, self.last) = self.maketree(document, self.mydestinations, toplevel=1)
  1504         self.ready = 1
  1497         self.ready = 1
  1505 
  1498 
  1506     def maketree(self, document, destinationtree, Parent=None, toplevel=0):
  1499     def maketree(self, document, destinationtree, Parent=None, toplevel=0):
  1507         from types import ListType, TupleType, DictType
       
  1508         tdestinationtree = type(destinationtree)
       
  1509         if toplevel:
  1500         if toplevel:
  1510             levelname = "Outline"
  1501             levelname = "Outline"
  1511             Parent = document.Reference(document.Outlines)
  1502             Parent = document.Reference(document.Outlines)
  1512         else:
  1503         else:
  1513             self.count = self.count+1
  1504             self.count = self.count+1
  1514             levelname = "Outline.%s" % self.count
  1505             levelname = "Outline.%s" % self.count
  1515             if Parent is None:
  1506             if Parent is None:
  1516                 raise ValueError("non-top level outline elt parent must be specified")
  1507                 raise ValueError("non-top level outline elt parent must be specified")
  1517         if tdestinationtree is not ListType and tdestinationtree is not TupleType:
  1508         if not isSeqType(destinationtree):
  1518             raise ValueError("destinationtree must be list or tuple, got %s")
  1509             raise ValueError("destinationtree must be list or tuple, got %s")
  1519         nelts = len(destinationtree)
  1510         nelts = len(destinationtree)
  1520         lastindex = nelts-1
  1511         lastindex = nelts-1
  1521         lastelt = firstref = lastref = None
  1512         lastelt = firstref = lastref = None
  1522         destinationnamestotitles = self.destinationnamestotitles
  1513         destinationnamestotitles = self.destinationnamestotitles
  1534                 firstref = eltref
  1525                 firstref = eltref
  1535             lastref = eltref
  1526             lastref = eltref
  1536             lastelt = eltobj # advance eltobj
  1527             lastelt = eltobj # advance eltobj
  1537             lastref = eltref
  1528             lastref = eltref
  1538             elt = destinationtree[index]
  1529             elt = destinationtree[index]
  1539             te = type(elt)
  1530             if isinstance(elt,dict):
  1540             if te is DictType:
       
  1541                 # simple leaf {name: dest}
  1531                 # simple leaf {name: dest}
  1542                 leafdict = elt
  1532                 leafdict = elt
  1543             elif te is TupleType:
  1533             elif isinstance(elt,tuple):
  1544                 # leaf with subsections: ({name: ref}, subsections) XXXX should clean up (see count(...))
  1534                 # leaf with subsections: ({name: ref}, subsections) XXXX should clean up (see count(...))
  1545                 try:
  1535                 try:
  1546                     (leafdict, subsections) = elt
  1536                     (leafdict, subsections) = elt
  1547                 except:
  1537                 except:
  1548                     raise ValueError("destination tree elt tuple should have two elts, got %s" % len(elt))
  1538                     raise ValueError("destination tree elt tuple should have two elts, got %s" % len(elt))
  1554                 [(Title, Dest)] = list(leafdict.items())
  1544                 [(Title, Dest)] = list(leafdict.items())
  1555             except:
  1545             except:
  1556                 raise ValueError("bad outline leaf dictionary, should have one entry "+utf8str(elt))
  1546                 raise ValueError("bad outline leaf dictionary, should have one entry "+utf8str(elt))
  1557             eltobj.Title = destinationnamestotitles[Title]
  1547             eltobj.Title = destinationnamestotitles[Title]
  1558             eltobj.Dest = Dest
  1548             eltobj.Dest = Dest
  1559             if te is TupleType and Dest in closedict:
  1549             if isinstance(elt,tuple) and Dest in closedict:
  1560                 # closed subsection, count should be negative
  1550                 # closed subsection, count should be negative
  1561                 eltobj.Count = -eltobj.Count
  1551                 eltobj.Count = -eltobj.Count
  1562         return (firstref, lastref)
  1552         return (firstref, lastref)
  1563 
  1553 
  1564 def count(tree, closedict=None):
  1554 def count(tree, closedict=None):
  1565     """utility for outline: recursively count leaves in a tuple/list tree"""
  1555     """utility for outline: recursively count leaves in a tuple/list tree"""
  1566     from operator import add
  1556     from operator import add
  1567     from types import TupleType, ListType
  1557     if isinstance(tree,tuple):
  1568     tt = type(tree)
       
  1569     if tt is TupleType:
       
  1570         # leaf with subsections XXXX should clean up this structural usage
  1558         # leaf with subsections XXXX should clean up this structural usage
  1571         (leafdict, subsections) = tree
  1559         (leafdict, subsections) = tree
  1572         [(Title, Dest)] = list(leafdict.items())
  1560         [(Title, Dest)] = list(leafdict.items())
  1573         if closedict and Dest in closedict:
  1561         if closedict and Dest in closedict:
  1574             return 1 # closed tree element
  1562             return 1 # closed tree element
  1575     if tt is TupleType or tt is ListType:
  1563     if isSeqType(tree):
  1576         #return reduce(add, map(count, tree))
  1564         #return reduce(add, map(count, tree))
  1577         counts = []
  1565         counts = []
  1578         for e in tree:
  1566         for e in tree:
  1579             counts.append(count(e, closedict))
  1567             counts.append(count(e, closedict))
  1580         return sum(counts)  #used to be: return reduce(add, counts)
  1568         return sum(counts)  #used to be: return reduce(add, counts)
  1615         return PD.format(document)
  1603         return PD.format(document)
  1616 
  1604 
  1617     def copy(self):
  1605     def copy(self):
  1618         "shallow copy - useful in pagecatchering"
  1606         "shallow copy - useful in pagecatchering"
  1619         thing = self.__klass__()
  1607         thing = self.__klass__()
  1620         for (k, v) in list(self.__dict__.items()):
  1608         for k, v in self.__dict__.items():
  1621             setattr(thing, k, v)
  1609             setattr(thing, k, v)
  1622         return thing
  1610         return thing
  1623 # skipping thumbnails, etc
  1611 # skipping thumbnails, etc
  1624 
  1612 
  1625 class Annotation:
  1613 class Annotation:
  1630     permitted = required+(
  1618     permitted = required+(
  1631       "Border", "C", "T", "M", "F", "H", "BS", "AA", "AS", "Popup", "P", "AP")
  1619       "Border", "C", "T", "M", "F", "H", "BS", "AA", "AS", "Popup", "P", "AP")
  1632     def cvtdict(self, d, escape=1):
  1620     def cvtdict(self, d, escape=1):
  1633         """transform dict args from python form to pdf string rep as needed"""
  1621         """transform dict args from python form to pdf string rep as needed"""
  1634         Rect = d["Rect"]
  1622         Rect = d["Rect"]
  1635         if type(Rect) is not bytes:
  1623         if not isStrType(Rect):
  1636             d["Rect"] = PDFArray(Rect)
  1624             d["Rect"] = PDFArray(Rect)
  1637         d["Contents"] = PDFString(d["Contents"],escape)
  1625         d["Contents"] = PDFString(d["Contents"],escape)
  1638         return d
  1626         return d
  1639     def AnnotationDict(self, **kw):
  1627     def AnnotationDict(self, **kw):
  1640         if 'escape' in kw:
  1628         if 'escape' in kw:
  1649         for name in self.required:
  1637         for name in self.required:
  1650             if name not in d:
  1638             if name not in d:
  1651                 raise ValueError("keyword argument %s missing" % name)
  1639                 raise ValueError("keyword argument %s missing" % name)
  1652         d = self.cvtdict(d,escape=escape)
  1640         d = self.cvtdict(d,escape=escape)
  1653         permitted = self.permitted
  1641         permitted = self.permitted
  1654         for name in list(d.keys()):
  1642         for name in d.keys():
  1655             if name not in permitted:
  1643             if name not in permitted:
  1656                 raise ValueError("bad annotation dictionary name %s" % name)
  1644                 raise ValueError("bad annotation dictionary name %s" % name)
  1657         return PDFDictionary(d)
  1645         return PDFDictionary(d)
  1658     def Dict(self):
  1646     def Dict(self):
  1659         raise ValueError("DictString undefined for virtual superclass Annotation, must overload")
  1647         raise ValueError("DictString undefined for virtual superclass Annotation, must overload")
  1745 class PDFDate:
  1733 class PDFDate:
  1746     __PDFObject__ = True
  1734     __PDFObject__ = True
  1747     # gmt offset now suppported properly
  1735     # gmt offset now suppported properly
  1748     def __init__(self, invariant=rl_config.invariant, dateFormatter=None):
  1736     def __init__(self, invariant=rl_config.invariant, dateFormatter=None):
  1749         if invariant:
  1737         if invariant:
  1750             now = (2000,0o1,0o1,00,00,00,0)
  1738             now = (2000,1,1,0,0,0,0)
  1751             self.dhh = 0
  1739             self.dhh = 0
  1752             self.dmm = 0
  1740             self.dmm = 0
  1753         else:
  1741         else:
  1754             import time
  1742             import time
  1755             now = tuple(time.localtime(_getTimeStamp())[:6])
  1743             now = tuple(time.localtime(_getTimeStamp())[:6])
  1893         self.ProcSet = []
  1881         self.ProcSet = []
  1894         self.Properties = {}
  1882         self.Properties = {}
  1895         self.Shading = {}
  1883         self.Shading = {}
  1896         # ?by default define the basicprocs
  1884         # ?by default define the basicprocs
  1897         self.basicProcs()
  1885         self.basicProcs()
  1898     stdprocs = list(map(PDFName, string.split("PDF Text ImageB ImageC ImageI")))
  1886     stdprocs = [PDFName(s) for s in "PDF Text ImageB ImageC ImageI".split()]
  1899     dict_attributes = ("ColorSpace", "XObject", "ExtGState", "Font", "Pattern", "Properties", "Shading")
  1887     dict_attributes = ("ColorSpace", "XObject", "ExtGState", "Font", "Pattern", "Properties", "Shading")
  1900 
  1888 
  1901     def allProcs(self):
  1889     def allProcs(self):
  1902         # define all standard procsets
  1890         # define all standard procsets
  1903         self.ProcSet = self.stdprocs
  1891         self.ProcSet = self.stdprocs
  1916         for c,s in shadingUsed.items():
  1904         for c,s in shadingUsed.items():
  1917             self.Shading[s] = PDFObjectReference(c)
  1905             self.Shading[s] = PDFObjectReference(c)
  1918 
  1906 
  1919     def format(self, document):
  1907     def format(self, document):
  1920         D = {}
  1908         D = {}
  1921         from types import ListType, DictType
       
  1922         for dname in self.dict_attributes:
  1909         for dname in self.dict_attributes:
  1923             v = getattr(self, dname)
  1910             v = getattr(self, dname)
  1924             if type(v) is DictType:
  1911             if isinstance(v,dict):
  1925                 if v:
  1912                 if v:
  1926                     dv = PDFDictionary(v)
  1913                     dv = PDFDictionary(v)
  1927                     D[dname] = dv
  1914                     D[dname] = dv
  1928             else:
  1915             else:
  1929                 D[dname] = v
  1916                 D[dname] = v
  1930         v = self.ProcSet
  1917         v = self.ProcSet
  1931         dname = "ProcSet"
  1918         dname = "ProcSet"
  1932         if type(v) is ListType:
  1919         if isSeqType(v):
  1933             if v:
  1920             if v:
  1934                 dv = PDFArray(v)
  1921                 dv = PDFArray(v)
  1935                 D[dname] = dv
  1922                 D[dname] = dv
  1936         else:
  1923         else:
  1937             D[dname] = v
  1924             D[dname] = v
  1947 class PDFType1Font:
  1934 class PDFType1Font:
  1948     """no init: set attributes explicitly"""
  1935     """no init: set attributes explicitly"""
  1949     __PDFObject__ = True
  1936     __PDFObject__ = True
  1950     __RefOnly__ = 1
  1937     __RefOnly__ = 1
  1951     # note! /Name appears to be an undocumented attribute....
  1938     # note! /Name appears to be an undocumented attribute....
  1952     name_attributes = string.split("Type Subtype BaseFont Name")
  1939     name_attributes = "Type Subtype BaseFont Name".split()
  1953     Type = "Font"
  1940     Type = "Font"
  1954     Subtype = "Type1"
  1941     Subtype = "Type1"
  1955     # these attributes are assumed to already be of the right type
  1942     # these attributes are assumed to already be of the right type
  1956     local_attributes = string.split("FirstChar LastChar Widths Encoding ToUnicode FontDescriptor")
  1943     local_attributes = "FirstChar LastChar Widths Encoding ToUnicode FontDescriptor".split()
  1957     def format(self, document):
  1944     def format(self, document):
  1958         D = {}
  1945         D = {}
  1959         for name in self.name_attributes:
  1946         for name in self.name_attributes:
  1960             if hasattr(self, name):
  1947             if hasattr(self, name):
  1961                 value = getattr(self, name)
  1948                 value = getattr(self, name)
  2021     def __init__(self, lowerx, lowery, upperx, uppery):
  2008     def __init__(self, lowerx, lowery, upperx, uppery):
  2022         #not done
  2009         #not done
  2023         self.lowerx = lowerx; self.lowery=lowery; self.upperx=upperx; self.uppery=uppery
  2010         self.lowerx = lowerx; self.lowery=lowery; self.upperx=upperx; self.uppery=uppery
  2024 
  2011 
  2025     def setStreamList(self, data):
  2012     def setStreamList(self, data):
  2026         if type(data) is list:
  2013         if isSeqType(data):
  2027             data = string.join(data, LINEEND)
  2014             data = LINEEND.join(data)
  2028         self.stream = data
  2015         self.stream = data
  2029 
  2016 
  2030     def BBoxList(self):
  2017     def BBoxList(self):
  2031         "get the declared bounding box for the form as a list"
  2018         "get the declared bounding box for the form as a list"
  2032         if self.BBox:
  2019         if self.BBox:
  2118         elif hasattr(source,'jpeg_fh'):
  2105         elif hasattr(source,'jpeg_fh'):
  2119             self.loadImageFromSRC(source)   #it is already a PIL Image
  2106             self.loadImageFromSRC(source)   #it is already a PIL Image
  2120         else:
  2107         else:
  2121             # it is a filename
  2108             # it is a filename
  2122             import os
  2109             import os
  2123             ext = string.lower(os.path.splitext(source)[1])
  2110             ext = os.path.splitext(source)[1].lower()
  2124             src = open_for_read(source)
  2111             src = open_for_read(source)
  2125             if not(ext in ('.jpg', '.jpeg') and self.loadImageFromJPEG(src)):
  2112             if not(ext in ('.jpg', '.jpeg') and self.loadImageFromJPEG(src)):
  2126                 if rl_config.useA85:
  2113                 if rl_config.useA85:
  2127                     self.loadImageFromA85(src)
  2114                     self.loadImageFromA85(src)
  2128                 else:
  2115                 else:
  2129                     self.loadImageFromRaw(src)
  2116                     self.loadImageFromRaw(src)
  2130 
  2117 
  2131     def loadImageFromA85(self,source):
  2118     def loadImageFromA85(self,source):
  2132         IMG=[]
  2119         IMG=[]
  2133         imagedata = list(map(string.strip,pdfutils.makeA85Image(source,IMG=IMG)))
  2120         imagedata = [s.strip() for s in pdfutils.makeA85Image(source,IMG=IMG)]
  2134         words = string.split(imagedata[1])
  2121         words = imagedata[1].split()
  2135         self.width, self.height = list(map(string.atoi,(words[1],words[3])))
  2122         self.width, self.height = (int(words[1]),int(words[3]))
  2136         self.colorSpace = {'/RGB':'DeviceRGB', '/G':'DeviceGray', '/CMYK':'DeviceCMYK'}[words[7]]
  2123         self.colorSpace = {'/RGB':'DeviceRGB', '/G':'DeviceGray', '/CMYK':'DeviceCMYK'}[words[7]]
  2137         self.bitsPerComponent = 8
  2124         self.bitsPerComponent = 8
  2138         self._filters = 'ASCII85Decode','FlateDecode' #'A85','Fl'
  2125         self._filters = 'ASCII85Decode','FlateDecode' #'A85','Fl'
  2139         if IMG: self._checkTransparency(IMG[0])
  2126         if IMG: self._checkTransparency(IMG[0])
  2140         elif self.mask=='auto': self.mask = None
  2127         elif self.mask=='auto': self.mask = None
  2141         self.streamContent = string.join(imagedata[3:-1],'')
  2128         self.streamContent = ''.join(imagedata[3:-1])
  2142 
  2129 
  2143     def loadImageFromJPEG(self,imageFile):
  2130     def loadImageFromJPEG(self,imageFile):
  2144         try:
  2131         try:
  2145             try:
  2132             try:
  2146                 info = pdfutils.readJPEGInfo(imageFile)
  2133                 info = pdfutils.readJPEGInfo(imageFile)
  2168 
  2155 
  2169     def loadImageFromRaw(self,source):
  2156     def loadImageFromRaw(self,source):
  2170         IMG=[]
  2157         IMG=[]
  2171         imagedata = pdfutils.makeRawImage(source,IMG=IMG)
  2158         imagedata = pdfutils.makeRawImage(source,IMG=IMG)
  2172         words = string.split(imagedata[1])
  2159         words = string.split(imagedata[1])
  2173         self.width, self.height = list(map(string.atoi,(words[1],words[3])))
  2160         self.width = int(words[1])
       
  2161         self.height = int(words[3])
  2174         self.colorSpace = {'/RGB':'DeviceRGB', '/G':'DeviceGray', '/CMYK':'DeviceCMYK'}[words[7]]
  2162         self.colorSpace = {'/RGB':'DeviceRGB', '/G':'DeviceGray', '/CMYK':'DeviceCMYK'}[words[7]]
  2175         self.bitsPerComponent = 8
  2163         self.bitsPerComponent = 8
  2176         self._filters = 'FlateDecode', #'Fl'
  2164         self._filters = 'FlateDecode', #'Fl'
  2177         if IMG: self._checkTransparency(IMG[0])
  2165         if IMG: self._checkTransparency(IMG[0])
  2178         elif self.mask=='auto': self.mask = None
  2166         elif self.mask=='auto': self.mask = None
  2179         self.streamContent = string.join(imagedata[3:-1],'')
  2167         self.streamContent = ''.join(imagedata[3:-1])
  2180 
  2168 
  2181     def _checkTransparency(self,im):
  2169     def _checkTransparency(self,im):
  2182         if self.mask=='auto':
  2170         if self.mask=='auto':
  2183             if im._dataA:
  2171             if im._dataA:
  2184                 self.mask = None
  2172                 self.mask = None
  2226         dict["ColorSpace"] = PDFName(self.colorSpace)
  2214         dict["ColorSpace"] = PDFName(self.colorSpace)
  2227         if self.colorSpace=='DeviceCMYK' and getattr(self,'_dotrans',0):
  2215         if self.colorSpace=='DeviceCMYK' and getattr(self,'_dotrans',0):
  2228             dict["Decode"] = PDFArray([1,0,1,0,1,0,1,0])
  2216             dict["Decode"] = PDFArray([1,0,1,0,1,0,1,0])
  2229         elif getattr(self,'_decode',None):
  2217         elif getattr(self,'_decode',None):
  2230             dict["Decode"] = PDFArray(self._decode)
  2218             dict["Decode"] = PDFArray(self._decode)
  2231         dict["Filter"] = PDFArray(list(map(PDFName,self._filters)))
  2219         dict["Filter"] = PDFArray(map(PDFName,self._filters))
  2232         dict["Length"] = len(self.streamContent)
  2220         dict["Length"] = len(self.streamContent)
  2233         if self.mask: dict["Mask"] = PDFArray(self.mask)
  2221         if self.mask: dict["Mask"] = PDFArray(self.mask)
  2234         if getattr(self,'smask',None): dict["SMask"] = self.smask
  2222         if getattr(self,'smask',None): dict["SMask"] = self.smask
  2235         return S.format(document)
  2223         return S.format(document)
  2236 
  2224 
  2295         d.update(kw)
  2283         d.update(kw)
  2296         for name in self.required:
  2284         for name in self.required:
  2297             if name not in d:
  2285             if name not in d:
  2298                 raise ValueError("keyword argument %s missing" % name)
  2286                 raise ValueError("keyword argument %s missing" % name)
  2299         permitted = self.permitted
  2287         permitted = self.permitted
  2300         for name in list(d.keys()):
  2288         for name in d.keys():
  2301             if name not in permitted:
  2289             if name not in permitted:
  2302                 raise ValueError("bad annotation dictionary name %s" % name)
  2290                 raise ValueError("bad annotation dictionary name %s" % name)
  2303         return PDFDictionary(d)
  2291         return PDFDictionary(d)
  2304 
  2292 
  2305     def Dict(self, document):
  2293     def Dict(self, document):
  2356         d.update(kw)
  2344         d.update(kw)
  2357         for name in self.required:
  2345         for name in self.required:
  2358             if name not in d:
  2346             if name not in d:
  2359                 raise ValueError("keyword argument %s missing" % name)
  2347                 raise ValueError("keyword argument %s missing" % name)
  2360         permitted = self.permitted
  2348         permitted = self.permitted
  2361         for name in list(d.keys()):
  2349         for name in d.keys():
  2362             if name not in permitted:
  2350             if name not in permitted:
  2363                 raise ValueError("bad annotation dictionary name %s" % name)
  2351                 raise ValueError("bad annotation dictionary name %s" % name)
  2364         return PDFDictionary(d)
  2352         return PDFDictionary(d)
  2365 
  2353 
  2366     def Dict(self, document):
  2354     def Dict(self, document):