reportlab/pdfbase/pdfdoc.py
changeset 484 cc20a344973c
parent 483 c4c3969d1e30
child 494 54257447cfe9
equal deleted inserted replaced
483:c4c3969d1e30 484:cc20a344973c
    29 # OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
    29 # OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
    30 # PERFORMANCE OF THIS SOFTWARE. 
    30 # PERFORMANCE OF THIS SOFTWARE. 
    31 #
    31 #
    32 ###############################################################################
    32 ###############################################################################
    33 #	$Log: pdfdoc.py,v $
    33 #	$Log: pdfdoc.py,v $
       
    34 #	Revision 1.29  2000/10/19 19:15:43  rgbecker
       
    35 #	Aaron's latest update from the newslist
       
    36 #
    34 #	Revision 1.28  2000/10/18 16:37:22  aaron_watters
    37 #	Revision 1.28  2000/10/18 16:37:22  aaron_watters
    35 #	undid last checkin and added an option for a default outline (different fix)
    38 #	undid last checkin and added an option for a default outline (different fix)
    36 #
    39 #	
    37 #	Revision 1.27  2000/10/18 16:26:17  aaron_watters
    40 #	Revision 1.27  2000/10/18 16:26:17  aaron_watters
    38 #	moved the outline preprocessing step into the format method (fixes testing error)
    41 #	moved the outline preprocessing step into the format method (fixes testing error)
    39 #	
    42 #	
    40 #	Revision 1.26  2000/10/18 05:03:21  aaron_watters
    43 #	Revision 1.26  2000/10/18 05:03:21  aaron_watters
    41 #	complete revision of pdfdoc.  Not finished (compression missing, testing needed)
    44 #	complete revision of pdfdoc.  Not finished (compression missing, testing needed)
   103 #	License text fixes
   106 #	License text fixes
   104 #	
   107 #	
   105 #	Revision 1.2  2000/02/15 15:47:09  rgbecker
   108 #	Revision 1.2  2000/02/15 15:47:09  rgbecker
   106 #	Added license, __version__ and Logi comment
   109 #	Added license, __version__ and Logi comment
   107 #	
   110 #	
   108 __version__=''' $Id: pdfdoc.py,v 1.28 2000/10/18 16:37:22 aaron_watters Exp $ '''
   111 __version__=''' $Id: pdfdoc.py,v 1.29 2000/10/19 19:15:43 rgbecker Exp $ '''
   109 __doc__=""" 
   112 __doc__=""" 
   110 PDFgen is a library to generate PDF files containing text and graphics.  It is the 
   113 PDFgen is a library to generate PDF files containing text and graphics.  It is the 
   111 foundation for a complete reporting solution in Python.  
   114 foundation for a complete reporting solution in Python.  
   112 
   115 
   113 The module pdfdoc.py handles the 'outer structure' of PDF documents, ensuring that
   116 The module pdfdoc.py handles the 'outer structure' of PDF documents, ensuring that
   210     inObject = None
   213     inObject = None
   211     # set this to define filters 
   214     # set this to define filters 
   212     defaultStreamFilters = None
   215     defaultStreamFilters = None
   213     pageCounter = 1
   216     pageCounter = 1
   214     def __init__(self, encoding=DEFAULT_ENCODING, dummyoutline=0):
   217     def __init__(self, encoding=DEFAULT_ENCODING, dummyoutline=0):
       
   218         #self.defaultStreamFilters = [PDFBase85Encode, PDFZCompress] # for testing!
       
   219         #self.defaultStreamFilters = [PDFZCompress] # for testing!
   215         self.encoding = encoding
   220         self.encoding = encoding
   216         # mapping of internal identifier ("Page001") to PDF objectnumber and generation number (34, 0)
   221         # mapping of internal identifier ("Page001") to PDF objectnumber and generation number (34, 0)
   217         self.idToObjectNumberAndVersion = {}
   222         self.idToObjectNumberAndVersion = {}
   218         # mapping of internal identifier ("Page001") to PDF object (PDFPage instance)
   223         # mapping of internal identifier ("Page001") to PDF object (PDFPage instance)
   219         self.idToObject = {}
   224         self.idToObject = {}
   443 def PDFString(str):
   448 def PDFString(str):
   444     # might need to change this to class for encryption
   449     # might need to change this to class for encryption
   445     return "(%s)" % pdfutils._escape(str)
   450     return "(%s)" % pdfutils._escape(str)
   446     
   451     
   447 def PDFName(data):
   452 def PDFName(data):
       
   453     # might need to change this to class for encryption
   448     # first convert the name
   454     # first convert the name
   449     ldata = list(data)
   455     ldata = list(data)
   450     index = 0
   456     index = 0
   451     for thischar in data:
   457     for thischar in data:
   452         if 0x21<=ord(thischar)<=0x7e and thischar not in "%()<>{}[]#":
   458         if 0x21<=ord(thischar)<=0x7e and thischar not in "%()<>{}[]#":
   494             for i in range(6, len(Lj), 6):
   500             for i in range(6, len(Lj), 6):
   495                 Lj.insert(i,LINEEND)
   501                 Lj.insert(i,LINEEND)
   496             Lj = string.join(L, " ")
   502             Lj = string.join(L, " ")
   497         return "<< %s >>" % Lj
   503         return "<< %s >>" % Lj
   498 
   504 
       
   505 # stream filters are objects to support round trip and
       
   506 # possibly in the future also support parameters
       
   507 class PDFStreamFilterZCompress:
       
   508     pdfname = "FlateDecode"
       
   509     def encode(self, text):
       
   510         try:
       
   511             from zlib import compress
       
   512         except:
       
   513             raise ImportError, "cannot z-compress zlib unavailable"
       
   514         return compress(text)
       
   515     def decode(self, encoded):
       
   516         try:
       
   517             from zlib import decompress
       
   518         except:
       
   519             raise ImportError, "cannot z-compress zlib unavailable"
       
   520         return decompress(encoded)
       
   521 
       
   522 # need only one of these, unless we implement parameters later
       
   523 PDFZCompress = PDFStreamFilterZCompress()    
       
   524 
       
   525 class PDFStreamFilterBase85Encode:
       
   526     pdfname = "ASCII85Decode"
       
   527     def encode(self, text):
       
   528         from pdfutils import _AsciiBase85Encode, _wrap
       
   529         return _wrap(_AsciiBase85Encode(text))
       
   530     def decode(self, text):
       
   531         from pdfutils import _AsciiBase85Decode
       
   532         return _AsciiBase85Decode(text)
       
   533     
       
   534 # need only one of these too
       
   535 PDFBase85Encode = PDFStreamFilterBase85Encode()
       
   536 
   499 STREAMFMT = ("%(dictionary)s%(LINEEND)s" # dictionary
   537 STREAMFMT = ("%(dictionary)s%(LINEEND)s" # dictionary
   500              "stream" # stream keyword
   538              "stream" # stream keyword
   501              "%(LINEEND)s" # a line end (could be just a \n)
   539              "%(LINEEND)s" # a line end (could be just a \n)
   502              "%(content)s" # the content, with no lineend
   540              "%(content)s" # the content, with no lineend
   503              "endstream%(LINEEND)s" # the endstream keyword
   541              "endstream%(LINEEND)s" # the endstream keyword
   519         if self.content is None:
   557         if self.content is None:
   520             raise ValueError, "stream content not set"
   558             raise ValueError, "stream content not set"
   521         if filters is None:
   559         if filters is None:
   522             filters = document.defaultStreamFilters
   560             filters = document.defaultStreamFilters
   523         if filters is not None:
   561         if filters is not None:
   524             raise "oops", "filters for streams not yet implemented"
   562             # apply filters in reverse order listed
       
   563             rf = list(filters)
       
   564             rf.reverse()
       
   565             fnames = []
       
   566             for f in rf:
       
   567                 #print "*****************content:"; print repr(content[:200])
       
   568                 #print "*****************filter", f.pdfname
       
   569                 content = f.encode(content)
       
   570                 fnames.insert(0, PDFName(f.pdfname))
       
   571             #print "*****************finally:"; print content[:200]
       
   572             #print "****** FILTERS", fnames
       
   573             #stop
       
   574             dictionary["Filter"] = PDFArray(fnames)
   525         fc = format(content, document)
   575         fc = format(content, document)
   526         #print "type(content)", type(content)
   576         #print "type(content)", type(content)
   527         if fc!=content: burp
   577         #if fc!=content: burp
   528         # set dictionary length parameter
   578         # set dictionary length parameter
   529         dictionary["Length"] = len(content)
   579         dictionary["Length"] = len(content)
   530         fd = format(dictionary, document)
   580         fd = format(dictionary, document)
   531         sdict = LINEENDDICT.copy()
   581         sdict = LINEENDDICT.copy()
   532         sdict["dictionary"] = fd
   582         sdict["dictionary"] = fd
   539         content = teststreamcontent
   589         content = teststreamcontent
   540     content = string.strip(content)
   590     content = string.strip(content)
   541     content = string.replace(content, "\n", LINEEND) + LINEEND
   591     content = string.replace(content, "\n", LINEEND) + LINEEND
   542     S = PDFStream()
   592     S = PDFStream()
   543     S.content = content
   593     S.content = content
       
   594     S.filters = [PDFBase85Encode, PDFZCompress]
   544     # nothing else needed...
   595     # nothing else needed...
   545     S.__Comment__ = "test stream"
   596     S.__Comment__ = "test stream"
   546     return S
   597     return S
   547 
   598 
   548 teststreamcontent = """
   599 teststreamcontent = """
   846             stream = self.stream
   897             stream = self.stream
   847             if not stream:
   898             if not stream:
   848                 self.Contents = teststream()
   899                 self.Contents = teststream()
   849             else:
   900             else:
   850                 S = PDFStream()
   901                 S = PDFStream()
       
   902                 if self.compression:
       
   903                     S.filters = [PDFZCompress, PDFBase85Encode]
   851                 S.content = stream
   904                 S.content = stream
   852                 # need to add filter stuff (?)
       
   853                 S.__Comment__ = "page stream"
   905                 S.__Comment__ = "page stream"
   854                 self.Contents = S
   906                 self.Contents = S
   855         if not self.Resources:
   907         if not self.Resources:
   856             resources = PDFResourceDictionary()
   908             resources = PDFResourceDictionary()
   857             # fonts!
   909             # fonts!
  1500     local_attributes = ["Differences"]
  1552     local_attributes = ["Differences"]
  1501 
  1553 
  1502 # skipping CMaps
  1554 # skipping CMaps
  1503 
  1555 
  1504 class PDFFormXObject:
  1556 class PDFFormXObject:
  1505 	# like page requires .info set by some higher level (doc)
  1557     # like page requires .info set by some higher level (doc)
  1506 	# XXXX any resource used in a form must be propagated up to the page that (recursively) uses
  1558     # XXXX any resource used in a form must be propagated up to the page that (recursively) uses
  1507 	#   the form!! (not implemented yet).
  1559     #   the form!! (not implemented yet).
  1508 	XObjects = Annots = BBox = Matrix = Contents = stream = Resources = None
  1560     XObjects = Annots = BBox = Matrix = Contents = stream = Resources = None
  1509 	hasImages = 1 # probably should change
  1561     hasImages = 1 # probably should change
  1510 	compression = 0
  1562     compression = 0
  1511 	def __init__(self, lowerx, lowery, upperx, uppery):
  1563     def __init__(self, lowerx, lowery, upperx, uppery):
  1512 		#not done
  1564         #not done
  1513 		self.lowerx = lowerx; self.lowery=lowery; self.upperx=upperx; self.uppery=uppery
  1565         self.lowerx = lowerx; self.lowery=lowery; self.upperx=upperx; self.uppery=uppery
  1514 		
  1566         
  1515 	def setStreamList(self, data):
  1567     def setStreamList(self, data):
  1516 		if type(data) is types.ListType:
  1568         if type(data) is types.ListType:
  1517 			data = string.join(data, LINEEND)
  1569             data = string.join(data, LINEEND)
  1518 		self.stream = data
  1570         self.stream = data
  1519 		
  1571         
  1520 	def format(self, document):
  1572     def format(self, document):
  1521 		self.BBox = self.BBox or PDFArray([self.lowerx, self.lowery, self.upperx, self.uppery])
  1573         self.BBox = self.BBox or PDFArray([self.lowerx, self.lowery, self.upperx, self.uppery])
  1522 		self.Matrix = self.Matrix or PDFArray([1, 0, 0, 1, 0, 0])
  1574         self.Matrix = self.Matrix or PDFArray([1, 0, 0, 1, 0, 0])
  1523 		if not self.Annots:
  1575         if not self.Annots:
  1524 			self.Annots = None
  1576             self.Annots = None
  1525 		else:
  1577         else:
  1526 			raise ValueError, "annotations not reimplemented yet"
  1578             raise ValueError, "annotations not reimplemented yet"
  1527 		if not self.Contents:
  1579         if not self.Contents:
  1528 			stream = self.stream
  1580             stream = self.stream
  1529 			if not stream:
  1581             if not stream:
  1530 				self.Contents = teststream()
  1582                 self.Contents = teststream()
  1531 			else:
  1583             else:
  1532 				S = PDFStream()
  1584                 S = PDFStream()
  1533 				S.content = stream
  1585                 S.content = stream
  1534 				# need to add filter stuff (?)
  1586                 # need to add filter stuff (?)
  1535 				S.__Comment__ = "xobject form stream"
  1587                 S.__Comment__ = "xobject form stream"
  1536 				self.Contents = S
  1588                 self.Contents = S
  1537 		if not self.Resources:
  1589         if not self.Resources:
  1538 			resources = PDFResourceDictionary()
  1590             resources = PDFResourceDictionary()
  1539 			# fonts!
  1591             # fonts!
  1540 			resources.basicFonts()
  1592             resources.basicFonts()
  1541 			if self.hasImages:
  1593             if self.hasImages:
  1542 				resources.allProcs()
  1594                 resources.allProcs()
  1543 			else:
  1595             else:
  1544 				resources.basicProcs()
  1596                 resources.basicProcs()
  1545 		sdict = self.Contents.dictionary
  1597         if self.compression:
  1546 		sdict["Type"] = PDFName("XObject")
  1598             self.Contents.filters = [PDFBase85Encode, PDFZCompress]
  1547 		sdict["Subtype"] = PDFName("Form")
  1599         sdict = self.Contents.dictionary
  1548 		sdict["FormType"] = 1
  1600         sdict["Type"] = PDFName("XObject")
  1549 		sdict["BBox"] = self.BBox
  1601         sdict["Subtype"] = PDFName("Form")
  1550 		sdict["Matrix"] = self.Matrix
  1602         sdict["FormType"] = 1
  1551 		sdict["Resources"] = resources
  1603         sdict["BBox"] = self.BBox
  1552 		return self.Contents.format(document)
  1604         sdict["Matrix"] = self.Matrix
       
  1605         sdict["Resources"] = resources
       
  1606         return self.Contents.format(document)
  1553 
  1607 
  1554 if __name__=="__main__":
  1608 if __name__=="__main__":
  1555     # first test
  1609     # first test
  1556     print "line end is", repr(LINEEND)
  1610     print "line end is", repr(LINEEND)
  1557     print "PDFName", PDFName("test")
  1611     print "PDFName", PDFName("test")