author rgbecker
Wed, 25 Oct 2000 08:57:46 +0000
changeset 494 54257447cfe9
parent 484 cc20a344973c
child 500 58d712fef651
permissions -rwxr-xr-x
Changed to indirect copyright

#copyright ReportLab Inc. 2000
#see license.txt for license details
#$Header: /tmp/reportlab/reportlab/pdfbase/,v 1.30 2000/10/25 08:57:45 rgbecker Exp $
__version__=''' $Id:,v 1.30 2000/10/25 08:57:45 rgbecker Exp $ '''
PDFgen is a library to generate PDF files containing text and graphics.  It is the 
foundation for a complete reporting solution in Python.  

The module handles the 'outer structure' of PDF documents, ensuring that
all objects are properly cross-referenced and indexed to the nearest byte.  The 
'inner structure' - the page descriptions - are presumed to be generated before 
each page is saved. calls this and provides a 'canvas' object to handle page marking operators.
piddlePDF calls pdfgen and offers a high-level interface.

2000-10-13 gmcm Packagize
"""extremely anally  retentive structured version of pdfdoc"""

DEFAULT_ENCODING = 'WinAnsiEncoding' #hack here for a system wide change
ALLOWED_ENCODINGS = ('WinAnsiEncoding', 'MacRomanEncoding')

PDFError = 'PDFError'

StandardEnglishFonts = [
    'Courier', 'Courier-Bold', 'Courier-Oblique', 'Courier-BoldOblique',  
    'Helvetica', 'Helvetica-Bold', 'Helvetica-Oblique', 
    'Times-Roman', 'Times-Bold', 'Times-Italic', 'Times-BoldItalic',

# set this flag to get more vertical whitespace (and larger files)
LongFormat = 1

# XXXX stream filters need to be added

# __InternalName__ is a special attribute that can only be set by the Document arbitrator
__InternalName__ = "__InternalName__"

# __RefOnly__ marks reference only elements that must be formatted on top level
__RefOnly__ = "__RefOnly__"

# __Comment__ provides a (one line) comment to inline with an object ref, if present
#   if it is more than one line then percentize it...
__Comment__ = "__Comment__"
DoComments = 1

# name for standard font dictionary
BasicFonts = "BasicFonts"

# name for the pages object
Pages = "Pages"

### generic utilities

import string, types
from reportlab.pdfbase import pdfutils
from reportlab.pdfbase.pdfutils import LINEEND   # this constant needed in both

# for % substitutions

def markfilename(filename):
	# with the Mac, we need to tag the file in a special
	#way so the system knows it is a PDF file.
	#This supplied by Joe Strout
	import os
	if == 'mac':
		import macfs
			macfs.FSSpec(filename).SetCreatorType('CARO','PDF ')

def format(element, document, toplevel=0):
    """Indirection step for formatting.
       Ensures that document parameters alter behaviour
       of formatting for all elements.
    from types import InstanceType
    if type(element) is InstanceType:
        if not toplevel and hasattr(element, __RefOnly__):
            # the object cannot be a component at non top level.
            # make a reference to it and return it's format
            R = document.Reference(element)
            return R.format(document)
                fmt = element.format
                raise AttributeError, "%s has no format operation" % element
            f = fmt(document)
            if DoComments and hasattr(element, __Comment__):
                f = "%s%s%s%s" % ("% ", element.__Comment__, LINEEND, f)
            return f
        return str(element)

def indent(s, IND=LINEEND+" "):
    return string.replace(s, LINEEND, IND)

### the global document structure manager

class PDFDocument:
    objectcounter = 0
    inObject = None
    # set this to define filters 
    defaultStreamFilters = None
    pageCounter = 1
    def __init__(self, encoding=DEFAULT_ENCODING, dummyoutline=0):
        #self.defaultStreamFilters = [PDFBase85Encode, PDFZCompress] # for testing!
        #self.defaultStreamFilters = [PDFZCompress] # for testing!
        self.encoding = encoding
        # mapping of internal identifier ("Page001") to PDF objectnumber and generation number (34, 0)
        self.idToObjectNumberAndVersion = {}
        # mapping of internal identifier ("Page001") to PDF object (PDFPage instance)
        self.idToObject = {}
        # internal id to file location
        self.idToOffset = {}
        # number to id
        self.numberToId = {}
        cat = self.Catalog = self._catalog = PDFCatalog()
        pages = self.Pages = PDFPages()
        cat.Pages = pages
        if dummyoutline:
            outlines = PDFOutlines0()
            outlines = PDFOutlines()
        self.Outlines = self.outline = outlines
        cat.Outlines = outlines = self.Info = PDFInfo()
        # make std fonts (this could be made optional
        self.fontMapping = {}
        MakeStandardEnglishFontObjects(self, encoding)

    def SaveToFile(self, filename, canvas):
        # prepare outline
        outline = self.outline
        outline.prepare(self, canvas)
        from types import StringType
        if type(filename) is StringType:
            myfile = 1
            f = open(filename, "wb")
            myfile = 0
            f = filename # IT BETTER BE A FILE-LIKE OBJECT!
        txt = self.format()
        if myfile:
            markfilename(filename) # do platform specific file junk
    def inPage(self):
        """specify the current object as a page (enables reference binding and other page features)"""
        if self.inObject is not None:
            if self.inObject=="page": return
            raise ValueError, "can't go in page already in object %s" % self.inObject
        self.inObject = "page"

    def inForm(self):
        """specify that we are in a form xobject (disable page features, etc)"""
        if self.inObject not in ["form", None]:
            raise ValueError, "can't go in form already in object %s" % self.inObject
        self.inObject = "form"
        # don't need to do anything else, I think...        

    def getInternalFontName(self, psfontname):
        fm = self.fontMapping
        if fm.has_key(psfontname):
            return fm[psfontname]
            raise PDFError, "Font %s not available in document" % repr(psfontname)

    def thisPageName(self):
        return "Page"+repr(self.pageCounter)

    def thisPageRef(self):
        return PDFObjectReference(self.thisPageName())

    def addPage(self, page):
        name = self.thisPageName()
        self.Reference(page, name)
        self.pageCounter = self.pageCounter+1
        self.inObject = None

    def formName(self, externalname):
        return "FormXob.%s" % externalname
    def addForm(self, name, form):
        """add a Form XObject."""
        # XXX should check that name is a legal PDF name
        if self.inObject != "form":
        self.Reference(form, self.formName(name))
        self.inObject = None

    def annotationName(self, externalname):
        return "Annot.%s"%externalname
    def addAnnotation(self, name, annotation):
        self.Reference(annotation, self.annotationName(name))
    def refAnnotation(self, name):
        internalname = self.annotationName(name)
        return PDFObjectReference(internalname)
    def setTitle(self, title):
        "embeds in PDF file" = title
    def setAuthor(self, author):
        "embedded in PDF file" = author
    def setSubject(self, subject):
        "embeds in PDF file" = subject

    def getAvailableFonts(self):
        fontnames = self.fontMapping.keys()
        return fontnames
    def format(self):
        # register the Catalog/INfo and then format the objects one by one until exhausted
        # (possible infinite loop if there is a bug that continually makes new objects/refs...)
        cat = self.Catalog
        info = self.Info
        # make std fonts (this could be made optional
        counter = 0 # start at first object (object 1 after preincrement)
        ids = [] # the collection of object ids in object number order
        numbertoid = self.numberToId
        idToNV = self.idToObjectNumberAndVersion
        idToOb = self.idToObject
        idToOf = self.idToOffset
        ### note that new entries may be "appended" DURING FORMATTING
        done = None
        File = PDFFile() # output collector
        while done is None:
            counter = counter+1 # do next object...
            if numbertoid.has_key(counter):
                id = numbertoid[counter]
                obj = idToOb[id]
                IO = PDFIndirectObject(id, obj)
                IOf = IO.format(self)
                # add a comment to the PDF output
                if DoComments:
                    File.add("%% %s %s %s" % (repr(id), repr(repr(obj)[:50]), LINEEND))
                offset = File.add(IOf)
                idToOf[id] = offset
                done = 1
        # sanity checks (must happen AFTER formatting)
        lno = len(numbertoid)
        if counter-1!=lno:
            raise ValueError, "counter %s doesn't match number to id dictionary %s" %(counter, lno)
        # now add the xref
        xref = PDFCrossReferenceTable()
        xref.addsection(0, ids)
        xreff = xref.format(self)
        xrefoffset = File.add(xreff)
        # now add the trailer
        trailer = PDFTrailer(
            startxref = xrefoffset,
            Size = lno,
            Root = self.Reference(cat),
            Info = self.Reference(info)
        trailerf = trailer.format(self)
        # return string format for pdf file
        return File.format(self)
    def hasForm(self, name):
        """test for existence of named form"""
        internalname = self.formName(name)
            test = self.idToObject[internalname]          
            return 0
            return internalname

    def xobjDict(self, formnames):
        """construct an xobject dict (for inclusion in a resource dict, usually)
           from a list of form names (images not yet supported)"""
        D = {}
        for name in formnames:
            internalname = self.formName(name)
            reference = PDFObjectReference(internalname)
            D[internalname] = reference
        #print "xobjDict D", D
        return PDFDictionary(D)
    def Reference(self, object, name=None):
        ### note references may "grow" during the final formatting pass: don't use d.keys()!
        # don't make references to other references, or non instances
        from types import InstanceType
        #print"object type is ", type(object)
        tob = type(object)
        if (tob is not InstanceType) or (tob is InstanceType and object.__class__ is PDFObjectReference):
            return object
        idToObject = self.idToObject
        if hasattr(object, __InternalName__):
            # already registered
            intname = object.__InternalName__
            if name is not None and name!=intname:
                raise ValueError, "attempt to reregister object %s with new name %s" % (
                    repr(intname), repr(name))
            if not idToObject.has_key(intname):
                raise ValueError, "object named but not registered"
            return PDFObjectReference(intname)
        # otherwise register the new object
        objectcounter = self.objectcounter = self.objectcounter+1
        if name is None:
            name = "R"+repr(objectcounter)
        if idToObject.has_key(name):
            raise ValueError, "redefining named object: "+repr(name)
        object.__InternalName__ = name
        self.idToObjectNumberAndVersion[name] = (objectcounter, 0)
        self.numberToId[objectcounter] = name
        idToObject[name] = object
        return PDFObjectReference(name)

### chapter 4 Objects

PDFtrue = "true"
PDFfalse = "false"
PDFnull = "null"

def PDFnumber(n):
    return n

def PDFString(str):
    # might need to change this to class for encryption
    return "(%s)" % pdfutils._escape(str)
def PDFName(data):
    # might need to change this to class for encryption
    # first convert the name
    ldata = list(data)
    index = 0
    for thischar in data:
        if 0x21<=ord(thischar)<=0x7e and thischar not in "%()<>{}[]#":
            pass # no problemo
            hexord = hex(ord(thischar))[2:] # forget the 0x thing...
            ldata[index] = "#"+hexord
        index = index+1
    data = string.join(ldata, "")
    return "/%s" % data
class PDFDictionary:

    multiline = LongFormat
    def __init__(self, dict=None):
        """dict should be namestring to value eg "a": 122 NOT pdfname to value NOT "/a":122"""
        if dict is None:
            self.dict = {}
            self.dict = dict.copy()
    def __setitem__(self, name, value):
        self.dict[name] = value
    def Reference(name, document):
        ob = self.dict[name]
        self.dict[name] = document.Reference(ob)
    def format(self, document):
        dict = self.dict
        keys = dict.keys()
        L = []
        a = L.append
        for k in keys:
            v = dict[k]
            fv = format(v, document)
            fk = format(PDFName(k), document)
            a(" "+fv)
        #L = map(str, L)
        if self.multiline:
            Lj = string.join(L, LINEEND)
            Lj = indent(Lj)
            Lj = L
            # break up every 6 elements anyway
            for i in range(6, len(Lj), 6):
            Lj = string.join(L, " ")
        return "<< %s >>" % Lj

# stream filters are objects to support round trip and
# possibly in the future also support parameters
class PDFStreamFilterZCompress:
    pdfname = "FlateDecode"
    def encode(self, text):
            from zlib import compress
            raise ImportError, "cannot z-compress zlib unavailable"
        return compress(text)
    def decode(self, encoded):
            from zlib import decompress
            raise ImportError, "cannot z-compress zlib unavailable"
        return decompress(encoded)

# need only one of these, unless we implement parameters later
PDFZCompress = PDFStreamFilterZCompress()    

class PDFStreamFilterBase85Encode:
    pdfname = "ASCII85Decode"
    def encode(self, text):
        from pdfutils import _AsciiBase85Encode, _wrap
        return _wrap(_AsciiBase85Encode(text))
    def decode(self, text):
        from pdfutils import _AsciiBase85Decode
        return _AsciiBase85Decode(text)
# need only one of these too
PDFBase85Encode = PDFStreamFilterBase85Encode()

STREAMFMT = ("%(dictionary)s%(LINEEND)s" # dictionary
             "stream" # stream keyword
             "%(LINEEND)s" # a line end (could be just a \n)
             "%(content)s" # the content, with no lineend
             "endstream%(LINEEND)s" # the endstream keyword
class PDFStream:
    '''set dictionary elements explicitly stream.dictionary[name]=value'''
    ### compression stuff not implemented yet
    __RefOnly__ = 1 # must be at top level
    def __init__(self, dictionary=None, content=None):
        if dictionary is None:
            dictionary = PDFDictionary()
        self.dictionary = dictionary
        self.content = content
        self.filters = None
    def format(self, document):
        dictionary = self.dictionary
        content = self.content
        filters = self.filters
        if self.content is None:
            raise ValueError, "stream content not set"
        if filters is None:
            filters = document.defaultStreamFilters
        if filters is not None:
            # apply filters in reverse order listed
            rf = list(filters)
            fnames = []
            for f in rf:
                #print "*****************content:"; print repr(content[:200])
                #print "*****************filter", f.pdfname
                content = f.encode(content)
                fnames.insert(0, PDFName(f.pdfname))
            #print "*****************finally:"; print content[:200]
            #print "****** FILTERS", fnames
            dictionary["Filter"] = PDFArray(fnames)
        fc = format(content, document)
        #print "type(content)", type(content)
        #if fc!=content: burp
        # set dictionary length parameter
        dictionary["Length"] = len(content)
        fd = format(dictionary, document)
        sdict = LINEENDDICT.copy()
        sdict["dictionary"] = fd
        sdict["content"] = fc
        return STREAMFMT % sdict

def teststream(content=None):
    #content = "" # test
    if content is None:
        content = teststreamcontent
    content = string.strip(content)
    content = string.replace(content, "\n", LINEEND) + LINEEND
    S = PDFStream()
    S.content = content
    S.filters = [PDFBase85Encode, PDFZCompress]
    # nothing else needed...
    S.__Comment__ = "test stream"
    return S

teststreamcontent = """
1 0 0 1 0 0 cm BT /F9 12 Tf 14.4 TL ET
1.00 0.00 1.00 rg
n 72.00 72.00 432.00 648.00 re B*
class PDFArray:
    multiline = LongFormat
    def __init__(self, sequence):
        self.sequence = list(sequence)
    def References(self, document):
        """make all objects in sequence references"""
        self.sequence = map(document.Reference, self.sequence)
    def format(self, document):
        #ssequence = map(str, self.sequence)
        sequence = self.sequence
        fsequence = []
        for elt in sequence:
            felt = format(elt, document)
        if self.multiline:
            Lj = string.join(fsequence, LINEEND)
            Lj = indent(Lj)
            # break up every 10 elements anyway
            Lj = fsequence
            breakline = LINEEND+" "
            for i in range(10, len(Lj), 10):
            Lj = string.join(Lj)
        return "[ %s ]" % Lj

INDIRECTOBFMT = ("%(n)s %(v)s obj%(LINEEND)s"
                 "%(content)s" "%(LINEEND)s"
                 "endobj" "%(LINEEND)s")

class PDFIndirectObject:
    __RefOnly__ = 1
    def __init__(self, name, content): = name
        self.content = content
    def format(self, document):
        name =
        (n, v) = document.idToObjectNumberAndVersion[name]
        content = self.content
        fcontent = format(content, document, toplevel=1) # yes this is at top level
        sdict = LINEENDDICT.copy()
        sdict["n"] = n
        sdict["v"] = v
        sdict["content"] = fcontent
        return INDIRECTOBFMT % sdict

class PDFObjectReference:
    def __init__(self, name): = name
    def format(self, document):
        name =
        (n, v) = document.idToObjectNumberAndVersion[name]
        return "%s %s R" % (n,v)

### chapter 5

PDFHeader = ("%PDF-1.3"+LINEEND+"%í춾  "+LINEEND)

class PDFFile:
    ### just accumulates strings: keeps track of current offset
    def __init__(self):
        self.strings = []
        self.offset = 0
    def add(self, s):
        """should be constructed as late as possible, return position where placed"""
        result = self.offset
        self.offset = result+len(s)
        return result
    def format(self, document):
        return string.join(self.strings, "")

XREFFMT = '%0.10d %0.5d n'    

class PDFCrossReferenceSubsection:
    def __init__(self, firstentrynumber, idsequence):
        self.firstentrynumber = firstentrynumber
        self.idsequence = idsequence
    def format(self, document):
        """id sequence should represent contiguous object nums else error. free numbers not supported (yet)"""
        firstentrynumber = self.firstentrynumber
        idsequence = self.idsequence
        entries = list(idsequence)
        nentries = len(idsequence)
        # special case: object number 0 is always free
        taken = {}
        if firstentrynumber==0:
            taken[0] = "standard free entry"
            nentries = nentries+1
            entries.insert(0, "0000000000 65535 f")
        idToNV = document.idToObjectNumberAndVersion
        idToOffset = document.idToOffset
        lastentrynumber = firstentrynumber+nentries-1
        for id in idsequence:
            (num, version) = idToNV[id]
            if taken.has_key(num):
                raise ValueError, "object number collision %s %s %s" % (num, repr(id), repr(taken[id]))
            if num>lastentrynumber or num<firstentrynumber:
                raise ValueError, "object number %s not in range %s..%s" % (num, firstentrynumber, lastentrynumber)
            # compute position in list
            rnum = num-firstentrynumber
            taken[num] = id
            offset = idToOffset[id]
            entries[num] = XREFFMT % (offset, version)
        # now add the initial line
        firstline = "%s %s" % (firstentrynumber, nentries)
        entries.insert(0, firstline)
        # make sure it ends with a LINEEND
        if LINEEND=="\n" or LINEEND=="\r":
            reflineend = " "+LINEEND # as per spec
        elif LINEEND=="\r\n":
            reflineend = LINEEND
            raise ValueError, "bad end of line! %s" % repr(LINEEND)
        return string.join(entries, LINEEND)

class PDFCrossReferenceTable:

    def __init__(self):
        self.sections = []
    def addsection(self, firstentry, ids):
        section = PDFCrossReferenceSubsection(firstentry, ids)
    def format(self, document):
        sections = self.sections
        if not sections:
            raise ValueError, "no crossref sections"
        L = ["xref"+LINEEND]
        for s in self.sections:
            fs = format(s, document)
        return string.join(L, "")

TRAILERFMT = ("trailer%(LINEEND)s"

class PDFTrailer:

    def __init__(self, startxref, Size=None, Prev=None, Root=None, Info=None, ID=None, Encrypt=None):
        self.startxref = startxref
        if Size is None or Root is None:
            raise ValueError, "Size and Root keys required"
        dict = self.dict = PDFDictionary()
        for (n,v) in [("Size", Size), ("Prev", Prev), ("Root", Root),
                      ("Info", Info), ("Id", ID), ("Encrypt", Encrypt)]:
            if v is not None:
                dict[n] = v
    def format(self, document):
        fdict = format(self.dict, document)
        D = LINEENDDICT.copy()
        D["dict"] = fdict
        D["startxref"] = self.startxref
        return TRAILERFMT % D

#### XXXX skipping incremental update,
#### encryption

#### chapter 6, doc structure

class PDFCatalog:
    __Comment__ = "Document Root"
    __RefOnly__ = 1
    # to override, set as attributes
    __Defaults__ = {"Type": PDFName("Catalog"),
                "PageMode": PDFName("UseNone"),
    __NoDefault__ = string.split("""
        Dests Outlines Pages Threads AcroForm Names OpenActions PageMode URI
        ViewerPreferences PageLabels PageLayout JavaScript StructTreeRoot SpiderInfo"""
    __Refs__ = __NoDefault__ # make these all into references, if present
    def format(self, document):
        defaults = self.__Defaults__
        Refs = self.__Refs__
        D = {}
        for k in defaults.keys():
            default = defaults[k]
            v = None
            if hasattr(self, k) and getattr(self,k) is not None:
                v = getattr(self, k)
            elif default is not None:
                v = default
            if v is not None:
                D[k] = v
        for k in self.__NoDefault__:
            if hasattr(self, k):
                v = getattr(self,k)
                if v is not None:
                    D[k] = v
        # force objects to be references where required
        for k in Refs:
            if D.has_key(k):
                #print"k is", k, "value", D[k]
                D[k] = document.Reference(D[k])
        dict = PDFDictionary(D)
        return format(dict, document)

    def showOutline(self):
        self.PageMode = PDFName("UseOutlines")

    def showFullScreen(self):
        self.PageMode = PDFName("FullScreen")
    def check_format(self, document):
        """for use in subclasses"""

# not yet implementing
#  ViewerPreferences, PageLabelDictionaries,

class PDFPages(PDFCatalog):
    __Comment__ = "page tree"
    __RefOnly__ = 1
    # note: could implement page attribute inheritance...
    __Defaults__ = {"Type": PDFName("Pages"),
    __NoDefault__ = string.split("Kids Count Parent")
    __Refs__ = ["Parent"]
    def __init__(self):
        self.pages = []
    def __getitem__(self, item):
        return self.pages[item]
    def addPage(self, page):
    def check_format(self, document):
        # convert all pages to page references
        pages = self.pages
        kids = PDFArray(self.pages)
        # make sure all pages are references
        self.Kids = kids
        self.Count = len(pages)

class PDFPage(PDFCatalog):
    __Comment__ = "Page dictionary"
    # all PDF attributes can be set explicitly
    # if this flag is set, the "usual" behavior will be suppressed
    Override_default_compilation = 0
    __RefOnly__ = 1
    __Defaults__ = {"Type": PDFName("Page"),
                   # "Parent": PDFObjectReference(Pages),  # no! use document.Pages
    __NoDefault__ = string.split(""" Parent
        MediaBox Resources Contents CropBox Rotate Thumb Annots B Dur Hid Trans AA
        PieceInfo LastModified SeparationInfo ArtBox TrimBox BleedBox ID PZ
    __Refs__ = string.split("""
        Contents Parent ID
    pagewidth = 595
    pageheight = 842
    stream = None
    hasImages = 0
    compression = 0
    XObjects = None
    # transitionstring?
    # xobjects?
    # annotations
    def __init__(self):
        # set all nodefaults to None
        for name in self.__NoDefault__:
            setattr(self, name, None)
    def setCompression(self, onoff):
        self.compression = onoff
    def setStream(self, code):
        if self.Override_default_compilation:
            raise ValueError, "overridden! must set stream explicitly"
        from types import ListType
        if type(code) is ListType:
            code = string.join(code, LINEEND)+LINEEND = code
    def check_format(self, document):
        # set up parameters unless usual behaviour is suppressed
        if self.Override_default_compilation:
        self.MediaBox = self.MediaBox or PDFArray([0, 0, self.pagewidth, self.pageheight])
        if not self.Annots:
            self.Annots = None
            #print self.Annots
            #raise ValueError, "annotations not reimplemented yet"
            if type(self.Annots) is not types.InstanceType:
                self.Annots = PDFArray(self.Annots)
        if not self.Contents:
            stream =
            if not stream:
                self.Contents = teststream()
                S = PDFStream()
                if self.compression:
                    S.filters = [PDFZCompress, PDFBase85Encode]
                S.content = stream
                S.__Comment__ = "page stream"
                self.Contents = S
        if not self.Resources:
            resources = PDFResourceDictionary()
            # fonts!
            if self.hasImages:
            if self.XObjects:
                #print "XObjects", self.XObjects.dict
                resources.XObject = self.XObjects
            self.Resources = resources
        if not self.Parent:
            pages = document.Pages
            self.Parent = document.Reference(pages)

def testpage(document):
    P = PDFPage()
    P.Contents = teststream()
    pages = document.Pages
    P.Parent = document.Reference(pages)
    P.MediaBox = PDFArray([0, 0, 595, 841])
    resources = PDFResourceDictionary()
    resources.allProcs() # enable all procsets
    P.Resources = resources



class PDFOutlines0:
    __Comment__ = "TEST OUTLINE!"
    text = string.replace(DUMMYOUTLINE, "\n", LINEEND)
    __RefOnly__ = 1
    def format(self, document):
        return self.text

class OutlineEntryObject:
	"an entry in an outline"
	Title = Dest = Parent = Prev = Next = First = Last = Count = None
	def format(self, document):
		D = {}
		D["Title"] = PDFString(self.Title)
		D["Parent"] = self.Parent
		D["Dest"] = self.Dest
		for n in ("Prev", "Next", "First", "Last", "Count"):
			v = getattr(self, n)
			if v is not None:
				D[n] = v
		PD = PDFDictionary(D)
		return PD.format(document)

class PDFOutlines:
	"""takes a recursive list of outline destinations
		   out = PDFOutline1()
		   out.setNames(canvas, # requires canvas for name resolution
			 ), # end of chapter2 description
			 ("chapter4dest", ["c4s1", "c4s2"])
	   Higher layers may build this structure incrementally. KISS at base level.
	# first attempt, many possible features missing.
	#no init for now
	mydestinations = ready = None
	counter = 0
	currentlevel = -1 # ie, no levels yet
	def __init__(self):
		self.destinationnamestotitles = {}
		self.destinationstotitles = {}
		self.levelstack = []
		self.buildtree = []
		self.closedict = {} # dictionary of "closed" destinations in the outline

	def addOutlineEntry(self, destinationname, level=0, title=None, closed=None):
		"""destinationname of None means "close the tree" """
		from types import IntType, TupleType
		if destinationname is None and level!=0:
			raise ValueError, "close tree must have level of 0"
		if type(level) is not IntType: raise ValueError, "level must be integer, got %s" % type(level)
		if level<0: raise ValueError, "negative levels not allowed"
		if title is None: title = destinationname
		currentlevel = self.currentlevel
		stack = self.levelstack
		tree = self.buildtree
		# adjust currentlevel and stack to match level
		if level>currentlevel:
			if level>currentlevel+1:
				raise ValueError, "can't jump from outline level %s to level %s, need intermediates" %(currentlevel, level)
			level = currentlevel = currentlevel+1
		while level<currentlevel:
			# pop off levels to match
			current = stack[-1]
			del stack[-1]
			previous = stack[-1]
			lastinprevious = previous[-1]
			if type(lastinprevious) is TupleType:
				(name, sectionlist) = lastinprevious
				raise ValueError, "cannot reset existing sections: " + repr(lastinprevious)
				name = lastinprevious
				sectionlist = current
				previous[-1] = (name, sectionlist)
			currentlevel = currentlevel-1
		if destinationname is None: return
		self.destinationnamestotitles[destinationname] = title
		if closed: self.closedict[destinationname] = 1
		self.currentlevel = level
	def setDestinations(self, destinationtree):
		self.mydestinations = destinationtree
	def format(self, document):
		D = {}
		D["Type"] = PDFName("Outlines")
		c = self.count
		D["Count"] = c
		if c!=0:
		    D["First"] = self.first
		    D["Last"] = self.last
		PD = PDFDictionary(D)
		return PD.format(document)
	def setNames(self, canvas, *nametree):
		desttree = self.translateNames(canvas, nametree)
	def setNameList(self, canvas, nametree):
		"Explicit list so I don't need to do apply(...) in the caller"
		desttree = self.translateNames(canvas, nametree)
	def translateNames(self, canvas, object):
		"recursively translate tree of names into tree of destinations"
		from types import StringType, ListType, TupleType
		Ot = type(object)
		destinationnamestotitles = self.destinationnamestotitles
		destinationstotitles = self.destinationstotitles
		closedict = self.closedict
		if Ot is StringType:
			destination = canvas._bookmarkReference(object)
			title = object
			if destinationnamestotitles.has_key(object):
				title = destinationnamestotitles[object]
				destinationnamestotitles[title] = title
			destinationstotitles[destination] = title
			if closedict.has_key(object):
				closedict[destination] = 1 # mark destination closed
			return {object: canvas._bookmarkReference(object)} # name-->ref
		if Ot is ListType or Ot is TupleType:
			L = []
			for o in object:
				L.append(self.translateNames(canvas, o))
			if Ot is TupleType:
				return tuple(L)
			return L
		raise "in outline, destination name must be string: got a %s" % Ot

	def prepare(self, document, canvas):
		"""prepare all data structures required for save operation (create related objects)"""
		if self.mydestinations is None:
			if self.levelstack:
				self.addOutlineEntry(None) # close the tree
				destnames = self.levelstack[0]
				#from pprint import pprint; pprint(destnames); stop
				self.mydestinations = self.translateNames(canvas, destnames)
				self.first = self.last = None
				self.count = 0
				self.ready = 1
		#self.first = document.objectReference("Outline.First")
		#self.last = document.objectReference("Outline.Last")
		# XXXX this needs to be generalized for closed entries!
		self.count = count(self.mydestinations, self.closedict)
		(self.first, self.last) = self.maketree(document, self.mydestinations, toplevel=1)
		self.ready = 1

	def maketree(self, document, destinationtree, Parent=None, toplevel=0):
		from types import ListType, TupleType, DictType
		tdestinationtree = type(destinationtree)
		if toplevel:
			levelname = "Outline"
			Parent = document.Reference(document.Outlines)
			self.count = self.count+1
			levelname = "Outline.%s" % self.count
			if Parent is None:
				raise ValueError, "non-top level outline elt parent must be specified"
		if tdestinationtree is not ListType and tdestinationtree is not TupleType:
			raise ValueError, "destinationtree must be list or tuple, got %s"
		nelts = len(destinationtree)
		lastindex = nelts-1
		lastelt = firstref = lastref = None
		destinationnamestotitles = self.destinationnamestotitles
		closedict = self.closedict
		for index in range(nelts):
			eltobj = OutlineEntryObject()
			eltobj.Parent = Parent
			eltname = "%s.%s" % (levelname, index)
			eltref = document.Reference(eltobj, eltname)
			#document.add(eltname, eltobj)
			if lastelt is not None:
				lastelt.Next = eltref
				eltobj.Prev = lastref
			if firstref is None:
				firstref = eltref
			lastref = eltref
			lastelt = eltobj # advance eltobj
			lastref = eltref
			elt = destinationtree[index]
			te = type(elt)
			if te is DictType:
				# simple leaf {name: dest}
				leafdict = elt
			elif te is TupleType:
				# leaf with subsections: ({name: ref}, subsections) XXXX should clean up (see count(...))
					(leafdict, subsections) = elt
					raise ValueError, "destination tree elt tuple should have two elts, got %s" % len(elt)
				eltobj.Count = count(subsections, closedict)
				(eltobj.First, eltobj.Last) = self.maketree(document, subsections, eltref)
				raise ValueError, "destination tree elt should be dict or tuple, got %s" % te
				[(Title, Dest)] = leafdict.items()
				raise ValueError, "bad outline leaf dictionary, should have one entry "+str(elt)
			eltobj.Title = destinationnamestotitles[Title]
			eltobj.Dest = Dest
			if te is TupleType and closedict.has_key(Dest):
				# closed subsection, count should be negative
				eltobj.Count = -eltobj.Count
		return (firstref, lastref)
def count(tree, closedict=None): 
	"""utility for outline: recursively count leaves in a tuple/list tree"""
	from operator import add
	from types import TupleType, ListType
	tt = type(tree)
	if tt is TupleType:
		# leaf with subsections XXXX should clean up this structural usage
		(leafdict, subsections) = tree
		[(Title, Dest)] = leafdict.items()
		if closedict and closedict.has_key(Dest):
			return 1 # closed tree element
	if tt is TupleType or tt is ListType:
		#return reduce(add, map(count, tree))
		counts = []
		for e in tree:
			counts.append(count(e, closedict))
		return reduce(add, counts)
	return 1

#### dummy info
<</Title (testing)
/Author (arw)
/CreationDate (D:20001012220652)
/Producer (ReportLab
/Subject (this file generated by an alpha test module)
class PDFInfo0:
    __Comment__ = "TEST INFO STRUCTURE"
    text = string.replace(DUMMYINFO, "\n", LINEEND)
    __RefOnly__ = 1
    def format(self, document):
        return self.text

class PDFInfo:
    """PDF documents can have basic information embedded, viewable from
    File | Document Info in Acrobat Reader.  If this is wrong, you get
    Postscript errors while printing, even though it does not print."""
    def __init__(self):
        self.title = "untitled" = "anonymous"
        self.subject = "unspecified"
        #now = time.localtime(time.time())
        #self.datestr = '%04d%02d%02d%02d%02d%02d' % tuple(now[0:6])
    def format(self, document):
        D = {}
        D["Title"] = PDFString(self.title)
        D["Author"] = PDFString(
        D["CreationDate"] = PDFDate()
        D["Producer"] = PDFString("ReporLab")
        D["Subject"] = PDFString(self.subject)
        PD = PDFDictionary(D)
        return PD.format(document)

# skipping thumbnails, etc

class Annotation:
    """superclass for all annotations."""
    defaults = [("Type", PDFName("Annot"),)]
    required = ("Type", "Rect", "Contents", "Subtype")
    permitted = required+(
      "Border", "C", "T", "M", "F", "H", "BS", "AA", "AS", "Popup", "P")
    def cvtdict(self, d):
        """transform dict args from python form to pdf string rep as needed"""
        Rect = d["Rect"]
        if type(Rect) is not types.StringType:
            d["Rect"] = PDFArray(Rect)
        d["Contents"] = PDFString(d["Contents"])
        return d
    def AnnotationDict(self, **kw):
        d = {}
        for (name,val) in self.defaults:
            d[name] = val
        for name in self.required:
            if not d.has_key(name):
                raise ValueError, "keyword argument %s missing" % name
        d = self.cvtdict(d)
        permitted = self.permitted
        for name in d.keys():
            if name not in permitted:
                raise ValueError, "bad annotation dictionary name %s" % name
        return PDFDictionary(d)
    def Dict(self):
        raise ValueError, "DictString undefined for virtual superclass Annotation, must overload"
        # but usually
        #return self.AnnotationDict(self, Rect=(a,b,c,d)) or whatever
    def format(self, document):
        D = self.Dict()
        return D.format(document)

class TextAnnotation(Annotation):
    permitted = Annotation.permitted + (
        "Open", "Name", "AP")
    def __init__(self, Rect, Contents, **kw):
        self.Rect = Rect
        self.Contents = Contents
        self.otherkw = kw
    def Dict(self):
        d = {}
        d["Rect"] = self.Rect
        d["Contents"] = self.Contents
        d["Subtype"] = "/Text"
        return apply(self.AnnotationDict, (), d)
class LinkAnnotation(Annotation):
    permitted = Annotation.permitted + (
        "Dest", "A", "PA")
    def __init__(self, Rect, Contents, Destination, Border="[0 0 1]", **kw):
        self.Border = Border
        self.Rect = Rect
        self.Contents = Contents
        self.Destination = Destination
        self.otherkw = kw
    def dummyDictString(self): # old, testing
        return """
          << /Type /Annot /Subtype /Link /Rect [71 717 190 734] /Border [16 16 1]
             /Dest [23 0 R /Fit] >>
    def Dict(self):
        d = {}
        d["Border"] = self.Border
        d["Rect"] = self.Rect
        d["Contents"] = self.Contents
        d["Subtype"] = "/Link"
        d["Dest"] = self.Destination
        return apply(self.AnnotationDict, (), d)

# skipping names tree

# skipping actions

# skipping names trees

# skipping to chapter 7

class PDFRectangle:
    def __init__(self, llx, lly, urx, ury):
        self.llx, self.lly, self.ulx, self.ury = llx, lly, urx, ury
    def format(self, document):
        A = PDFArray([self.llx, self.lly, self.ulx, self.ury])
        return format(A, document)

DATEFMT = '%04d%02d%02d%02d%02d%02d'
import time
(nowyyyy, nowmm, nowdd, nowhh, nowm, nows) = tuple(time.localtime(time.time())[:6])

class PDFDate:
    # gmt offset not yet suppported
    def __init__(self, yyyy=nowyyyy, mm=nowmm, dd=nowdd, hh=nowhh, m=nowm, s=nows):
        self.yyyy=yyyy;; self.dd=dd; self.hh=hh; self.m=m; self.s=s
    def format(self, doc):
        S = PDFString(DATEFMT % (self.yyyy,, self.dd, self.hh, self.m, self.s))
        return format(S, doc)

class Destination:
    """not a pdfobject!  This is a placeholder that can delegates
       to a pdf object only after it has been defined by the methods
       below.  EG a Destination can refer to Appendix A before it has been
       defined, but only if Appendix A is explicitly noted as a destination
       and resolved before the document is generated...
       For example the following sequence causes resolution before doc generation.
          d = Destination()
 # or other format defining method call
       (at present setPageRef is called on generation of the page).
    representation = format = page = None
    def __init__(self,name): = name
    def format(self, document):
        f = self.fmt
        if f is None: raise ValueError, "format not resolved %s" %
        p =
        if p is None: raise ValueError, "Page reference unbound %s" % = p
        return f.format(document)
    def xyz(self, left, top, zoom):  # see pdfspec mar 11 99 pp184+
        self.fmt = PDFDestinationXYZ(None, left, top, zoom)
    def fit(self):
        self.fmt = PDFDestinationFit(None)
    def fitb(self):
        self.fmt = PDFDestinationFitB(None)
    def fith(self, top):
        self.fmt = PDFDestinationFitH(None,top)
    def fitv(self, left):
        self.fmt = PDFDestinationFitV(None, left)
    def fitbh(self, top):
        self.fmt = PDFDestinationFitBH(None, top)
    def fitbv(self, left):
        self.fmt = PDFDestinationFitBV(None, left)
    def fitr(self, left, bottom, right, top):
        self.fmt = PDFDestinationFitR(None, left, bottom, right, top)
    def setPage(self, page): = page = page # may not yet be defined!
class PDFDestinationXYZ:
    typename = "XYZ"
    def __init__(self, page, left, top, zoom): = page;; self.zoom=zoom
    def format(self, document):
        pageref = document.Reference(
        A = PDFArray( [ pageref, PDFName(self.typename), self.left,, self.zoom ] )
        return format(A, document)
class PDFDestinationFit:
    typename = "Fit"
    def __init__(self, page): = page
    def format(self, document):
        pageref = document.Reference(
        A = PDFArray( [ pageref, PDFName(self.typename) ] )
        return format(A, document)

class PDFDestinationFitB(PDFDestinationFit):
    typename = "FitB"
class PDFDestinationFitH:
    typename = "FitH"
    def __init__(self, page, top): = page;
    def format(self, document):
        pageref = document.Reference(
        A = PDFArray( [ pageref, PDFName(self.typename), ] )
        return format(A, document)

class PDFDestinationFitBH(PDFDestinationFitH):
    typename = "FitBH"
class PDFDestinationFitV:
    typename = "FitV"
    def __init__(self, page, left): = page; self.left=left
    def format(self, document):
        pageref = document.Reference(
        A = PDFArray( [ pageref, PDFName(self.typename), self.left ] )
        return format(A, document)

class PDFDestinationBV(PDFDestinationFitV):
    typename = "FitBV"

class PDFDestinationFitR:
    typename = "FitR"
    def __init__(self, page, left, bottom, right, top): = page; self.left=left; self.bottom=bottom; self.right=right;
    def format(self, document):
        pageref = document.Reference(
        A = PDFArray( [ pageref, PDFName(self.typename), self.left, self.bottom, self.right,] )
        return format(A, document)

# named destinations need nothing

# skipping filespecs

class PDFResourceDictionary:
    """each element *could* be reset to a reference if desired"""
    def __init__(self):
        self.ColorSpace = {}
        self.XObject = {}
        self.ExtGState = {}
        self.Font = {}
        self.Pattern = {}
        self.ProcSet = []
        self.Properties = {}
        self.Shading = {}
        # ?by default define the basicprocs
    stdprocs = map(PDFName, string.split("PDF Text ImageB ImageC ImageI"))
    dict_attributes = ("ColorSpace", "XObject", "ExtGState", "Font", "Pattern", "Properties", "Shading")
    def allProcs(self):
        # define all standard procsets
        self.ProcSet = self.stdprocs
    def basicProcs(self):
        self.ProcSet = self.stdprocs[:2] # just PDF and Text
    def basicFonts(self):
        self.Font = PDFObjectReference(BasicFonts)
    def format(self, document):
        D = {}
        from types import ListType, DictType
        for dname in self.dict_attributes:
            v = getattr(self, dname)
            if type(v) is DictType:
                if v:
                    dv = PDFDictionary(v)
                    D[dname] = dv
                D[dname] = v
        v = self.ProcSet
        dname = "ProcSet"
        if type(v) is ListType:
            if v:
                dv = PDFArray(v)
                D[dname] = dv
            D[dname] = v
        DD = PDFDictionary(D)
        return format(DD, document)

class PDFType1Font:
    """no init: set attributes explicitly"""
    __RefOnly__ = 1
    # note! /Name appears to be an undocumented attribute....
    name_attributes = string.split("Type Subtype BaseFont ToUnicode Name")
    Type = "Font"
    Subtype = "Type1"
    # these attributes are assumed to already be of the right type
    local_attributes = string.split("FirstChar LastChar Widths Encoding FontDescriptor")
    def format(self, document):
        D = {}
        for name in self.name_attributes:
            if hasattr(self, name):
                value = getattr(self, name)
                D[name] = PDFName(value)
        for name in self.local_attributes:
            if hasattr(self, name):
                value = getattr(self, name)
                D[name] = value
        #print D
        PD = PDFDictionary(D)
        return PD.format(document)

def MakeStandardEnglishFontObjects(document, encoding=DEFAULT_ENCODING):
    # make the standard fonts and the standard font dictionary
    if encoding not in ALLOWED_ENCODINGS:
        raise ValueError, "bad encoding %s" % repr(encoding)
    D = {}
    count = 1
    fontmapping = document.fontMapping
    for name in StandardEnglishFonts:
        F = PDFType1Font()
        F.BaseFont = name
        F.Encoding = PDFName(DEFAULT_ENCODING)
        F.__Comment__ = "Standard English Font %s" % repr(name)
        fname = "F"+repr(count)
        F.Name = fname
        R = document.Reference(F, fname)
        D[fname] = R
        fontmapping[name] = "/"+fname # record the external to internal name map (NOT REALLY A PDFNAME: PAGE DESC)
        count = count+1
    DD = PDFDictionary(D)
    DD.__Comment__ = "The standard fonts dictionary"
    DDR = document.Reference(DD, BasicFonts)
    return DDR

class PDFTrueTypeFont(PDFType1Font):
    Subtype = "TrueType"
    #local_attributes = string.split("FirstChar LastChar Widths Encoding FontDescriptor") #same

class PDFMMType1Font(PDFType1Font):
    Subtype = "MMType1"

class PDFType3Font(PDFType1Font):
    Subtype = "Type3"
    local_attributes = string.split(
        "FirstChar LastChar Widths CharProcs FontBBox FontMatrix Resources Encoding")

class PDFType0Font(PDFType1Font):
    Subtype = "Type0"
    local_attributes = string.split(
        "DescendantFonts Encoding")

class PDFCIDFontType0(PDFType1Font):
    Subtype = "CIDFontType0"
    local_attributes = string.split(
        "CIDSystemInfo FontDescriptor DW W DW2 W2 Registry Ordering Supplement")

class PDFCIDFontType0(PDFType1Font):
    Subtype = "CIDFontType2"
    local_attributes = string.split(
        "BaseFont CIDToGIDMap CIDSystemInfo FontDescriptor DW W DW2 W2")

class PDFEncoding(PDFType1Font):
    Type = "Encoding"
    name_attributes = string.split("Type BaseEncoding")
    # these attributes are assumed to already be of the right type
    local_attributes = ["Differences"]

# skipping CMaps

class PDFFormXObject:
    # like page requires .info set by some higher level (doc)
    # XXXX any resource used in a form must be propagated up to the page that (recursively) uses
    #   the form!! (not implemented yet).
    XObjects = Annots = BBox = Matrix = Contents = stream = Resources = None
    hasImages = 1 # probably should change
    compression = 0
    def __init__(self, lowerx, lowery, upperx, uppery):
        #not done
        self.lowerx = lowerx; self.lowery=lowery; self.upperx=upperx; self.uppery=uppery
    def setStreamList(self, data):
        if type(data) is types.ListType:
            data = string.join(data, LINEEND) = data
    def format(self, document):
        self.BBox = self.BBox or PDFArray([self.lowerx, self.lowery, self.upperx, self.uppery])
        self.Matrix = self.Matrix or PDFArray([1, 0, 0, 1, 0, 0])
        if not self.Annots:
            self.Annots = None
            raise ValueError, "annotations not reimplemented yet"
        if not self.Contents:
            stream =
            if not stream:
                self.Contents = teststream()
                S = PDFStream()
                S.content = stream
                # need to add filter stuff (?)
                S.__Comment__ = "xobject form stream"
                self.Contents = S
        if not self.Resources:
            resources = PDFResourceDictionary()
            # fonts!
            if self.hasImages:
        if self.compression:
            self.Contents.filters = [PDFBase85Encode, PDFZCompress]
        sdict = self.Contents.dictionary
        sdict["Type"] = PDFName("XObject")
        sdict["Subtype"] = PDFName("Form")
        sdict["FormType"] = 1
        sdict["BBox"] = self.BBox
        sdict["Matrix"] = self.Matrix
        sdict["Resources"] = resources
        return self.Contents.format(document)

if __name__=="__main__":
    # first test
    print "line end is", repr(LINEEND)
    print "PDFName", PDFName("test")
    D = PDFDocument(dummyoutline=1)
    print "PDFDict", PDFDictionary({"this":1}).format(D)
    txt = D.format()
    fn = "test.pdf"
    f = open(fn, "wb")
    print "wrote", fn