src/reportlab/pdfbase/pdfmetrics.py
branchpy33
changeset 3723 99aa837b6703
parent 3721 0c93dd8ff567
child 3725 ca840494f9dd
equal deleted inserted replaced
3722:29c11b905751 3723:99aa837b6703
    17 a registry of Font, TypeFace and Encoding objects.  Ideally these
    17 a registry of Font, TypeFace and Encoding objects.  Ideally these
    18 would be pre-loaded, but due to a nasty circularity problem we
    18 would be pre-loaded, but due to a nasty circularity problem we
    19 trap attempts to access them and do it on first access.
    19 trap attempts to access them and do it on first access.
    20 """
    20 """
    21 import string, os, sys
    21 import string, os, sys
    22 from types import StringType, ListType, TupleType
       
    23 from reportlab.pdfbase import _fontdata
    22 from reportlab.pdfbase import _fontdata
    24 from reportlab.lib.logger import warnOnce
    23 from reportlab.lib.logger import warnOnce
    25 from reportlab.lib.utils import rl_isfile, rl_glob, rl_isdir, open_and_read, open_and_readlines, findInPaths
    24 from reportlab.lib.utils import rl_isfile, rl_glob, rl_isdir, open_and_read, open_and_readlines, findInPaths, isSeqType, isStrType, isUnicodeType, isPython3
    26 from reportlab.rl_config import defaultEncoding, T1SearchPath
    25 from reportlab.rl_config import defaultEncoding, T1SearchPath
    27 from . import rl_codecs
    26 from . import rl_codecs
    28 _notdefChar = chr(110)
    27 _notdefChar = b'n'
    29 
    28 
    30 rl_codecs.RL_Codecs.register()
    29 rl_codecs.RL_Codecs.register()
    31 standardFonts = _fontdata.standardFonts
    30 standardFonts = _fontdata.standardFonts
    32 standardEncodings = _fontdata.standardEncodings
    31 standardEncodings = _fontdata.standardEncodings
    33 
    32 
    35 _encodings = {}
    34 _encodings = {}
    36 _fonts = {}
    35 _fonts = {}
    37 
    36 
    38 def _py_unicode2T1(utext,fonts):
    37 def _py_unicode2T1(utext,fonts):
    39     '''return a list of (font,string) pairs representing the unicode text'''
    38     '''return a list of (font,string) pairs representing the unicode text'''
    40     #print 'unicode2t1(%s, %s): %s' % (utext, fonts, type(utext))
       
    41     #if type(utext)
       
    42     R = []
    39     R = []
    43     font, fonts = fonts[0], fonts[1:]
    40     font, fonts = fonts[0], fonts[1:]
    44     enc = font.encName
    41     enc = font.encName
    45     if 'UCS-2' in enc:
    42     if 'UCS-2' in enc:
    46         enc = 'UTF16'
    43         enc = 'UTF16'
    47     while utext:
    44     while utext:
    48         try:
    45         try:
    49             R.append((font,utext.encode(enc)))
    46             if isUnicodeType(utext):
       
    47                 s = utext.encode(enc)
       
    48             else:
       
    49                 s = utext
       
    50             R.append((font,s))
    50             break
    51             break
    51         except UnicodeEncodeError as e:
    52         except UnicodeEncodeError as e:
    52             i0, il = e.args[2:4]
    53             i0, il = e.args[2:4]
    53             if i0:
    54             if i0:
    54                 R.append((font,utext[:i0].encode(enc)))
    55                 R.append((font,utext[:i0].encode(enc)))
    78     order."""
    79     order."""
    79 
    80 
    80     lines = open_and_readlines(afmFileName, 'r')
    81     lines = open_and_readlines(afmFileName, 'r')
    81     if len(lines)<=1:
    82     if len(lines)<=1:
    82         #likely to be a MAC file
    83         #likely to be a MAC file
    83         if lines: lines = string.split(lines[0],'\r')
    84         if lines: lines = lines[0].split('\r')
    84         if len(lines)<=1:
    85         if len(lines)<=1:
    85             raise ValueError('AFM file %s hasn\'t enough data' % afmFileName)
    86             raise ValueError('AFM file %s hasn\'t enough data' % afmFileName)
    86     topLevel = {}
    87     topLevel = {}
    87     glyphLevel = []
    88     glyphLevel = []
    88 
    89 
    89     lines = [l for l in map(string.strip, lines) if not l.lower().startswith('comment')]
    90     lines = [l.strip() for l in lines]
       
    91     lines = [l for l in lines if not l.lower().startswith('comment')]
    90     #pass 1 - get the widths
    92     #pass 1 - get the widths
    91     inMetrics = 0  # os 'TOP', or 'CHARMETRICS'
    93     inMetrics = 0  # os 'TOP', or 'CHARMETRICS'
    92     for line in lines:
    94     for line in lines:
    93         if line[0:16] == 'StartCharMetrics':
    95         if line[0:16] == 'StartCharMetrics':
    94             inMetrics = 1
    96             inMetrics = 1
    95         elif line[0:14] == 'EndCharMetrics':
    97         elif line[0:14] == 'EndCharMetrics':
    96             inMetrics = 0
    98             inMetrics = 0
    97         elif inMetrics:
    99         elif inMetrics:
    98             chunks = string.split(line, ';')
   100             chunks = line.split(';')
    99             chunks = list(map(string.strip, chunks))
   101             chunks = [chunk.strip() for chunk in chunks]
   100             cidChunk, widthChunk, nameChunk = chunks[0:3]
   102             cidChunk, widthChunk, nameChunk = chunks[0:3]
   101 
   103 
   102             # character ID
   104             # character ID
   103             l, r = string.split(cidChunk)
   105             l, r = cidChunk.split()
   104             assert l == 'C', 'bad line in font file %s' % line
   106             assert l == 'C', 'bad line in font file %s' % line
   105             cid = string.atoi(r)
   107             cid = int(r)
   106 
   108 
   107             # width
   109             # width
   108             l, r = string.split(widthChunk)
   110             l, r = widthChunk.split()
   109             assert l == 'WX', 'bad line in font file %s' % line
   111             assert l == 'WX', 'bad line in font file %s' % line
   110             width = string.atoi(r)
   112             width = int(r)
   111 
   113 
   112             # name
   114             # name
   113             l, r = string.split(nameChunk)
   115             l, r = nameChunk.split()
   114             assert l == 'N', 'bad line in font file %s' % line
   116             assert l == 'N', 'bad line in font file %s' % line
   115             name = r
   117             name = r
   116 
   118 
   117             glyphLevel.append((cid, width, name))
   119             glyphLevel.append((cid, width, name))
   118 
   120 
   124         if line[0:16] == 'StartCharMetrics':
   126         if line[0:16] == 'StartCharMetrics':
   125             inHeader = 0
   127             inHeader = 0
   126         elif inHeader:
   128         elif inHeader:
   127             if line[0:7] == 'Comment': pass
   129             if line[0:7] == 'Comment': pass
   128             try:
   130             try:
   129                 left, right = string.split(line,' ',1)
   131                 left, right = line.split(' ',1)
   130             except:
   132             except:
   131                 raise ValueError("Header information error in afm %s: line='%s'" % (afmFileName, line))
   133                 raise ValueError("Header information error in afm %s: line='%s'" % (afmFileName, line))
   132             try:
   134             try:
   133                 right = string.atoi(right)
   135                 right = int(right)
   134             except:
   136             except:
   135                 pass
   137                 pass
   136             topLevel[left] = right
   138             topLevel[left] = right
   137 
   139 
   138 
   140 
   176     def getFontFiles(self):
   178     def getFontFiles(self):
   177         "Info function, return list of the font files this depends on."
   179         "Info function, return list of the font files this depends on."
   178         return []
   180         return []
   179 
   181 
   180     def findT1File(self, ext='.pfb'):
   182     def findT1File(self, ext='.pfb'):
   181         possible_exts = (string.lower(ext), string.upper(ext))
   183         possible_exts = (ext.lower(), ext.upper())
   182         if hasattr(self,'pfbFileName'):
   184         if hasattr(self,'pfbFileName'):
   183             r_basename = os.path.splitext(self.pfbFileName)[0]
   185             r_basename = os.path.splitext(self.pfbFileName)[0]
   184             for e in possible_exts:
   186             for e in possible_exts:
   185                 if rl_isfile(r_basename + e):
   187                 if rl_isfile(r_basename + e):
   186                     return r_basename + e
   188                     return r_basename + e
   187         try:
   189         try:
   188             r = _fontdata.findT1File(self.name)
   190             r = _fontdata.findT1File(self.name)
   189         except:
   191         except:
   190             afm = bruteForceSearchForAFM(self.name)
   192             afm = bruteForceSearchForAFM(self.name)
   191             if afm:
   193             if afm:
   192                 if string.lower(ext) == '.pfb':
   194                 if ext.lower() == '.pfb':
   193                     for e in possible_exts:
   195                     for e in possible_exts:
   194                         pfb = os.path.splitext(afm)[0] + e
   196                         pfb = os.path.splitext(afm)[0] + e
   195                         if rl_isfile(pfb):
   197                         if rl_isfile(pfb):
   196                             r = pfb
   198                             r = pfb
   197                         else:
   199                         else:
   198                             r = None
   200                             r = None
   199                 elif string.lower(ext) == '.afm':
   201                 elif ext.lower() == '.afm':
   200                     r = afm
   202                     r = afm
   201             else:
   203             else:
   202                 r = None
   204                 r = None
   203         if r is None:
   205         if r is None:
   204             warnOnce("Can't find %s for face '%s'" % (ext, self.name))
   206             warnOnce("Can't find %s for face '%s'" % (ext, self.name))
   249             self.vector = _fontdata.encodings[name]
   251             self.vector = _fontdata.encodings[name]
   250         elif base == None:
   252         elif base == None:
   251             # assume based on the usual one
   253             # assume based on the usual one
   252             self.baseEncodingName = defaultEncoding
   254             self.baseEncodingName = defaultEncoding
   253             self.vector = _fontdata.encodings[defaultEncoding]
   255             self.vector = _fontdata.encodings[defaultEncoding]
   254         elif type(base) is StringType:
   256         elif isStrType(base):
   255             baseEnc = getEncoding(base)
   257             baseEnc = getEncoding(base)
   256             self.baseEncodingName = baseEnc.name
   258             self.baseEncodingName = baseEnc.name
   257             self.vector = baseEnc.vector[:]
   259             self.vector = baseEnc.vector[:]
   258         elif type(base) in (ListType, TupleType):
   260         elif isSeqType(base):
   259             self.baseEncodingName = defaultEncoding
   261             self.baseEncodingName = defaultEncoding
   260             self.vector = base[:]
   262             self.vector = base[:]
   261         elif isinstance(base, Encoding):
   263         elif isinstance(base, Encoding):
   262             # accept a vector
   264             # accept a vector
   263             self.baseEncodingName = base.name
   265             self.baseEncodingName = base.name
   405 
   407 
   406     def _py_stringWidth(self, text, size, encoding='utf8'):
   408     def _py_stringWidth(self, text, size, encoding='utf8'):
   407         """This is the "purist" approach to width.  The practical approach
   409         """This is the "purist" approach to width.  The practical approach
   408         is to use the stringWidth function, which may be swapped in for one
   410         is to use the stringWidth function, which may be swapped in for one
   409         written in C."""
   411         written in C."""
   410         if not isinstance(text,str): text = text.decode(encoding)
   412         if not isUnicodeType(text): text = text.decode(encoding)
   411         return sum([sum(map(f.widths.__getitem__,list(map(ord,t)))) for f, t in unicode2T1(text,[self]+self.substitutionFonts)])*0.001*size
   413         return sum([sum(map(f.widths.__getitem__,list(map(ord,t)))) for f, t in unicode2T1(text,[self]+self.substitutionFonts)])*0.001*size
   412     stringWidth = _py_stringWidth
   414     stringWidth = _py_stringWidth
   413 
   415 
   414     def _formatWidths(self):
   416     def _formatWidths(self):
   415         "returns a pretty block in PDF Array format to aid inspection"
   417         "returns a pretty block in PDF Array format to aid inspection"
   416         text = '['
   418         text = b'['
   417         for i in range(256):
   419         for i in range(256):
   418             text = text + ' ' + str(self.widths[i])
   420             text = text + b' ' + bytes(str(self.widths[i]),'utf8')
   419             if i == 255:
   421             if i == 255:
   420                 text = text + ' ]'
   422                 text = text + b' ]'
   421             if i % 16 == 15:
   423             if i % 16 == 15:
   422                 text = text + '\n'
   424                 text = text + b'\n'
   423         return text
   425         return text
   424 
   426 
   425     def addObjects(self, doc):
   427     def addObjects(self, doc):
   426         """Makes and returns one or more PDF objects to be added
   428         """Makes and returns one or more PDF objects to be added
   427         to the document.  The caller supplies the internal name
   429         to the document.  The caller supplies the internal name
   455 
   457 
   456 PFB_MARKER=chr(0x80)
   458 PFB_MARKER=chr(0x80)
   457 PFB_ASCII=chr(1)
   459 PFB_ASCII=chr(1)
   458 PFB_BINARY=chr(2)
   460 PFB_BINARY=chr(2)
   459 PFB_EOF=chr(3)
   461 PFB_EOF=chr(3)
   460 def _pfbSegLen(p,d):
   462 
   461     '''compute a pfb style length from the first 4 bytes of string d'''
   463 if isPython3:
   462     return ((((ord(d[p+3])<<8)|ord(d[p+2])<<8)|ord(d[p+1]))<<8)|ord(d[p])
   464     def _pfbCheck(p,d,m,fn):
   463 
   465         if chr(d[p])!=PFB_MARKER or chr(d[p+1])!=m:
   464 def _pfbCheck(p,d,m,fn):
   466             raise ValueError('Bad pfb file\'%s\' expected chr(%d)chr(%d) at char %d, got chr(%d)chr(%d)' % (fn,ord(PFB_MARKER),ord(m),p,d[p],d[p+1]))
   465     if d[p]!=PFB_MARKER or d[p+1]!=m:
   467         if m==PFB_EOF: return
   466         raise ValueError('Bad pfb file\'%s\' expected chr(%d)chr(%d) at char %d, got chr(%d)chr(%d)' % (fn,ord(PFB_MARKER),ord(m),p,ord(d[p]),ord(d[p+1])))
   468         p = p + 2
   467     if m==PFB_EOF: return
   469         l = (((((d[p+3])<<8)|(d[p+2])<<8)|(d[p+1]))<<8)|(d[p])
   468     p = p + 2
   470         p = p + 4
   469     l = _pfbSegLen(p,d)
   471         if p+l>len(d):
   470     p = p + 4
   472             raise ValueError('Bad pfb file\'%s\' needed %d+%d bytes have only %d!' % (fn,p,l,len(d)))
   471     if p+l>len(d):
   473         return p, p+l
   472         raise ValueError('Bad pfb file\'%s\' needed %d+%d bytes have only %d!' % (fn,p,l,len(d)))
   474 else:
   473     return p, p+l
   475     def _pfbSegLen(p,d):
       
   476         '''compute a pfb style length from the first 4 bytes of string d'''
       
   477         return ((((ord(d[p+3])<<8)|ord(d[p+2])<<8)|ord(d[p+1]))<<8)|ord(d[p])
       
   478 
       
   479     def _pfbCheck(p,d,m,fn):
       
   480         if d[p]!=PFB_MARKER or d[p+1]!=m:
       
   481             raise ValueError('Bad pfb file\'%s\' expected chr(%d)chr(%d) at char %d, got chr(%d)chr(%d)' % (fn,ord(PFB_MARKER),ord(m),p,ord(d[p]),ord(d[p+1])))
       
   482         if m==PFB_EOF: return
       
   483         p = p + 2
       
   484         l = _pfbSegLen(p,d)
       
   485         p = p + 4
       
   486         if p+l>len(d):
       
   487             raise ValueError('Bad pfb file\'%s\' needed %d+%d bytes have only %d!' % (fn,p,l,len(d)))
       
   488         return p, p+l
   474 
   489 
   475 class EmbeddedType1Face(TypeFace):
   490 class EmbeddedType1Face(TypeFace):
   476     """A Type 1 font other than one of the basic 14.
   491     """A Type 1 font other than one of the basic 14.
   477 
   492 
   478     Its glyph data will be embedded in the PDF file."""
   493     Its glyph data will be embedded in the PDF file."""
   524         self.italicAngle = topLevel.get('ItalicAngle', 0)
   539         self.italicAngle = topLevel.get('ItalicAngle', 0)
   525         self.stemV = topLevel.get('stemV', 0)
   540         self.stemV = topLevel.get('stemV', 0)
   526         self.xHeight = topLevel.get('XHeight', 1000)
   541         self.xHeight = topLevel.get('XHeight', 1000)
   527 
   542 
   528         strBbox = topLevel.get('FontBBox', [0,0,1000,1000])
   543         strBbox = topLevel.get('FontBBox', [0,0,1000,1000])
   529         tokens = string.split(strBbox)
   544         tokens = strBbox.split()
   530         self.bbox = []
   545         self.bbox = []
   531         for tok in tokens:
   546         for tok in tokens:
   532             self.bbox.append(string.atoi(tok))
   547             self.bbox.append(int(tok))
   533 
   548 
   534         glyphWidths = {}
   549         glyphWidths = {}
   535         for (cid, width, name) in glyphData:
   550         for (cid, width, name) in glyphData:
   536             glyphWidths[name] = width
   551             glyphWidths[name] = width
   537         self.glyphWidths = glyphWidths
   552         self.glyphWidths = glyphWidths
   664         else:
   679         else:
   665             raise
   680             raise
   666 
   681 
   667 def findFontAndRegister(fontName):
   682 def findFontAndRegister(fontName):
   668     '''search for and register a font given its name'''
   683     '''search for and register a font given its name'''
       
   684     assert type(fontName) is str
   669     #it might have a font-specific encoding e.g. Symbol
   685     #it might have a font-specific encoding e.g. Symbol
   670     # or Dingbats.  If not, take the default.
   686     # or Dingbats.  If not, take the default.
   671     face = getTypeFace(fontName)
   687     face = getTypeFace(fontName)
   672     if face.requiredEncoding:
   688     if face.requiredEncoding:
   673         font = Font(fontName, fontName, face.requiredEncoding)
   689         font = Font(fontName, fontName, face.requiredEncoding)
   782     rawdata = open('../../rlextra/rml2pdf/doc/rml_user_guide.prep').read()
   798     rawdata = open('../../rlextra/rml2pdf/doc/rml_user_guide.prep').read()
   783     print('rawdata length %d' % len(rawdata))
   799     print('rawdata length %d' % len(rawdata))
   784     print('test one huge string...')
   800     print('test one huge string...')
   785     test3widths([rawdata])
   801     test3widths([rawdata])
   786     print()
   802     print()
   787     words = string.split(rawdata)
   803     words = rawdata.split()
   788     print('test %d shorter strings (average length %0.2f chars)...' % (len(words), 1.0*len(rawdata)/len(words)))
   804     print('test %d shorter strings (average length %0.2f chars)...' % (len(words), 1.0*len(rawdata)/len(words)))
   789     test3widths(words)
   805     test3widths(words)
   790 
   806 
   791 
   807 
   792 def test():
   808 def test():