17 a registry of Font, TypeFace and Encoding objects. Ideally these |
17 a registry of Font, TypeFace and Encoding objects. Ideally these |
18 would be pre-loaded, but due to a nasty circularity problem we |
18 would be pre-loaded, but due to a nasty circularity problem we |
19 trap attempts to access them and do it on first access. |
19 trap attempts to access them and do it on first access. |
20 """ |
20 """ |
21 import string, os, sys |
21 import string, os, sys |
22 from types import StringType, ListType, TupleType |
|
23 from reportlab.pdfbase import _fontdata |
22 from reportlab.pdfbase import _fontdata |
24 from reportlab.lib.logger import warnOnce |
23 from reportlab.lib.logger import warnOnce |
25 from reportlab.lib.utils import rl_isfile, rl_glob, rl_isdir, open_and_read, open_and_readlines, findInPaths |
24 from reportlab.lib.utils import rl_isfile, rl_glob, rl_isdir, open_and_read, open_and_readlines, findInPaths, isSeqType, isStrType, isUnicodeType, isPython3 |
26 from reportlab.rl_config import defaultEncoding, T1SearchPath |
25 from reportlab.rl_config import defaultEncoding, T1SearchPath |
27 from . import rl_codecs |
26 from . import rl_codecs |
28 _notdefChar = chr(110) |
27 _notdefChar = b'n' |
29 |
28 |
30 rl_codecs.RL_Codecs.register() |
29 rl_codecs.RL_Codecs.register() |
31 standardFonts = _fontdata.standardFonts |
30 standardFonts = _fontdata.standardFonts |
32 standardEncodings = _fontdata.standardEncodings |
31 standardEncodings = _fontdata.standardEncodings |
33 |
32 |
35 _encodings = {} |
34 _encodings = {} |
36 _fonts = {} |
35 _fonts = {} |
37 |
36 |
38 def _py_unicode2T1(utext,fonts): |
37 def _py_unicode2T1(utext,fonts): |
39 '''return a list of (font,string) pairs representing the unicode text''' |
38 '''return a list of (font,string) pairs representing the unicode text''' |
40 #print 'unicode2t1(%s, %s): %s' % (utext, fonts, type(utext)) |
|
41 #if type(utext) |
|
42 R = [] |
39 R = [] |
43 font, fonts = fonts[0], fonts[1:] |
40 font, fonts = fonts[0], fonts[1:] |
44 enc = font.encName |
41 enc = font.encName |
45 if 'UCS-2' in enc: |
42 if 'UCS-2' in enc: |
46 enc = 'UTF16' |
43 enc = 'UTF16' |
47 while utext: |
44 while utext: |
48 try: |
45 try: |
49 R.append((font,utext.encode(enc))) |
46 if isUnicodeType(utext): |
|
47 s = utext.encode(enc) |
|
48 else: |
|
49 s = utext |
|
50 R.append((font,s)) |
50 break |
51 break |
51 except UnicodeEncodeError as e: |
52 except UnicodeEncodeError as e: |
52 i0, il = e.args[2:4] |
53 i0, il = e.args[2:4] |
53 if i0: |
54 if i0: |
54 R.append((font,utext[:i0].encode(enc))) |
55 R.append((font,utext[:i0].encode(enc))) |
78 order.""" |
79 order.""" |
79 |
80 |
80 lines = open_and_readlines(afmFileName, 'r') |
81 lines = open_and_readlines(afmFileName, 'r') |
81 if len(lines)<=1: |
82 if len(lines)<=1: |
82 #likely to be a MAC file |
83 #likely to be a MAC file |
83 if lines: lines = string.split(lines[0],'\r') |
84 if lines: lines = lines[0].split('\r') |
84 if len(lines)<=1: |
85 if len(lines)<=1: |
85 raise ValueError('AFM file %s hasn\'t enough data' % afmFileName) |
86 raise ValueError('AFM file %s hasn\'t enough data' % afmFileName) |
86 topLevel = {} |
87 topLevel = {} |
87 glyphLevel = [] |
88 glyphLevel = [] |
88 |
89 |
89 lines = [l for l in map(string.strip, lines) if not l.lower().startswith('comment')] |
90 lines = [l.strip() for l in lines] |
|
91 lines = [l for l in lines if not l.lower().startswith('comment')] |
90 #pass 1 - get the widths |
92 #pass 1 - get the widths |
91 inMetrics = 0 # os 'TOP', or 'CHARMETRICS' |
93 inMetrics = 0 # os 'TOP', or 'CHARMETRICS' |
92 for line in lines: |
94 for line in lines: |
93 if line[0:16] == 'StartCharMetrics': |
95 if line[0:16] == 'StartCharMetrics': |
94 inMetrics = 1 |
96 inMetrics = 1 |
95 elif line[0:14] == 'EndCharMetrics': |
97 elif line[0:14] == 'EndCharMetrics': |
96 inMetrics = 0 |
98 inMetrics = 0 |
97 elif inMetrics: |
99 elif inMetrics: |
98 chunks = string.split(line, ';') |
100 chunks = line.split(';') |
99 chunks = list(map(string.strip, chunks)) |
101 chunks = [chunk.strip() for chunk in chunks] |
100 cidChunk, widthChunk, nameChunk = chunks[0:3] |
102 cidChunk, widthChunk, nameChunk = chunks[0:3] |
101 |
103 |
102 # character ID |
104 # character ID |
103 l, r = string.split(cidChunk) |
105 l, r = cidChunk.split() |
104 assert l == 'C', 'bad line in font file %s' % line |
106 assert l == 'C', 'bad line in font file %s' % line |
105 cid = string.atoi(r) |
107 cid = int(r) |
106 |
108 |
107 # width |
109 # width |
108 l, r = string.split(widthChunk) |
110 l, r = widthChunk.split() |
109 assert l == 'WX', 'bad line in font file %s' % line |
111 assert l == 'WX', 'bad line in font file %s' % line |
110 width = string.atoi(r) |
112 width = int(r) |
111 |
113 |
112 # name |
114 # name |
113 l, r = string.split(nameChunk) |
115 l, r = nameChunk.split() |
114 assert l == 'N', 'bad line in font file %s' % line |
116 assert l == 'N', 'bad line in font file %s' % line |
115 name = r |
117 name = r |
116 |
118 |
117 glyphLevel.append((cid, width, name)) |
119 glyphLevel.append((cid, width, name)) |
118 |
120 |
124 if line[0:16] == 'StartCharMetrics': |
126 if line[0:16] == 'StartCharMetrics': |
125 inHeader = 0 |
127 inHeader = 0 |
126 elif inHeader: |
128 elif inHeader: |
127 if line[0:7] == 'Comment': pass |
129 if line[0:7] == 'Comment': pass |
128 try: |
130 try: |
129 left, right = string.split(line,' ',1) |
131 left, right = line.split(' ',1) |
130 except: |
132 except: |
131 raise ValueError("Header information error in afm %s: line='%s'" % (afmFileName, line)) |
133 raise ValueError("Header information error in afm %s: line='%s'" % (afmFileName, line)) |
132 try: |
134 try: |
133 right = string.atoi(right) |
135 right = int(right) |
134 except: |
136 except: |
135 pass |
137 pass |
136 topLevel[left] = right |
138 topLevel[left] = right |
137 |
139 |
138 |
140 |
176 def getFontFiles(self): |
178 def getFontFiles(self): |
177 "Info function, return list of the font files this depends on." |
179 "Info function, return list of the font files this depends on." |
178 return [] |
180 return [] |
179 |
181 |
180 def findT1File(self, ext='.pfb'): |
182 def findT1File(self, ext='.pfb'): |
181 possible_exts = (string.lower(ext), string.upper(ext)) |
183 possible_exts = (ext.lower(), ext.upper()) |
182 if hasattr(self,'pfbFileName'): |
184 if hasattr(self,'pfbFileName'): |
183 r_basename = os.path.splitext(self.pfbFileName)[0] |
185 r_basename = os.path.splitext(self.pfbFileName)[0] |
184 for e in possible_exts: |
186 for e in possible_exts: |
185 if rl_isfile(r_basename + e): |
187 if rl_isfile(r_basename + e): |
186 return r_basename + e |
188 return r_basename + e |
187 try: |
189 try: |
188 r = _fontdata.findT1File(self.name) |
190 r = _fontdata.findT1File(self.name) |
189 except: |
191 except: |
190 afm = bruteForceSearchForAFM(self.name) |
192 afm = bruteForceSearchForAFM(self.name) |
191 if afm: |
193 if afm: |
192 if string.lower(ext) == '.pfb': |
194 if ext.lower() == '.pfb': |
193 for e in possible_exts: |
195 for e in possible_exts: |
194 pfb = os.path.splitext(afm)[0] + e |
196 pfb = os.path.splitext(afm)[0] + e |
195 if rl_isfile(pfb): |
197 if rl_isfile(pfb): |
196 r = pfb |
198 r = pfb |
197 else: |
199 else: |
198 r = None |
200 r = None |
199 elif string.lower(ext) == '.afm': |
201 elif ext.lower() == '.afm': |
200 r = afm |
202 r = afm |
201 else: |
203 else: |
202 r = None |
204 r = None |
203 if r is None: |
205 if r is None: |
204 warnOnce("Can't find %s for face '%s'" % (ext, self.name)) |
206 warnOnce("Can't find %s for face '%s'" % (ext, self.name)) |
249 self.vector = _fontdata.encodings[name] |
251 self.vector = _fontdata.encodings[name] |
250 elif base == None: |
252 elif base == None: |
251 # assume based on the usual one |
253 # assume based on the usual one |
252 self.baseEncodingName = defaultEncoding |
254 self.baseEncodingName = defaultEncoding |
253 self.vector = _fontdata.encodings[defaultEncoding] |
255 self.vector = _fontdata.encodings[defaultEncoding] |
254 elif type(base) is StringType: |
256 elif isStrType(base): |
255 baseEnc = getEncoding(base) |
257 baseEnc = getEncoding(base) |
256 self.baseEncodingName = baseEnc.name |
258 self.baseEncodingName = baseEnc.name |
257 self.vector = baseEnc.vector[:] |
259 self.vector = baseEnc.vector[:] |
258 elif type(base) in (ListType, TupleType): |
260 elif isSeqType(base): |
259 self.baseEncodingName = defaultEncoding |
261 self.baseEncodingName = defaultEncoding |
260 self.vector = base[:] |
262 self.vector = base[:] |
261 elif isinstance(base, Encoding): |
263 elif isinstance(base, Encoding): |
262 # accept a vector |
264 # accept a vector |
263 self.baseEncodingName = base.name |
265 self.baseEncodingName = base.name |
405 |
407 |
406 def _py_stringWidth(self, text, size, encoding='utf8'): |
408 def _py_stringWidth(self, text, size, encoding='utf8'): |
407 """This is the "purist" approach to width. The practical approach |
409 """This is the "purist" approach to width. The practical approach |
408 is to use the stringWidth function, which may be swapped in for one |
410 is to use the stringWidth function, which may be swapped in for one |
409 written in C.""" |
411 written in C.""" |
410 if not isinstance(text,str): text = text.decode(encoding) |
412 if not isUnicodeType(text): text = text.decode(encoding) |
411 return sum([sum(map(f.widths.__getitem__,list(map(ord,t)))) for f, t in unicode2T1(text,[self]+self.substitutionFonts)])*0.001*size |
413 return sum([sum(map(f.widths.__getitem__,list(map(ord,t)))) for f, t in unicode2T1(text,[self]+self.substitutionFonts)])*0.001*size |
412 stringWidth = _py_stringWidth |
414 stringWidth = _py_stringWidth |
413 |
415 |
414 def _formatWidths(self): |
416 def _formatWidths(self): |
415 "returns a pretty block in PDF Array format to aid inspection" |
417 "returns a pretty block in PDF Array format to aid inspection" |
416 text = '[' |
418 text = b'[' |
417 for i in range(256): |
419 for i in range(256): |
418 text = text + ' ' + str(self.widths[i]) |
420 text = text + b' ' + bytes(str(self.widths[i]),'utf8') |
419 if i == 255: |
421 if i == 255: |
420 text = text + ' ]' |
422 text = text + b' ]' |
421 if i % 16 == 15: |
423 if i % 16 == 15: |
422 text = text + '\n' |
424 text = text + b'\n' |
423 return text |
425 return text |
424 |
426 |
425 def addObjects(self, doc): |
427 def addObjects(self, doc): |
426 """Makes and returns one or more PDF objects to be added |
428 """Makes and returns one or more PDF objects to be added |
427 to the document. The caller supplies the internal name |
429 to the document. The caller supplies the internal name |
455 |
457 |
456 PFB_MARKER=chr(0x80) |
458 PFB_MARKER=chr(0x80) |
457 PFB_ASCII=chr(1) |
459 PFB_ASCII=chr(1) |
458 PFB_BINARY=chr(2) |
460 PFB_BINARY=chr(2) |
459 PFB_EOF=chr(3) |
461 PFB_EOF=chr(3) |
460 def _pfbSegLen(p,d): |
462 |
461 '''compute a pfb style length from the first 4 bytes of string d''' |
463 if isPython3: |
462 return ((((ord(d[p+3])<<8)|ord(d[p+2])<<8)|ord(d[p+1]))<<8)|ord(d[p]) |
464 def _pfbCheck(p,d,m,fn): |
463 |
465 if chr(d[p])!=PFB_MARKER or chr(d[p+1])!=m: |
464 def _pfbCheck(p,d,m,fn): |
466 raise ValueError('Bad pfb file\'%s\' expected chr(%d)chr(%d) at char %d, got chr(%d)chr(%d)' % (fn,ord(PFB_MARKER),ord(m),p,d[p],d[p+1])) |
465 if d[p]!=PFB_MARKER or d[p+1]!=m: |
467 if m==PFB_EOF: return |
466 raise ValueError('Bad pfb file\'%s\' expected chr(%d)chr(%d) at char %d, got chr(%d)chr(%d)' % (fn,ord(PFB_MARKER),ord(m),p,ord(d[p]),ord(d[p+1]))) |
468 p = p + 2 |
467 if m==PFB_EOF: return |
469 l = (((((d[p+3])<<8)|(d[p+2])<<8)|(d[p+1]))<<8)|(d[p]) |
468 p = p + 2 |
470 p = p + 4 |
469 l = _pfbSegLen(p,d) |
471 if p+l>len(d): |
470 p = p + 4 |
472 raise ValueError('Bad pfb file\'%s\' needed %d+%d bytes have only %d!' % (fn,p,l,len(d))) |
471 if p+l>len(d): |
473 return p, p+l |
472 raise ValueError('Bad pfb file\'%s\' needed %d+%d bytes have only %d!' % (fn,p,l,len(d))) |
474 else: |
473 return p, p+l |
475 def _pfbSegLen(p,d): |
|
476 '''compute a pfb style length from the first 4 bytes of string d''' |
|
477 return ((((ord(d[p+3])<<8)|ord(d[p+2])<<8)|ord(d[p+1]))<<8)|ord(d[p]) |
|
478 |
|
479 def _pfbCheck(p,d,m,fn): |
|
480 if d[p]!=PFB_MARKER or d[p+1]!=m: |
|
481 raise ValueError('Bad pfb file\'%s\' expected chr(%d)chr(%d) at char %d, got chr(%d)chr(%d)' % (fn,ord(PFB_MARKER),ord(m),p,ord(d[p]),ord(d[p+1]))) |
|
482 if m==PFB_EOF: return |
|
483 p = p + 2 |
|
484 l = _pfbSegLen(p,d) |
|
485 p = p + 4 |
|
486 if p+l>len(d): |
|
487 raise ValueError('Bad pfb file\'%s\' needed %d+%d bytes have only %d!' % (fn,p,l,len(d))) |
|
488 return p, p+l |
474 |
489 |
475 class EmbeddedType1Face(TypeFace): |
490 class EmbeddedType1Face(TypeFace): |
476 """A Type 1 font other than one of the basic 14. |
491 """A Type 1 font other than one of the basic 14. |
477 |
492 |
478 Its glyph data will be embedded in the PDF file.""" |
493 Its glyph data will be embedded in the PDF file.""" |
524 self.italicAngle = topLevel.get('ItalicAngle', 0) |
539 self.italicAngle = topLevel.get('ItalicAngle', 0) |
525 self.stemV = topLevel.get('stemV', 0) |
540 self.stemV = topLevel.get('stemV', 0) |
526 self.xHeight = topLevel.get('XHeight', 1000) |
541 self.xHeight = topLevel.get('XHeight', 1000) |
527 |
542 |
528 strBbox = topLevel.get('FontBBox', [0,0,1000,1000]) |
543 strBbox = topLevel.get('FontBBox', [0,0,1000,1000]) |
529 tokens = string.split(strBbox) |
544 tokens = strBbox.split() |
530 self.bbox = [] |
545 self.bbox = [] |
531 for tok in tokens: |
546 for tok in tokens: |
532 self.bbox.append(string.atoi(tok)) |
547 self.bbox.append(int(tok)) |
533 |
548 |
534 glyphWidths = {} |
549 glyphWidths = {} |
535 for (cid, width, name) in glyphData: |
550 for (cid, width, name) in glyphData: |
536 glyphWidths[name] = width |
551 glyphWidths[name] = width |
537 self.glyphWidths = glyphWidths |
552 self.glyphWidths = glyphWidths |
664 else: |
679 else: |
665 raise |
680 raise |
666 |
681 |
667 def findFontAndRegister(fontName): |
682 def findFontAndRegister(fontName): |
668 '''search for and register a font given its name''' |
683 '''search for and register a font given its name''' |
|
684 assert type(fontName) is str |
669 #it might have a font-specific encoding e.g. Symbol |
685 #it might have a font-specific encoding e.g. Symbol |
670 # or Dingbats. If not, take the default. |
686 # or Dingbats. If not, take the default. |
671 face = getTypeFace(fontName) |
687 face = getTypeFace(fontName) |
672 if face.requiredEncoding: |
688 if face.requiredEncoding: |
673 font = Font(fontName, fontName, face.requiredEncoding) |
689 font = Font(fontName, fontName, face.requiredEncoding) |
782 rawdata = open('../../rlextra/rml2pdf/doc/rml_user_guide.prep').read() |
798 rawdata = open('../../rlextra/rml2pdf/doc/rml_user_guide.prep').read() |
783 print('rawdata length %d' % len(rawdata)) |
799 print('rawdata length %d' % len(rawdata)) |
784 print('test one huge string...') |
800 print('test one huge string...') |
785 test3widths([rawdata]) |
801 test3widths([rawdata]) |
786 print() |
802 print() |
787 words = string.split(rawdata) |
803 words = rawdata.split() |
788 print('test %d shorter strings (average length %0.2f chars)...' % (len(words), 1.0*len(rawdata)/len(words))) |
804 print('test %d shorter strings (average length %0.2f chars)...' % (len(words), 1.0*len(rawdata)/len(words))) |
789 test3widths(words) |
805 test3widths(words) |
790 |
806 |
791 |
807 |
792 def test(): |
808 def test(): |