author | rptlab |
Tue, 30 Apr 2013 14:28:14 +0100 | |
branch | py33 |
changeset 3723 | 99aa837b6703 |
parent 3721 | 0c93dd8ff567 |
child 3781 | df8b57380768 |
permissions | -rw-r--r-- |
3617 | 1 |
#Copyright ReportLab Europe Ltd. 2000-2012 |
1180 | 2 |
#see license.txt for license details |
2332 | 3 |
#history http://www.reportlab.co.uk/cgi-bin/viewcvs.cgi/public/reportlab/trunk/reportlab/pdfbase/cidfonts.py |
1180 | 4 |
#$Header $ |
2332 | 5 |
__version__=''' $Id$ ''' |
1180 | 6 |
__doc__="""CID (Asian multi-byte) font support. |
7 |
||
8 |
This defines classes to represent CID fonts. They know how to calculate |
|
9 |
their own width and how to write themselves into PDF files.""" |
|
10 |
||
11 |
import os |
|
1192
08043d817140
Added a trivial korean test, prior to testing on another machine
andy_robinson
parents:
1180
diff
changeset
|
12 |
from types import ListType, TupleType, DictType |
1180 | 13 |
from string import find, split, strip |
1265
f11478b89928
Initial (inaccurate metrics?) support for chinese simplified and traditional
andy_robinson
parents:
1264
diff
changeset
|
14 |
import marshal |
f11478b89928
Initial (inaccurate metrics?) support for chinese simplified and traditional
andy_robinson
parents:
1264
diff
changeset
|
15 |
import time |
2993 | 16 |
try: |
17 |
from hashlib import md5 |
|
18 |
except ImportError: |
|
19 |
from md5 import md5 |
|
1180 | 20 |
|
1361
35586d769319
Enhancements/fixes to Asian fonts; codecharts utility to
andy_robinson
parents:
1286
diff
changeset
|
21 |
import reportlab |
1180 | 22 |
from reportlab.pdfbase import pdfmetrics |
2629 | 23 |
from reportlab.pdfbase._cidfontdata import allowedTypeFaces, allowedEncodings, CIDFontInfo, \ |
24 |
defaultUnicodeEncodings, widthsByUnichar |
|
1180 | 25 |
from reportlab.pdfgen.canvas import Canvas |
26 |
from reportlab.pdfbase import pdfdoc |
|
2575 | 27 |
from reportlab.pdfbase.pdfutils import _escape |
1193 | 28 |
from reportlab.rl_config import CMapSearchPath |
1180 | 29 |
|
30 |
||
2633 | 31 |
#quick hackery for 2.0 release. Now we always do unicode, and have built in |
32 |
#the CMAP data, any code to load CMap files is not needed. |
|
33 |
DISABLE_CMAP = True |
|
34 |
||
35 |
||
1219 | 36 |
def findCMapFile(name): |
37 |
"Returns full filename, or raises error" |
|
38 |
for dirname in CMapSearchPath: |
|
39 |
cmapfile = dirname + os.sep + name |
|
40 |
if os.path.isfile(cmapfile): |
|
2575 | 41 |
#print "found", cmapfile |
1219 | 42 |
return cmapfile |
3721 | 43 |
raise IOError('CMAP file for encodings "%s" not found!' % name) |
1683 | 44 |
|
1192
08043d817140
Added a trivial korean test, prior to testing on another machine
andy_robinson
parents:
1180
diff
changeset
|
45 |
def structToPDF(structure): |
08043d817140
Added a trivial korean test, prior to testing on another machine
andy_robinson
parents:
1180
diff
changeset
|
46 |
"Converts deeply nested structure to PDFdoc dictionary/array objects" |
08043d817140
Added a trivial korean test, prior to testing on another machine
andy_robinson
parents:
1180
diff
changeset
|
47 |
if type(structure) is DictType: |
08043d817140
Added a trivial korean test, prior to testing on another machine
andy_robinson
parents:
1180
diff
changeset
|
48 |
newDict = {} |
3723
99aa837b6703
second stage of port to Python 3.3; working hello world
rptlab
parents:
3721
diff
changeset
|
49 |
for k, v in structure.items(): |
1192
08043d817140
Added a trivial korean test, prior to testing on another machine
andy_robinson
parents:
1180
diff
changeset
|
50 |
newDict[k] = structToPDF(v) |
08043d817140
Added a trivial korean test, prior to testing on another machine
andy_robinson
parents:
1180
diff
changeset
|
51 |
return pdfdoc.PDFDictionary(newDict) |
08043d817140
Added a trivial korean test, prior to testing on another machine
andy_robinson
parents:
1180
diff
changeset
|
52 |
elif type(structure) in (ListType, TupleType): |
08043d817140
Added a trivial korean test, prior to testing on another machine
andy_robinson
parents:
1180
diff
changeset
|
53 |
newList = [] |
08043d817140
Added a trivial korean test, prior to testing on another machine
andy_robinson
parents:
1180
diff
changeset
|
54 |
for elem in structure: |
08043d817140
Added a trivial korean test, prior to testing on another machine
andy_robinson
parents:
1180
diff
changeset
|
55 |
newList.append(structToPDF(elem)) |
08043d817140
Added a trivial korean test, prior to testing on another machine
andy_robinson
parents:
1180
diff
changeset
|
56 |
return pdfdoc.PDFArray(newList) |
08043d817140
Added a trivial korean test, prior to testing on another machine
andy_robinson
parents:
1180
diff
changeset
|
57 |
else: |
08043d817140
Added a trivial korean test, prior to testing on another machine
andy_robinson
parents:
1180
diff
changeset
|
58 |
return structure |
08043d817140
Added a trivial korean test, prior to testing on another machine
andy_robinson
parents:
1180
diff
changeset
|
59 |
|
1180 | 60 |
class CIDEncoding(pdfmetrics.Encoding): |
61 |
"""Multi-byte encoding. These are loaded from CMAP files. |
|
62 |
||
63 |
A CMAP file is like a mini-codec. It defines the correspondence |
|
64 |
between code points in the (multi-byte) input data and Character |
|
65 |
IDs. """ |
|
66 |
# aims to do similar things to Brian Hooper's CMap class, |
|
67 |
# but I could not get it working and had to rewrite. |
|
68 |
# also, we should really rearrange our current encoding |
|
69 |
# into a SingleByteEncoding since many of its methods |
|
70 |
# should not apply here. |
|
71 |
||
1361
35586d769319
Enhancements/fixes to Asian fonts; codecharts utility to
andy_robinson
parents:
1286
diff
changeset
|
72 |
def __init__(self, name, useCache=1): |
1265
f11478b89928
Initial (inaccurate metrics?) support for chinese simplified and traditional
andy_robinson
parents:
1264
diff
changeset
|
73 |
self.name = name |
f11478b89928
Initial (inaccurate metrics?) support for chinese simplified and traditional
andy_robinson
parents:
1264
diff
changeset
|
74 |
self._mapFileHash = None |
1180 | 75 |
self._codeSpaceRanges = [] |
76 |
self._notDefRanges = [] |
|
77 |
self._cmap = {} |
|
1362 | 78 |
self.source = None |
2633 | 79 |
if not DISABLE_CMAP: |
80 |
if useCache: |
|
81 |
from reportlab.lib.utils import get_rl_tempdir |
|
82 |
fontmapdir = get_rl_tempdir('FastCMAPS') |
|
83 |
if os.path.isfile(fontmapdir + os.sep + name + '.fastmap'): |
|
84 |
self.fastLoad(fontmapdir) |
|
85 |
self.source = fontmapdir + os.sep + name + '.fastmap' |
|
86 |
else: |
|
87 |
self.parseCMAPFile(name) |
|
88 |
self.source = 'CMAP: ' + name |
|
89 |
self.fastSave(fontmapdir) |
|
1361
35586d769319
Enhancements/fixes to Asian fonts; codecharts utility to
andy_robinson
parents:
1286
diff
changeset
|
90 |
else: |
35586d769319
Enhancements/fixes to Asian fonts; codecharts utility to
andy_robinson
parents:
1286
diff
changeset
|
91 |
self.parseCMAPFile(name) |
1180 | 92 |
|
1265
f11478b89928
Initial (inaccurate metrics?) support for chinese simplified and traditional
andy_robinson
parents:
1264
diff
changeset
|
93 |
def _hash(self, text): |
2993 | 94 |
hasher = md5() |
1265
f11478b89928
Initial (inaccurate metrics?) support for chinese simplified and traditional
andy_robinson
parents:
1264
diff
changeset
|
95 |
hasher.update(text) |
f11478b89928
Initial (inaccurate metrics?) support for chinese simplified and traditional
andy_robinson
parents:
1264
diff
changeset
|
96 |
return hasher.digest() |
1683 | 97 |
|
1180 | 98 |
def parseCMAPFile(self, name): |
99 |
"""This is a tricky one as CMAP files are Postscript |
|
100 |
ones. Some refer to others with a 'usecmap' |
|
101 |
command""" |
|
2565
c71534d975ff
reportlab: remove cidfonts print, fix get_rl_tempdir
rgbecker
parents:
2332
diff
changeset
|
102 |
#started = time.clock() |
1219 | 103 |
cmapfile = findCMapFile(name) |
1264
79862e6e40a1
Mostly correct string widths implemented for Asian fonts.
andy_robinson
parents:
1219
diff
changeset
|
104 |
# this will CRAWL with the unicode encodings... |
1180 | 105 |
rawdata = open(cmapfile, 'r').read() |
1683 | 106 |
|
1265
f11478b89928
Initial (inaccurate metrics?) support for chinese simplified and traditional
andy_robinson
parents:
1264
diff
changeset
|
107 |
self._mapFileHash = self._hash(rawdata) |
1180 | 108 |
#if it contains the token 'usecmap', parse the other |
109 |
#cmap file first.... |
|
110 |
usecmap_pos = find(rawdata, 'usecmap') |
|
111 |
if usecmap_pos > -1: |
|
112 |
#they tell us to look in another file |
|
113 |
#for the code space ranges. The one |
|
114 |
# to use will be the previous word. |
|
115 |
chunk = rawdata[0:usecmap_pos] |
|
116 |
words = split(chunk) |
|
117 |
otherCMAPName = words[-1] |
|
118 |
#print 'referred to another CMAP %s' % otherCMAPName |
|
119 |
self.parseCMAPFile(otherCMAPName) |
|
120 |
# now continue parsing this, as it may |
|
121 |
# override some settings |
|
122 |
||
1683 | 123 |
|
1180 | 124 |
words = split(rawdata) |
3326 | 125 |
while words != []: |
1180 | 126 |
if words[0] == 'begincodespacerange': |
127 |
words = words[1:] |
|
3326 | 128 |
while words[0] != 'endcodespacerange': |
1180 | 129 |
strStart, strEnd, words = words[0], words[1], words[2:] |
130 |
start = int(strStart[1:-1], 16) |
|
131 |
end = int(strEnd[1:-1], 16) |
|
132 |
self._codeSpaceRanges.append((start, end),) |
|
133 |
elif words[0] == 'beginnotdefrange': |
|
134 |
words = words[1:] |
|
3326 | 135 |
while words[0] != 'endnotdefrange': |
1180 | 136 |
strStart, strEnd, strValue = words[0:3] |
137 |
start = int(strStart[1:-1], 16) |
|
138 |
end = int(strEnd[1:-1], 16) |
|
139 |
value = int(strValue) |
|
140 |
self._notDefRanges.append((start, end, value),) |
|
141 |
words = words[3:] |
|
142 |
elif words[0] == 'begincidrange': |
|
143 |
words = words[1:] |
|
3326 | 144 |
while words[0] != 'endcidrange': |
1180 | 145 |
strStart, strEnd, strValue = words[0:3] |
146 |
start = int(strStart[1:-1], 16) |
|
147 |
end = int(strEnd[1:-1], 16) |
|
148 |
value = int(strValue) |
|
149 |
# this means that 'start' corresponds to 'value', |
|
150 |
# start+1 corresponds to value+1 and so on up |
|
151 |
# to end |
|
152 |
offset = 0 |
|
153 |
while start + offset <= end: |
|
154 |
self._cmap[start + offset] = value + offset |
|
155 |
offset = offset + 1 |
|
156 |
words = words[3:] |
|
1683 | 157 |
|
1180 | 158 |
else: |
159 |
words = words[1:] |
|
2565
c71534d975ff
reportlab: remove cidfonts print, fix get_rl_tempdir
rgbecker
parents:
2332
diff
changeset
|
160 |
#finished = time.clock() |
c71534d975ff
reportlab: remove cidfonts print, fix get_rl_tempdir
rgbecker
parents:
2332
diff
changeset
|
161 |
#print 'parsed CMAP %s in %0.4f seconds' % (self.name, finished - started) |
1286 | 162 |
|
1180 | 163 |
def translate(self, text): |
164 |
"Convert a string into a list of CIDs" |
|
165 |
output = [] |
|
166 |
cmap = self._cmap |
|
167 |
lastChar = '' |
|
168 |
for char in text: |
|
3326 | 169 |
if lastChar != '': |
1180 | 170 |
#print 'convert character pair "%s"' % (lastChar + char) |
171 |
num = ord(lastChar) * 256 + ord(char) |
|
172 |
else: |
|
173 |
#print 'convert character "%s"' % char |
|
174 |
num = ord(char) |
|
175 |
lastChar = char |
|
176 |
found = 0 |
|
177 |
for low, high in self._codeSpaceRanges: |
|
178 |
if low < num < high: |
|
179 |
try: |
|
180 |
cid = cmap[num] |
|
181 |
#print '%d -> %d' % (num, cid) |
|
182 |
except KeyError: |
|
183 |
#not defined. Try to find the appropriate |
|
184 |
# notdef character, or failing that return |
|
185 |
# zero |
|
186 |
cid = 0 |
|
187 |
for low2, high2, notdef in self._notDefRanges: |
|
188 |
if low2 < num < high2: |
|
189 |
cid = notdef |
|
190 |
break |
|
191 |
output.append(cid) |
|
192 |
found = 1 |
|
193 |
break |
|
194 |
if found: |
|
195 |
lastChar = '' |
|
196 |
else: |
|
197 |
lastChar = char |
|
198 |
return output |
|
1683 | 199 |
|
1265
f11478b89928
Initial (inaccurate metrics?) support for chinese simplified and traditional
andy_robinson
parents:
1264
diff
changeset
|
200 |
def fastSave(self, directory): |
f11478b89928
Initial (inaccurate metrics?) support for chinese simplified and traditional
andy_robinson
parents:
1264
diff
changeset
|
201 |
f = open(os.path.join(directory, self.name + '.fastmap'), 'wb') |
f11478b89928
Initial (inaccurate metrics?) support for chinese simplified and traditional
andy_robinson
parents:
1264
diff
changeset
|
202 |
marshal.dump(self._mapFileHash, f) |
f11478b89928
Initial (inaccurate metrics?) support for chinese simplified and traditional
andy_robinson
parents:
1264
diff
changeset
|
203 |
marshal.dump(self._codeSpaceRanges, f) |
f11478b89928
Initial (inaccurate metrics?) support for chinese simplified and traditional
andy_robinson
parents:
1264
diff
changeset
|
204 |
marshal.dump(self._notDefRanges, f) |
f11478b89928
Initial (inaccurate metrics?) support for chinese simplified and traditional
andy_robinson
parents:
1264
diff
changeset
|
205 |
marshal.dump(self._cmap, f) |
f11478b89928
Initial (inaccurate metrics?) support for chinese simplified and traditional
andy_robinson
parents:
1264
diff
changeset
|
206 |
f.close() |
1683 | 207 |
|
1265
f11478b89928
Initial (inaccurate metrics?) support for chinese simplified and traditional
andy_robinson
parents:
1264
diff
changeset
|
208 |
def fastLoad(self, directory): |
f11478b89928
Initial (inaccurate metrics?) support for chinese simplified and traditional
andy_robinson
parents:
1264
diff
changeset
|
209 |
started = time.clock() |
f11478b89928
Initial (inaccurate metrics?) support for chinese simplified and traditional
andy_robinson
parents:
1264
diff
changeset
|
210 |
f = open(os.path.join(directory, self.name + '.fastmap'), 'rb') |
f11478b89928
Initial (inaccurate metrics?) support for chinese simplified and traditional
andy_robinson
parents:
1264
diff
changeset
|
211 |
self._mapFileHash = marshal.load(f) |
f11478b89928
Initial (inaccurate metrics?) support for chinese simplified and traditional
andy_robinson
parents:
1264
diff
changeset
|
212 |
self._codeSpaceRanges = marshal.load(f) |
f11478b89928
Initial (inaccurate metrics?) support for chinese simplified and traditional
andy_robinson
parents:
1264
diff
changeset
|
213 |
self._notDefRanges = marshal.load(f) |
f11478b89928
Initial (inaccurate metrics?) support for chinese simplified and traditional
andy_robinson
parents:
1264
diff
changeset
|
214 |
self._cmap = marshal.load(f) |
f11478b89928
Initial (inaccurate metrics?) support for chinese simplified and traditional
andy_robinson
parents:
1264
diff
changeset
|
215 |
f.close() |
f11478b89928
Initial (inaccurate metrics?) support for chinese simplified and traditional
andy_robinson
parents:
1264
diff
changeset
|
216 |
finished = time.clock() |
1361
35586d769319
Enhancements/fixes to Asian fonts; codecharts utility to
andy_robinson
parents:
1286
diff
changeset
|
217 |
#print 'loaded %s in %0.4f seconds' % (self.name, finished - started) |
1683 | 218 |
|
2575 | 219 |
def getData(self): |
220 |
"""Simple persistence helper. Return a dict with all that matters.""" |
|
221 |
return { |
|
222 |
'mapFileHash': self._mapFileHash, |
|
223 |
'codeSpaceRanges': self._codeSpaceRanges, |
|
224 |
'notDefRanges': self._notDefRanges, |
|
225 |
'cmap': self._cmap, |
|
226 |
} |
|
1683 | 227 |
|
1180 | 228 |
class CIDTypeFace(pdfmetrics.TypeFace): |
229 |
"""Multi-byte type face. |
|
230 |
||
231 |
Conceptually similar to a single byte typeface, |
|
232 |
but the glyphs are identified by a numeric Character |
|
233 |
ID (CID) and not a glyph name. """ |
|
234 |
def __init__(self, name): |
|
235 |
"""Initialised from one of the canned dictionaries in allowedEncodings |
|
236 |
||
237 |
Or rather, it will be shortly...""" |
|
238 |
pdfmetrics.TypeFace.__init__(self, name) |
|
239 |
self._extractDictInfo(name) |
|
240 |
def _extractDictInfo(self, name): |
|
241 |
try: |
|
242 |
fontDict = CIDFontInfo[name] |
|
243 |
except KeyError: |
|
3721 | 244 |
raise KeyError("Unable to find information on CID typeface '%s'" % name + |
245 |
"Only the following font names work:" + repr(allowedTypeFaces)) |
|
1180 | 246 |
descFont = fontDict['DescendantFonts'][0] |
247 |
self.ascent = descFont['FontDescriptor']['Ascent'] |
|
248 |
self.descent = descFont['FontDescriptor']['Descent'] |
|
249 |
self._defaultWidth = descFont['DW'] |
|
250 |
self._explicitWidths = self._expandWidths(descFont['W']) |
|
251 |
||
252 |
# should really support self.glyphWidths, self.glyphNames |
|
253 |
# but not done yet. |
|
254 |
||
1683 | 255 |
|
1180 | 256 |
def _expandWidths(self, compactWidthArray): |
257 |
"""Expands Adobe nested list structure to get a dictionary of widths. |
|
258 |
||
3031 | 259 |
Here is an example of such a structure.:: |
260 |
||
261 |
( |
|
1180 | 262 |
# starting at character ID 1, next n characters have the widths given. |
263 |
1, (277,305,500,668,668,906,727,305,445,445,508,668,305,379,305,539), |
|
264 |
# all Characters from ID 17 to 26 are 668 em units wide |
|
265 |
17, 26, 668, |
|
266 |
27, (305, 305, 668, 668, 668, 566, 871, 727, 637, 652, 699, 574, 555, |
|
267 |
676, 687, 242, 492, 664, 582, 789, 707, 734, 582, 734, 605, 605, |
|
268 |
641, 668, 727, 945, 609, 609, 574, 445, 668, 445, 668, 668, 590, |
|
269 |
555, 609, 547, 602, 574, 391, 609, 582, 234, 277, 539, 234, 895, |
|
270 |
582, 605, 602, 602, 387, 508, 441, 582, 562, 781, 531, 570, 555, |
|
271 |
449, 246, 449, 668), |
|
272 |
# these must be half width katakana and the like. |
|
273 |
231, 632, 500 |
|
3031 | 274 |
) |
275 |
||
1180 | 276 |
""" |
277 |
data = compactWidthArray[:] |
|
278 |
widths = {} |
|
279 |
while data: |
|
280 |
start, data = data[0], data[1:] |
|
281 |
if type(data[0]) in (ListType, TupleType): |
|
282 |
items, data = data[0], data[1:] |
|
283 |
for offset in range(len(items)): |
|
284 |
widths[start + offset] = items[offset] |
|
285 |
else: |
|
286 |
end, width, data = data[0], data[1], data[2:] |
|
287 |
for idx in range(start, end+1): |
|
288 |
widths[idx] = width |
|
289 |
return widths |
|
290 |
||
291 |
def getCharWidth(self, characterId): |
|
292 |
return self._explicitWidths.get(characterId, self._defaultWidth) |
|
293 |
||
294 |
class CIDFont(pdfmetrics.Font): |
|
1193 | 295 |
"Represents a built-in multi-byte font" |
2678
38d18a697cd0
reportlab: Python2.5 changes + minor cosmetics and improvements
rgbecker
parents:
2633
diff
changeset
|
296 |
_multiByte = 1 |
38d18a697cd0
reportlab: Python2.5 changes + minor cosmetics and improvements
rgbecker
parents:
2633
diff
changeset
|
297 |
|
1180 | 298 |
def __init__(self, face, encoding): |
299 |
||
300 |
assert face in allowedTypeFaces, "TypeFace '%s' not supported! Use any of these instead: %s" % (face, allowedTypeFaces) |
|
301 |
self.faceName = face |
|
1286 | 302 |
#should cache in registry... |
1180 | 303 |
self.face = CIDTypeFace(face) |
1264
79862e6e40a1
Mostly correct string widths implemented for Asian fonts.
andy_robinson
parents:
1219
diff
changeset
|
304 |
|
79862e6e40a1
Mostly correct string widths implemented for Asian fonts.
andy_robinson
parents:
1219
diff
changeset
|
305 |
assert encoding in allowedEncodings, "Encoding '%s' not supported! Use any of these instead: %s" % (encoding, allowedEncodings) |
79862e6e40a1
Mostly correct string widths implemented for Asian fonts.
andy_robinson
parents:
1219
diff
changeset
|
306 |
self.encodingName = encoding |
1180 | 307 |
self.encoding = CIDEncoding(encoding) |
308 |
||
309 |
#legacy hack doing quick cut and paste. |
|
1264
79862e6e40a1
Mostly correct string widths implemented for Asian fonts.
andy_robinson
parents:
1219
diff
changeset
|
310 |
self.fontName = self.faceName + '-' + self.encodingName |
1180 | 311 |
self.name = self.fontName |
1286 | 312 |
|
313 |
# need to know if it is vertical or horizontal |
|
314 |
self.isVertical = (self.encodingName[-1] == 'V') |
|
1683 | 315 |
|
2575 | 316 |
#no substitutes initially |
317 |
self.substitutionFonts = [] |
|
2678
38d18a697cd0
reportlab: Python2.5 changes + minor cosmetics and improvements
rgbecker
parents:
2633
diff
changeset
|
318 |
|
2575 | 319 |
def formatForPdf(self, text): |
320 |
encoded = _escape(text) |
|
321 |
#print 'encoded CIDFont:', encoded |
|
322 |
return encoded |
|
323 |
||
324 |
def stringWidth(self, text, size, encoding=None): |
|
325 |
"""This presumes non-Unicode input. UnicodeCIDFont wraps it for that context""" |
|
1180 | 326 |
cidlist = self.encoding.translate(text) |
1286 | 327 |
if self.isVertical: |
328 |
#this part is "not checked!" but seems to work. |
|
329 |
#assume each is 1000 ems high |
|
330 |
return len(cidlist) * size |
|
331 |
else: |
|
332 |
w = 0 |
|
333 |
for cid in cidlist: |
|
334 |
w = w + self.face.getCharWidth(cid) |
|
335 |
return 0.001 * w * size |
|
1180 | 336 |
|
337 |
||
338 |
def addObjects(self, doc): |
|
1362 | 339 |
"""The explicit code in addMinchoObjects and addGothicObjects |
1180 | 340 |
will be replaced by something that pulls the data from |
341 |
_cidfontdata.py in the next few days.""" |
|
342 |
internalName = 'F' + repr(len(doc.fontMapping)+1) |
|
343 |
||
1192
08043d817140
Added a trivial korean test, prior to testing on another machine
andy_robinson
parents:
1180
diff
changeset
|
344 |
bigDict = CIDFontInfo[self.face.name] |
08043d817140
Added a trivial korean test, prior to testing on another machine
andy_robinson
parents:
1180
diff
changeset
|
345 |
bigDict['Name'] = '/' + internalName |
1264
79862e6e40a1
Mostly correct string widths implemented for Asian fonts.
andy_robinson
parents:
1219
diff
changeset
|
346 |
bigDict['Encoding'] = '/' + self.encodingName |
1180 | 347 |
|
1192
08043d817140
Added a trivial korean test, prior to testing on another machine
andy_robinson
parents:
1180
diff
changeset
|
348 |
#convert to PDF dictionary/array objects |
08043d817140
Added a trivial korean test, prior to testing on another machine
andy_robinson
parents:
1180
diff
changeset
|
349 |
cidObj = structToPDF(bigDict) |
08043d817140
Added a trivial korean test, prior to testing on another machine
andy_robinson
parents:
1180
diff
changeset
|
350 |
|
1683 | 351 |
# link into document, and add to font map |
1192
08043d817140
Added a trivial korean test, prior to testing on another machine
andy_robinson
parents:
1180
diff
changeset
|
352 |
r = doc.Reference(cidObj, internalName) |
08043d817140
Added a trivial korean test, prior to testing on another machine
andy_robinson
parents:
1180
diff
changeset
|
353 |
fontDict = doc.idToObject['BasicFonts'].dict |
08043d817140
Added a trivial korean test, prior to testing on another machine
andy_robinson
parents:
1180
diff
changeset
|
354 |
fontDict[internalName] = r |
08043d817140
Added a trivial korean test, prior to testing on another machine
andy_robinson
parents:
1180
diff
changeset
|
355 |
doc.fontMapping[self.name] = '/' + internalName |
1683 | 356 |
|
1180 | 357 |
|
2575 | 358 |
class UnicodeCIDFont(CIDFont): |
359 |
"""Wraps up CIDFont to hide explicit encoding choice; |
|
360 |
encodes text for output as UTF16. |
|
361 |
||
362 |
lang should be one of 'jpn',chs','cht','kor' for now. |
|
363 |
if vertical is set, it will select a different widths array |
|
364 |
and possibly glyphs for some punctuation marks. |
|
365 |
||
366 |
halfWidth is only for Japanese. |
|
367 |
||
368 |
||
369 |
>>> dodgy = UnicodeCIDFont('nonexistent') |
|
370 |
Traceback (most recent call last): |
|
371 |
... |
|
372 |
KeyError: "don't know anything about CID font nonexistent" |
|
373 |
>>> heisei = UnicodeCIDFont('HeiseiMin-W3') |
|
374 |
>>> heisei.name |
|
375 |
'HeiseiMin-W3' |
|
376 |
>>> heisei.language |
|
377 |
'jpn' |
|
378 |
>>> heisei.encoding.name |
|
379 |
'UniJIS-UCS2-H' |
|
380 |
>>> #This is how PDF data gets encoded. |
|
381 |
>>> print heisei.formatForPdf('hello') |
|
3376 | 382 |
\\000h\\000e\\000l\\000l\\000o |
2575 | 383 |
>>> tokyo = u'\u6771\u4AEC' |
384 |
>>> print heisei.formatForPdf(tokyo) |
|
3376 | 385 |
gqJ\\354 |
2678
38d18a697cd0
reportlab: Python2.5 changes + minor cosmetics and improvements
rgbecker
parents:
2633
diff
changeset
|
386 |
|
2575 | 387 |
""" |
388 |
||
389 |
def __init__(self, face, isVertical=False, isHalfWidth=False): |
|
390 |
#pass |
|
391 |
try: |
|
392 |
lang, defaultEncoding = defaultUnicodeEncodings[face] |
|
393 |
except KeyError: |
|
394 |
raise KeyError("don't know anything about CID font %s" % face) |
|
395 |
||
396 |
#we know the languages now. |
|
397 |
self.language = lang |
|
2678
38d18a697cd0
reportlab: Python2.5 changes + minor cosmetics and improvements
rgbecker
parents:
2633
diff
changeset
|
398 |
|
2575 | 399 |
#rebuilt encoding string. They follow rules which work |
400 |
#for the 7 fonts provided. |
|
401 |
enc = defaultEncoding[:-1] |
|
402 |
if isHalfWidth: |
|
403 |
enc = enc + 'HW-' |
|
404 |
if isVertical: |
|
405 |
enc = enc + 'V' |
|
406 |
else: |
|
407 |
enc = enc + 'H' |
|
2678
38d18a697cd0
reportlab: Python2.5 changes + minor cosmetics and improvements
rgbecker
parents:
2633
diff
changeset
|
408 |
|
38d18a697cd0
reportlab: Python2.5 changes + minor cosmetics and improvements
rgbecker
parents:
2633
diff
changeset
|
409 |
#now we can do the more general case |
2575 | 410 |
CIDFont.__init__(self, face, enc) |
411 |
#self.encName = 'utf_16_le' |
|
412 |
#it's simpler for unicode, just use the face name |
|
2678
38d18a697cd0
reportlab: Python2.5 changes + minor cosmetics and improvements
rgbecker
parents:
2633
diff
changeset
|
413 |
self.name = self.fontName = face |
2575 | 414 |
self.vertical = isVertical |
415 |
self.isHalfWidth = isHalfWidth |
|
416 |
||
2629 | 417 |
self.unicodeWidths = widthsByUnichar[self.name] |
2678
38d18a697cd0
reportlab: Python2.5 changes + minor cosmetics and improvements
rgbecker
parents:
2633
diff
changeset
|
418 |
|
2575 | 419 |
|
420 |
def formatForPdf(self, text): |
|
421 |
#these ones should be encoded asUTF16 minus the BOM |
|
422 |
from codecs import utf_16_be_encode |
|
423 |
#print 'formatting %s: %s' % (type(text), repr(text)) |
|
3721 | 424 |
if type(text) is not str: |
2575 | 425 |
text = text.decode('utf8') |
426 |
utfText = utf_16_be_encode(text)[0] |
|
427 |
encoded = _escape(utfText) |
|
428 |
#print ' encoded:',encoded |
|
429 |
return encoded |
|
430 |
# |
|
431 |
#result = _escape(encoded) |
|
432 |
#print ' -> %s' % repr(result) |
|
433 |
#return result |
|
2678
38d18a697cd0
reportlab: Python2.5 changes + minor cosmetics and improvements
rgbecker
parents:
2633
diff
changeset
|
434 |
|
2575 | 435 |
|
436 |
def stringWidth(self, text, size, encoding=None): |
|
437 |
"Just ensure we do width test on characters, not bytes..." |
|
438 |
if type(text) is type(''): |
|
439 |
text = text.decode('utf8') |
|
2629 | 440 |
|
441 |
widths = self.unicodeWidths |
|
442 |
return size * 0.001 * sum([widths.get(uch, 1000) for uch in text]) |
|
443 |
#return CIDFont.stringWidth(self, text, size, encoding) |
|
2678
38d18a697cd0
reportlab: Python2.5 changes + minor cosmetics and improvements
rgbecker
parents:
2633
diff
changeset
|
444 |
|
1265
f11478b89928
Initial (inaccurate metrics?) support for chinese simplified and traditional
andy_robinson
parents:
1264
diff
changeset
|
445 |
|
f11478b89928
Initial (inaccurate metrics?) support for chinese simplified and traditional
andy_robinson
parents:
1264
diff
changeset
|
446 |
def precalculate(cmapdir): |
f11478b89928
Initial (inaccurate metrics?) support for chinese simplified and traditional
andy_robinson
parents:
1264
diff
changeset
|
447 |
# crunches through all, making 'fastmap' files |
f11478b89928
Initial (inaccurate metrics?) support for chinese simplified and traditional
andy_robinson
parents:
1264
diff
changeset
|
448 |
import os |
f11478b89928
Initial (inaccurate metrics?) support for chinese simplified and traditional
andy_robinson
parents:
1264
diff
changeset
|
449 |
files = os.listdir(cmapdir) |
f11478b89928
Initial (inaccurate metrics?) support for chinese simplified and traditional
andy_robinson
parents:
1264
diff
changeset
|
450 |
for file in files: |
f11478b89928
Initial (inaccurate metrics?) support for chinese simplified and traditional
andy_robinson
parents:
1264
diff
changeset
|
451 |
if os.path.isfile(cmapdir + os.sep + self.name + '.fastmap'): |
f11478b89928
Initial (inaccurate metrics?) support for chinese simplified and traditional
andy_robinson
parents:
1264
diff
changeset
|
452 |
continue |
f11478b89928
Initial (inaccurate metrics?) support for chinese simplified and traditional
andy_robinson
parents:
1264
diff
changeset
|
453 |
try: |
f11478b89928
Initial (inaccurate metrics?) support for chinese simplified and traditional
andy_robinson
parents:
1264
diff
changeset
|
454 |
enc = CIDEncoding(file) |
f11478b89928
Initial (inaccurate metrics?) support for chinese simplified and traditional
andy_robinson
parents:
1264
diff
changeset
|
455 |
except: |
3721 | 456 |
print('cannot parse %s, skipping' % enc) |
1265
f11478b89928
Initial (inaccurate metrics?) support for chinese simplified and traditional
andy_robinson
parents:
1264
diff
changeset
|
457 |
continue |
f11478b89928
Initial (inaccurate metrics?) support for chinese simplified and traditional
andy_robinson
parents:
1264
diff
changeset
|
458 |
enc.fastSave(cmapdir) |
3721 | 459 |
print('saved %s.fastmap' % file) |
1683 | 460 |
|
1180 | 461 |
def test(): |
1192
08043d817140
Added a trivial korean test, prior to testing on another machine
andy_robinson
parents:
1180
diff
changeset
|
462 |
# only works if you have cirrect encodings on your box! |
1180 | 463 |
c = Canvas('test_japanese.pdf') |
464 |
c.setFont('Helvetica', 30) |
|
465 |
c.drawString(100,700, 'Japanese Font Support') |
|
466 |
||
467 |
pdfmetrics.registerFont(CIDFont('HeiseiMin-W3','90ms-RKSJ-H')) |
|
468 |
pdfmetrics.registerFont(CIDFont('HeiseiKakuGo-W5','90ms-RKSJ-H')) |
|
1683 | 469 |
|
1180 | 470 |
|
471 |
# the two typefaces |
|
472 |
c.setFont('HeiseiMin-W3-90ms-RKSJ-H', 16) |
|
473 |
# this says "This is HeiseiMincho" in shift-JIS. Not all our readers |
|
474 |
# have a Japanese PC, so I escaped it. On a Japanese-capable |
|
475 |
# system, print the string to see Kanji |
|
476 |
message1 = '\202\261\202\352\202\315\225\275\220\254\226\276\222\251\202\305\202\267\201B' |
|
477 |
c.drawString(100, 675, message1) |
|
478 |
c.save() |
|
3721 | 479 |
print('saved test_japanese.pdf') |
1192
08043d817140
Added a trivial korean test, prior to testing on another machine
andy_robinson
parents:
1180
diff
changeset
|
480 |
|
08043d817140
Added a trivial korean test, prior to testing on another machine
andy_robinson
parents:
1180
diff
changeset
|
481 |
|
1180 | 482 |
## print 'CMAP_DIR = ', CMAP_DIR |
483 |
## tf1 = CIDTypeFace('HeiseiMin-W3') |
|
484 |
## print 'ascent = ',tf1.ascent |
|
485 |
## print 'descent = ',tf1.descent |
|
486 |
## for cid in [1,2,3,4,5,18,19,28,231,1742]: |
|
487 |
## print 'width of cid %d = %d' % (cid, tf1.getCharWidth(cid)) |
|
488 |
||
489 |
encName = '90ms-RKSJ-H' |
|
490 |
enc = CIDEncoding(encName) |
|
3721 | 491 |
print(message1, '->', enc.translate(message1)) |
1683 | 492 |
|
1180 | 493 |
f = CIDFont('HeiseiMin-W3','90ms-RKSJ-H') |
3721 | 494 |
print('width = %0.2f' % f.stringWidth(message1, 10)) |
1180 | 495 |
|
496 |
||
497 |
#testing all encodings |
|
498 |
## import time |
|
499 |
## started = time.time() |
|
500 |
## import glob |
|
501 |
## for encName in _cidfontdata.allowedEncodings: |
|
502 |
## #encName = '90ms-RKSJ-H' |
|
503 |
## enc = CIDEncoding(encName) |
|
504 |
## print 'encoding %s:' % encName |
|
505 |
## print ' codeSpaceRanges = %s' % enc._codeSpaceRanges |
|
506 |
## print ' notDefRanges = %s' % enc._notDefRanges |
|
507 |
## print ' mapping size = %d' % len(enc._cmap) |
|
508 |
## finished = time.time() |
|
509 |
## print 'constructed all encodings in %0.2f seconds' % (finished - started) |
|
1683 | 510 |
|
1180 | 511 |
if __name__=='__main__': |
3328 | 512 |
import doctest |
3721 | 513 |
from . import cidfonts |
2575 | 514 |
doctest.testmod(cidfonts) |
515 |
#test() |
|
1180 | 516 |
|
517 |
||
518 |
||
519 |