--- a/reportlab/graphics/renderPDF.py Mon Jun 14 16:29:04 2004 +0000
+++ b/reportlab/graphics/renderPDF.py Mon Jun 14 16:41:25 2004 +0000
@@ -9,7 +9,7 @@
Execute the script to see some test drawings.
changed
"""
-__version__=''' $Id: renderPDF.py,v 1.24 2003/11/20 17:09:42 rgbecker Exp $ '''
+__version__=''' $Id$ '''
from reportlab.graphics.shapes import *
from reportlab.pdfgen.canvas import Canvas
@@ -173,10 +173,10 @@
def drawString(self, stringObj):
if self._fill:
S = self._tracker.getState()
- text_anchor, x, y, text = S['textAnchor'], stringObj.x,stringObj.y,stringObj.text
+ text_anchor, x, y, text, enc = S['textAnchor'], stringObj.x,stringObj.y,stringObj.text, stringObj.encoding
if not text_anchor in ['start','inherited']:
font, font_size = S['fontName'], S['fontSize']
- textLen = stringWidth(text, font,font_size)
+ textLen = stringWidth(text, font, font_size, enc)
if text_anchor=='end':
x = x-textLen
elif text_anchor=='middle':
--- a/reportlab/graphics/shapes.py Mon Jun 14 16:29:04 2004 +0000
+++ b/reportlab/graphics/shapes.py Mon Jun 14 16:41:25 2004 +0000
@@ -5,7 +5,7 @@
"""
core of the graphics library - defines Drawing and Shapes
"""
-__version__=''' $Id: shapes.py,v 1.102 2004/05/26 09:37:06 jjlee Exp $ '''
+__version__=''' $Id$ '''
import string, os, sys
from math import pi, cos, sin, tan
@@ -998,7 +998,6 @@
return (self.cx - self.r, self.cy - self.r, self.cx + self.r, self.cy + self.r)
class Ellipse(SolidShape):
-
_attrMap = AttrMap(BASE=SolidShape,
cx = AttrMapValue(isNumber),
cy = AttrMapValue(isNumber),
@@ -1176,6 +1175,7 @@
fontSize = AttrMapValue(isNumber),
fillColor = AttrMapValue(isColorOrNone),
textAnchor = AttrMapValue(isTextAnchor),
+ encoding = AttrMapValue(isString),
)
def __init__(self, x, y, text, **kw):
@@ -1187,9 +1187,10 @@
self.fontSize = STATE_DEFAULTS['fontSize']
self.fillColor = STATE_DEFAULTS['fillColor']
self.setProperties(kw)
+ self.encoding = 'cp1252' #matches only fonts we have!
def getEast(self):
- return self.x + stringWidth(self.text,self.fontName,self.fontSize)
+ return self.x + stringWidth(self.text,self.fontName,self.fontSize, self.encoding)
def copy(self):
new = String(self.x, self.y, self.text)
@@ -1198,7 +1199,7 @@
def getBounds(self):
# assumes constant drop of 0.2*size to baseline
- w = stringWidth(self.text,self.fontName,self.fontSize)
+ w = stringWidth(self.text,self.fontName,self.fontSize, self.encoding)
if self.textAnchor == 'start':
x = self.x
elif self.textAnchor == 'middle':
--- a/reportlab/lib/validators.py Mon Jun 14 16:29:04 2004 +0000
+++ b/reportlab/lib/validators.py Mon Jun 14 16:41:25 2004 +0000
@@ -2,7 +2,7 @@
#see license.txt for license details
#history http://cvs.sourceforge.net/cgi-bin/cvsweb.cgi/reportlab/lib/validators.py?cvsroot=reportlab
#$Header: /tmp/reportlab/reportlab/lib/validators.py,v 1.30 2003/12/10 14:40:13 rgbecker Exp $
-__version__=''' $Id: validators.py,v 1.30 2003/12/10 14:40:13 rgbecker Exp $ '''
+__version__=''' $Id$ '''
"""
This module contains some standard verifying functions which can be
used in an attribute map.
@@ -65,7 +65,7 @@
class _isString(Validator):
def test(self,x):
- return type(x) is StringType
+ return type(x) in (StringType, UnicodeType)
class _isNumber(Validator):
def test(self,x):
--- a/reportlab/pdfbase/pdfmetrics.py Mon Jun 14 16:29:04 2004 +0000
+++ b/reportlab/pdfbase/pdfmetrics.py Mon Jun 14 16:41:25 2004 +0000
@@ -2,7 +2,7 @@
#see license.txt for license details
#history http://cvs.sourceforge.net/cgi-bin/cvsweb.cgi/reportlab/pdfbase/pdfmetrics.py?cvsroot=reportlab
#$Header $
-__version__=''' $Id: pdfmetrics.py,v 1.69 2004/03/23 17:35:42 rgbecker Exp $ '''
+__version__=''' $Id$ '''
__doc__="""
This provides a database of font metric information and
efines Font, Encoding and TypeFace classes aimed at end users.
@@ -20,7 +20,7 @@
trap attempts to access them and do it on first access.
"""
import string, os
-from types import StringType, ListType, TupleType
+from types import StringType, ListType, TupleType, UnicodeType
from reportlab.pdfbase import _fontdata
from reportlab.lib.logger import warnOnce
from reportlab.lib.utils import rl_isfile, open_and_read, open_and_readlines
@@ -29,22 +29,43 @@
standardFonts = _fontdata.standardFonts
standardEncodings = _fontdata.standardEncodings
-_dummyEncoding=' _not an encoding_ '
-# conditional import - try both import techniques, and set a flag
-try:
- import _rl_accel
- try:
- _stringWidth = _rl_accel.stringWidth
- _rl_accel.defaultEncoding(_dummyEncoding)
- except:
- _stringWidth = None
-except ImportError:
- _stringWidth = None
+# AR 20040612 - disabling accelerated stringwidth until I have
+# a slow one which works right for Unicode. Then we can change
+# the accelerated one.
+##_dummyEncoding=' _not an encoding_ '
+## conditional import - try both import techniques, and set a flag
+##try:
+## import _rl_accel
+## try:
+## _stringWidth = _rl_accel.stringWidth
+## _rl_accel.defaultEncoding(_dummyEncoding)
+## except:
+## _stringWidth = None
+##except ImportError:
+## _stringWidth = None
+_stringWidth = None
+
_typefaces = {}
_encodings = {}
_fonts = {}
+
+def codecName(encName):
+ """Attempt to convert some other encoding name to a Python codex"""
+ encName = encName.lower()
+ if encName[0:7] == 'winansi':
+ return 'cp1252'
+ elif encName[0:8] == 'MacRomanEncoding':
+ return 'mac-roman'
+ elif encName == 'zapfdingbatsencoding':
+ return 'cp1252'
+ elif encName == 'symbolencoding':
+ return 'cp1252'
+ else:
+ return encName
+
+
class FontError(Exception):
pass
class FontNotFoundError(Exception):
@@ -358,16 +379,19 @@
pass
self.widths = w
- if not _stringWidth:
- def stringWidth(self, text, size):
- """This is the "purist" approach to width. The practical one
- is to use the stringWidth one which may be optimized
- in C."""
- w = 0
- widths = self.widths
- for ch in text:
- w = w + widths[ord(ch)]
- return w * 0.001 * size
+ #if not _stringWidth:
+ def stringWidth(self, text, size, encoding='latin-1'):
+ """This is the "purist" approach to width. The practical approach
+ is to use the stringWidth function, which may be swapped in for one
+ written in C."""
+ if type(text) is UnicodeType:
+ text = text.encode(codecName(self.encoding.name))
+
+ w = 0
+ widths = self.widths
+ for ch in text:
+ w = w + widths[ord(ch)]
+ return w * 0.001 * size
def _formatWidths(self):
"returns a pretty block in PDF Array format to aid inspection"
@@ -652,9 +676,18 @@
reg.sort()
return reg
-def _slowStringWidth(text, fontName, fontSize):
+def _slowStringWidth(text, fontName, fontSize, encoding=None):
"""Define this anyway so it can be tested, but whether it is used or not depends on _rl_accel"""
font = getFont(fontName)
+ fontCodec = codecName(font.encoding.name)
+## if encoding:
+## print 'slowStringWidth(%s/%s, %s, %s)' % (encoding, fontCodec, repr(text), fontName)
+ if type(text) is StringType:
+ if encoding is not None:
+ if encoding <> fontCodec:
+ #convert
+ text = unicode(text, encoding).encode(fontCodec)
+
return font.stringWidth(text, fontSize)
#this is faster, but will need more special-casing for multi-byte fonts.
#wid = getFont(fontName).widths
@@ -721,13 +754,14 @@
def test3widths(texts):
# checks all 3 algorithms give same answer, note speed
+
import time
for fontName in standardFonts[0:1]:
- t0 = time.time()
- for text in texts:
- l1 = _stringWidth(text, fontName, 10)
- t1 = time.time()
- print 'fast stringWidth took %0.4f' % (t1 - t0)
+## t0 = time.time()
+## for text in texts:
+## l1 = stringWidth(text, fontName, 10)
+## t1 = time.time()
+## print 'fast stringWidth took %0.4f' % (t1 - t0)
t0 = time.time()
w = getFont(fontName).widths
--- a/reportlab/pdfbase/ttfonts.py Mon Jun 14 16:29:04 2004 +0000
+++ b/reportlab/pdfbase/ttfonts.py Mon Jun 14 16:41:25 2004 +0000
@@ -58,10 +58,10 @@
Canvas and TextObject have special support for dynamic fonts.
"""
-__version__ = '$Id: ttfonts.py,v 1.22 2004/04/05 14:17:29 rgbecker Exp $'
+__version__ = '$Id$'
import string
-from types import StringType
+from types import StringType, UnicodeType
from struct import pack, unpack
from cStringIO import StringIO
from reportlab.pdfbase import pdfmetrics, pdfdoc
@@ -953,11 +953,16 @@
self._dynamicFont = 1 # We want dynamic subsetting
self.state = {}
- def stringWidth(self, text, size):
+ def stringWidth(self, text, size, encoding='utf-8'):
"Calculate text width"
width = self.face.getCharWidth
w = 0
- for code in parse_utf8(text):
+ if type(text) is UnicodeType:
+ codes = map(ord, text)
+ else:
+ uText = unicode(text, encoding)
+ codes = map(ord, text)
+ for code in codes:
w = w + width(code)
return 0.001 * w * size
--- a/reportlab/pdfgen/canvas.py Mon Jun 14 16:29:04 2004 +0000
+++ b/reportlab/pdfgen/canvas.py Mon Jun 14 16:41:25 2004 +0000
@@ -1275,12 +1275,13 @@
def _convertText(self, text):
"Convert to correct encoding for current font"
- if type(text) is type(u''):
+ if type(text) is UnicodeType:
# If text is unicode always convert
uni = text
+ converted = uni.encode(self._fontencoding, self.encodingErrorMode)
elif self.encoding is None:
- # If no encoding specified, no conversion
- return text
+ # If no encoding specified, 8-bit no conversion
+ converted = text
else:
# Otherwise assume in specified encoding and decode
if self.encoding == 'WinAnsiEncoding':
@@ -1291,7 +1292,9 @@
docEnc = self.encoding
#uni = text.decode(docEnc) #hack #won't work in 2.1
uni = unicode(text, docEnc, getattr(self,'decodingErrorMode',self.encodingErrorMode)) #works in 2.1
- return uni.encode(self._fontencoding, self.encodingErrorMode)
+ converted = uni.encode(self._fontencoding, self.encodingErrorMode)
+## print ' ->', converted
+ return converted
def setFont(self, psfontname, size, leading = None):
--- a/reportlab/pdfgen/textobject.py Mon Jun 14 16:29:04 2004 +0000
+++ b/reportlab/pdfgen/textobject.py Mon Jun 14 16:41:25 2004 +0000
@@ -154,6 +154,13 @@
self._fontname = psfontname
self._fontsize = size
font = pdfmetrics.getFont(self._fontname)
+
+ #track codec name for auto-conversion
+ encName = font.encoding.name
+ if encName == 'WinAnsiEncoding':
+ encName = 'cp1252'
+ self._fontencoding = self._canvas._fontencoding = encName.lower() #python codec name
+
self._dynamicFont = getattr(font, '_dynamicFont', 0)
if self._dynamicFont:
self._curSubset = -1
@@ -177,7 +184,7 @@
encName = font.encoding.name
if encName == 'WinAnsiEncoding':
encName = 'cp1252'
- self._fontencoding = encName.lower() #python codec name
+ self._fontencoding = self._canvas._fontencoding = encName.lower() #python codec name
self._dynamicFont = getattr(font, '_dynamicFont', 0)
if self._dynamicFont:
@@ -297,8 +304,9 @@
def _formatText(self, text):
"Generates PDF text output operator(s)"
#convert to current doc encoding
+ #print '_formatText',repr(text),'as',self._canvas._fontencoding,'->',
text = self._canvas._convertText(text)
-
+ #print repr(text)
if self._dynamicFont:
#it's a truetype font and should be utf8. If an error is raised,
@@ -342,6 +350,8 @@
self._y0 = self._y
# Output the text followed by a PDF newline command
+## if type(text) == type(u''):
+## print "doing unicode textline on", text.encode('cp1252')
self._code.append('%s T*' % self._formatText(text))
def textLines(self, stuff, trim=1):
--- a/reportlab/platypus/flowables.py Mon Jun 14 16:29:04 2004 +0000
+++ b/reportlab/platypus/flowables.py Mon Jun 14 16:41:25 2004 +0000
@@ -2,7 +2,7 @@
#see license.txt for license details
#history http://cvs.sourceforge.net/cgi-bin/cvsweb.cgi/reportlab/platypus/flowables.py?cvsroot=reportlab
#$Header: /tmp/reportlab/reportlab/platypus/flowables.py,v 1.49 2004/04/05 18:07:42 rgbecker Exp $
-__version__=''' $Id: flowables.py,v 1.49 2004/04/05 18:07:42 rgbecker Exp $ '''
+__version__=''' $Id$ '''
__doc__="""
A flowable is a "floating element" in a document whose exact position is determined by the
other elements that precede it, such as a paragraph, a diagram interspersed between paragraphs,
@@ -76,6 +76,11 @@
self._traceInfo = None
self._showBoundary = None
+ #many flowables handle text and must be processed in the
+ #absence of a canvas. tagging them with their encoding
+ #helps us to get conversions right. Use Python codec names.
+ self.encoding = None
+
def _drawOn(self,canv):
'''ensure canv is set on and then draw'''
--- a/reportlab/platypus/paragraph.py Mon Jun 14 16:29:04 2004 +0000
+++ b/reportlab/platypus/paragraph.py Mon Jun 14 16:41:25 2004 +0000
@@ -2,7 +2,7 @@
#see license.txt for license details
#history http://cvs.sourceforge.net/cgi-bin/cvsweb.cgi/reportlab/platypus/paragraph.py?cvsroot=reportlab
#$Header: /tmp/reportlab/reportlab/platypus/paragraph.py,v 1.73 2004/02/05 18:31:35 rgbecker Exp $
-__version__=''' $Id: paragraph.py,v 1.73 2004/02/05 18:31:35 rgbecker Exp $ '''
+__version__=''' $Id$ '''
from string import split, strip, join, whitespace, find
from operator import truth
from types import StringType, ListType
@@ -374,8 +374,9 @@
It will also be able to handle any MathML specified Greek characters.
"""
- def __init__(self, text, style, bulletText = None, frags=None, caseSensitive=1):
+ def __init__(self, text, style, bulletText = None, frags=None, caseSensitive=1, encoding=None):
self.caseSensitive = caseSensitive
+ self.encoding = encoding
self._setup(text, style, bulletText, frags, cleanBlockQuotedText)
@@ -523,11 +524,12 @@
fontSize = f.fontSize
fontName = f.fontName
words = hasattr(f,'text') and split(f.text, ' ') or f.words
- spaceWidth = stringWidth(' ', fontName, fontSize)
+ spaceWidth = stringWidth(' ', fontName, fontSize, self.encoding)
cLine = []
currentWidth = - spaceWidth # hack to get around extra space for word 1
for word in words:
- wordWidth = stringWidth(word, fontName, fontSize)
+ #this underscores my feeling that Unicode throughout would be easier!
+ wordWidth = stringWidth(word, fontName, fontSize, self.encoding)
newWidth = currentWidth + spaceWidth + wordWidth
if newWidth<=maxWidth or len(cLine)==0:
# fit one more on this line
--- a/reportlab/platypus/paraparser.py Mon Jun 14 16:29:04 2004 +0000
+++ b/reportlab/platypus/paraparser.py Mon Jun 14 16:41:25 2004 +0000
@@ -2,10 +2,10 @@
#see license.txt for license details
#history http://cvs.sourceforge.net/cgi-bin/cvsweb.cgi/reportlab/platypus/paraparser.py?cvsroot=reportlab
#$Header: /tmp/reportlab/reportlab/platypus/paraparser.py,v 1.54 2004/01/20 22:50:32 andy_robinson Exp $
-__version__=''' $Id: paraparser.py,v 1.54 2004/01/20 22:50:32 andy_robinson Exp $ '''
+__version__=''' $Id$ '''
import string
import re
-from types import TupleType
+from types import TupleType, UnicodeType, StringType
import sys
import os
import copy
@@ -787,6 +787,18 @@
If errors occur None will be returned and the
self.errors holds a list of the error messages.
"""
+ # AR 20040612 - when we feed Unicode strings in, sgmlop
+ # tries to coerce to ASCII. Must intercept, coerce to
+ # any 8-bit encoding which defines most of 256 points,
+ # and revert at end. Yuk. Preliminary step prior to
+ # removal of parser altogether.
+ enc = 'cp1252' #our legacy default
+ if type(text) is UnicodeType:
+ UNI = 1
+ text = text.encode(enc)
+ else:
+ UNI = 0
+
self._seq = reportlab.lib.sequencer.getSequencer()
self._reset(style) # reinitialise the parser
@@ -806,6 +818,16 @@
self._iReset()
else:
fragList = bFragList = None
+
+ if UNI:
+ #reconvert to unicode
+ if fragList:
+ for frag in fragList:
+ frag.text = unicode(frag.text, enc)
+ if bFragList:
+ for frag in bFragList:
+ frag.text = unicode(frag.text, enc)
+
return style, fragList, bFragList
if __name__=='__main__':
--- a/reportlab/test/test_pdfbase_encodings.py Mon Jun 14 16:29:04 2004 +0000
+++ b/reportlab/test/test_pdfbase_encodings.py Mon Jun 14 16:41:25 2004 +0000
@@ -5,6 +5,12 @@
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
from reportlab.pdfbase import pdfutils
+
+from reportlab.platypus.paragraph import Paragraph
+from reportlab.lib.styles import ParagraphStyle
+from reportlab.graphics.shapes import Drawing, String, Ellipse
+
+
import re
import codecs
@@ -14,9 +20,10 @@
#test sentences
testCp1252 = 'copyright %s trademark %s registered %s ReportLab! Ol%s!' % (chr(169), chr(153),chr(174), chr(0xe9))
testUni = unicode(testCp1252, 'cp1252')
-testUTF8 = testUni.encode('utf_8')
+testUTF8 = testUni.encode('utf-8')
# expected result is octal-escaped text in the PDF
expectedCp1252 = pdfutils._escape(testCp1252)
+
def extractText(pdfOps):
@@ -43,6 +50,7 @@
if codeStr:
chrs.append(unichr(subset[int(codeStr[1:], 8)]))
return u''.join(chrs)
+
class TextEncodingTestCase(unittest.TestCase):
@@ -50,6 +58,49 @@
"""
+ def setUp(self):
+ self.luxi = TTFont("Luxi", "luxiserif.ttf")
+ pdfmetrics.registerFont(self.luxi)
+
+ self.styNormal = ParagraphStyle(name='Helvetica', fontName='Helvetica-Oblique')
+ self.styTrueType = ParagraphStyle(name='TrueType', fontName='luxi')
+
+
+ def testStringWidth(self):
+ msg = 'Hello World'
+ assert abs(pdfmetrics.stringWidth(msg, 'Courier', 10) - 66.0) < 0.01
+ assert abs(pdfmetrics.stringWidth(msg, 'Helvetica', 10) - 51.67) < 0.01
+ assert abs(pdfmetrics.stringWidth(msg, 'Times-Roman', 10) - 50.27) < 0.01
+ assert abs(pdfmetrics.stringWidth(msg, 'Luxi', 10) - 50.22) < 0.01
+
+ uniMsg1 = u"Hello World"
+ assert abs(pdfmetrics.stringWidth(uniMsg1, 'Courier', 10) - 66.0) < 0.01
+ assert abs(pdfmetrics.stringWidth(uniMsg1, 'Helvetica', 10) - 51.67) < 0.01
+ assert abs(pdfmetrics.stringWidth(uniMsg1, 'Times-Roman', 10) - 50.27) < 0.01
+ assert abs(pdfmetrics.stringWidth(uniMsg1, 'Luxi', 10) - 50.22) < 0.01
+
+
+ # Courier are all 600 ems wide. So if one 'measures as utf8' one will
+ # get a wrong width as extra characters are seen
+ assert len(testCp1252) == 52
+ assert abs(pdfmetrics.stringWidth(testCp1252, 'Courier', 10) - 312.0) < 0.01
+ # the test string has 5 more bytes and so "measures too long" if passed to
+ # a single-byte font which treats it as a single-byte string.
+ assert len(testUTF8) == 57
+ assert abs(pdfmetrics.stringWidth(testUTF8, 'Courier', 10) - 342.0) < 0.01
+
+ assert len(testUni) == 52
+ assert abs(pdfmetrics.stringWidth(testUni, 'Courier', 10) - 312.0) < 0.01
+
+
+ # now try a TrueType font. Should be able to accept Unicode or UTF8
+ #print 'utf8_luxi =', pdfmetrics.stringWidth(testUTF8, 'Luxi', 10)
+ #print 'unicluxi =', pdfmetrics.stringWidth(testUni, 'Luxi', 10)
+ #assert abs(pdfmetrics.stringWidth(testUTF8, 'Luxi', 10) - 224.44) < 0.01
+ assert abs(pdfmetrics.stringWidth(testUni, 'Luxi', 10) - 224.44) < 0.01
+
+
+
#AR 9/6/2004 - just adding this to illustrate behaviour I expect.
def testStraightThrough(self):
"""This assumes input encoding matches font. no conversion,
@@ -58,18 +109,9 @@
c.drawString(100,800, 'hello') # 0
self.assertEquals(c.encoding, None)
-
- #warmup - is my text extraction working?
self.assertEquals(extractText(c.getCurrentPageContent()), ['hello'])
c.drawString(100,700, testCp1252) # 1
- extracted = extractText(c.getCurrentPageContent())
- self.assertEquals(extracted[1], expectedCp1252)
-
- #now we register a unicode truetype font
- luxi = TTFont("Luxi", "luxiserif.ttf")
- pdfmetrics.registerFont(luxi)
- #pdfmetrics.registerFont(TTFont("Rina", "rina.ttf"))
c.setFont('Luxi', 12)
@@ -86,20 +128,72 @@
c.drawString(100, 600, testUTF8) # 2
# And Unicode strings should always be converted
- c.drawString(100, 500, testUni) # 3
+# c.drawString(100, 500, testUni) # 3
+
+ # now add a paragraph in Latin-1 in the latin-1 style
+ p = Paragraph(testCp1252, style=self.styNormal)
+ w, h = p.wrap(150, 100)
+ p.drawOn(c, 100, 400)
+ c.rect(100,400,w,h)
+
+ # now add a paragraph in UTF-8 in the UTF-8 style
+ p2 = Paragraph(testUTF8, style=self.styTrueType)
+ w, h = p2.wrap(150, 100)
+ p2.drawOn(c, 300, 400)
+ c.rect(300,400,w,h)
+
+ # now add a paragraph in Unicode in the latin-1 style
+ p3 = Paragraph(testUni, style=self.styNormal)
+ w, h = p3.wrap(150, 100)
+ p3.drawOn(c, 100, 300)
+ c.rect(100,300,w,h)
+
+
+ # now add a paragraph in Unicode in the UTF-8 style
+ p4 = Paragraph(testUni, style=self.styTrueType)
+ p4.wrap(150, 100)
+ p4.drawOn(c, 300, 300)
+ c.rect(300,300,w,h)
+
+
+ # now a graphic
+ d1 = Drawing(400,50)
+ d1.add(Ellipse(200,25,200,12.5, fillColor=None))
+ d1.add(String(200,25,testCp1252, textAnchor='middle'))
+ d1.drawOn(c, 100, 150)
+
+ # now a graphic in utf8
+ d2 = Drawing(400,50)
+ d2.add(Ellipse(200,25,200,12.5, fillColor=None))
+ d2.add(String(200,25,testUTF8, fontName='Luxi', textAnchor='middle'))
+ d2.drawOn(c, 100, 100)
+
+ # now a graphic in Unicode with T1 font
+ d3 = Drawing(400,50)
+ d3.add(Ellipse(200,25,200,12.5, fillColor=None))
+ d3.add(String(200,25,testUni, textAnchor='middle'))
+ d3.drawOn(c, 100, 50)
+
+ # now a graphic in Unicode with TT font
+ d4 = Drawing(400,50)
+ d4.add(Ellipse(200,25,200,12.5, fillColor=None))
+ d4.add(String(200,25,testUni, fontName='Luxi', textAnchor='middle'))
+ d4.drawOn(c, 100, 0)
extracted = extractText(c.getCurrentPageContent())
self.assertEquals(extracted[1], expectedCp1252)
+## self.assertEquals(extracted[2], extracted[3])
+## self.assertEquals(subsetToUnicode(self.luxi, extracted[2]), testUni)
- self.assertEquals(extracted[2], extracted[3])
- self.assertEquals(subsetToUnicode(luxi, extracted[2]), testUni)
+
+
+
c.save()
def testCp1252Canvas(self):
-
"""Verify canvas declared as cp1252 autoconverts.
This assumes winansi (cp1252) input. It converts to the
@@ -109,32 +203,85 @@
c = Canvas(outputfile('test_pdfbase_encodings_cp1252.pdf'), encoding='cp1252')
- c.drawString(100,700, testCp1252)
- extracted = extractText(c.getCurrentPageContent())
- # Assuming default font's encoding is cp1252
- self.assertEquals(extracted[0], expectedCp1252)
+
+ #print 'test 1'
+ c.drawString(100,700, testCp1252) #0
# Set a font with UTF8 encoding
- luxi = TTFont("Luxi", "luxiserif.ttf")
- pdfmetrics.registerFont(luxi)
c.setFont('Luxi', 12)
+ #print 'test 2'
# This should convert on the fly from cp1252 to UTF8
- c.drawString(100,600, testCp1252)
+ c.drawString(100,600, testCp1252) #1
+
+ #print 'test 3'
# and this should convert from Unicode to UTF8
- c.drawString(100,500, testUni)
+ c.drawString(100,500, testUni) #2
+
+
+ # now add a paragraph in Latin-1 in the latin-1 style
+ #print
+ #print 'test 4: para cp1252, type 1 font:'
+ p = Paragraph(testCp1252, style=self.styNormal, encoding="cp1252")
+ p.wrap(150, 100)
+ p.drawOn(c, 100, 400) #3
+
+ # now add a paragraph in UTF-8 in the UTF-8 style
+ #print
+ #print 'test 5: para cp1252, truetype font:'
+ p2 = Paragraph(testCp1252, style=self.styTrueType, encoding="cp1252")
+ p2.wrap(150, 100)
+ p2.drawOn(c, 300, 400) #4
+
+ # now add a paragraph in Unicode in the latin-1 style
+ p3 = Paragraph(testUni, style=self.styNormal)
+ w, h = p3.wrap(150, 100)
+ p3.drawOn(c, 100, 300)
+ c.rect(100,300,w,h)
+
+ # now add a paragraph in Unicode in the UTF-8 style
+ p4 = Paragraph(testUni, style=self.styTrueType)
+ p4.wrap(150, 100)
+ p4.drawOn(c, 300, 300)
+ c.rect(300,300,w,h)
+
+ # now a graphic
+ d1 = Drawing(400,50)
+ d1.add(Ellipse(200,25,200,12.5, fillColor=None))
+ d1.add(String(200,25,testCp1252, textAnchor='middle', encoding='cp1252'))
+ d1.drawOn(c, 100, 150)
+
+ # now a graphic in utf8 font
+ d2 = Drawing(400,50)
+ d2.add(Ellipse(200,25,200,12.5, fillColor=None))
+ d2.add(String(200,25,testCp1252, fontName='Luxi', textAnchor='middle', encoding='cp1252'))
+ d2.drawOn(c, 100, 100)
+
+ # now a graphic in Unicode with T1 font
+ d3 = Drawing(400,50)
+ d3.add(Ellipse(200,25,200,12.5, fillColor=None))
+ d3.add(String(200,25,testUni, textAnchor='middle'))
+ d3.drawOn(c, 100, 50)
+
+ # now a graphic in Unicode with TT font
+ d4 = Drawing(400,50)
+ d4.add(Ellipse(200,25,200,12.5, fillColor=None))
+ d4.add(String(200,25,testUni, fontName='Luxi', textAnchor='middle'))
+ d4.drawOn(c, 100, 0)
+
extracted = extractText(c.getCurrentPageContent())
+ self.assertEquals(extracted[0], expectedCp1252)
+
self.assertEquals(extracted[1], extracted[2])
- self.assertEquals(subsetToUnicode(luxi, extracted[1]), testUni)
+## self.assertEquals(subsetToUnicode(self.luxi, extracted[1]), testUni)
+##
+## self.assertEquals(subsetToUnicode(self.luxi, extracted[4]), testUni)
- #uncomment this to see some PDF for fun...
- #print c.getCurrentPageContent()
c.save()
def testUtf8Canvas(self):
-
"""Verify canvas declared as utf8 autoconverts.
This assumes utf8 input. It converts to the encoding of the
@@ -144,28 +291,75 @@
c = Canvas(outputfile('test_pdfbase_encodings_utf8.pdf'), encoding='utf-8')
c.drawString(100,700, testUTF8)
- extracted = extractText(c.getCurrentPageContent())
- # Input UTF8 should be encoded to font's cp1252
- self.assertEquals(extracted[0], expectedCp1252)
# Set a font with UTF8 encoding
- luxi = TTFont("Luxi", "luxiserif.ttf")
- pdfmetrics.registerFont(luxi)
c.setFont('Luxi', 12)
# This should pass the UTF8 through unchanged
c.drawString(100,600, testUTF8)
# and this should convert from Unicode to UTF8
c.drawString(100,500, testUni)
- extracted = extractText(c.getCurrentPageContent())
+
+
+ # now add a paragraph in Latin-1 in the latin-1 style
+ p = Paragraph(testUTF8, style=self.styNormal, encoding="utf-8")
+ w, h = p.wrap(150, 100)
+ p.drawOn(c, 100, 400) #3
+ c.rect(100,300,w,h)
+
+ # now add a paragraph in UTF-8 in the UTF-8 style
+ p2 = Paragraph(testUTF8, style=self.styTrueType, encoding="utf-8")
+ w, h = p2.wrap(150, 100)
+ p2.drawOn(c, 300, 400) #4
+ c.rect(100,300,w,h)
+
+ # now add a paragraph in Unicode in the latin-1 style
+ p3 = Paragraph(testUni, style=self.styNormal)
+ w, h = p3.wrap(150, 100)
+ p3.drawOn(c, 100, 300)
+ c.rect(100,300,w,h)
+
+ # now add a paragraph in Unicode in the UTF-8 style
+ p4 = Paragraph(testUni, style=self.styTrueType)
+ p4.wrap(150, 100)
+ p4.drawOn(c, 300, 300)
+ c.rect(300,300,w,h)
+ # now a graphic
+ d1 = Drawing(400,50)
+ d1.add(Ellipse(200,25,200,12.5, fillColor=None))
+ d1.add(String(200,25,testUTF8, textAnchor='middle', encoding='utf-8'))
+ d1.drawOn(c, 100, 150)
+
+ # now a graphic in utf8
+ d2 = Drawing(400,50)
+ d2.add(Ellipse(200,25,200,12.5, fillColor=None))
+ d2.add(String(200,25,testUTF8, fontName='Luxi', textAnchor='middle', encoding='utf-8'))
+ d2.drawOn(c, 100, 100)
+
+ # now a graphic in Unicode with T1 font
+ d3 = Drawing(400,50)
+ d3.add(Ellipse(200,25,200,12.5, fillColor=None))
+ d3.add(String(200,25,testUni, textAnchor='middle'))
+ d3.drawOn(c, 100, 50)
+
+ # now a graphic in Unicode with TT font
+ d4 = Drawing(400,50)
+ d4.add(Ellipse(200,25,200,12.5, fillColor=None))
+ d4.add(String(200,25,testUni, fontName='Luxi', textAnchor='middle'))
+ d4.drawOn(c, 100, 0)
+
+ extracted = extractText(c.getCurrentPageContent())
+ self.assertEquals(extracted[0], expectedCp1252)
self.assertEquals(extracted[1], extracted[2])
- self.assertEquals(subsetToUnicode(luxi, extracted[1]), testUni)
+ self.assertEquals(subsetToUnicode(self.luxi, extracted[1]), testUni)
c.save()
+
+
class FontEncodingTestCase(unittest.TestCase):
"""Make documents with custom encodings of Type 1 built-in fonts.
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reportlab/test/test_platypus_paraparser.py Mon Jun 14 16:41:25 2004 +0000
@@ -0,0 +1,91 @@
+#!/bin/env python
+#copyright ReportLab Inc. 2000
+#see license.txt for license details
+#history TBC
+#$Header$
+__version__=''' $Id'''
+__doc__="""Tests of intra-paragraph parsing behaviour in Platypus."""
+
+from types import TupleType, ListType, StringType, UnicodeType
+from pprint import pprint as pp
+
+from reportlab.test import unittest
+from reportlab.test.utils import makeSuiteForClasses, outputfile
+from reportlab.platypus import cleanBlockQuotedText
+from reportlab.platypus.paraparser import ParaParser, ParaFrag
+from reportlab.lib.colors import black
+
+class ParaParserTestCase(unittest.TestCase):
+ """Tests of data structures created by paragraph parser. Esp. ability
+ to accept unicode and preserve it"""
+
+ def setUp(self):
+ style=ParaFrag()
+ style.fontName='Times-Roman'
+ style.fontSize = 12
+ style.textColor = black
+ style.bulletFontName = black
+ style.bulletFontName='Times-Roman'
+ style.bulletFontSize=12
+ self.style = style
+
+ def testPlain(self):
+ txt = "Hello World"
+ stuff = ParaParser().parse(txt, self.style)
+ assert type(stuff) is TupleType
+ assert len(stuff) == 3
+ assert stuff[1][0].text == 'Hello World'
+
+ def testBold(self):
+ txt = "Hello <b>Bold</b> World"
+ fragList = ParaParser().parse(txt, self.style)[1]
+ self.assertEquals(map(lambda x:x.text, fragList), ['Hello ','Bold',' World'])
+ self.assertEquals(fragList[1].fontName, 'Times-Bold')
+
+ def testEntity(self):
+ "Numeric entities should be unescaped by parser"
+ txt = "Hello © copyright"
+ fragList = ParaParser().parse(txt, self.style)[1]
+ self.assertEquals(map(lambda x:x.text, fragList), ['Hello ','\xa9',' copyright'])
+
+ def testEscaped(self):
+ "Escaped high-bit stuff should go straight through"
+ txt = "Hello \xa9 copyright"
+ fragList = ParaParser().parse(txt, self.style)[1]
+ assert fragList[0].text == txt
+
+ def testPlainUnicode(self):
+ "See if simple unicode goes through"
+ txt = u"Hello World"
+ stuff = ParaParser().parse(txt, self.style)
+ assert type(stuff) is TupleType
+ assert len(stuff) == 3
+ assert stuff[1][0].text == u'Hello World'
+
+ def testBoldUnicode(self):
+ txt = u"Hello <b>Bold</b> World"
+ fragList = ParaParser().parse(txt, self.style)[1]
+ self.assertEquals(map(lambda x:x.text, fragList), [u'Hello ',u'Bold',u' World'])
+ self.assertEquals(fragList[1].fontName, 'Times-Bold')
+
+ def testEntityUnicode(self):
+ "Numeric entities should be unescaped by parser"
+ txt = u"Hello © copyright"
+ fragList = ParaParser().parse(txt, self.style)[1]
+ self.assertEquals(map(lambda x:x.text, fragList), [u'Hello ',u'\xa9',u' copyright'])
+
+ def testEscapedUnicode(self):
+ "Escaped high-bit stuff should go straight through"
+ txt = u"Hello \xa9 copyright"
+ fragList = ParaParser().parse(txt, self.style)[1]
+ assert fragList[0].text == txt
+
+
+
+def makeSuite():
+ return makeSuiteForClasses(ParaParserTestCase)
+
+
+#noruntests
+if __name__ == "__main__":
+ unittest.TextTestRunner().run(makeSuite())