paraparser.py: force assertion of tag rather than value checking in end_<tag>
authorrobin
Thu, 22 May 2014 16:45:43 +0100
changeset 4097 1c2ebf285cb7
parent 4096 7fb931c1d167
child 4098 7154a8213d29
paraparser.py: force assertion of tag rather than value checking in end_<tag>
src/reportlab/platypus/paraparser.py
tests/test_paragraphs.py
--- a/src/reportlab/platypus/paraparser.py	Thu May 22 16:15:59 2014 +0100
+++ b/src/reportlab/platypus/paraparser.py	Thu May 22 16:45:43 2014 +0100
@@ -596,52 +596,51 @@
 
     #### bold
     def start_b( self, attributes ):
-        self._push(bold=1)
+        self._push('b',bold=1)
 
     def end_b( self ):
-        self._pop(bold=1)
+        self._pop('b')
 
     def start_strong( self, attributes ):
-        self._push(bold=1)
+        self._push('strong',bold=1)
 
     def end_strong( self ):
-        self._pop(bold=1)
+        self._pop('strong')
 
     #### italics
     def start_i( self, attributes ):
-        self._push(italic=1)
+        self._push('i',italic=1)
 
     def end_i( self ):
-        self._pop(italic=1)
+        self._pop('i')
 
     def start_em( self, attributes ):
-        self._push(italic=1)
+        self._push('em', italic=1)
 
     def end_em( self ):
-        self._pop(italic=1)
+        self._pop('em')
 
     #### underline
     def start_u( self, attributes ):
-        self._push(underline=1)
+        self._push('u',underline=1)
 
     def end_u( self ):
-        self._pop(underline=1)
+        self._pop('u')
 
     #### strike
     def start_strike( self, attributes ):
-        self._push(strike=1)
+        self._push('strike',strike=1)
 
     def end_strike( self ):
-        self._pop(strike=1)
+        self._pop('strike')
 
     #### link
     def start_link(self, attributes):
-        self._push(**self.getAttributes(attributes,_linkAttrMap))
+        self._push('link',**self.getAttributes(attributes,_linkAttrMap))
 
     def end_link(self):
-        frag = self._stack[-1]
-        del self._stack[-1]
-        assert frag.link!=None
+        if self._pop('link').link is None:
+            raise ValueError('<link> has no target or href')
 
     #### anchor
     def start_a(self, attributes):
@@ -659,33 +658,35 @@
             href = A.get('href','').strip()
             A['link'] = href    #convert to our link form
             A.pop('href',None)
-        self._push(**A)
+        self._push('a',**A)
 
     def end_a(self):
         frag = self._stack[-1]
         sct = getattr(frag,'_selfClosingTag','')
         if sct:
-            assert sct=='anchor' and frag.name,'Parser failure in <a/>'
+            if not (sct=='anchor' and frag.name):
+                raise ValueError('Parser failure in <a/>')
             defn = frag.cbDefn = ABag()
             defn.label = defn.kind = 'anchor'
             defn.name = frag.name
             del frag.name, frag._selfClosingTag
             self.handle_data('')
-            self._pop()
+            self._pop('a')
         else:
-            del self._stack[-1]
-            assert frag.link!=None
+            if self._pop('a').link is None:
+                raise ValueError('<link> has no href')
 
     def start_img(self,attributes):
         A = self.getAttributes(attributes,_imgAttrMap)
         if not A.get('src'):
             self._syntax_error('<img> needs src attribute')
         A['_selfClosingTag'] = 'img'
-        self._push(**A)
+        self._push('img',**A)
 
     def end_img(self):
         frag = self._stack[-1]
-        assert getattr(frag,'_selfClosingTag',''),'Parser failure in <img/>'
+        if not getattr(frag,'_selfClosingTag',''):
+            raise ValueError('Parser failure in <img/>')
         defn = frag.cbDefn = ABag()
         defn.kind = 'img'
         defn.src = getattr(frag,'src',None)
@@ -696,24 +697,27 @@
         defn.valign = getattr(frag,'valign','bottom')
         del frag._selfClosingTag
         self.handle_data('')
-        self._pop()
+        self._pop('img')
 
     #### super script
     def start_super( self, attributes ):
-        self._push(super=1)
+        self._push('super',super=1)
 
     def end_super( self ):
-        self._pop(super=1)
+        self._pop('super')
 
-    start_sup = start_super
-    end_sup = end_super
+    def start_sup( self, attributes ):
+        self._push('sup',super=1)
+
+    def end_sup( self ):
+        self._pop('sup')
 
     #### sub script
     def start_sub( self, attributes ):
-        self._push(sub=1)
+        self._push('sub',sub=1)
 
     def end_sub( self ):
-        self._pop(sub=1)
+        self._pop('sub')
 
     #### greek script
     #### add symbol encoding
@@ -736,10 +740,10 @@
         self.errors.append(message)
 
     def start_greek(self, attr):
-        self._push(greek=1)
+        self._push('greek',greek=1)
 
     def end_greek(self):
-        self._pop(greek=1)
+        self._pop('greek')
 
     def start_unichar(self, attr):
         if 'name' in attr:
@@ -764,16 +768,16 @@
 
         if v is not None:
             self.handle_data(v)
-        self._push(_selfClosingTag='unichar')
+        self._push('unichar',_selfClosingTag='unichar')
 
     def end_unichar(self):
-        self._pop()
+        self._pop('unichar')
 
     def start_font(self,attr):
-        self._push(**self.getAttributes(attr,_fontAttrMap))
+        self._push('font',**self.getAttributes(attr,_fontAttrMap))
 
     def end_font(self):
-        self._pop()
+        self._pop('font')
 
     def start_span(self,attr):
         A = self.getAttributes(attr,_spanAttrMap)
@@ -786,20 +790,22 @@
                 D[k] = v
             D.update(A)
             A = D
-        self._push(**A)
+        self._push('span',**A)
 
-    end_span = end_font
+    def end_span(self):
+        self._pop('span')
 
     def start_br(self, attr):
-        self._push(_selfClosingTag='br',lineBreak=True,text='')
+        self._push('br',_selfClosingTag='br',lineBreak=True,text='')
         
     def end_br(self):
         #print('\nend_br called, %d frags in list' % len(self.fragList))
         frag = self._stack[-1]
-        assert frag._selfClosingTag=='br' and frag.lineBreak,'Parser failure in <br/>'
+        if not (frag._selfClosingTag=='br' and frag.lineBreak):
+                raise ValueError('Parser failure in <br/>')
         del frag._selfClosingTag
         self.handle_data('')
-        self._pop()
+        self._pop('br')
 
     def _initial_frag(self,attr,attrMap,bullet=0):
         style = self._style
@@ -828,10 +834,12 @@
         return frag
 
     def start_para(self,attr):
-        self._stack = [self._initial_frag(attr,_paraAttrMap)]
+        frag = self._initial_frag(attr,_paraAttrMap)
+        frag.__tag__ = 'para'
+        self._stack = [frag]
 
     def end_para(self):
-        self._pop()
+        self._pop('para')
 
     def start_bullet(self,attr):
         if hasattr(self,'bFragList'):
@@ -839,10 +847,11 @@
         self.bFragList = []
         frag = self._initial_frag(attr,_bulletAttrMap,1)
         frag.isBullet = 1
+        frag.__tag__ = 'bullet'
         self._stack.append(frag)
 
     def end_bullet(self):
-        self._pop()
+        self._pop('bullet')
 
     #---------------------------------------------------------------
     def start_seqdefault(self, attr):
@@ -938,9 +947,9 @@
 
         if 'label' in attr: defn.label = attr['label']
         defn.kind='onDraw'
-        self._push(cbDefn=defn)
+        self._push('ondraw',cbDefn=defn)
         self.handle_data('')
-        self._pop()
+        self._pop('ondraw')
     start_onDraw=start_ondraw 
     end_onDraw=end_ondraw=end_seq
 
@@ -967,9 +976,9 @@
         defn.label = encode_label((label,format,offset))
         defn.name = name
         defn.kind='index'
-        self._push(cbDefn=defn)
+        self._push('index',cbDefn=defn)
         self.handle_data('')
-        self._pop()
+        self._pop('index',)
     end_index=end_seq
 
     def start_unknown(self,attr):
@@ -977,17 +986,16 @@
     end_unknown=end_seq
 
     #---------------------------------------------------------------
-    def _push(self,**attr):
+    def _push(self,tag,**attr):
         frag = copy.copy(self._stack[-1])
+        frag.__tag__ = tag
         _applyAttributes(frag,attr)
         self._stack.append(frag)
 
-    def _pop(self,**kw):
-        frag = self._stack[-1]
-        del self._stack[-1]
-        for k, v in kw.items():
-            assert getattr(frag,k)==v
-        return frag
+    def _pop(self,tag):
+        frag = self._stack.pop()
+        if tag==frag.__tag__: return frag
+        raise ValueError('Parse error: saw </%s> instead of expected </%s>' % (tag,frag.__tag__))
 
     def getAttributes(self,attr,attrMap):
         A = {}
@@ -1148,7 +1156,7 @@
         try:
             self.feed(text)
         except:
-            annotateException('paragraph text %s caused exception' % ascii(text))
+            annotateException('\nparagraph text %s caused exception' % ascii(text))
         return self._complete_parse()
 
     def handle_starttag(self, tag, attrs):
--- a/tests/test_paragraphs.py	Thu May 22 16:15:59 2014 +0100
+++ b/tests/test_paragraphs.py	Thu May 22 16:45:43 2014 +0100
@@ -5,7 +5,7 @@
 from reportlab.lib.testutils import setOutDir,makeSuiteForClasses, outputfile, printLocation
 setOutDir(__name__)
 import unittest
-from reportlab.platypus import Paragraph, SimpleDocTemplate, XBox, Indenter, XPreformatted, PageBreak
+from reportlab.platypus import Paragraph, SimpleDocTemplate, XBox, Indenter, XPreformatted, PageBreak, Spacer
 from reportlab.lib.styles import ParagraphStyle
 from reportlab.lib.units import inch
 from reportlab.lib.abag import ABag
@@ -15,7 +15,7 @@
 from reportlab.rl_config import defaultPageSize, rtlSupport
 from reportlab.pdfbase import ttfonts
 from reportlab.pdfbase import pdfmetrics
-from reportlab.lib.fonts import addMapping
+from reportlab.lib.fonts import addMapping, tt2ps
 
 (PAGE_WIDTH, PAGE_HEIGHT) = defaultPageSize
 
@@ -30,7 +30,6 @@
     canvas.drawString(4 * inch, 0.75 * inch, "First Page")
     canvas.restoreState()
 
-
 def myLaterPages(canvas, doc):
     canvas.saveState()
     canvas.setStrokeColor(red)
@@ -40,7 +39,6 @@
     canvas.drawString(4 * inch, 0.75 * inch, "Page %d" % doc.page)
     canvas.restoreState()
 
-
 def getAFont():
     '''register a font that supports most Unicode characters'''
     I = []
@@ -56,7 +54,6 @@
                  (font_name, 0, 1, font_name + 'Italic'),
                  (font_name, 1, 1, font_name + 'BoldItalic'),
                  ])
-    I.reverse()
     for info in I:
         n = 0
         for font in info:
@@ -67,7 +64,7 @@
                 n += 1
             except:
                 pass
-        if n==4: return fontName
+        if n==4: return font[0]
     raise ValueError('could not find suitable font')
 
 class ParagraphTestCase(unittest.TestCase):
@@ -124,7 +121,6 @@
                                    spaceBefore=12,
                                    spaceAfter=12)
 
-
         SA(Paragraph("This is a normal paragraph. "+ randomText(), styNormal))
         SA(Paragraph("There follows a paragraph with only \"&lt;br/&gt;\"", styNormal))
         SA(Paragraph("<br/>", styNormal))
@@ -253,12 +249,13 @@
             fontName = getAFont()
 
             # create styles based on the registered font
-            from reportlab.lib.enums import TA_LEFT, TA_RIGHT
-            styLTR = ParagraphStyle('left', fontName = fontName)
-            styRTL = ParagraphStyle('right', parent = styLTR, alignment = TA_RIGHT,
+            stySTD = ParagraphStyle('STD', fontName = fontName)
+            styRJ = ParagraphStyle('RJ', parent=stySTD, alignment=TA_RIGHT)
+            styLTR = ParagraphStyle('LTR', parent=stySTD, wordWrap='LTR')
+            styRTL = ParagraphStyle('RTL', parent = stySTD, alignment = TA_RIGHT,
                                     wordWrap = 'RTL', spaceAfter = 12)
 
-            # strings for testing LTR.
+            # strings for testing Normal & LTR styles
             ltrStrings = [# English followed by Arabic.
                           b'English followed by \xd8\xb9\xd8\xb1\xd8\xa8\xd9\x8a.',
                           # English with Arabic in the middle
@@ -300,7 +297,7 @@
                               b' \xd8\xa5\xd9\x84\xd9\x89 \xd8\xa7\xd9\x84\xd9\x8a\xd8\xb3\xd8\xa7\xd8\xb1'
                               b' 456.78 \xd8\xa3\xd8\xb1\xd9\x82\xd8\xa7\xd9\x85'
                               b' \xd8\xb9\xd8\xb4\xd8\xb1\xd9\x8a\xd8\xa9 456.78.',
-                          # Long Arabic text with LTR script in the middle, splitting over multiple lines
+                          # Long Arabic text with LTR text in the middle, splitting over multiple lines
                           b'\xd9\x86\xd8\xb5 \xd8\xb9\xd8\xb1\xd8\xa8\xd9\x8a \xd8\xb7\xd9\x88\xd9\x8a\xd9\x84'
                               b' Long Arabic text \xd9\x85\xd8\xb9 with \xd9\x83\xd8\xaa\xd8\xa7\xd8\xa8\xd8\xa9'
                               b' \xd9\x85\xd9\x86 \xd8\xa7\xd9\x84\xd9\x8a\xd8\xb3\xd8\xa7\xd8\xb1'
@@ -317,11 +314,18 @@
             # create a store to be printed
             story = []
             
+            story.append(Paragraph("<b><i>Following pairs of left justified texts have style.wordWrap=None &amp; 'LTR'.</i></b><br/>",stySTD))
             # write every LTR string and its corresponding RTL string to be matched.
-            for i in range(0, n):
+            for i in xrange(n):
+                story.append(Paragraph(ltrStrings[i], stySTD))
                 story.append(Paragraph(ltrStrings[i], styLTR))
+
+            story.append(Paragraph("<br/><b><i>Following pairs of right justfied texts have style.wordWrap=None &amp; 'RTL'.</i></b><br/>",stySTD))
+            for i in xrange(n):
+                story.append(Paragraph(rtlStrings[i], styRJ))
                 story.append(Paragraph(rtlStrings[i], styRTL))
 
+            story.append(Paragraph("<b><i><br/>Following texts have style.wordWrap='RTL'</i></b>",stySTD))
             # a few additional scripts for testing.
             story.append(
                 Paragraph(b'\xd9\x87\xd8\xb0\xd9\x87 \xd9\x81\xd9\x82\xd8\xb1\xd8\xa9'
@@ -354,13 +358,6 @@
             except ImportError:
                 pass
 
-            import os
-            from reportlab.platypus import SimpleDocTemplate
-            from reportlab.platypus.paragraph import Paragraph
-            from reportlab.platypus.flowables import Spacer
-            from reportlab.lib.styles import ParagraphStyle
-            from reportlab.lib.enums import TA_JUSTIFY, TA_RIGHT
-
             font_name = getAFont()
             doc = SimpleDocTemplate(outputfile('test_rtl_bullets.pdf'),showBoundary=True)
             p_style = ParagraphStyle('default')
@@ -394,13 +391,12 @@
                 else:
                     list_style.leftIndent = indent_amount*list_lvl
 
-
             elements =[]
 
             TEXTS=[
                     b'\xd7\xa9\xd7\xa8 \xd7\x94\xd7\x91\xd7\x99\xd7\x98\xd7\x97\xd7\x95\xd7\x9f, \xd7\x94\xd7\x95\xd7\x90 \xd7\x94\xd7\xa9\xd7\xa8 \xd7\x94\xd7\x90\xd7\x97\xd7\xa8\xd7\x90\xd7\x99 \xd7\xa2\xd7\x9c \xd7\x9e\xd7\xa9\xd7\xa8\xd7\x93 \xd7\x96\xd7\x94. \xd7\xaa\xd7\xa4\xd7\xa7\xd7\x99\xd7\x93 \xd7\x96\xd7\x94 \xd7\xa0\xd7\x97\xd7\xa9\xd7\x91 \xd7\x9c\xd7\x90\xd7\x97\xd7\x93 \xd7\x94\xd7\xaa\xd7\xa4\xd7\xa7\xd7\x99\xd7\x93\xd7\x99\xd7\x9d \xd7\x94\xd7\x91\xd7\x9b\xd7\x99\xd7\xa8\xd7\x99\xd7\x9d \xd7\x91\xd7\x9e\xd7\x9e\xd7\xa9\xd7\x9c\xd7\x94. \xd7\x9c\xd7\xa9\xd7\xa8 \xd7\x94\xd7\x91\xd7\x99\xd7\x98\xd7\x97\xd7\x95\xd7\x9f \xd7\x9e\xd7\xaa\xd7\x9e\xd7\xa0\xd7\x94 \xd7\x9c\xd7\xa8\xd7\x95\xd7\x91 \xd7\x92\xd7\x9d \xd7\xa1\xd7\x92\xd7\x9f \xd7\xa9\xd7\xa8.',
-                    b'\xd7\xa9\xd7\xa8 \xd7\x94\xd7\x91\xd7\x99\xd7\x98\xd7\x97\xd7\x95\xd7\x9f, <b>\xd7\x94\xd7\x95\xd7\x90 \xd7\x94\xd7\xa9\xd7\xa8 \xd7\x94\xd7\x90\xd7\x97\xd7\xa8\xd7\x90\xd7\x99 \xd7\xa2\xd7\x9c \xd7\x9e\xd7\xa9\xd7\xa8\xd7\x93 \xd7\x96\xd7\x94.</b> \xd7\xaa\xd7\xa4\xd7\xa7\xd7\x99\xd7\x93 \xd7\x96\xd7\x94 <i>\xd7\xa0\xd7\x97\xd7\xa9\xd7\x91 \xd7\x9c\xd7\x90\xd7\x97\xd7\x93</i> \xd7\x94\xd7\xaa\xd7\xa4\xd7\xa7\xd7\x99\xd7\x93\xd7\x99\xd7\x9d <b><i>\xd7\x94\xd7\x91\xd7\x9b\xd7\x99\xd7\xa8\xd7\x99\xd7\x9d \xd7\x91\xd7\x9e\xd7\x9e\xd7\xa9\xd7\x9c\xd7\x94</b></i>. \xd7\x9c\xd7\xa9\xd7\xa8 \xd7\x94\xd7\x91\xd7\x99\xd7\x98\xd7\x97\xd7\x95\xd7\x9f \xd7\x9e\xd7\xaa\xd7\x9e\xd7\xa0\xd7\x94 \xd7\x9c\xd7\xa8\xd7\x95\xd7\x91 \xd7\x92\xd7\x9d \xd7\xa1\xd7\x92\xd7\x9f \xd7\xa9\xd7\xa8.',
-                    u'<bullet>\u2022</bullet>\u05e9\u05e8 \u05d4\u05d1\u05d9\u05d8\u05d7\u05d5\u05df, <b>\u05d4\u05d5\u05d0 \u05d4\u05e9\u05e8 \u05d4\u05d0\u05d7\u05e8\u05d0\u05d9 \u05e2\u05dc \u05de\u05e9\u05e8\u05d3 \u05d6\u05d4.</b> \u05ea\u05e4\u05e7\u05d9\u05d3 \u05d6\u05d4 <i>\u05e0\u05d7\u05e9\u05d1 \u05dc\u05d0\u05d7\u05d3</i> \u05d4\u05ea\u05e4\u05e7\u05d9\u05d3\u05d9\u05dd <b><i>\u05d4\u05d1\u05db\u05d9\u05e8\u05d9\u05dd \u05d1\u05de\u05de\u05e9\u05dc\u05d4</b></i>. \u05dc\u05e9\u05e8\u05d4\u05d1\u05d9\u05d8\u05d7\u05d5\u05df \u05de\u05ea\u05de\u05e0\u05d4 \u05dc\u05e8\u05d5\u05d1 \u05d2\u05dd \u05e1\u05d2\u05df \u05e9\u05e8.',
+                    b'\xd7\xa9\xd7\xa8 \xd7\x94\xd7\x91\xd7\x99\xd7\x98\xd7\x97\xd7\x95\xd7\x9f, <b>\xd7\x94\xd7\x95\xd7\x90 \xd7\x94\xd7\xa9\xd7\xa8 \xd7\x94\xd7\x90\xd7\x97\xd7\xa8\xd7\x90\xd7\x99 \xd7\xa2\xd7\x9c \xd7\x9e\xd7\xa9\xd7\xa8\xd7\x93 \xd7\x96\xd7\x94.</b> \xd7\xaa\xd7\xa4\xd7\xa7\xd7\x99\xd7\x93 \xd7\x96\xd7\x94 <i>\xd7\xa0\xd7\x97\xd7\xa9\xd7\x91 \xd7\x9c\xd7\x90\xd7\x97\xd7\x93</i> \xd7\x94\xd7\xaa\xd7\xa4\xd7\xa7\xd7\x99\xd7\x93\xd7\x99\xd7\x9d <b><i>\xd7\x94\xd7\x91\xd7\x9b\xd7\x99\xd7\xa8\xd7\x99\xd7\x9d \xd7\x91\xd7\x9e\xd7\x9e\xd7\xa9\xd7\x9c\xd7\x94</i></b>. \xd7\x9c\xd7\xa9\xd7\xa8 \xd7\x94\xd7\x91\xd7\x99\xd7\x98\xd7\x97\xd7\x95\xd7\x9f \xd7\x9e\xd7\xaa\xd7\x9e\xd7\xa0\xd7\x94 \xd7\x9c\xd7\xa8\xd7\x95\xd7\x91 \xd7\x92\xd7\x9d \xd7\xa1\xd7\x92\xd7\x9f \xd7\xa9\xd7\xa8.',
+                    u'<bullet>\u2022</bullet>\u05e9\u05e8 \u05d4\u05d1\u05d9\u05d8\u05d7\u05d5\u05df, <b>\u05d4\u05d5\u05d0 \u05d4\u05e9\u05e8 \u05d4\u05d0\u05d7\u05e8\u05d0\u05d9 \u05e2\u05dc \u05de\u05e9\u05e8\u05d3 \u05d6\u05d4.</b> \u05ea\u05e4\u05e7\u05d9\u05d3 \u05d6\u05d4 <i>\u05e0\u05d7\u05e9\u05d1 \u05dc\u05d0\u05d7\u05d3</i> \u05d4\u05ea\u05e4\u05e7\u05d9\u05d3\u05d9\u05dd <b><i>\u05d4\u05d1\u05db\u05d9\u05e8\u05d9\u05dd \u05d1\u05de\u05de\u05e9\u05dc\u05d4</i></b>. \u05dc\u05e9\u05e8\u05d4\u05d1\u05d9\u05d8\u05d7\u05d5\u05df \u05de\u05ea\u05de\u05e0\u05d4 \u05dc\u05e8\u05d5\u05d1 \u05d2\u05dd \u05e1\u05d2\u05df \u05e9\u05e8.',
                     ]
 
             # simple text in a paragraph
@@ -422,10 +418,17 @@
 
             doc.build(elements)
 
+        def testParsing(self):
+            fontName = getAFont()
+            fontNameBI = tt2ps(fontName,1,1)
+            stySTD = ParagraphStyle('STD',fontName=fontName)
+            styBI = ParagraphStyle('BI',fontName=fontNameBI)
+            self.assertRaises(ValueError,Paragraph,'aaaa <b><i>bibibi</b></i> ccccc',stySTD)
+            self.assertRaises(ValueError,Paragraph,'AAAA <b><i>BIBIBI</b></i> CCCCC',styBI)
+
 def makeSuite():
     return makeSuiteForClasses(ParagraphTestCase)
 
-
 #noruntests
 if __name__ == "__main__":
     unittest.TextTestRunner().run(makeSuite())