src/reportlab/platypus/paraparser.py
author robin <robin@reportlab.com>
Wed, 27 Sep 2017 11:04:12 +0100
changeset 4367 9960d82643bf
parent 4330 617ffa6bbdc8
child 4370 823a8c33ce43
permissions -rw-r--r--
remove ascii, cmp & xrange builtins abuse; version-->3.4.15
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
4330
617ffa6bbdc8 changes for release 3.4.0
robin <robin@reportlab.com>
parents: 4315
diff changeset
     1
#Copyright ReportLab Europe Ltd. 2000-2017
494
54257447cfe9 Changed to indirect copyright
rgbecker
parents: 433
diff changeset
     2
#see license.txt for license details
2332
2a7ab4405e18 Remove $Header:, fix CopyRight & history
rgbecker
parents: 2321
diff changeset
     3
#history http://www.reportlab.co.uk/cgi-bin/viewcvs.cgi/public/reportlab/trunk/reportlab/platypus/paraparser.py
4252
fe660f227cac changes for release 3.3.0
robin
parents: 4249
diff changeset
     4
__version__='3.3.0'
3032
22224b1b4d24 New docstrings mainly for module titles
damian
parents: 2964
diff changeset
     5
__doc__='''The parser used to process markup within paragraphs'''
96
2a9cca4c5cf0 Beginnings of a paragraph parser
rgbecker
parents:
diff changeset
     6
import string
119
b4dc589c8364 <para> tag added in layout.py paraparser.py
rgbecker
parents: 115
diff changeset
     7
import re
96
2a9cca4c5cf0 Beginnings of a paragraph parser
rgbecker
parents:
diff changeset
     8
import sys
2a9cca4c5cf0 Beginnings of a paragraph parser
rgbecker
parents:
diff changeset
     9
import os
2a9cca4c5cf0 Beginnings of a paragraph parser
rgbecker
parents:
diff changeset
    10
import copy
3187
2d5a6655556e tableofcontents/paraparser: allow for format and offset parameters
rgbecker
parents: 3165
diff changeset
    11
import base64
3954
44dbe56eb858 first attempt at HTMLParser-based paraparser
Andy Robinson <andy@reportlab.com>
parents: 3937
diff changeset
    12
from pprint import pprint as pp
4367
9960d82643bf remove ascii, cmp & xrange builtins abuse; version-->3.4.15
robin <robin@reportlab.com>
parents: 4330
diff changeset
    13
from reportlab import ascii
2693
3c61a57aecd1 missing import statement
andy
parents: 2670
diff changeset
    14
import unicodedata
279
e7d8b3631d5c Global sequencer put in the 'story builder'.
andy_robinson
parents: 267
diff changeset
    15
import reportlab.lib.sequencer
3954
44dbe56eb858 first attempt at HTMLParser-based paraparser
Andy Robinson <andy@reportlab.com>
parents: 3937
diff changeset
    16
518
5be3fcb26c78 Semantic Name changes
rgbecker
parents: 514
diff changeset
    17
from reportlab.lib.abag import ABag
4220
c0e82d246798 add justifyBreaks & justifyLastLine ParagraphStyle attributes; version-->3.2.7
robin
parents: 4181
diff changeset
    18
from reportlab.lib.utils import ImageReader, isPy3, annotateException, encode_label, asUnicode, asBytes, uniChr, isStr
248
c103b7a55e79 Color fixes; thanks to J Alet
rgbecker
parents: 238
diff changeset
    19
from reportlab.lib.colors import toColor, white, black, red, Color
96
2a9cca4c5cf0 Beginnings of a paragraph parser
rgbecker
parents:
diff changeset
    20
from reportlab.lib.fonts import tt2ps, ps2tt
119
b4dc589c8364 <para> tag added in layout.py paraparser.py
rgbecker
parents: 115
diff changeset
    21
from reportlab.lib.enums import TA_LEFT, TA_RIGHT, TA_CENTER, TA_JUSTIFY
1940
baa0abc136c4 Henning von Bargen's caseSensitive flag
rgbecker
parents: 1932
diff changeset
    22
from reportlab.lib.units import inch,mm,cm,pica
3955
3dbd805c708c paraparser.py: fix HTMLParser imports
robin
parents: 3954
diff changeset
    23
if isPy3:
3dbd805c708c paraparser.py: fix HTMLParser imports
robin
parents: 3954
diff changeset
    24
    from html.parser import HTMLParser
3dbd805c708c paraparser.py: fix HTMLParser imports
robin
parents: 3954
diff changeset
    25
    from html.entities import name2codepoint
3dbd805c708c paraparser.py: fix HTMLParser imports
robin
parents: 3954
diff changeset
    26
else:
3dbd805c708c paraparser.py: fix HTMLParser imports
robin
parents: 3954
diff changeset
    27
    from HTMLParser import HTMLParser
3dbd805c708c paraparser.py: fix HTMLParser imports
robin
parents: 3954
diff changeset
    28
    from htmlentitydefs import name2codepoint
3dbd805c708c paraparser.py: fix HTMLParser imports
robin
parents: 3954
diff changeset
    29
2410
f505ed647678 reportlab: add fix for <para/> and new test
rgbecker
parents: 2376
diff changeset
    30
_re_para = re.compile(r'^\s*<\s*para(?:\s+|>|/>)')
96
2a9cca4c5cf0 Beginnings of a paragraph parser
rgbecker
parents:
diff changeset
    31
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
    32
sizeDelta = 2       # amount to reduce font size by for super and sub script
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
    33
subFraction = 0.5   # fraction of font size that a sub script should be lowered
4249
8fc7d11bdee0 add support for rml <sup>/<sub> rise and size attributes; version --> 3.2.17
robin
parents: 4220
diff changeset
    34
supFraction = 0.5 # fraction of font size that a super script should be raised
96
2a9cca4c5cf0 Beginnings of a paragraph parser
rgbecker
parents:
diff changeset
    35
3165
cbda9e7d0ee3 reportlab: new index support
rgbecker
parents: 3137
diff changeset
    36
DEFAULT_INDEX_NAME='_indexAdd'
cbda9e7d0ee3 reportlab: new index support
rgbecker
parents: 3137
diff changeset
    37
3434
3c14212cc997 platypus: preliminary working version of % height/width for <img> tag
rgbecker
parents: 3368
diff changeset
    38
def _convnum(s, unit=1, allowRelative=True):
3c14212cc997 platypus: preliminary working version of % height/width for <img> tag
rgbecker
parents: 3368
diff changeset
    39
    if s[0] in ('+','-') and allowRelative:
2857
487dc2450eec reprotlab: inline images horizontal positioning OK
rgbecker
parents: 2836
diff changeset
    40
        try:
487dc2450eec reprotlab: inline images horizontal positioning OK
rgbecker
parents: 2836
diff changeset
    41
            return ('relative',int(s)*unit)
487dc2450eec reprotlab: inline images horizontal positioning OK
rgbecker
parents: 2836
diff changeset
    42
        except ValueError:
487dc2450eec reprotlab: inline images horizontal positioning OK
rgbecker
parents: 2836
diff changeset
    43
            return ('relative',float(s)*unit)
487dc2450eec reprotlab: inline images horizontal positioning OK
rgbecker
parents: 2836
diff changeset
    44
    else:
487dc2450eec reprotlab: inline images horizontal positioning OK
rgbecker
parents: 2836
diff changeset
    45
        try:
487dc2450eec reprotlab: inline images horizontal positioning OK
rgbecker
parents: 2836
diff changeset
    46
            return int(s)*unit
487dc2450eec reprotlab: inline images horizontal positioning OK
rgbecker
parents: 2836
diff changeset
    47
        except ValueError:
487dc2450eec reprotlab: inline images horizontal positioning OK
rgbecker
parents: 2836
diff changeset
    48
            return float(s)*unit
487dc2450eec reprotlab: inline images horizontal positioning OK
rgbecker
parents: 2836
diff changeset
    49
3434
3c14212cc997 platypus: preliminary working version of % height/width for <img> tag
rgbecker
parents: 3368
diff changeset
    50
def _num(s, unit=1, allowRelative=True):
1940
baa0abc136c4 Henning von Bargen's caseSensitive flag
rgbecker
parents: 1932
diff changeset
    51
    """Convert a string like '10cm' to an int or float (in points).
baa0abc136c4 Henning von Bargen's caseSensitive flag
rgbecker
parents: 1932
diff changeset
    52
       The default unit is point, but optionally you can use other
baa0abc136c4 Henning von Bargen's caseSensitive flag
rgbecker
parents: 1932
diff changeset
    53
       default units like mm.
baa0abc136c4 Henning von Bargen's caseSensitive flag
rgbecker
parents: 1932
diff changeset
    54
    """
3434
3c14212cc997 platypus: preliminary working version of % height/width for <img> tag
rgbecker
parents: 3368
diff changeset
    55
    if s.endswith('cm'):
1940
baa0abc136c4 Henning von Bargen's caseSensitive flag
rgbecker
parents: 1932
diff changeset
    56
        unit=cm
baa0abc136c4 Henning von Bargen's caseSensitive flag
rgbecker
parents: 1932
diff changeset
    57
        s = s[:-2]
3434
3c14212cc997 platypus: preliminary working version of % height/width for <img> tag
rgbecker
parents: 3368
diff changeset
    58
    if s.endswith('in'):
1940
baa0abc136c4 Henning von Bargen's caseSensitive flag
rgbecker
parents: 1932
diff changeset
    59
        unit=inch
baa0abc136c4 Henning von Bargen's caseSensitive flag
rgbecker
parents: 1932
diff changeset
    60
        s = s[:-2]
3434
3c14212cc997 platypus: preliminary working version of % height/width for <img> tag
rgbecker
parents: 3368
diff changeset
    61
    if s.endswith('pt'):
1940
baa0abc136c4 Henning von Bargen's caseSensitive flag
rgbecker
parents: 1932
diff changeset
    62
        unit=1
baa0abc136c4 Henning von Bargen's caseSensitive flag
rgbecker
parents: 1932
diff changeset
    63
        s = s[:-2]
3434
3c14212cc997 platypus: preliminary working version of % height/width for <img> tag
rgbecker
parents: 3368
diff changeset
    64
    if s.endswith('i'):
1940
baa0abc136c4 Henning von Bargen's caseSensitive flag
rgbecker
parents: 1932
diff changeset
    65
        unit=inch
baa0abc136c4 Henning von Bargen's caseSensitive flag
rgbecker
parents: 1932
diff changeset
    66
        s = s[:-1]
3434
3c14212cc997 platypus: preliminary working version of % height/width for <img> tag
rgbecker
parents: 3368
diff changeset
    67
    if s.endswith('mm'):
1940
baa0abc136c4 Henning von Bargen's caseSensitive flag
rgbecker
parents: 1932
diff changeset
    68
        unit=mm
baa0abc136c4 Henning von Bargen's caseSensitive flag
rgbecker
parents: 1932
diff changeset
    69
        s = s[:-2]
3434
3c14212cc997 platypus: preliminary working version of % height/width for <img> tag
rgbecker
parents: 3368
diff changeset
    70
    if s.endswith('pica'):
1940
baa0abc136c4 Henning von Bargen's caseSensitive flag
rgbecker
parents: 1932
diff changeset
    71
        unit=pica
baa0abc136c4 Henning von Bargen's caseSensitive flag
rgbecker
parents: 1932
diff changeset
    72
        s = s[:-4]
3434
3c14212cc997 platypus: preliminary working version of % height/width for <img> tag
rgbecker
parents: 3368
diff changeset
    73
    return _convnum(s,unit,allowRelative)
3c14212cc997 platypus: preliminary working version of % height/width for <img> tag
rgbecker
parents: 3368
diff changeset
    74
4220
c0e82d246798 add justifyBreaks & justifyLastLine ParagraphStyle attributes; version-->3.2.7
robin
parents: 4181
diff changeset
    75
def _int(s):
c0e82d246798 add justifyBreaks & justifyLastLine ParagraphStyle attributes; version-->3.2.7
robin
parents: 4181
diff changeset
    76
    try:
c0e82d246798 add justifyBreaks & justifyLastLine ParagraphStyle attributes; version-->3.2.7
robin
parents: 4181
diff changeset
    77
        return int(s)
c0e82d246798 add justifyBreaks & justifyLastLine ParagraphStyle attributes; version-->3.2.7
robin
parents: 4181
diff changeset
    78
    except:
c0e82d246798 add justifyBreaks & justifyLastLine ParagraphStyle attributes; version-->3.2.7
robin
parents: 4181
diff changeset
    79
        raise ValueError('cannot convert %r to int' % s)
c0e82d246798 add justifyBreaks & justifyLastLine ParagraphStyle attributes; version-->3.2.7
robin
parents: 4181
diff changeset
    80
c0e82d246798 add justifyBreaks & justifyLastLine ParagraphStyle attributes; version-->3.2.7
robin
parents: 4181
diff changeset
    81
def _bool(s):
c0e82d246798 add justifyBreaks & justifyLastLine ParagraphStyle attributes; version-->3.2.7
robin
parents: 4181
diff changeset
    82
    s = s.lower()
c0e82d246798 add justifyBreaks & justifyLastLine ParagraphStyle attributes; version-->3.2.7
robin
parents: 4181
diff changeset
    83
    if s in ('true','1','yes'):
c0e82d246798 add justifyBreaks & justifyLastLine ParagraphStyle attributes; version-->3.2.7
robin
parents: 4181
diff changeset
    84
        return True
c0e82d246798 add justifyBreaks & justifyLastLine ParagraphStyle attributes; version-->3.2.7
robin
parents: 4181
diff changeset
    85
    if s in ('false','0','no'):
c0e82d246798 add justifyBreaks & justifyLastLine ParagraphStyle attributes; version-->3.2.7
robin
parents: 4181
diff changeset
    86
        return False
c0e82d246798 add justifyBreaks & justifyLastLine ParagraphStyle attributes; version-->3.2.7
robin
parents: 4181
diff changeset
    87
    raise ValueError('cannot convert %r to bool value' % s)
c0e82d246798 add justifyBreaks & justifyLastLine ParagraphStyle attributes; version-->3.2.7
robin
parents: 4181
diff changeset
    88
3434
3c14212cc997 platypus: preliminary working version of % height/width for <img> tag
rgbecker
parents: 3368
diff changeset
    89
def _numpct(s,unit=1,allowRelative=False):
3c14212cc997 platypus: preliminary working version of % height/width for <img> tag
rgbecker
parents: 3368
diff changeset
    90
    if s.endswith('%'):
3c14212cc997 platypus: preliminary working version of % height/width for <img> tag
rgbecker
parents: 3368
diff changeset
    91
        return _PCT(_convnum(s[:-1],allowRelative=allowRelative))
3c14212cc997 platypus: preliminary working version of % height/width for <img> tag
rgbecker
parents: 3368
diff changeset
    92
    else:
3c14212cc997 platypus: preliminary working version of % height/width for <img> tag
rgbecker
parents: 3368
diff changeset
    93
        return _num(s,unit,allowRelative)
2857
487dc2450eec reprotlab: inline images horizontal positioning OK
rgbecker
parents: 2836
diff changeset
    94
4315
7c65c6e52b13 fix paragraph splitting bug (reporters Olivia Zhang & Echo Bell); version-->3.3.31
robin <robin@reportlab.com>
parents: 4277
diff changeset
    95
class _PCT(float):
7c65c6e52b13 fix paragraph splitting bug (reporters Olivia Zhang & Echo Bell); version-->3.3.31
robin <robin@reportlab.com>
parents: 4277
diff changeset
    96
    def __new__(cls,v):
7c65c6e52b13 fix paragraph splitting bug (reporters Olivia Zhang & Echo Bell); version-->3.3.31
robin <robin@reportlab.com>
parents: 4277
diff changeset
    97
        self = float.__new__(cls,v*0.01)
7c65c6e52b13 fix paragraph splitting bug (reporters Olivia Zhang & Echo Bell); version-->3.3.31
robin <robin@reportlab.com>
parents: 4277
diff changeset
    98
        self._normalizer = 1.0
7c65c6e52b13 fix paragraph splitting bug (reporters Olivia Zhang & Echo Bell); version-->3.3.31
robin <robin@reportlab.com>
parents: 4277
diff changeset
    99
        self._value = v
7c65c6e52b13 fix paragraph splitting bug (reporters Olivia Zhang & Echo Bell); version-->3.3.31
robin <robin@reportlab.com>
parents: 4277
diff changeset
   100
        return self
2857
487dc2450eec reprotlab: inline images horizontal positioning OK
rgbecker
parents: 2836
diff changeset
   101
487dc2450eec reprotlab: inline images horizontal positioning OK
rgbecker
parents: 2836
diff changeset
   102
    def normalizedValue(self,normalizer):
4315
7c65c6e52b13 fix paragraph splitting bug (reporters Olivia Zhang & Echo Bell); version-->3.3.31
robin <robin@reportlab.com>
parents: 4277
diff changeset
   103
        if not normalizer:
7c65c6e52b13 fix paragraph splitting bug (reporters Olivia Zhang & Echo Bell); version-->3.3.31
robin <robin@reportlab.com>
parents: 4277
diff changeset
   104
            normaliser = self._normalizer
7c65c6e52b13 fix paragraph splitting bug (reporters Olivia Zhang & Echo Bell); version-->3.3.31
robin <robin@reportlab.com>
parents: 4277
diff changeset
   105
        r = _PCT(normalizer*self._value)
7c65c6e52b13 fix paragraph splitting bug (reporters Olivia Zhang & Echo Bell); version-->3.3.31
robin <robin@reportlab.com>
parents: 4277
diff changeset
   106
        r._value = self._value
7c65c6e52b13 fix paragraph splitting bug (reporters Olivia Zhang & Echo Bell); version-->3.3.31
robin <robin@reportlab.com>
parents: 4277
diff changeset
   107
        r._normalizer = normalizer
7c65c6e52b13 fix paragraph splitting bug (reporters Olivia Zhang & Echo Bell); version-->3.3.31
robin <robin@reportlab.com>
parents: 4277
diff changeset
   108
        return r
2857
487dc2450eec reprotlab: inline images horizontal positioning OK
rgbecker
parents: 2836
diff changeset
   109
4255
89ea1d46b4a0 make paraparser syntax errors real and fix <sup/sub> tags to have relative values; version-->3.3.1
robin
parents: 4252
diff changeset
   110
def fontSizeNormalize(frag,attr,default):
89ea1d46b4a0 make paraparser syntax errors real and fix <sup/sub> tags to have relative values; version-->3.3.1
robin
parents: 4252
diff changeset
   111
    if not hasattr(frag,attr): return default
89ea1d46b4a0 make paraparser syntax errors real and fix <sup/sub> tags to have relative values; version-->3.3.1
robin
parents: 4252
diff changeset
   112
    v = _numpct(getattr(frag,attr),allowRelative=True)
89ea1d46b4a0 make paraparser syntax errors real and fix <sup/sub> tags to have relative values; version-->3.3.1
robin
parents: 4252
diff changeset
   113
    return (v[1]+frag.fontSize) if isinstance(v,tuple) else v.normalizedValue(frag.fontSize) if isinstance(v,_PCT) else v
89ea1d46b4a0 make paraparser syntax errors real and fix <sup/sub> tags to have relative values; version-->3.3.1
robin
parents: 4252
diff changeset
   114
89ea1d46b4a0 make paraparser syntax errors real and fix <sup/sub> tags to have relative values; version-->3.3.1
robin
parents: 4252
diff changeset
   115
class _CheckSup:
89ea1d46b4a0 make paraparser syntax errors real and fix <sup/sub> tags to have relative values; version-->3.3.1
robin
parents: 4252
diff changeset
   116
    '''class for syntax checking <sup> attributes
89ea1d46b4a0 make paraparser syntax errors real and fix <sup/sub> tags to have relative values; version-->3.3.1
robin
parents: 4252
diff changeset
   117
    if the check succeeds then we always return the string for later evaluation
89ea1d46b4a0 make paraparser syntax errors real and fix <sup/sub> tags to have relative values; version-->3.3.1
robin
parents: 4252
diff changeset
   118
    '''
89ea1d46b4a0 make paraparser syntax errors real and fix <sup/sub> tags to have relative values; version-->3.3.1
robin
parents: 4252
diff changeset
   119
    def __init__(self,kind):
89ea1d46b4a0 make paraparser syntax errors real and fix <sup/sub> tags to have relative values; version-->3.3.1
robin
parents: 4252
diff changeset
   120
        self.kind = kind
89ea1d46b4a0 make paraparser syntax errors real and fix <sup/sub> tags to have relative values; version-->3.3.1
robin
parents: 4252
diff changeset
   121
        self.fontSize = 10
89ea1d46b4a0 make paraparser syntax errors real and fix <sup/sub> tags to have relative values; version-->3.3.1
robin
parents: 4252
diff changeset
   122
89ea1d46b4a0 make paraparser syntax errors real and fix <sup/sub> tags to have relative values; version-->3.3.1
robin
parents: 4252
diff changeset
   123
    def __call__(self,s):
89ea1d46b4a0 make paraparser syntax errors real and fix <sup/sub> tags to have relative values; version-->3.3.1
robin
parents: 4252
diff changeset
   124
        setattr(self,self.kind,s)
89ea1d46b4a0 make paraparser syntax errors real and fix <sup/sub> tags to have relative values; version-->3.3.1
robin
parents: 4252
diff changeset
   125
        try:
89ea1d46b4a0 make paraparser syntax errors real and fix <sup/sub> tags to have relative values; version-->3.3.1
robin
parents: 4252
diff changeset
   126
            fontSizeNormalize(self,self.kind,None)
89ea1d46b4a0 make paraparser syntax errors real and fix <sup/sub> tags to have relative values; version-->3.3.1
robin
parents: 4252
diff changeset
   127
            return s
89ea1d46b4a0 make paraparser syntax errors real and fix <sup/sub> tags to have relative values; version-->3.3.1
robin
parents: 4252
diff changeset
   128
        except:
89ea1d46b4a0 make paraparser syntax errors real and fix <sup/sub> tags to have relative values; version-->3.3.1
robin
parents: 4252
diff changeset
   129
            raise ValueError('<sup> invalid value %r for attribute %s' % (s,self.kind))
89ea1d46b4a0 make paraparser syntax errors real and fix <sup/sub> tags to have relative values; version-->3.3.1
robin
parents: 4252
diff changeset
   130
2857
487dc2450eec reprotlab: inline images horizontal positioning OK
rgbecker
parents: 2836
diff changeset
   131
def _valignpc(s):
487dc2450eec reprotlab: inline images horizontal positioning OK
rgbecker
parents: 2836
diff changeset
   132
    s = s.lower()
487dc2450eec reprotlab: inline images horizontal positioning OK
rgbecker
parents: 2836
diff changeset
   133
    if s in ('baseline','sub','super','top','text-top','middle','bottom','text-bottom'):
487dc2450eec reprotlab: inline images horizontal positioning OK
rgbecker
parents: 2836
diff changeset
   134
        return s
487dc2450eec reprotlab: inline images horizontal positioning OK
rgbecker
parents: 2836
diff changeset
   135
    if s.endswith('%'):
487dc2450eec reprotlab: inline images horizontal positioning OK
rgbecker
parents: 2836
diff changeset
   136
        n = _convnum(s[:-1])
487dc2450eec reprotlab: inline images horizontal positioning OK
rgbecker
parents: 2836
diff changeset
   137
        if isinstance(n,tuple):
487dc2450eec reprotlab: inline images horizontal positioning OK
rgbecker
parents: 2836
diff changeset
   138
            n = n[1]
487dc2450eec reprotlab: inline images horizontal positioning OK
rgbecker
parents: 2836
diff changeset
   139
        return _PCT(n)
487dc2450eec reprotlab: inline images horizontal positioning OK
rgbecker
parents: 2836
diff changeset
   140
    n = _num(s)
487dc2450eec reprotlab: inline images horizontal positioning OK
rgbecker
parents: 2836
diff changeset
   141
    if isinstance(n,tuple):
487dc2450eec reprotlab: inline images horizontal positioning OK
rgbecker
parents: 2836
diff changeset
   142
        n = n[1]
487dc2450eec reprotlab: inline images horizontal positioning OK
rgbecker
parents: 2836
diff changeset
   143
    return n
119
b4dc589c8364 <para> tag added in layout.py paraparser.py
rgbecker
parents: 115
diff changeset
   144
2836
66fb84201abe platypus: add support for autoLeading
rgbecker
parents: 2748
diff changeset
   145
def _autoLeading(x):
66fb84201abe platypus: add support for autoLeading
rgbecker
parents: 2748
diff changeset
   146
    x = x.lower()
66fb84201abe platypus: add support for autoLeading
rgbecker
parents: 2748
diff changeset
   147
    if x in ('','min','max','off'):
66fb84201abe platypus: add support for autoLeading
rgbecker
parents: 2748
diff changeset
   148
        return x
66fb84201abe platypus: add support for autoLeading
rgbecker
parents: 2748
diff changeset
   149
    raise ValueError('Invalid autoLeading=%r' % x )
66fb84201abe platypus: add support for autoLeading
rgbecker
parents: 2748
diff changeset
   150
119
b4dc589c8364 <para> tag added in layout.py paraparser.py
rgbecker
parents: 115
diff changeset
   151
def _align(s):
3731
b233dd0577ff another round of changes mostly type related
rptlab
parents: 3723
diff changeset
   152
    s = s.lower()
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   153
    if s=='left': return TA_LEFT
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   154
    elif s=='right': return TA_RIGHT
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   155
    elif s=='justify': return TA_JUSTIFY
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   156
    elif s in ('centre','center'): return TA_CENTER
4136
16f067cf3dae added rl_settings.decimalSymbol & support for simple bullet anchoring, version-->3.1.35
robin
parents: 4130
diff changeset
   157
    else: raise ValueError('illegal alignment %r' % s)
16f067cf3dae added rl_settings.decimalSymbol & support for simple bullet anchoring, version-->3.1.35
robin
parents: 4130
diff changeset
   158
16f067cf3dae added rl_settings.decimalSymbol & support for simple bullet anchoring, version-->3.1.35
robin
parents: 4130
diff changeset
   159
def _bAnchor(s):
16f067cf3dae added rl_settings.decimalSymbol & support for simple bullet anchoring, version-->3.1.35
robin
parents: 4130
diff changeset
   160
    s = s.lower()
16f067cf3dae added rl_settings.decimalSymbol & support for simple bullet anchoring, version-->3.1.35
robin
parents: 4130
diff changeset
   161
    if not s in ('start','middle','end','numeric'):
16f067cf3dae added rl_settings.decimalSymbol & support for simple bullet anchoring, version-->3.1.35
robin
parents: 4130
diff changeset
   162
        raise ValueError('illegal bullet anchor %r' % s)
16f067cf3dae added rl_settings.decimalSymbol & support for simple bullet anchoring, version-->3.1.35
robin
parents: 4130
diff changeset
   163
    return s
119
b4dc589c8364 <para> tag added in layout.py paraparser.py
rgbecker
parents: 115
diff changeset
   164
4220
c0e82d246798 add justifyBreaks & justifyLastLine ParagraphStyle attributes; version-->3.2.7
robin
parents: 4181
diff changeset
   165
def _wordWrapConv(s):
c0e82d246798 add justifyBreaks & justifyLastLine ParagraphStyle attributes; version-->3.2.7
robin
parents: 4181
diff changeset
   166
    s = s.upper().strip()
c0e82d246798 add justifyBreaks & justifyLastLine ParagraphStyle attributes; version-->3.2.7
robin
parents: 4181
diff changeset
   167
    if not s: return None
c0e82d246798 add justifyBreaks & justifyLastLine ParagraphStyle attributes; version-->3.2.7
robin
parents: 4181
diff changeset
   168
    if s not in ('CJK','RTL','LTR'):
c0e82d246798 add justifyBreaks & justifyLastLine ParagraphStyle attributes; version-->3.2.7
robin
parents: 4181
diff changeset
   169
        raise ValueError('cannot convert wordWrap=%r' % s)
c0e82d246798 add justifyBreaks & justifyLastLine ParagraphStyle attributes; version-->3.2.7
robin
parents: 4181
diff changeset
   170
    return s
c0e82d246798 add justifyBreaks & justifyLastLine ParagraphStyle attributes; version-->3.2.7
robin
parents: 4181
diff changeset
   171
c0e82d246798 add justifyBreaks & justifyLastLine ParagraphStyle attributes; version-->3.2.7
robin
parents: 4181
diff changeset
   172
def _textTransformConv(s):
c0e82d246798 add justifyBreaks & justifyLastLine ParagraphStyle attributes; version-->3.2.7
robin
parents: 4181
diff changeset
   173
    s = s.lower().strip()
c0e82d246798 add justifyBreaks & justifyLastLine ParagraphStyle attributes; version-->3.2.7
robin
parents: 4181
diff changeset
   174
    if not s: return None
c0e82d246798 add justifyBreaks & justifyLastLine ParagraphStyle attributes; version-->3.2.7
robin
parents: 4181
diff changeset
   175
    if s not in ('uppercase','lowercase','capitalize','none'):
c0e82d246798 add justifyBreaks & justifyLastLine ParagraphStyle attributes; version-->3.2.7
robin
parents: 4181
diff changeset
   176
        raise ValueError('cannot convert wordWrap=%r' % s)
c0e82d246798 add justifyBreaks & justifyLastLine ParagraphStyle attributes; version-->3.2.7
robin
parents: 4181
diff changeset
   177
    return s
c0e82d246798 add justifyBreaks & justifyLastLine ParagraphStyle attributes; version-->3.2.7
robin
parents: 4181
diff changeset
   178
119
b4dc589c8364 <para> tag added in layout.py paraparser.py
rgbecker
parents: 115
diff changeset
   179
_paraAttrMap = {'font': ('fontName', None),
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   180
                'face': ('fontName', None),
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   181
                'fontsize': ('fontSize', _num),
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   182
                'size': ('fontSize', _num),
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   183
                'leading': ('leading', _num),
2836
66fb84201abe platypus: add support for autoLeading
rgbecker
parents: 2748
diff changeset
   184
                'autoleading': ('autoLeading', _autoLeading),
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   185
                'lindent': ('leftIndent', _num),
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   186
                'rindent': ('rightIndent', _num),
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   187
                'findent': ('firstLineIndent', _num),
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   188
                'align': ('alignment', _align),
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   189
                'spaceb': ('spaceBefore', _num),
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   190
                'spacea': ('spaceAfter', _num),
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   191
                'bfont': ('bulletFontName', None),
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   192
                'bfontsize': ('bulletFontSize',_num),
2860
3f14d66194c2 platypus: added bulletOffsetY inspired by haraldarminmassa@gmail.com
rgbecker
parents: 2857
diff changeset
   193
                'boffsety': ('bulletOffsetY',_num),
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   194
                'bindent': ('bulletIndent',_num),
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   195
                'bcolor': ('bulletColor',toColor),
4136
16f067cf3dae added rl_settings.decimalSymbol & support for simple bullet anchoring, version-->3.1.35
robin
parents: 4130
diff changeset
   196
                'banchor': ('bulletAnchor',_bAnchor),
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   197
                'color':('textColor',toColor),
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   198
                'backcolor':('backColor',toColor),
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   199
                'bgcolor':('backColor',toColor),
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   200
                'bg':('backColor',toColor),
1940
baa0abc136c4 Henning von Bargen's caseSensitive flag
rgbecker
parents: 1932
diff changeset
   201
                'fg': ('textColor',toColor),
4220
c0e82d246798 add justifyBreaks & justifyLastLine ParagraphStyle attributes; version-->3.2.7
robin
parents: 4181
diff changeset
   202
                'justifybreaks': ('justifyBreaks',_bool),
c0e82d246798 add justifyBreaks & justifyLastLine ParagraphStyle attributes; version-->3.2.7
robin
parents: 4181
diff changeset
   203
                'justifylastline': ('justifyLastLine',_int),
c0e82d246798 add justifyBreaks & justifyLastLine ParagraphStyle attributes; version-->3.2.7
robin
parents: 4181
diff changeset
   204
                'wordwrap': ('wordWrap',_wordWrapConv),
c0e82d246798 add justifyBreaks & justifyLastLine ParagraphStyle attributes; version-->3.2.7
robin
parents: 4181
diff changeset
   205
                'allowwidows': ('allowWidows',_bool),
c0e82d246798 add justifyBreaks & justifyLastLine ParagraphStyle attributes; version-->3.2.7
robin
parents: 4181
diff changeset
   206
                'alloworphans': ('allowOrphans',_bool),
c0e82d246798 add justifyBreaks & justifyLastLine ParagraphStyle attributes; version-->3.2.7
robin
parents: 4181
diff changeset
   207
                'splitlongwords': ('splitLongWords',_bool),
c0e82d246798 add justifyBreaks & justifyLastLine ParagraphStyle attributes; version-->3.2.7
robin
parents: 4181
diff changeset
   208
                'borderwidth': ('borderWidth',_num),
c0e82d246798 add justifyBreaks & justifyLastLine ParagraphStyle attributes; version-->3.2.7
robin
parents: 4181
diff changeset
   209
                'borderpadding': ('borderpadding',_num),
c0e82d246798 add justifyBreaks & justifyLastLine ParagraphStyle attributes; version-->3.2.7
robin
parents: 4181
diff changeset
   210
                'bordercolor': ('borderColor',toColor),
c0e82d246798 add justifyBreaks & justifyLastLine ParagraphStyle attributes; version-->3.2.7
robin
parents: 4181
diff changeset
   211
                'borderradius': ('borderRadius',_num),
c0e82d246798 add justifyBreaks & justifyLastLine ParagraphStyle attributes; version-->3.2.7
robin
parents: 4181
diff changeset
   212
                'texttransform':('textTransform',_textTransformConv),
c0e82d246798 add justifyBreaks & justifyLastLine ParagraphStyle attributes; version-->3.2.7
robin
parents: 4181
diff changeset
   213
                'enddots':('endDots',None),
c0e82d246798 add justifyBreaks & justifyLastLine ParagraphStyle attributes; version-->3.2.7
robin
parents: 4181
diff changeset
   214
                'underlineproportion':('underlineProportion',_num),
4277
838129322a55 really merge para-measure-fix; version-->3.3.11
robin
parents: 4255
diff changeset
   215
                'spaceshrinkage':('spaceShrinkage',_num),
1940
baa0abc136c4 Henning von Bargen's caseSensitive flag
rgbecker
parents: 1932
diff changeset
   216
                }
119
b4dc589c8364 <para> tag added in layout.py paraparser.py
rgbecker
parents: 115
diff changeset
   217
250
a1bcf9c6c21e <bullet> xml tag added
rgbecker
parents: 248
diff changeset
   218
_bulletAttrMap = {
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   219
                'font': ('bulletFontName', None),
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   220
                'face': ('bulletFontName', None),
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   221
                'size': ('bulletFontSize',_num),
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   222
                'fontsize': ('bulletFontSize',_num),
2860
3f14d66194c2 platypus: added bulletOffsetY inspired by haraldarminmassa@gmail.com
rgbecker
parents: 2857
diff changeset
   223
                'offsety': ('bulletOffsetY',_num),
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   224
                'indent': ('bulletIndent',_num),
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   225
                'color': ('bulletColor',toColor),
1940
baa0abc136c4 Henning von Bargen's caseSensitive flag
rgbecker
parents: 1932
diff changeset
   226
                'fg': ('bulletColor',toColor),
4136
16f067cf3dae added rl_settings.decimalSymbol & support for simple bullet anchoring, version-->3.1.35
robin
parents: 4130
diff changeset
   227
                'anchor': ('bulletAnchor',_bAnchor),
1940
baa0abc136c4 Henning von Bargen's caseSensitive flag
rgbecker
parents: 1932
diff changeset
   228
                }
250
a1bcf9c6c21e <bullet> xml tag added
rgbecker
parents: 248
diff changeset
   229
119
b4dc589c8364 <para> tag added in layout.py paraparser.py
rgbecker
parents: 115
diff changeset
   230
#things which are valid font attributes
b4dc589c8364 <para> tag added in layout.py paraparser.py
rgbecker
parents: 115
diff changeset
   231
_fontAttrMap = {'size': ('fontSize', _num),
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   232
                'face': ('fontName', None),
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   233
                'name': ('fontName', None),
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   234
                'fg':   ('textColor', toColor),
1940
baa0abc136c4 Henning von Bargen's caseSensitive flag
rgbecker
parents: 1932
diff changeset
   235
                'color':('textColor', toColor),
2446
6b9268ab33c3 allow solid para background
andy
parents: 2410
diff changeset
   236
                'backcolor':('backColor',toColor),
6b9268ab33c3 allow solid para background
andy
parents: 2410
diff changeset
   237
                'bgcolor':('backColor',toColor),
1940
baa0abc136c4 Henning von Bargen's caseSensitive flag
rgbecker
parents: 1932
diff changeset
   238
                }
3552
20ecbcc53c15 paraparser.py add support for <span style=stylename>
rgbecker
parents: 3440
diff changeset
   239
#things which are valid span attributes
20ecbcc53c15 paraparser.py add support for <span style=stylename>
rgbecker
parents: 3440
diff changeset
   240
_spanAttrMap = {'size': ('fontSize', _num),
20ecbcc53c15 paraparser.py add support for <span style=stylename>
rgbecker
parents: 3440
diff changeset
   241
                'face': ('fontName', None),
20ecbcc53c15 paraparser.py add support for <span style=stylename>
rgbecker
parents: 3440
diff changeset
   242
                'name': ('fontName', None),
20ecbcc53c15 paraparser.py add support for <span style=stylename>
rgbecker
parents: 3440
diff changeset
   243
                'fg':   ('textColor', toColor),
20ecbcc53c15 paraparser.py add support for <span style=stylename>
rgbecker
parents: 3440
diff changeset
   244
                'color':('textColor', toColor),
20ecbcc53c15 paraparser.py add support for <span style=stylename>
rgbecker
parents: 3440
diff changeset
   245
                'backcolor':('backColor',toColor),
20ecbcc53c15 paraparser.py add support for <span style=stylename>
rgbecker
parents: 3440
diff changeset
   246
                'bgcolor':('backColor',toColor),
20ecbcc53c15 paraparser.py add support for <span style=stylename>
rgbecker
parents: 3440
diff changeset
   247
                'style': ('style',None),
20ecbcc53c15 paraparser.py add support for <span style=stylename>
rgbecker
parents: 3440
diff changeset
   248
                }
2575
0cba68b93555 reportlab-utf8 moved to trunk
rgbecker
parents: 2446
diff changeset
   249
#things which are valid font attributes
0cba68b93555 reportlab-utf8 moved to trunk
rgbecker
parents: 2446
diff changeset
   250
_linkAttrMap = {'size': ('fontSize', _num),
0cba68b93555 reportlab-utf8 moved to trunk
rgbecker
parents: 2446
diff changeset
   251
                'face': ('fontName', None),
0cba68b93555 reportlab-utf8 moved to trunk
rgbecker
parents: 2446
diff changeset
   252
                'name': ('fontName', None),
0cba68b93555 reportlab-utf8 moved to trunk
rgbecker
parents: 2446
diff changeset
   253
                'fg':   ('textColor', toColor),
0cba68b93555 reportlab-utf8 moved to trunk
rgbecker
parents: 2446
diff changeset
   254
                'color':('textColor', toColor),
0cba68b93555 reportlab-utf8 moved to trunk
rgbecker
parents: 2446
diff changeset
   255
                'backcolor':('backColor',toColor),
0cba68b93555 reportlab-utf8 moved to trunk
rgbecker
parents: 2446
diff changeset
   256
                'bgcolor':('backColor',toColor),
0cba68b93555 reportlab-utf8 moved to trunk
rgbecker
parents: 2446
diff changeset
   257
                'dest': ('link', None),
0cba68b93555 reportlab-utf8 moved to trunk
rgbecker
parents: 2446
diff changeset
   258
                'destination': ('link', None),
0cba68b93555 reportlab-utf8 moved to trunk
rgbecker
parents: 2446
diff changeset
   259
                'target': ('link', None),
2594
746800f5caf9 reportlab: fix up links in paragraphs
rgbecker
parents: 2585
diff changeset
   260
                'href': ('link', None),
2575
0cba68b93555 reportlab-utf8 moved to trunk
rgbecker
parents: 2446
diff changeset
   261
                }
2744
9472eedb9702 reportlab/platypus: add two way <a> tag
rgbecker
parents: 2742
diff changeset
   262
_anchorAttrMap = {'fontSize': ('fontSize', _num),
9472eedb9702 reportlab/platypus: add two way <a> tag
rgbecker
parents: 2742
diff changeset
   263
                'fontName': ('fontName', None),
9472eedb9702 reportlab/platypus: add two way <a> tag
rgbecker
parents: 2742
diff changeset
   264
                'name': ('name', None),
9472eedb9702 reportlab/platypus: add two way <a> tag
rgbecker
parents: 2742
diff changeset
   265
                'fg':   ('textColor', toColor),
9472eedb9702 reportlab/platypus: add two way <a> tag
rgbecker
parents: 2742
diff changeset
   266
                'color':('textColor', toColor),
9472eedb9702 reportlab/platypus: add two way <a> tag
rgbecker
parents: 2742
diff changeset
   267
                'backcolor':('backColor',toColor),
9472eedb9702 reportlab/platypus: add two way <a> tag
rgbecker
parents: 2742
diff changeset
   268
                'bgcolor':('backColor',toColor),
9472eedb9702 reportlab/platypus: add two way <a> tag
rgbecker
parents: 2742
diff changeset
   269
                'href': ('href', None),
9472eedb9702 reportlab/platypus: add two way <a> tag
rgbecker
parents: 2742
diff changeset
   270
                }
2857
487dc2450eec reprotlab: inline images horizontal positioning OK
rgbecker
parents: 2836
diff changeset
   271
_imgAttrMap = {
487dc2450eec reprotlab: inline images horizontal positioning OK
rgbecker
parents: 2836
diff changeset
   272
                'src': ('src', None),
3434
3c14212cc997 platypus: preliminary working version of % height/width for <img> tag
rgbecker
parents: 3368
diff changeset
   273
                'width': ('width',_numpct),
3c14212cc997 platypus: preliminary working version of % height/width for <img> tag
rgbecker
parents: 3368
diff changeset
   274
                'height':('height',_numpct),
2857
487dc2450eec reprotlab: inline images horizontal positioning OK
rgbecker
parents: 2836
diff changeset
   275
                'valign':('valign',_valignpc),
487dc2450eec reprotlab: inline images horizontal positioning OK
rgbecker
parents: 2836
diff changeset
   276
                }
3165
cbda9e7d0ee3 reportlab: new index support
rgbecker
parents: 3137
diff changeset
   277
_indexAttrMap = {
cbda9e7d0ee3 reportlab: new index support
rgbecker
parents: 3137
diff changeset
   278
                'name': ('name',None),
cbda9e7d0ee3 reportlab: new index support
rgbecker
parents: 3137
diff changeset
   279
                'item': ('item',None),
3187
2d5a6655556e tableofcontents/paraparser: allow for format and offset parameters
rgbecker
parents: 3165
diff changeset
   280
                'offset': ('offset',None),
2d5a6655556e tableofcontents/paraparser: allow for format and offset parameters
rgbecker
parents: 3165
diff changeset
   281
                'format': ('format',None),
3165
cbda9e7d0ee3 reportlab: new index support
rgbecker
parents: 3137
diff changeset
   282
                }
4249
8fc7d11bdee0 add support for rml <sup>/<sub> rise and size attributes; version --> 3.2.17
robin
parents: 4220
diff changeset
   283
_supAttrMap = {
4255
89ea1d46b4a0 make paraparser syntax errors real and fix <sup/sub> tags to have relative values; version-->3.3.1
robin
parents: 4252
diff changeset
   284
                'rise': ('supr', _CheckSup('rise')),
89ea1d46b4a0 make paraparser syntax errors real and fix <sup/sub> tags to have relative values; version-->3.3.1
robin
parents: 4252
diff changeset
   285
                'size': ('sups', _CheckSup('size')),
4249
8fc7d11bdee0 add support for rml <sup>/<sub> rise and size attributes; version --> 3.2.17
robin
parents: 4220
diff changeset
   286
                }
119
b4dc589c8364 <para> tag added in layout.py paraparser.py
rgbecker
parents: 115
diff changeset
   287
b4dc589c8364 <para> tag added in layout.py paraparser.py
rgbecker
parents: 115
diff changeset
   288
def _addAttributeNames(m):
3721
0c93dd8ff567 initial changes from 2to3-3.3
rptlab
parents: 3656
diff changeset
   289
    K = list(m.keys())
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   290
    for k in K:
1944
a50f8e3f93f8 laissez faire case
rgbecker
parents: 1940
diff changeset
   291
        n = m[k][0]
3326
ce725978d11c Initial Python3 compatibility fixes
damian
parents: 3188
diff changeset
   292
        if n not in m: m[n] = m[k]
3731
b233dd0577ff another round of changes mostly type related
rptlab
parents: 3723
diff changeset
   293
        n = n.lower()
3326
ce725978d11c Initial Python3 compatibility fixes
damian
parents: 3188
diff changeset
   294
        if n not in m: m[n] = m[k]
119
b4dc589c8364 <para> tag added in layout.py paraparser.py
rgbecker
parents: 115
diff changeset
   295
b4dc589c8364 <para> tag added in layout.py paraparser.py
rgbecker
parents: 115
diff changeset
   296
_addAttributeNames(_paraAttrMap)
b4dc589c8364 <para> tag added in layout.py paraparser.py
rgbecker
parents: 115
diff changeset
   297
_addAttributeNames(_fontAttrMap)
3552
20ecbcc53c15 paraparser.py add support for <span style=stylename>
rgbecker
parents: 3440
diff changeset
   298
_addAttributeNames(_spanAttrMap)
250
a1bcf9c6c21e <bullet> xml tag added
rgbecker
parents: 248
diff changeset
   299
_addAttributeNames(_bulletAttrMap)
2747
46005202d9d0 paraparser: fix link/anchor attributes
rgbecker
parents: 2745
diff changeset
   300
_addAttributeNames(_anchorAttrMap)
46005202d9d0 paraparser: fix link/anchor attributes
rgbecker
parents: 2745
diff changeset
   301
_addAttributeNames(_linkAttrMap)
119
b4dc589c8364 <para> tag added in layout.py paraparser.py
rgbecker
parents: 115
diff changeset
   302
b4dc589c8364 <para> tag added in layout.py paraparser.py
rgbecker
parents: 115
diff changeset
   303
def _applyAttributes(obj, attr):
3723
99aa837b6703 second stage of port to Python 3.3; working hello world
rptlab
parents: 3721
diff changeset
   304
    for k, v in attr.items():
3787
8f9be6d6f75c convert paraparser to use pyRXP directly (or any TT producer)
robin
parents: 3731
diff changeset
   305
        if isinstance(v,(list,tuple)) and v[0]=='relative':
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   306
            if hasattr(obj, k):
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   307
                v = v[1]+getattr(obj,k)
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   308
            else:
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   309
                v = v[1]
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   310
        setattr(obj,k,v)
102
1818e7fa3738 Added clone method to ParaFrag
rgbecker
parents: 96
diff changeset
   311
1931
784fce255e2d Added in more special entities as suggested by Christoph Zwerschke
rgbecker
parents: 1736
diff changeset
   312
#Named character entities intended to be supported from the special font
2200
be0cfccc662a Fixed up tabs and whitespace in all source files
andy_robinson
parents: 2053
diff changeset
   313
#with additions suggested by Christoph Zwerschke who also suggested the
1931
784fce255e2d Added in more special entities as suggested by Christoph Zwerschke
rgbecker
parents: 1736
diff changeset
   314
#numeric entity names that follow.
96
2a9cca4c5cf0 Beginnings of a paragraph parser
rgbecker
parents:
diff changeset
   315
greeks = {
3957
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   316
    'Aacute': u'\xc1',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   317
    'aacute': u'\xe1',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   318
    'Acirc': u'\xc2',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   319
    'acirc': u'\xe2',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   320
    'acute': u'\xb4',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   321
    'AElig': u'\xc6',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   322
    'aelig': u'\xe6',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   323
    'Agrave': u'\xc0',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   324
    'agrave': u'\xe0',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   325
    'alefsym': u'\u2135',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   326
    'Alpha': u'\u0391',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   327
    'alpha': u'\u03b1',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   328
    'and': u'\u2227',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   329
    'ang': u'\u2220',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   330
    'Aring': u'\xc5',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   331
    'aring': u'\xe5',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   332
    'asymp': u'\u2248',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   333
    'Atilde': u'\xc3',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   334
    'atilde': u'\xe3',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   335
    'Auml': u'\xc4',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   336
    'auml': u'\xe4',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   337
    'bdquo': u'\u201e',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   338
    'Beta': u'\u0392',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   339
    'beta': u'\u03b2',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   340
    'brvbar': u'\xa6',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   341
    'bull': u'\u2022',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   342
    'cap': u'\u2229',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   343
    'Ccedil': u'\xc7',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   344
    'ccedil': u'\xe7',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   345
    'cedil': u'\xb8',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   346
    'cent': u'\xa2',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   347
    'Chi': u'\u03a7',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   348
    'chi': u'\u03c7',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   349
    'circ': u'\u02c6',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   350
    'clubs': u'\u2663',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   351
    'cong': u'\u2245',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   352
    'copy': u'\xa9',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   353
    'crarr': u'\u21b5',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   354
    'cup': u'\u222a',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   355
    'curren': u'\xa4',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   356
    'dagger': u'\u2020',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   357
    'Dagger': u'\u2021',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   358
    'darr': u'\u2193',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   359
    'dArr': u'\u21d3',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   360
    'deg': u'\xb0',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   361
    'delta': u'\u03b4',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   362
    'Delta': u'\u2206',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   363
    'diams': u'\u2666',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   364
    'divide': u'\xf7',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   365
    'Eacute': u'\xc9',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   366
    'eacute': u'\xe9',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   367
    'Ecirc': u'\xca',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   368
    'ecirc': u'\xea',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   369
    'Egrave': u'\xc8',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   370
    'egrave': u'\xe8',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   371
    'empty': u'\u2205',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   372
    'emsp': u'\u2003',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   373
    'ensp': u'\u2002',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   374
    'Epsilon': u'\u0395',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   375
    'epsilon': u'\u03b5',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   376
    'epsiv': u'\u03b5',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   377
    'equiv': u'\u2261',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   378
    'Eta': u'\u0397',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   379
    'eta': u'\u03b7',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   380
    'ETH': u'\xd0',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   381
    'eth': u'\xf0',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   382
    'Euml': u'\xcb',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   383
    'euml': u'\xeb',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   384
    'euro': u'\u20ac',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   385
    'exist': u'\u2203',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   386
    'fnof': u'\u0192',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   387
    'forall': u'\u2200',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   388
    'frac12': u'\xbd',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   389
    'frac14': u'\xbc',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   390
    'frac34': u'\xbe',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   391
    'frasl': u'\u2044',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   392
    'Gamma': u'\u0393',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   393
    'gamma': u'\u03b3',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   394
    'ge': u'\u2265',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   395
    'harr': u'\u2194',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   396
    'hArr': u'\u21d4',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   397
    'hearts': u'\u2665',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   398
    'hellip': u'\u2026',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   399
    'Iacute': u'\xcd',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   400
    'iacute': u'\xed',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   401
    'Icirc': u'\xce',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   402
    'icirc': u'\xee',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   403
    'iexcl': u'\xa1',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   404
    'Igrave': u'\xcc',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   405
    'igrave': u'\xec',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   406
    'image': u'\u2111',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   407
    'infin': u'\u221e',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   408
    'int': u'\u222b',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   409
    'Iota': u'\u0399',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   410
    'iota': u'\u03b9',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   411
    'iquest': u'\xbf',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   412
    'isin': u'\u2208',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   413
    'Iuml': u'\xcf',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   414
    'iuml': u'\xef',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   415
    'Kappa': u'\u039a',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   416
    'kappa': u'\u03ba',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   417
    'Lambda': u'\u039b',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   418
    'lambda': u'\u03bb',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   419
    'lang': u'\u2329',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   420
    'laquo': u'\xab',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   421
    'larr': u'\u2190',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   422
    'lArr': u'\u21d0',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   423
    'lceil': u'\uf8ee',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   424
    'ldquo': u'\u201c',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   425
    'le': u'\u2264',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   426
    'lfloor': u'\uf8f0',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   427
    'lowast': u'\u2217',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   428
    'loz': u'\u25ca',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   429
    'lrm': u'\u200e',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   430
    'lsaquo': u'\u2039',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   431
    'lsquo': u'\u2018',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   432
    'macr': u'\xaf',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   433
    'mdash': u'\u2014',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   434
    'micro': u'\xb5',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   435
    'middot': u'\xb7',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   436
    'minus': u'\u2212',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   437
    'mu': u'\xb5',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   438
    'Mu': u'\u039c',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   439
    'nabla': u'\u2207',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   440
    'nbsp': u'\xa0',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   441
    'ndash': u'\u2013',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   442
    'ne': u'\u2260',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   443
    'ni': u'\u220b',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   444
    'notin': u'\u2209',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   445
    'not': u'\xac',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   446
    'nsub': u'\u2284',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   447
    'Ntilde': u'\xd1',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   448
    'ntilde': u'\xf1',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   449
    'Nu': u'\u039d',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   450
    'nu': u'\u03bd',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   451
    'Oacute': u'\xd3',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   452
    'oacute': u'\xf3',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   453
    'Ocirc': u'\xd4',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   454
    'ocirc': u'\xf4',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   455
    'OElig': u'\u0152',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   456
    'oelig': u'\u0153',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   457
    'Ograve': u'\xd2',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   458
    'ograve': u'\xf2',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   459
    'oline': u'\uf8e5',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   460
    'omega': u'\u03c9',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   461
    'Omega': u'\u2126',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   462
    'Omicron': u'\u039f',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   463
    'omicron': u'\u03bf',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   464
    'oplus': u'\u2295',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   465
    'ordf': u'\xaa',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   466
    'ordm': u'\xba',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   467
    'or': u'\u2228',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   468
    'Oslash': u'\xd8',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   469
    'oslash': u'\xf8',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   470
    'Otilde': u'\xd5',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   471
    'otilde': u'\xf5',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   472
    'otimes': u'\u2297',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   473
    'Ouml': u'\xd6',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   474
    'ouml': u'\xf6',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   475
    'para': u'\xb6',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   476
    'part': u'\u2202',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   477
    'permil': u'\u2030',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   478
    'perp': u'\u22a5',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   479
    'phis': u'\u03c6',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   480
    'Phi': u'\u03a6',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   481
    'phi': u'\u03d5',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   482
    'piv': u'\u03d6',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   483
    'Pi': u'\u03a0',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   484
    'pi': u'\u03c0',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   485
    'plusmn': u'\xb1',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   486
    'pound': u'\xa3',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   487
    'prime': u'\u2032',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   488
    'Prime': u'\u2033',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   489
    'prod': u'\u220f',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   490
    'prop': u'\u221d',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   491
    'Psi': u'\u03a8',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   492
    'psi': u'\u03c8',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   493
    'radic': u'\u221a',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   494
    'rang': u'\u232a',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   495
    'raquo': u'\xbb',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   496
    'rarr': u'\u2192',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   497
    'rArr': u'\u21d2',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   498
    'rceil': u'\uf8f9',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   499
    'rdquo': u'\u201d',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   500
    'real': u'\u211c',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   501
    'reg': u'\xae',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   502
    'rfloor': u'\uf8fb',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   503
    'Rho': u'\u03a1',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   504
    'rho': u'\u03c1',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   505
    'rlm': u'\u200f',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   506
    'rsaquo': u'\u203a',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   507
    'rsquo': u'\u2019',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   508
    'sbquo': u'\u201a',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   509
    'Scaron': u'\u0160',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   510
    'scaron': u'\u0161',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   511
    'sdot': u'\u22c5',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   512
    'sect': u'\xa7',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   513
    'shy': u'\xad',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   514
    'sigmaf': u'\u03c2',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   515
    'sigmav': u'\u03c2',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   516
    'Sigma': u'\u03a3',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   517
    'sigma': u'\u03c3',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   518
    'sim': u'\u223c',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   519
    'spades': u'\u2660',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   520
    'sube': u'\u2286',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   521
    'sub': u'\u2282',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   522
    'sum': u'\u2211',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   523
    'sup1': u'\xb9',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   524
    'sup2': u'\xb2',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   525
    'sup3': u'\xb3',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   526
    'supe': u'\u2287',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   527
    'sup': u'\u2283',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   528
    'szlig': u'\xdf',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   529
    'Tau': u'\u03a4',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   530
    'tau': u'\u03c4',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   531
    'there4': u'\u2234',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   532
    'thetasym': u'\u03d1',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   533
    'thetav': u'\u03d1',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   534
    'Theta': u'\u0398',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   535
    'theta': u'\u03b8',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   536
    'thinsp': u'\u2009',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   537
    'THORN': u'\xde',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   538
    'thorn': u'\xfe',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   539
    'tilde': u'\u02dc',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   540
    'times': u'\xd7',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   541
    'trade': u'\uf8ea',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   542
    'Uacute': u'\xda',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   543
    'uacute': u'\xfa',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   544
    'uarr': u'\u2191',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   545
    'uArr': u'\u21d1',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   546
    'Ucirc': u'\xdb',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   547
    'ucirc': u'\xfb',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   548
    'Ugrave': u'\xd9',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   549
    'ugrave': u'\xf9',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   550
    'uml': u'\xa8',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   551
    'upsih': u'\u03d2',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   552
    'Upsilon': u'\u03a5',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   553
    'upsilon': u'\u03c5',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   554
    'Uuml': u'\xdc',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   555
    'uuml': u'\xfc',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   556
    'weierp': u'\u2118',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   557
    'Xi': u'\u039e',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   558
    'xi': u'\u03be',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   559
    'Yacute': u'\xdd',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   560
    'yacute': u'\xfd',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   561
    'yen': u'\xa5',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   562
    'yuml': u'\xff',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   563
    'Yuml': u'\u0178',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   564
    'Zeta': u'\u0396',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   565
    'zeta': u'\u03b6',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   566
    'zwj': u'\u200d',
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   567
    'zwnj': u'\u200c',
1931
784fce255e2d Added in more special entities as suggested by Christoph Zwerschke
rgbecker
parents: 1736
diff changeset
   568
    }
96
2a9cca4c5cf0 Beginnings of a paragraph parser
rgbecker
parents:
diff changeset
   569
4004
ebd460ef8d2f rename utils.py UniChr --> uniChr
robin
parents: 3994
diff changeset
   570
known_entities = dict([(k,uniChr(v)) for k,v in name2codepoint.items()])
3957
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   571
for k in greeks:
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   572
    if k not in known_entities:
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   573
        known_entities[k] = greeks[k]
3994
9c38aba205e1 paraparser.py: fix entityref handling
robin
parents: 3961
diff changeset
   574
f = isPy3 and asBytes or asUnicode
9c38aba205e1 paraparser.py: fix entityref handling
robin
parents: 3961
diff changeset
   575
K = list(known_entities.keys())
9c38aba205e1 paraparser.py: fix entityref handling
robin
parents: 3961
diff changeset
   576
for k in K:
9c38aba205e1 paraparser.py: fix entityref handling
robin
parents: 3961
diff changeset
   577
    known_entities[f(k)] = known_entities[k]
9c38aba205e1 paraparser.py: fix entityref handling
robin
parents: 3961
diff changeset
   578
del k, f, K
3957
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
   579
96
2a9cca4c5cf0 Beginnings of a paragraph parser
rgbecker
parents:
diff changeset
   580
#------------------------------------------------------------------------
518
5be3fcb26c78 Semantic Name changes
rgbecker
parents: 514
diff changeset
   581
class ParaFrag(ABag):
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   582
    """class ParaFrag contains the intermediate representation of string
3961
8c5743bc6489 paraparser.py: remove reference to XMLParser
robin
parents: 3957
diff changeset
   583
    segments as they are being parsed by the ParaParser.
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   584
    fontname, fontSize, rise, textColor, cbDefn
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   585
    """
96
2a9cca4c5cf0 Beginnings of a paragraph parser
rgbecker
parents:
diff changeset
   586
2584
0fed2bd8ef90 reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents: 2575
diff changeset
   587
_greek2Utf8=None
0fed2bd8ef90 reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents: 2575
diff changeset
   588
def _greekConvert(data):
0fed2bd8ef90 reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents: 2575
diff changeset
   589
    global _greek2Utf8
0fed2bd8ef90 reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents: 2575
diff changeset
   590
    if not _greek2Utf8:
0fed2bd8ef90 reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents: 2575
diff changeset
   591
        from reportlab.pdfbase.rl_codecs import RL_Codecs
0fed2bd8ef90 reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents: 2575
diff changeset
   592
        import codecs
3812
22a7f2e7ad1f paraparser.py: remove unused local var
robin
parents: 3809
diff changeset
   593
        #our decoding map
22a7f2e7ad1f paraparser.py: remove unused local var
robin
parents: 3809
diff changeset
   594
        dm = codecs.make_identity_dict(range(32,256))
3721
0c93dd8ff567 initial changes from 2to3-3.3
rptlab
parents: 3656
diff changeset
   595
        for k in range(0,32):
2584
0fed2bd8ef90 reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents: 2575
diff changeset
   596
            dm[k] = None
0fed2bd8ef90 reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents: 2575
diff changeset
   597
        dm.update(RL_Codecs._RL_Codecs__rl_codecs_data['symbol'][0])
0fed2bd8ef90 reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents: 2575
diff changeset
   598
        _greek2Utf8 = {}
3721
0c93dd8ff567 initial changes from 2to3-3.3
rptlab
parents: 3656
diff changeset
   599
        for k,v in dm.items():
2584
0fed2bd8ef90 reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents: 2575
diff changeset
   600
            if not v:
0fed2bd8ef90 reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents: 2575
diff changeset
   601
                u = '\0'
0fed2bd8ef90 reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents: 2575
diff changeset
   602
            else:
3787
8f9be6d6f75c convert paraparser to use pyRXP directly (or any TT producer)
robin
parents: 3731
diff changeset
   603
                if isPy3:
8f9be6d6f75c convert paraparser to use pyRXP directly (or any TT producer)
robin
parents: 3731
diff changeset
   604
                    u = chr(v)
8f9be6d6f75c convert paraparser to use pyRXP directly (or any TT producer)
robin
parents: 3731
diff changeset
   605
                else:
3900
056aff5b1a58 paraparser.py: fix _greekConvert for 2.7
robin
parents: 3896
diff changeset
   606
                    u = unichr(v).encode('utf8')
2584
0fed2bd8ef90 reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents: 2575
diff changeset
   607
            _greek2Utf8[chr(k)] = u
0fed2bd8ef90 reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents: 2575
diff changeset
   608
    return ''.join(map(_greek2Utf8.__getitem__,data))
0fed2bd8ef90 reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents: 2575
diff changeset
   609
96
2a9cca4c5cf0 Beginnings of a paragraph parser
rgbecker
parents:
diff changeset
   610
#------------------------------------------------------------------
267
52a348f6c4c3 noted replication of XML markup comment between paraparser.py and paragraph.py
aaron_watters
parents: 266
diff changeset
   611
# !!! NOTE !!! THIS TEXT IS NOW REPLICATED IN PARAGRAPH.PY !!!
2584
0fed2bd8ef90 reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents: 2575
diff changeset
   612
# The ParaFormatter will be able to format the following
96
2a9cca4c5cf0 Beginnings of a paragraph parser
rgbecker
parents:
diff changeset
   613
# tags:
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   614
#       < /b > - bold
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   615
#       < /i > - italics
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   616
#       < u > < /u > - underline
2644
e762ad1c8909 reportlab: add support for strike through
rgbecker
parents: 2594
diff changeset
   617
#       < strike > < /strike > - strike through
4249
8fc7d11bdee0 add support for rml <sup>/<sub> rise and size attributes; version --> 3.2.17
robin
parents: 4220
diff changeset
   618
#       < super [size="pts"] [rise="pts"]> < /super > - superscript
8fc7d11bdee0 add support for rml <sup>/<sub> rise and size attributes; version --> 3.2.17
robin
parents: 4220
diff changeset
   619
#       < sup ="pts"] [rise="pts"]> < /sup > - superscript
8fc7d11bdee0 add support for rml <sup>/<sub> rise and size attributes; version --> 3.2.17
robin
parents: 4220
diff changeset
   620
#       < sub ="pts"] [rise="pts"]> < /sub > - subscript
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   621
#       <font name=fontfamily/fontname color=colorname size=float>
3552
20ecbcc53c15 paraparser.py add support for <span style=stylename>
rgbecker
parents: 3440
diff changeset
   622
#        <span name=fontfamily/fontname color=colorname backcolor=colorname size=float style=stylename>
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   623
#       < bullet > </bullet> - bullet text (at head of para only)
3165
cbda9e7d0ee3 reportlab: new index support
rgbecker
parents: 3137
diff changeset
   624
#       <onDraw name=callable label="a label"/>
cbda9e7d0ee3 reportlab: new index support
rgbecker
parents: 3137
diff changeset
   625
#       <index [name="callablecanvasattribute"] label="a label"/>
2670
3fdd642a7b76 minor cosmetic changes
rgbecker
parents: 2664
diff changeset
   626
#       <link>link text</link>
3fdd642a7b76 minor cosmetic changes
rgbecker
parents: 2664
diff changeset
   627
#           attributes of links 
3fdd642a7b76 minor cosmetic changes
rgbecker
parents: 2664
diff changeset
   628
#               size/fontSize=num
3fdd642a7b76 minor cosmetic changes
rgbecker
parents: 2664
diff changeset
   629
#               name/face/fontName=name
3fdd642a7b76 minor cosmetic changes
rgbecker
parents: 2664
diff changeset
   630
#               fg/textColor/color=color
3fdd642a7b76 minor cosmetic changes
rgbecker
parents: 2664
diff changeset
   631
#               backcolor/backColor/bgcolor=color
3fdd642a7b76 minor cosmetic changes
rgbecker
parents: 2664
diff changeset
   632
#               dest/destination/target/href/link=target
2745
0b44535fa1a5 paraparser/paragraph.py: fix comments
rgbecker
parents: 2744
diff changeset
   633
#       <a>anchor text</a>
2744
9472eedb9702 reportlab/platypus: add two way <a> tag
rgbecker
parents: 2742
diff changeset
   634
#           attributes of anchors 
9472eedb9702 reportlab/platypus: add two way <a> tag
rgbecker
parents: 2742
diff changeset
   635
#               fontSize=num
9472eedb9702 reportlab/platypus: add two way <a> tag
rgbecker
parents: 2742
diff changeset
   636
#               fontName=name
9472eedb9702 reportlab/platypus: add two way <a> tag
rgbecker
parents: 2742
diff changeset
   637
#               fg/textColor/color=color
9472eedb9702 reportlab/platypus: add two way <a> tag
rgbecker
parents: 2742
diff changeset
   638
#               backcolor/backColor/bgcolor=color
9472eedb9702 reportlab/platypus: add two way <a> tag
rgbecker
parents: 2742
diff changeset
   639
#               href=href
9472eedb9702 reportlab/platypus: add two way <a> tag
rgbecker
parents: 2742
diff changeset
   640
#       <a name="anchorpoint"/>
2584
0fed2bd8ef90 reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents: 2575
diff changeset
   641
#       <unichar name="unicode character name"/>
0fed2bd8ef90 reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents: 2575
diff changeset
   642
#       <unichar value="unicode code point"/>
2857
487dc2450eec reprotlab: inline images horizontal positioning OK
rgbecker
parents: 2836
diff changeset
   643
#       <img src="path" width="1in" height="1in" valign="bottom"/>
3440
739ddbe7feab paaraparser/paragraph.py: add info re percentage in <img> for idea contributed by Roberto Alsina <ralsina@netmanagers.com.ar>
rgbecker
parents: 3434
diff changeset
   644
#               width="w%" --> fontSize*w/100   idea from Roberto Alsina
739ddbe7feab paaraparser/paragraph.py: add info re percentage in <img> for idea contributed by Roberto Alsina <ralsina@netmanagers.com.ar>
rgbecker
parents: 3434
diff changeset
   645
#               height="h%" --> linewidth*h/100 <ralsina@netmanagers.com.ar>
2584
0fed2bd8ef90 reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents: 2575
diff changeset
   646
#       <greek> - </greek>
1683
7fa753e4420a Removed all trailing whitespace
andy_robinson
parents: 1677
diff changeset
   647
#
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   648
#       The whole may be surrounded by <para> </para> tags
119
b4dc589c8364 <para> tag added in layout.py paraparser.py
rgbecker
parents: 115
diff changeset
   649
#
96
2a9cca4c5cf0 Beginnings of a paragraph parser
rgbecker
parents:
diff changeset
   650
# It will also be able to handle any MathML specified Greek characters.
2a9cca4c5cf0 Beginnings of a paragraph parser
rgbecker
parents:
diff changeset
   651
#------------------------------------------------------------------
3954
44dbe56eb858 first attempt at HTMLParser-based paraparser
Andy Robinson <andy@reportlab.com>
parents: 3937
diff changeset
   652
class ParaParser(HTMLParser):
96
2a9cca4c5cf0 Beginnings of a paragraph parser
rgbecker
parents:
diff changeset
   653
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   654
    #----------------------------------------------------------
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   655
    # First we will define all of the xml tag handler functions.
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   656
    #
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   657
    # start_<tag>(attributes)
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   658
    # end_<tag>()
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   659
    #
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   660
    # While parsing the xml ParaFormatter will call these
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   661
    # functions to handle the string formatting tags.
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   662
    # At the start of each tag the corresponding field will
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   663
    # be set to 1 and at the end tag the corresponding field will
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   664
    # be set to 0.  Then when handle_data is called the options
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   665
    # for that data will be aparent by the current settings.
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   666
    #----------------------------------------------------------
96
2a9cca4c5cf0 Beginnings of a paragraph parser
rgbecker
parents:
diff changeset
   667
1940
baa0abc136c4 Henning von Bargen's caseSensitive flag
rgbecker
parents: 1932
diff changeset
   668
    def __getattr__( self, attrName ):
baa0abc136c4 Henning von Bargen's caseSensitive flag
rgbecker
parents: 1932
diff changeset
   669
        """This way we can handle <TAG> the same way as <tag> (ignoring case)."""
2369
f3cc620c14ed paraparser.py: minor speedup
rgbecker
parents: 2368
diff changeset
   670
        if attrName!=attrName.lower() and attrName!="caseSensitive" and not self.caseSensitive and \
f3cc620c14ed paraparser.py: minor speedup
rgbecker
parents: 2368
diff changeset
   671
            (attrName.startswith("start_") or attrName.startswith("end_")):
f3cc620c14ed paraparser.py: minor speedup
rgbecker
parents: 2368
diff changeset
   672
                return getattr(self,attrName.lower())
3721
0c93dd8ff567 initial changes from 2to3-3.3
rptlab
parents: 3656
diff changeset
   673
        raise AttributeError(attrName)
1940
baa0abc136c4 Henning von Bargen's caseSensitive flag
rgbecker
parents: 1932
diff changeset
   674
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   675
    #### bold
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   676
    def start_b( self, attributes ):
4097
1c2ebf285cb7 paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents: 4077
diff changeset
   677
        self._push('b',bold=1)
96
2a9cca4c5cf0 Beginnings of a paragraph parser
rgbecker
parents:
diff changeset
   678
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   679
    def end_b( self ):
4097
1c2ebf285cb7 paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents: 4077
diff changeset
   680
        self._pop('b')
96
2a9cca4c5cf0 Beginnings of a paragraph parser
rgbecker
parents:
diff changeset
   681
1940
baa0abc136c4 Henning von Bargen's caseSensitive flag
rgbecker
parents: 1932
diff changeset
   682
    def start_strong( self, attributes ):
4097
1c2ebf285cb7 paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents: 4077
diff changeset
   683
        self._push('strong',bold=1)
1940
baa0abc136c4 Henning von Bargen's caseSensitive flag
rgbecker
parents: 1932
diff changeset
   684
baa0abc136c4 Henning von Bargen's caseSensitive flag
rgbecker
parents: 1932
diff changeset
   685
    def end_strong( self ):
4097
1c2ebf285cb7 paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents: 4077
diff changeset
   686
        self._pop('strong')
1940
baa0abc136c4 Henning von Bargen's caseSensitive flag
rgbecker
parents: 1932
diff changeset
   687
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   688
    #### italics
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   689
    def start_i( self, attributes ):
4097
1c2ebf285cb7 paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents: 4077
diff changeset
   690
        self._push('i',italic=1)
96
2a9cca4c5cf0 Beginnings of a paragraph parser
rgbecker
parents:
diff changeset
   691
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   692
    def end_i( self ):
4097
1c2ebf285cb7 paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents: 4077
diff changeset
   693
        self._pop('i')
96
2a9cca4c5cf0 Beginnings of a paragraph parser
rgbecker
parents:
diff changeset
   694
1940
baa0abc136c4 Henning von Bargen's caseSensitive flag
rgbecker
parents: 1932
diff changeset
   695
    def start_em( self, attributes ):
4097
1c2ebf285cb7 paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents: 4077
diff changeset
   696
        self._push('em', italic=1)
1940
baa0abc136c4 Henning von Bargen's caseSensitive flag
rgbecker
parents: 1932
diff changeset
   697
baa0abc136c4 Henning von Bargen's caseSensitive flag
rgbecker
parents: 1932
diff changeset
   698
    def end_em( self ):
4097
1c2ebf285cb7 paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents: 4077
diff changeset
   699
        self._pop('em')
1940
baa0abc136c4 Henning von Bargen's caseSensitive flag
rgbecker
parents: 1932
diff changeset
   700
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   701
    #### underline
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   702
    def start_u( self, attributes ):
4097
1c2ebf285cb7 paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents: 4077
diff changeset
   703
        self._push('u',underline=1)
96
2a9cca4c5cf0 Beginnings of a paragraph parser
rgbecker
parents:
diff changeset
   704
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   705
    def end_u( self ):
4097
1c2ebf285cb7 paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents: 4077
diff changeset
   706
        self._pop('u')
96
2a9cca4c5cf0 Beginnings of a paragraph parser
rgbecker
parents:
diff changeset
   707
2644
e762ad1c8909 reportlab: add support for strike through
rgbecker
parents: 2594
diff changeset
   708
    #### strike
e762ad1c8909 reportlab: add support for strike through
rgbecker
parents: 2594
diff changeset
   709
    def start_strike( self, attributes ):
4097
1c2ebf285cb7 paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents: 4077
diff changeset
   710
        self._push('strike',strike=1)
2644
e762ad1c8909 reportlab: add support for strike through
rgbecker
parents: 2594
diff changeset
   711
e762ad1c8909 reportlab: add support for strike through
rgbecker
parents: 2594
diff changeset
   712
    def end_strike( self ):
4097
1c2ebf285cb7 paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents: 4077
diff changeset
   713
        self._pop('strike')
2644
e762ad1c8909 reportlab: add support for strike through
rgbecker
parents: 2594
diff changeset
   714
2575
0cba68b93555 reportlab-utf8 moved to trunk
rgbecker
parents: 2446
diff changeset
   715
    #### link
0cba68b93555 reportlab-utf8 moved to trunk
rgbecker
parents: 2446
diff changeset
   716
    def start_link(self, attributes):
4097
1c2ebf285cb7 paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents: 4077
diff changeset
   717
        self._push('link',**self.getAttributes(attributes,_linkAttrMap))
2575
0cba68b93555 reportlab-utf8 moved to trunk
rgbecker
parents: 2446
diff changeset
   718
0cba68b93555 reportlab-utf8 moved to trunk
rgbecker
parents: 2446
diff changeset
   719
    def end_link(self):
4097
1c2ebf285cb7 paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents: 4077
diff changeset
   720
        if self._pop('link').link is None:
1c2ebf285cb7 paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents: 4077
diff changeset
   721
            raise ValueError('<link> has no target or href')
2575
0cba68b93555 reportlab-utf8 moved to trunk
rgbecker
parents: 2446
diff changeset
   722
2744
9472eedb9702 reportlab/platypus: add two way <a> tag
rgbecker
parents: 2742
diff changeset
   723
    #### anchor
9472eedb9702 reportlab/platypus: add two way <a> tag
rgbecker
parents: 2742
diff changeset
   724
    def start_a(self, attributes):
9472eedb9702 reportlab/platypus: add two way <a> tag
rgbecker
parents: 2742
diff changeset
   725
        A = self.getAttributes(attributes,_anchorAttrMap)
2893
7432e06445ba paraparser.py: improved checking on <a> tag attributes
rgbecker
parents: 2861
diff changeset
   726
        name = A.get('name',None)
7432e06445ba paraparser.py: improved checking on <a> tag attributes
rgbecker
parents: 2861
diff changeset
   727
        if name is not None:
7432e06445ba paraparser.py: improved checking on <a> tag attributes
rgbecker
parents: 2861
diff changeset
   728
            name = name.strip()
7432e06445ba paraparser.py: improved checking on <a> tag attributes
rgbecker
parents: 2861
diff changeset
   729
            if not name:
7432e06445ba paraparser.py: improved checking on <a> tag attributes
rgbecker
parents: 2861
diff changeset
   730
                self._syntax_error('<a name="..."/> anchor variant requires non-blank name')
2744
9472eedb9702 reportlab/platypus: add two way <a> tag
rgbecker
parents: 2742
diff changeset
   731
            if len(A)>1:
9472eedb9702 reportlab/platypus: add two way <a> tag
rgbecker
parents: 2742
diff changeset
   732
                self._syntax_error('<a name="..."/> anchor variant only allows name attribute')
9472eedb9702 reportlab/platypus: add two way <a> tag
rgbecker
parents: 2742
diff changeset
   733
                A = dict(name=A['name'])
9472eedb9702 reportlab/platypus: add two way <a> tag
rgbecker
parents: 2742
diff changeset
   734
            A['_selfClosingTag'] = 'anchor'
9472eedb9702 reportlab/platypus: add two way <a> tag
rgbecker
parents: 2742
diff changeset
   735
        else:
2893
7432e06445ba paraparser.py: improved checking on <a> tag attributes
rgbecker
parents: 2861
diff changeset
   736
            href = A.get('href','').strip()
7432e06445ba paraparser.py: improved checking on <a> tag attributes
rgbecker
parents: 2861
diff changeset
   737
            A['link'] = href    #convert to our link form
3931
ceee76b69e7c fix href bug reported by bitbucket.kmouts
robin
parents: 3900
diff changeset
   738
            A.pop('href',None)
4097
1c2ebf285cb7 paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents: 4077
diff changeset
   739
        self._push('a',**A)
2744
9472eedb9702 reportlab/platypus: add two way <a> tag
rgbecker
parents: 2742
diff changeset
   740
9472eedb9702 reportlab/platypus: add two way <a> tag
rgbecker
parents: 2742
diff changeset
   741
    def end_a(self):
9472eedb9702 reportlab/platypus: add two way <a> tag
rgbecker
parents: 2742
diff changeset
   742
        frag = self._stack[-1]
9472eedb9702 reportlab/platypus: add two way <a> tag
rgbecker
parents: 2742
diff changeset
   743
        sct = getattr(frag,'_selfClosingTag','')
9472eedb9702 reportlab/platypus: add two way <a> tag
rgbecker
parents: 2742
diff changeset
   744
        if sct:
4097
1c2ebf285cb7 paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents: 4077
diff changeset
   745
            if not (sct=='anchor' and frag.name):
1c2ebf285cb7 paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents: 4077
diff changeset
   746
                raise ValueError('Parser failure in <a/>')
2744
9472eedb9702 reportlab/platypus: add two way <a> tag
rgbecker
parents: 2742
diff changeset
   747
            defn = frag.cbDefn = ABag()
9472eedb9702 reportlab/platypus: add two way <a> tag
rgbecker
parents: 2742
diff changeset
   748
            defn.label = defn.kind = 'anchor'
9472eedb9702 reportlab/platypus: add two way <a> tag
rgbecker
parents: 2742
diff changeset
   749
            defn.name = frag.name
9472eedb9702 reportlab/platypus: add two way <a> tag
rgbecker
parents: 2742
diff changeset
   750
            del frag.name, frag._selfClosingTag
9472eedb9702 reportlab/platypus: add two way <a> tag
rgbecker
parents: 2742
diff changeset
   751
            self.handle_data('')
4097
1c2ebf285cb7 paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents: 4077
diff changeset
   752
            self._pop('a')
2744
9472eedb9702 reportlab/platypus: add two way <a> tag
rgbecker
parents: 2742
diff changeset
   753
        else:
4097
1c2ebf285cb7 paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents: 4077
diff changeset
   754
            if self._pop('a').link is None:
1c2ebf285cb7 paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents: 4077
diff changeset
   755
                raise ValueError('<link> has no href')
2742
8edd54153201 paraparser: allow <a> as alias for <link>
rgbecker
parents: 2694
diff changeset
   756
2857
487dc2450eec reprotlab: inline images horizontal positioning OK
rgbecker
parents: 2836
diff changeset
   757
    def start_img(self,attributes):
487dc2450eec reprotlab: inline images horizontal positioning OK
rgbecker
parents: 2836
diff changeset
   758
        A = self.getAttributes(attributes,_imgAttrMap)
487dc2450eec reprotlab: inline images horizontal positioning OK
rgbecker
parents: 2836
diff changeset
   759
        if not A.get('src'):
487dc2450eec reprotlab: inline images horizontal positioning OK
rgbecker
parents: 2836
diff changeset
   760
            self._syntax_error('<img> needs src attribute')
487dc2450eec reprotlab: inline images horizontal positioning OK
rgbecker
parents: 2836
diff changeset
   761
        A['_selfClosingTag'] = 'img'
4097
1c2ebf285cb7 paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents: 4077
diff changeset
   762
        self._push('img',**A)
2857
487dc2450eec reprotlab: inline images horizontal positioning OK
rgbecker
parents: 2836
diff changeset
   763
487dc2450eec reprotlab: inline images horizontal positioning OK
rgbecker
parents: 2836
diff changeset
   764
    def end_img(self):
487dc2450eec reprotlab: inline images horizontal positioning OK
rgbecker
parents: 2836
diff changeset
   765
        frag = self._stack[-1]
4097
1c2ebf285cb7 paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents: 4077
diff changeset
   766
        if not getattr(frag,'_selfClosingTag',''):
1c2ebf285cb7 paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents: 4077
diff changeset
   767
            raise ValueError('Parser failure in <img/>')
2857
487dc2450eec reprotlab: inline images horizontal positioning OK
rgbecker
parents: 2836
diff changeset
   768
        defn = frag.cbDefn = ABag()
487dc2450eec reprotlab: inline images horizontal positioning OK
rgbecker
parents: 2836
diff changeset
   769
        defn.kind = 'img'
487dc2450eec reprotlab: inline images horizontal positioning OK
rgbecker
parents: 2836
diff changeset
   770
        defn.src = getattr(frag,'src',None)
487dc2450eec reprotlab: inline images horizontal positioning OK
rgbecker
parents: 2836
diff changeset
   771
        defn.image = ImageReader(defn.src)
487dc2450eec reprotlab: inline images horizontal positioning OK
rgbecker
parents: 2836
diff changeset
   772
        size = defn.image.getSize()
487dc2450eec reprotlab: inline images horizontal positioning OK
rgbecker
parents: 2836
diff changeset
   773
        defn.width = getattr(frag,'width',size[0])
487dc2450eec reprotlab: inline images horizontal positioning OK
rgbecker
parents: 2836
diff changeset
   774
        defn.height = getattr(frag,'height',size[1])
487dc2450eec reprotlab: inline images horizontal positioning OK
rgbecker
parents: 2836
diff changeset
   775
        defn.valign = getattr(frag,'valign','bottom')
487dc2450eec reprotlab: inline images horizontal positioning OK
rgbecker
parents: 2836
diff changeset
   776
        del frag._selfClosingTag
487dc2450eec reprotlab: inline images horizontal positioning OK
rgbecker
parents: 2836
diff changeset
   777
        self.handle_data('')
4097
1c2ebf285cb7 paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents: 4077
diff changeset
   778
        self._pop('img')
2857
487dc2450eec reprotlab: inline images horizontal positioning OK
rgbecker
parents: 2836
diff changeset
   779
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   780
    #### super script
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   781
    def start_super( self, attributes ):
4249
8fc7d11bdee0 add support for rml <sup>/<sub> rise and size attributes; version --> 3.2.17
robin
parents: 4220
diff changeset
   782
        A = self.getAttributes(attributes,_supAttrMap)
8fc7d11bdee0 add support for rml <sup>/<sub> rise and size attributes; version --> 3.2.17
robin
parents: 4220
diff changeset
   783
        A['sup']=1
8fc7d11bdee0 add support for rml <sup>/<sub> rise and size attributes; version --> 3.2.17
robin
parents: 4220
diff changeset
   784
        self._push('super',**A)
96
2a9cca4c5cf0 Beginnings of a paragraph parser
rgbecker
parents:
diff changeset
   785
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   786
    def end_super( self ):
4097
1c2ebf285cb7 paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents: 4077
diff changeset
   787
        self._pop('super')
96
2a9cca4c5cf0 Beginnings of a paragraph parser
rgbecker
parents:
diff changeset
   788
4097
1c2ebf285cb7 paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents: 4077
diff changeset
   789
    def start_sup( self, attributes ):
4249
8fc7d11bdee0 add support for rml <sup>/<sub> rise and size attributes; version --> 3.2.17
robin
parents: 4220
diff changeset
   790
        A = self.getAttributes(attributes,_supAttrMap)
8fc7d11bdee0 add support for rml <sup>/<sub> rise and size attributes; version --> 3.2.17
robin
parents: 4220
diff changeset
   791
        A['sup']=1
8fc7d11bdee0 add support for rml <sup>/<sub> rise and size attributes; version --> 3.2.17
robin
parents: 4220
diff changeset
   792
        self._push('sup',**A)
4097
1c2ebf285cb7 paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents: 4077
diff changeset
   793
1c2ebf285cb7 paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents: 4077
diff changeset
   794
    def end_sup( self ):
1c2ebf285cb7 paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents: 4077
diff changeset
   795
        self._pop('sup')
1736
dafc17db33d2 Attempt to use sup as well as super
rgbecker
parents: 1683
diff changeset
   796
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   797
    #### sub script
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   798
    def start_sub( self, attributes ):
4249
8fc7d11bdee0 add support for rml <sup>/<sub> rise and size attributes; version --> 3.2.17
robin
parents: 4220
diff changeset
   799
        A = self.getAttributes(attributes,_supAttrMap)
8fc7d11bdee0 add support for rml <sup>/<sub> rise and size attributes; version --> 3.2.17
robin
parents: 4220
diff changeset
   800
        A['sub']=1
8fc7d11bdee0 add support for rml <sup>/<sub> rise and size attributes; version --> 3.2.17
robin
parents: 4220
diff changeset
   801
        self._push('sub',**A)
96
2a9cca4c5cf0 Beginnings of a paragraph parser
rgbecker
parents:
diff changeset
   802
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   803
    def end_sub( self ):
4097
1c2ebf285cb7 paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents: 4077
diff changeset
   804
        self._pop('sub')
96
2a9cca4c5cf0 Beginnings of a paragraph parser
rgbecker
parents:
diff changeset
   805
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   806
    #### greek script
2376
7e70411a7236 flowables.py: minor change to PTOContainer
rgbecker
parents: 2369
diff changeset
   807
    #### add symbol encoding
7e70411a7236 flowables.py: minor change to PTOContainer
rgbecker
parents: 2369
diff changeset
   808
    def handle_charref(self, name):
7e70411a7236 flowables.py: minor change to PTOContainer
rgbecker
parents: 2369
diff changeset
   809
        try:
2575
0cba68b93555 reportlab-utf8 moved to trunk
rgbecker
parents: 2446
diff changeset
   810
            if name[0]=='x':
0cba68b93555 reportlab-utf8 moved to trunk
rgbecker
parents: 2446
diff changeset
   811
                n = int(name[1:],16)
1931
784fce255e2d Added in more special entities as suggested by Christoph Zwerschke
rgbecker
parents: 1736
diff changeset
   812
            else:
2575
0cba68b93555 reportlab-utf8 moved to trunk
rgbecker
parents: 2446
diff changeset
   813
                n = int(name)
0cba68b93555 reportlab-utf8 moved to trunk
rgbecker
parents: 2446
diff changeset
   814
        except ValueError:
2376
7e70411a7236 flowables.py: minor change to PTOContainer
rgbecker
parents: 2369
diff changeset
   815
            self.unknown_charref(name)
7e70411a7236 flowables.py: minor change to PTOContainer
rgbecker
parents: 2369
diff changeset
   816
            return
4004
ebd460ef8d2f rename utils.py UniChr --> uniChr
robin
parents: 3994
diff changeset
   817
        self.handle_data(uniChr(n))   #.encode('utf8'))
134
60e8e0aee073 Fixed syntax_error handling
rgbecker
parents: 133
diff changeset
   818
2376
7e70411a7236 flowables.py: minor change to PTOContainer
rgbecker
parents: 2369
diff changeset
   819
    def syntax_error(self,lineno,message):
7e70411a7236 flowables.py: minor change to PTOContainer
rgbecker
parents: 2369
diff changeset
   820
        self._syntax_error(message)
134
60e8e0aee073 Fixed syntax_error handling
rgbecker
parents: 133
diff changeset
   821
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   822
    def _syntax_error(self,message):
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   823
        if message[:10]=="attribute " and message[-17:]==" value not quoted": return
4255
89ea1d46b4a0 make paraparser syntax errors real and fix <sup/sub> tags to have relative values; version-->3.3.1
robin
parents: 4252
diff changeset
   824
        if self._crashOnError:
89ea1d46b4a0 make paraparser syntax errors real and fix <sup/sub> tags to have relative values; version-->3.3.1
robin
parents: 4252
diff changeset
   825
            raise ValueError('paraparser: syntax error: %s' % message)
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   826
        self.errors.append(message)
134
60e8e0aee073 Fixed syntax_error handling
rgbecker
parents: 133
diff changeset
   827
2584
0fed2bd8ef90 reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents: 2575
diff changeset
   828
    def start_greek(self, attr):
4097
1c2ebf285cb7 paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents: 4077
diff changeset
   829
        self._push('greek',greek=1)
96
2a9cca4c5cf0 Beginnings of a paragraph parser
rgbecker
parents:
diff changeset
   830
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   831
    def end_greek(self):
4097
1c2ebf285cb7 paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents: 4077
diff changeset
   832
        self._pop('greek')
96
2a9cca4c5cf0 Beginnings of a paragraph parser
rgbecker
parents:
diff changeset
   833
2584
0fed2bd8ef90 reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents: 2575
diff changeset
   834
    def start_unichar(self, attr):
3326
ce725978d11c Initial Python3 compatibility fixes
damian
parents: 3188
diff changeset
   835
        if 'name' in attr:
ce725978d11c Initial Python3 compatibility fixes
damian
parents: 3188
diff changeset
   836
            if 'code' in attr:
2584
0fed2bd8ef90 reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents: 2575
diff changeset
   837
                self._syntax_error('<unichar/> invalid with both name and code attributes')
0fed2bd8ef90 reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents: 2575
diff changeset
   838
            try:
3809
cbd390e6d557 paraparser.py: fix unichar
robin
parents: 3787
diff changeset
   839
                v = unicodedata.lookup(attr['name'])
2584
0fed2bd8ef90 reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents: 2575
diff changeset
   840
            except KeyError:
4116
cf49463fc067 Fix a bunch of undefined names. Mostly typos or missing imports.
Matthew Duggan <mgithub@guarana.org>
parents: 4097
diff changeset
   841
                self._syntax_error('<unichar/> invalid name attribute\n"%s"' % ascii(attr['name']))
2584
0fed2bd8ef90 reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents: 2575
diff changeset
   842
                v = '\0'
3326
ce725978d11c Initial Python3 compatibility fixes
damian
parents: 3188
diff changeset
   843
        elif 'code' in attr:
2584
0fed2bd8ef90 reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents: 2575
diff changeset
   844
            try:
3937
f685e0de4282 paraparser.py:fix start_unichar for puython2
robin
parents: 3931
diff changeset
   845
                v = int(eval(attr['code']))
f685e0de4282 paraparser.py:fix start_unichar for puython2
robin
parents: 3931
diff changeset
   846
                v = chr(v) if isPy3 else unichr(v)
2584
0fed2bd8ef90 reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents: 2575
diff changeset
   847
            except:
3809
cbd390e6d557 paraparser.py: fix unichar
robin
parents: 3787
diff changeset
   848
                self._syntax_error('<unichar/> invalid code attribute %s' % ascii(attr['code']))
2584
0fed2bd8ef90 reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents: 2575
diff changeset
   849
                v = '\0'
0fed2bd8ef90 reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents: 2575
diff changeset
   850
        else:
0fed2bd8ef90 reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents: 2575
diff changeset
   851
            v = None
2664
c9faa3a99e93 reportlab/platypus: <br/> tags now working
rgbecker
parents: 2663
diff changeset
   852
            if attr:
3721
0c93dd8ff567 initial changes from 2to3-3.3
rptlab
parents: 3656
diff changeset
   853
                self._syntax_error('<unichar/> invalid attribute %s' % list(attr.keys())[0])
2584
0fed2bd8ef90 reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents: 2575
diff changeset
   854
0fed2bd8ef90 reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents: 2575
diff changeset
   855
        if v is not None:
0fed2bd8ef90 reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents: 2575
diff changeset
   856
            self.handle_data(v)
4097
1c2ebf285cb7 paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents: 4077
diff changeset
   857
        self._push('unichar',_selfClosingTag='unichar')
2584
0fed2bd8ef90 reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents: 2575
diff changeset
   858
0fed2bd8ef90 reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents: 2575
diff changeset
   859
    def end_unichar(self):
4097
1c2ebf285cb7 paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents: 4077
diff changeset
   860
        self._pop('unichar')
2584
0fed2bd8ef90 reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents: 2575
diff changeset
   861
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   862
    def start_font(self,attr):
4129
1266500a2c22 paraparser.py: fix handling of fontName attributes in span/font; version-->3.1.29
robin
parents: 4116
diff changeset
   863
        A = self.getAttributes(attr,_spanAttrMap)
1266500a2c22 paraparser.py: fix handling of fontName attributes in span/font; version-->3.1.29
robin
parents: 4116
diff changeset
   864
        if 'fontName' in A:
1266500a2c22 paraparser.py: fix handling of fontName attributes in span/font; version-->3.1.29
robin
parents: 4116
diff changeset
   865
            A['fontName'], A['bold'], A['italic'] = ps2tt(A['fontName'])
4130
49161ce56cad paraparser.py: fix handling of fontName attributes in <font> tag
robin
parents: 4129
diff changeset
   866
        self._push('font',**A)
96
2a9cca4c5cf0 Beginnings of a paragraph parser
rgbecker
parents:
diff changeset
   867
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   868
    def end_font(self):
4097
1c2ebf285cb7 paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents: 4077
diff changeset
   869
        self._pop('font')
96
2a9cca4c5cf0 Beginnings of a paragraph parser
rgbecker
parents:
diff changeset
   870
3552
20ecbcc53c15 paraparser.py add support for <span style=stylename>
rgbecker
parents: 3440
diff changeset
   871
    def start_span(self,attr):
20ecbcc53c15 paraparser.py add support for <span style=stylename>
rgbecker
parents: 3440
diff changeset
   872
        A = self.getAttributes(attr,_spanAttrMap)
20ecbcc53c15 paraparser.py add support for <span style=stylename>
rgbecker
parents: 3440
diff changeset
   873
        if 'style' in A:
20ecbcc53c15 paraparser.py add support for <span style=stylename>
rgbecker
parents: 3440
diff changeset
   874
            style = self.findSpanStyle(A.pop('style'))
20ecbcc53c15 paraparser.py add support for <span style=stylename>
rgbecker
parents: 3440
diff changeset
   875
            D = {}
20ecbcc53c15 paraparser.py add support for <span style=stylename>
rgbecker
parents: 3440
diff changeset
   876
            for k in 'fontName fontSize textColor backColor'.split():
20ecbcc53c15 paraparser.py add support for <span style=stylename>
rgbecker
parents: 3440
diff changeset
   877
                v = getattr(style,k,self)
20ecbcc53c15 paraparser.py add support for <span style=stylename>
rgbecker
parents: 3440
diff changeset
   878
                if v is self: continue
20ecbcc53c15 paraparser.py add support for <span style=stylename>
rgbecker
parents: 3440
diff changeset
   879
                D[k] = v
20ecbcc53c15 paraparser.py add support for <span style=stylename>
rgbecker
parents: 3440
diff changeset
   880
            D.update(A)
20ecbcc53c15 paraparser.py add support for <span style=stylename>
rgbecker
parents: 3440
diff changeset
   881
            A = D
4129
1266500a2c22 paraparser.py: fix handling of fontName attributes in span/font; version-->3.1.29
robin
parents: 4116
diff changeset
   882
        if 'fontName' in A:
1266500a2c22 paraparser.py: fix handling of fontName attributes in span/font; version-->3.1.29
robin
parents: 4116
diff changeset
   883
            A['fontName'], A['bold'], A['italic'] = ps2tt(A['fontName'])
4097
1c2ebf285cb7 paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents: 4077
diff changeset
   884
        self._push('span',**A)
3552
20ecbcc53c15 paraparser.py add support for <span style=stylename>
rgbecker
parents: 3440
diff changeset
   885
4097
1c2ebf285cb7 paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents: 4077
diff changeset
   886
    def end_span(self):
1c2ebf285cb7 paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents: 4077
diff changeset
   887
        self._pop('span')
3552
20ecbcc53c15 paraparser.py add support for <span style=stylename>
rgbecker
parents: 3440
diff changeset
   888
2663
927cc273c5a5 <br/> work in progress
andy
parents: 2646
diff changeset
   889
    def start_br(self, attr):
4097
1c2ebf285cb7 paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents: 4077
diff changeset
   890
        self._push('br',_selfClosingTag='br',lineBreak=True,text='')
3954
44dbe56eb858 first attempt at HTMLParser-based paraparser
Andy Robinson <andy@reportlab.com>
parents: 3937
diff changeset
   891
        
2663
927cc273c5a5 <br/> work in progress
andy
parents: 2646
diff changeset
   892
    def end_br(self):
3954
44dbe56eb858 first attempt at HTMLParser-based paraparser
Andy Robinson <andy@reportlab.com>
parents: 3937
diff changeset
   893
        #print('\nend_br called, %d frags in list' % len(self.fragList))
2664
c9faa3a99e93 reportlab/platypus: <br/> tags now working
rgbecker
parents: 2663
diff changeset
   894
        frag = self._stack[-1]
4097
1c2ebf285cb7 paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents: 4077
diff changeset
   895
        if not (frag._selfClosingTag=='br' and frag.lineBreak):
1c2ebf285cb7 paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents: 4077
diff changeset
   896
                raise ValueError('Parser failure in <br/>')
2664
c9faa3a99e93 reportlab/platypus: <br/> tags now working
rgbecker
parents: 2663
diff changeset
   897
        del frag._selfClosingTag
c9faa3a99e93 reportlab/platypus: <br/> tags now working
rgbecker
parents: 2663
diff changeset
   898
        self.handle_data('')
4097
1c2ebf285cb7 paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents: 4077
diff changeset
   899
        self._pop('br')
2663
927cc273c5a5 <br/> work in progress
andy
parents: 2646
diff changeset
   900
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   901
    def _initial_frag(self,attr,attrMap,bullet=0):
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   902
        style = self._style
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   903
        if attr!={}:
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   904
            style = copy.deepcopy(style)
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   905
            _applyAttributes(style,self.getAttributes(attr,attrMap))
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   906
            self._style = style
119
b4dc589c8364 <para> tag added in layout.py paraparser.py
rgbecker
parents: 115
diff changeset
   907
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   908
        # initialize semantic values
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   909
        frag = ParaFrag()
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   910
        frag.sub = 0
4249
8fc7d11bdee0 add support for rml <sup>/<sub> rise and size attributes; version --> 3.2.17
robin
parents: 4220
diff changeset
   911
        frag.sup = 0
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   912
        frag.rise = 0
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   913
        frag.underline = 0
2644
e762ad1c8909 reportlab: add support for strike through
rgbecker
parents: 2594
diff changeset
   914
        frag.strike = 0
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   915
        frag.greek = 0
2575
0cba68b93555 reportlab-utf8 moved to trunk
rgbecker
parents: 2446
diff changeset
   916
        frag.link = None
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   917
        if bullet:
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   918
            frag.fontName, frag.bold, frag.italic = ps2tt(style.bulletFontName)
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   919
            frag.fontSize = style.bulletFontSize
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   920
            frag.textColor = hasattr(style,'bulletColor') and style.bulletColor or style.textColor
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   921
        else:
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   922
            frag.fontName, frag.bold, frag.italic = ps2tt(style.fontName)
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   923
            frag.fontSize = style.fontSize
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   924
            frag.textColor = style.textColor
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   925
        return frag
250
a1bcf9c6c21e <bullet> xml tag added
rgbecker
parents: 248
diff changeset
   926
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   927
    def start_para(self,attr):
4097
1c2ebf285cb7 paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents: 4077
diff changeset
   928
        frag = self._initial_frag(attr,_paraAttrMap)
1c2ebf285cb7 paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents: 4077
diff changeset
   929
        frag.__tag__ = 'para'
1c2ebf285cb7 paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents: 4077
diff changeset
   930
        self._stack = [frag]
119
b4dc589c8364 <para> tag added in layout.py paraparser.py
rgbecker
parents: 115
diff changeset
   931
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   932
    def end_para(self):
4097
1c2ebf285cb7 paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents: 4077
diff changeset
   933
        self._pop('para')
119
b4dc589c8364 <para> tag added in layout.py paraparser.py
rgbecker
parents: 115
diff changeset
   934
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   935
    def start_bullet(self,attr):
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   936
        if hasattr(self,'bFragList'):
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   937
            self._syntax_error('only one <bullet> tag allowed')
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   938
        self.bFragList = []
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   939
        frag = self._initial_frag(attr,_bulletAttrMap,1)
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   940
        frag.isBullet = 1
4097
1c2ebf285cb7 paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents: 4077
diff changeset
   941
        frag.__tag__ = 'bullet'
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   942
        self._stack.append(frag)
250
a1bcf9c6c21e <bullet> xml tag added
rgbecker
parents: 248
diff changeset
   943
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   944
    def end_bullet(self):
4097
1c2ebf285cb7 paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents: 4077
diff changeset
   945
        self._pop('bullet')
250
a1bcf9c6c21e <bullet> xml tag added
rgbecker
parents: 248
diff changeset
   946
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   947
    #---------------------------------------------------------------
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   948
    def start_seqdefault(self, attr):
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   949
        try:
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   950
            default = attr['id']
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   951
        except KeyError:
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   952
            default = None
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   953
        self._seq.setDefaultCounter(default)
266
081154da1a78 Added Sequencer and associated XML tags
andy_robinson
parents: 253
diff changeset
   954
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   955
    def end_seqdefault(self):
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   956
        pass
1683
7fa753e4420a Removed all trailing whitespace
andy_robinson
parents: 1677
diff changeset
   957
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   958
    def start_seqreset(self, attr):
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   959
        try:
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   960
            id = attr['id']
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   961
        except KeyError:
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   962
            id = None
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   963
        try:
2368
791a362e9cae added seqchain/format tags
rgbecker
parents: 2341
diff changeset
   964
            base = int(attr['base'])
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   965
        except:
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   966
            base=0
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   967
        self._seq.reset(id, base)
266
081154da1a78 Added Sequencer and associated XML tags
andy_robinson
parents: 253
diff changeset
   968
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   969
    def end_seqreset(self):
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   970
        pass
744
2abd99baf95b Accepts seqdefault/seqDefault and seqreset/seqReset
andy_robinson
parents: 677
diff changeset
   971
2368
791a362e9cae added seqchain/format tags
rgbecker
parents: 2341
diff changeset
   972
    def start_seqchain(self, attr):
791a362e9cae added seqchain/format tags
rgbecker
parents: 2341
diff changeset
   973
        try:
791a362e9cae added seqchain/format tags
rgbecker
parents: 2341
diff changeset
   974
            order = attr['order']
791a362e9cae added seqchain/format tags
rgbecker
parents: 2341
diff changeset
   975
        except KeyError:
791a362e9cae added seqchain/format tags
rgbecker
parents: 2341
diff changeset
   976
            order = ''
791a362e9cae added seqchain/format tags
rgbecker
parents: 2341
diff changeset
   977
        order = order.split()
791a362e9cae added seqchain/format tags
rgbecker
parents: 2341
diff changeset
   978
        seq = self._seq
791a362e9cae added seqchain/format tags
rgbecker
parents: 2341
diff changeset
   979
        for p,c in zip(order[:-1],order[1:]):
791a362e9cae added seqchain/format tags
rgbecker
parents: 2341
diff changeset
   980
            seq.chain(p, c)
791a362e9cae added seqchain/format tags
rgbecker
parents: 2341
diff changeset
   981
    end_seqchain = end_seqreset
791a362e9cae added seqchain/format tags
rgbecker
parents: 2341
diff changeset
   982
791a362e9cae added seqchain/format tags
rgbecker
parents: 2341
diff changeset
   983
    def start_seqformat(self, attr):
791a362e9cae added seqchain/format tags
rgbecker
parents: 2341
diff changeset
   984
        try:
791a362e9cae added seqchain/format tags
rgbecker
parents: 2341
diff changeset
   985
            id = attr['id']
791a362e9cae added seqchain/format tags
rgbecker
parents: 2341
diff changeset
   986
        except KeyError:
791a362e9cae added seqchain/format tags
rgbecker
parents: 2341
diff changeset
   987
            id = None
791a362e9cae added seqchain/format tags
rgbecker
parents: 2341
diff changeset
   988
        try:
791a362e9cae added seqchain/format tags
rgbecker
parents: 2341
diff changeset
   989
            value = attr['value']
791a362e9cae added seqchain/format tags
rgbecker
parents: 2341
diff changeset
   990
        except KeyError:
791a362e9cae added seqchain/format tags
rgbecker
parents: 2341
diff changeset
   991
            value = '1'
791a362e9cae added seqchain/format tags
rgbecker
parents: 2341
diff changeset
   992
        self._seq.setFormat(id,value)
791a362e9cae added seqchain/format tags
rgbecker
parents: 2341
diff changeset
   993
    end_seqformat = end_seqreset
791a362e9cae added seqchain/format tags
rgbecker
parents: 2341
diff changeset
   994
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   995
    # AR hacking in aliases to allow the proper casing for RML.
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
   996
    # the above ones should be deprecated over time. 2001-03-22
2368
791a362e9cae added seqchain/format tags
rgbecker
parents: 2341
diff changeset
   997
    start_seqDefault = start_seqdefault
791a362e9cae added seqchain/format tags
rgbecker
parents: 2341
diff changeset
   998
    end_seqDefault = end_seqdefault
791a362e9cae added seqchain/format tags
rgbecker
parents: 2341
diff changeset
   999
    start_seqReset = start_seqreset
791a362e9cae added seqchain/format tags
rgbecker
parents: 2341
diff changeset
  1000
    end_seqReset = end_seqreset
791a362e9cae added seqchain/format tags
rgbecker
parents: 2341
diff changeset
  1001
    start_seqChain = start_seqchain
791a362e9cae added seqchain/format tags
rgbecker
parents: 2341
diff changeset
  1002
    end_seqChain = end_seqchain
791a362e9cae added seqchain/format tags
rgbecker
parents: 2341
diff changeset
  1003
    start_seqFormat = start_seqformat
791a362e9cae added seqchain/format tags
rgbecker
parents: 2341
diff changeset
  1004
    end_seqFormat = end_seqformat
1683
7fa753e4420a Removed all trailing whitespace
andy_robinson
parents: 1677
diff changeset
  1005
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
  1006
    def start_seq(self, attr):
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
  1007
        #if it has a template, use that; otherwise try for id;
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
  1008
        #otherwise take default sequence
3326
ce725978d11c Initial Python3 compatibility fixes
damian
parents: 3188
diff changeset
  1009
        if 'template' in attr:
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
  1010
            templ = attr['template']
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
  1011
            self.handle_data(templ % self._seq)
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
  1012
            return
3326
ce725978d11c Initial Python3 compatibility fixes
damian
parents: 3188
diff changeset
  1013
        elif 'id' in attr:
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
  1014
            id = attr['id']
1683
7fa753e4420a Removed all trailing whitespace
andy_robinson
parents: 1677
diff changeset
  1015
        else:
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
  1016
            id = None
2694
dd0ea6474ea0 fixes to crashing PTO trailer when empty, and numbering
andy
parents: 2693
diff changeset
  1017
        increment = attr.get('inc', None)
dd0ea6474ea0 fixes to crashing PTO trailer when empty, and numbering
andy
parents: 2693
diff changeset
  1018
        if not increment:
dd0ea6474ea0 fixes to crashing PTO trailer when empty, and numbering
andy
parents: 2693
diff changeset
  1019
            output = self._seq.nextf(id)
dd0ea6474ea0 fixes to crashing PTO trailer when empty, and numbering
andy
parents: 2693
diff changeset
  1020
        else:
dd0ea6474ea0 fixes to crashing PTO trailer when empty, and numbering
andy
parents: 2693
diff changeset
  1021
            #accepts "no" for do not increment, or an integer.
dd0ea6474ea0 fixes to crashing PTO trailer when empty, and numbering
andy
parents: 2693
diff changeset
  1022
            #thus, 0 and 1 increment by the right amounts.
dd0ea6474ea0 fixes to crashing PTO trailer when empty, and numbering
andy
parents: 2693
diff changeset
  1023
            if increment.lower() == 'no':
dd0ea6474ea0 fixes to crashing PTO trailer when empty, and numbering
andy
parents: 2693
diff changeset
  1024
                output = self._seq.thisf(id)
dd0ea6474ea0 fixes to crashing PTO trailer when empty, and numbering
andy
parents: 2693
diff changeset
  1025
            else:
dd0ea6474ea0 fixes to crashing PTO trailer when empty, and numbering
andy
parents: 2693
diff changeset
  1026
                incr = int(increment)
dd0ea6474ea0 fixes to crashing PTO trailer when empty, and numbering
andy
parents: 2693
diff changeset
  1027
                output = self._seq.thisf(id)
dd0ea6474ea0 fixes to crashing PTO trailer when empty, and numbering
andy
parents: 2693
diff changeset
  1028
                self._seq.reset(id, self._seq._this() + incr)
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
  1029
        self.handle_data(output)
1683
7fa753e4420a Removed all trailing whitespace
andy_robinson
parents: 1677
diff changeset
  1030
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
  1031
    def end_seq(self):
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
  1032
        pass
266
081154da1a78 Added Sequencer and associated XML tags
andy_robinson
parents: 253
diff changeset
  1033
4059
9b48d0d9f137 paraparse.py: add ondraw=onDraw aliases
robin
parents: 4004
diff changeset
  1034
    def start_ondraw(self,attr):
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
  1035
        defn = ABag()
3326
ce725978d11c Initial Python3 compatibility fixes
damian
parents: 3188
diff changeset
  1036
        if 'name' in attr: defn.name = attr['name']
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
  1037
        else: self._syntax_error('<onDraw> needs at least a name attribute')
506
68bd275f16e2 Added onDraw tag to paragraphs
rgbecker
parents: 494
diff changeset
  1038
4277
838129322a55 really merge para-measure-fix; version-->3.3.11
robin
parents: 4255
diff changeset
  1039
        defn.label = attr.get('label',None)
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
  1040
        defn.kind='onDraw'
4097
1c2ebf285cb7 paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents: 4077
diff changeset
  1041
        self._push('ondraw',cbDefn=defn)
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
  1042
        self.handle_data('')
4097
1c2ebf285cb7 paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents: 4077
diff changeset
  1043
        self._pop('ondraw')
4059
9b48d0d9f137 paraparse.py: add ondraw=onDraw aliases
robin
parents: 4004
diff changeset
  1044
    start_onDraw=start_ondraw 
9b48d0d9f137 paraparse.py: add ondraw=onDraw aliases
robin
parents: 4004
diff changeset
  1045
    end_onDraw=end_ondraw=end_seq
3111
86a3158c50bd reportlab: improved support for onDraw and SimpleIndex
rgbecker
parents: 3032
diff changeset
  1046
3165
cbda9e7d0ee3 reportlab: new index support
rgbecker
parents: 3137
diff changeset
  1047
    def start_index(self,attr):
cbda9e7d0ee3 reportlab: new index support
rgbecker
parents: 3137
diff changeset
  1048
        attr=self.getAttributes(attr,_indexAttrMap)
cbda9e7d0ee3 reportlab: new index support
rgbecker
parents: 3137
diff changeset
  1049
        defn = ABag()
3326
ce725978d11c Initial Python3 compatibility fixes
damian
parents: 3188
diff changeset
  1050
        if 'item' in attr:
3187
2d5a6655556e tableofcontents/paraparser: allow for format and offset parameters
rgbecker
parents: 3165
diff changeset
  1051
            label = attr['item']
3165
cbda9e7d0ee3 reportlab: new index support
rgbecker
parents: 3137
diff changeset
  1052
        else:
cbda9e7d0ee3 reportlab: new index support
rgbecker
parents: 3137
diff changeset
  1053
            self._syntax_error('<index> needs at least an item attribute')
3326
ce725978d11c Initial Python3 compatibility fixes
damian
parents: 3188
diff changeset
  1054
        if 'name' in attr:
3165
cbda9e7d0ee3 reportlab: new index support
rgbecker
parents: 3137
diff changeset
  1055
            name = attr['name']
cbda9e7d0ee3 reportlab: new index support
rgbecker
parents: 3137
diff changeset
  1056
        else:
cbda9e7d0ee3 reportlab: new index support
rgbecker
parents: 3137
diff changeset
  1057
            name = DEFAULT_INDEX_NAME
3187
2d5a6655556e tableofcontents/paraparser: allow for format and offset parameters
rgbecker
parents: 3165
diff changeset
  1058
        format = attr.get('format',None)
2d5a6655556e tableofcontents/paraparser: allow for format and offset parameters
rgbecker
parents: 3165
diff changeset
  1059
        if format is not None and format not in ('123','I','i','ABC','abc'):
2d5a6655556e tableofcontents/paraparser: allow for format and offset parameters
rgbecker
parents: 3165
diff changeset
  1060
            raise ValueError('index tag format is %r not valid 123 I i ABC or abc' % offset)
2d5a6655556e tableofcontents/paraparser: allow for format and offset parameters
rgbecker
parents: 3165
diff changeset
  1061
        offset = attr.get('offset',None)
2d5a6655556e tableofcontents/paraparser: allow for format and offset parameters
rgbecker
parents: 3165
diff changeset
  1062
        if offset is not None:
2d5a6655556e tableofcontents/paraparser: allow for format and offset parameters
rgbecker
parents: 3165
diff changeset
  1063
            try:
2d5a6655556e tableofcontents/paraparser: allow for format and offset parameters
rgbecker
parents: 3165
diff changeset
  1064
                offset = int(offset)
2d5a6655556e tableofcontents/paraparser: allow for format and offset parameters
rgbecker
parents: 3165
diff changeset
  1065
            except:
2d5a6655556e tableofcontents/paraparser: allow for format and offset parameters
rgbecker
parents: 3165
diff changeset
  1066
                raise ValueError('index tag offset is %r not an int' % offset)
3856
5316e8678dd9 paraparser.py: fix label encoding
robin
parents: 3826
diff changeset
  1067
        defn.label = encode_label((label,format,offset))
3165
cbda9e7d0ee3 reportlab: new index support
rgbecker
parents: 3137
diff changeset
  1068
        defn.name = name
cbda9e7d0ee3 reportlab: new index support
rgbecker
parents: 3137
diff changeset
  1069
        defn.kind='index'
4097
1c2ebf285cb7 paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents: 4077
diff changeset
  1070
        self._push('index',cbDefn=defn)
3165
cbda9e7d0ee3 reportlab: new index support
rgbecker
parents: 3137
diff changeset
  1071
        self.handle_data('')
4097
1c2ebf285cb7 paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents: 4077
diff changeset
  1072
        self._pop('index',)
3165
cbda9e7d0ee3 reportlab: new index support
rgbecker
parents: 3137
diff changeset
  1073
    end_index=end_seq
2663
927cc273c5a5 <br/> work in progress
andy
parents: 2646
diff changeset
  1074
3826
02f216b6e38e paraparser.py: support old behaviours eg ignore unknown tags etc etc
robin
parents: 3812
diff changeset
  1075
    def start_unknown(self,attr):
02f216b6e38e paraparser.py: support old behaviours eg ignore unknown tags etc etc
robin
parents: 3812
diff changeset
  1076
        pass
02f216b6e38e paraparser.py: support old behaviours eg ignore unknown tags etc etc
robin
parents: 3812
diff changeset
  1077
    end_unknown=end_seq
02f216b6e38e paraparser.py: support old behaviours eg ignore unknown tags etc etc
robin
parents: 3812
diff changeset
  1078
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
  1079
    #---------------------------------------------------------------
4097
1c2ebf285cb7 paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents: 4077
diff changeset
  1080
    def _push(self,tag,**attr):
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
  1081
        frag = copy.copy(self._stack[-1])
4097
1c2ebf285cb7 paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents: 4077
diff changeset
  1082
        frag.__tag__ = tag
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
  1083
        _applyAttributes(frag,attr)
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
  1084
        self._stack.append(frag)
96
2a9cca4c5cf0 Beginnings of a paragraph parser
rgbecker
parents:
diff changeset
  1085
4097
1c2ebf285cb7 paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents: 4077
diff changeset
  1086
    def _pop(self,tag):
1c2ebf285cb7 paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents: 4077
diff changeset
  1087
        frag = self._stack.pop()
1c2ebf285cb7 paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents: 4077
diff changeset
  1088
        if tag==frag.__tag__: return frag
1c2ebf285cb7 paraparser.py: force assertion of tag rather than value checking in end_<tag>
robin
parents: 4077
diff changeset
  1089
        raise ValueError('Parse error: saw </%s> instead of expected </%s>' % (tag,frag.__tag__))
96
2a9cca4c5cf0 Beginnings of a paragraph parser
rgbecker
parents:
diff changeset
  1090
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
  1091
    def getAttributes(self,attr,attrMap):
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
  1092
        A = {}
3723
99aa837b6703 second stage of port to Python 3.3; working hello world
rptlab
parents: 3721
diff changeset
  1093
        for k, v in attr.items():
1940
baa0abc136c4 Henning von Bargen's caseSensitive flag
rgbecker
parents: 1932
diff changeset
  1094
            if not self.caseSensitive:
3731
b233dd0577ff another round of changes mostly type related
rptlab
parents: 3723
diff changeset
  1095
                k = k.lower()
4220
c0e82d246798 add justifyBreaks & justifyLastLine ParagraphStyle attributes; version-->3.2.7
robin
parents: 4181
diff changeset
  1096
            if k in attrMap:
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
  1097
                j = attrMap[k]
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
  1098
                func = j[1]
4255
89ea1d46b4a0 make paraparser syntax errors real and fix <sup/sub> tags to have relative values; version-->3.3.1
robin
parents: 4252
diff changeset
  1099
                A[j[0]] = v if func is None else func(v)
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
  1100
            else:
4255
89ea1d46b4a0 make paraparser syntax errors real and fix <sup/sub> tags to have relative values; version-->3.3.1
robin
parents: 4252
diff changeset
  1101
                self._syntax_error('invalid attribute name %s attrMap=%r'% (k,list(sorted(attrMap.keys()))))
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
  1102
        return A
119
b4dc589c8364 <para> tag added in layout.py paraparser.py
rgbecker
parents: 115
diff changeset
  1103
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
  1104
    #----------------------------------------------------------------
96
2a9cca4c5cf0 Beginnings of a paragraph parser
rgbecker
parents:
diff changeset
  1105
4255
89ea1d46b4a0 make paraparser syntax errors real and fix <sup/sub> tags to have relative values; version-->3.3.1
robin
parents: 4252
diff changeset
  1106
    def __init__(self,verbose=0, caseSensitive=0, ignoreUnknownTags=1, crashOnError=True):
4077
ac3fcb7cc6f4 paraparser.py: fix convert_charrefs usage in HTMLParser.__init__; pointed out by Ivan Tchomgue @ bitbucket
robin
parents: 4067
diff changeset
  1107
        HTMLParser.__init__(self,
ac3fcb7cc6f4 paraparser.py: fix convert_charrefs usage in HTMLParser.__init__; pointed out by Ivan Tchomgue @ bitbucket
robin
parents: 4067
diff changeset
  1108
            **(dict(convert_charrefs=False) if sys.version_info>=(3,4) else {}))
3826
02f216b6e38e paraparser.py: support old behaviours eg ignore unknown tags etc etc
robin
parents: 3812
diff changeset
  1109
        self.verbose = verbose
4077
ac3fcb7cc6f4 paraparser.py: fix convert_charrefs usage in HTMLParser.__init__; pointed out by Ivan Tchomgue @ bitbucket
robin
parents: 4067
diff changeset
  1110
        #HTMLParser is case insenstive anyway, but the rml interface still needs this
4059
9b48d0d9f137 paraparse.py: add ondraw=onDraw aliases
robin
parents: 4004
diff changeset
  1111
        #all start/end_ methods should have a lower case version for HMTMParser
3826
02f216b6e38e paraparser.py: support old behaviours eg ignore unknown tags etc etc
robin
parents: 3812
diff changeset
  1112
        self.caseSensitive = caseSensitive
02f216b6e38e paraparser.py: support old behaviours eg ignore unknown tags etc etc
robin
parents: 3812
diff changeset
  1113
        self.ignoreUnknownTags = ignoreUnknownTags
4255
89ea1d46b4a0 make paraparser syntax errors real and fix <sup/sub> tags to have relative values; version-->3.3.1
robin
parents: 4252
diff changeset
  1114
        self._crashOnError = crashOnError
266
081154da1a78 Added Sequencer and associated XML tags
andy_robinson
parents: 253
diff changeset
  1115
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
  1116
    def _iReset(self):
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
  1117
        self.fragList = []
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
  1118
        if hasattr(self, 'bFragList'): delattr(self,'bFragList')
250
a1bcf9c6c21e <bullet> xml tag added
rgbecker
parents: 248
diff changeset
  1119
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
  1120
    def _reset(self, style):
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
  1121
        '''reset the parser'''
96
2a9cca4c5cf0 Beginnings of a paragraph parser
rgbecker
parents:
diff changeset
  1122
3957
c7cedb1dde29 paraparser.py: remove usage of pyRXPU, remove attempted optimization of texts
robin
parents: 3956
diff changeset
  1123
        HTMLParser.reset(self)
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
  1124
        # initialize list of string segments to empty
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
  1125
        self.errors = []
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
  1126
        self._style = style
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
  1127
        self._iReset()
96
2a9cca4c5cf0 Beginnings of a paragraph parser
rgbecker
parents:
diff changeset
  1128
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
  1129
    #----------------------------------------------------------------
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
  1130
    def handle_data(self,data):
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
  1131
        "Creates an intermediate representation of string segments."
96
2a9cca4c5cf0 Beginnings of a paragraph parser
rgbecker
parents:
diff changeset
  1132
3954
44dbe56eb858 first attempt at HTMLParser-based paraparser
Andy Robinson <andy@reportlab.com>
parents: 3937
diff changeset
  1133
        #The old parser would only 'see' a string after all entities had
44dbe56eb858 first attempt at HTMLParser-based paraparser
Andy Robinson <andy@reportlab.com>
parents: 3937
diff changeset
  1134
        #been processed.  Thus, 'Hello &trade; World' would emerge as one
44dbe56eb858 first attempt at HTMLParser-based paraparser
Andy Robinson <andy@reportlab.com>
parents: 3937
diff changeset
  1135
        #fragment.    HTMLParser processes these separately.  We want to ensure
44dbe56eb858 first attempt at HTMLParser-based paraparser
Andy Robinson <andy@reportlab.com>
parents: 3937
diff changeset
  1136
        #that successive calls like this are concatenated, to prevent too many
44dbe56eb858 first attempt at HTMLParser-based paraparser
Andy Robinson <andy@reportlab.com>
parents: 3937
diff changeset
  1137
        #fragments being created.
44dbe56eb858 first attempt at HTMLParser-based paraparser
Andy Robinson <andy@reportlab.com>
parents: 3937
diff changeset
  1138
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
  1139
        frag = copy.copy(self._stack[-1])
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
  1140
        if hasattr(frag,'cbDefn'):
2857
487dc2450eec reprotlab: inline images horizontal positioning OK
rgbecker
parents: 2836
diff changeset
  1141
            kind = frag.cbDefn.kind
487dc2450eec reprotlab: inline images horizontal positioning OK
rgbecker
parents: 2836
diff changeset
  1142
            if data: self._syntax_error('Only empty <%s> tag allowed' % kind)
2584
0fed2bd8ef90 reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents: 2575
diff changeset
  1143
        elif hasattr(frag,'_selfClosingTag'):
2663
927cc273c5a5 <br/> work in progress
andy
parents: 2646
diff changeset
  1144
            if data!='': self._syntax_error('No content allowed in %s tag' % frag._selfClosingTag)
2584
0fed2bd8ef90 reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents: 2575
diff changeset
  1145
            return
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
  1146
        else:
4249
8fc7d11bdee0 add support for rml <sup>/<sub> rise and size attributes; version --> 3.2.17
robin
parents: 4220
diff changeset
  1147
            # if sub and sup are both on they will cancel each other out
8fc7d11bdee0 add support for rml <sup>/<sub> rise and size attributes; version --> 3.2.17
robin
parents: 4220
diff changeset
  1148
            if frag.sub == 1 and frag.sup == 1:
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
  1149
                frag.sub = 0
4249
8fc7d11bdee0 add support for rml <sup>/<sub> rise and size attributes; version --> 3.2.17
robin
parents: 4220
diff changeset
  1150
                frag.sup = 0
96
2a9cca4c5cf0 Beginnings of a paragraph parser
rgbecker
parents:
diff changeset
  1151
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
  1152
            if frag.sub:
4255
89ea1d46b4a0 make paraparser syntax errors real and fix <sup/sub> tags to have relative values; version-->3.3.1
robin
parents: 4252
diff changeset
  1153
                frag.rise = -fontSizeNormalize(frag,'supr',frag.fontSize*subFraction)
89ea1d46b4a0 make paraparser syntax errors real and fix <sup/sub> tags to have relative values; version-->3.3.1
robin
parents: 4252
diff changeset
  1154
                frag.fontSize = fontSizeNormalize(frag,'sups',frag.fontSize-min(sizeDelta,0.2*frag.fontSize))
4249
8fc7d11bdee0 add support for rml <sup>/<sub> rise and size attributes; version --> 3.2.17
robin
parents: 4220
diff changeset
  1155
            elif frag.sup:
4255
89ea1d46b4a0 make paraparser syntax errors real and fix <sup/sub> tags to have relative values; version-->3.3.1
robin
parents: 4252
diff changeset
  1156
                frag.rise = fontSizeNormalize(frag,'supr',frag.fontSize*supFraction)
89ea1d46b4a0 make paraparser syntax errors real and fix <sup/sub> tags to have relative values; version-->3.3.1
robin
parents: 4252
diff changeset
  1157
                frag.fontSize = fontSizeNormalize(frag,'sups',frag.fontSize-min(sizeDelta,0.2*frag.fontSize))
112
1d4892961fdb Added rise attribute
rgbecker
parents: 102
diff changeset
  1158
2584
0fed2bd8ef90 reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents: 2575
diff changeset
  1159
            if frag.greek:
0fed2bd8ef90 reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents: 2575
diff changeset
  1160
                frag.fontName = 'symbol'
0fed2bd8ef90 reportlab: fixed <greek> added <unichar [name=..|code=../> to paragraph
rgbecker
parents: 2575
diff changeset
  1161
                data = _greekConvert(data)
514
3784fe357a72 Slight optimisation in handle_data for cbdefn frags
rgbecker
parents: 508
diff changeset
  1162
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
  1163
        # bold, italic, and underline
2861
2096955de8cf platypus: autoLeading vertical pos improvement
rgbecker
parents: 2860
diff changeset
  1164
        frag.fontName = tt2ps(frag.fontName,frag.bold,frag.italic)
96
2a9cca4c5cf0 Beginnings of a paragraph parser
rgbecker
parents:
diff changeset
  1165
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
  1166
        #save our data
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
  1167
        frag.text = data
514
3784fe357a72 Slight optimisation in handle_data for cbdefn frags
rgbecker
parents: 508
diff changeset
  1168
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
  1169
        if hasattr(frag,'isBullet'):
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
  1170
            delattr(frag,'isBullet')
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
  1171
            self.bFragList.append(frag)
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
  1172
        else:
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
  1173
            self.fragList.append(frag)
96
2a9cca4c5cf0 Beginnings of a paragraph parser
rgbecker
parents:
diff changeset
  1174
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
  1175
    def handle_cdata(self,data):
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1160
diff changeset
  1176
        self.handle_data(data)
211
52541f1643b6 CDATA handler added
rgbecker
parents: 209
diff changeset
  1177
2376
7e70411a7236 flowables.py: minor change to PTOContainer
rgbecker
parents: 2369
diff changeset
  1178
    def _setup_for_parse(self,style):
7e70411a7236 flowables.py: minor change to PTOContainer
rgbecker
parents: 2369
diff changeset
  1179
        self._seq = reportlab.lib.sequencer.getSequencer()
7e70411a7236 flowables.py: minor change to PTOContainer
rgbecker
parents: 2369
diff changeset
  1180
        self._reset(style)  # reinitialise the parser
7e70411a7236 flowables.py: minor change to PTOContainer
rgbecker
parents: 2369
diff changeset
  1181
7e70411a7236 flowables.py: minor change to PTOContainer
rgbecker
parents: 2369
diff changeset
  1182
    def _complete_parse(self):
3954
44dbe56eb858 first attempt at HTMLParser-based paraparser
Andy Robinson <andy@reportlab.com>
parents: 3937
diff changeset
  1183
        "Reset after parsing, to be ready for next parag