src/reportlab/lib/PyFontify.py
author rptlab
Tue, 30 Apr 2013 14:28:14 +0100
branchpy33
changeset 3723 99aa837b6703
parent 3721 0c93dd8ff567
child 4252 fe660f227cac
permissions -rw-r--r--
second stage of port to Python 3.3; working hello world
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
3617
ae5744e97c42 reportlab: copyright date changes
robin
parents: 3326
diff changeset
     1
#Copyright ReportLab Europe Ltd. 2000-2012
3029
eded59f94021 adding docstrings to lib
andy
parents: 3028
diff changeset
     2
#see license.txt for license details
3031
6f90e7668adb docstrings cleaned up for epydoc
tim
parents: 3029
diff changeset
     3
__version__=''' $Id$ '''
3029
eded59f94021 adding docstrings to lib
andy
parents: 3028
diff changeset
     4
__doc__="""
3028
082f5208644e docstring modifications to adhere to restructuredtext
damian
parents: 2964
diff changeset
     5
Module to analyze Python source code; for syntax coloring tools.
1108
0fe339abfbfd Added Just's PyFontify 0.4 (now using re).
dinu_gherman
parents:
diff changeset
     6
3028
082f5208644e docstring modifications to adhere to restructuredtext
damian
parents: 2964
diff changeset
     7
Interface::
3031
6f90e7668adb docstrings cleaned up for epydoc
tim
parents: 3029
diff changeset
     8
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
     9
    tags = fontify(pytext, searchfrom, searchto)
1108
0fe339abfbfd Added Just's PyFontify 0.4 (now using re).
dinu_gherman
parents:
diff changeset
    10
3028
082f5208644e docstring modifications to adhere to restructuredtext
damian
parents: 2964
diff changeset
    11
 - The 'pytext' argument is a string containing Python source code.
082f5208644e docstring modifications to adhere to restructuredtext
damian
parents: 2964
diff changeset
    12
 - The (optional) arguments 'searchfrom' and 'searchto' may contain a slice in pytext.
082f5208644e docstring modifications to adhere to restructuredtext
damian
parents: 2964
diff changeset
    13
 - The returned value is a list of tuples, formatted like this::
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
    14
    [('keyword', 0, 6, None), ('keyword', 11, 17, None), ('comment', 23, 53, None), etc. ]
3028
082f5208644e docstring modifications to adhere to restructuredtext
damian
parents: 2964
diff changeset
    15
082f5208644e docstring modifications to adhere to restructuredtext
damian
parents: 2964
diff changeset
    16
 - The tuple contents are always like this::
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
    17
    (tag, startindex, endindex, sublist)
3028
082f5208644e docstring modifications to adhere to restructuredtext
damian
parents: 2964
diff changeset
    18
082f5208644e docstring modifications to adhere to restructuredtext
damian
parents: 2964
diff changeset
    19
 - tag is one of 'keyword', 'string', 'comment' or 'identifier'
082f5208644e docstring modifications to adhere to restructuredtext
damian
parents: 2964
diff changeset
    20
 - sublist is not used, hence always None.
1108
0fe339abfbfd Added Just's PyFontify 0.4 (now using re).
dinu_gherman
parents:
diff changeset
    21
"""
0fe339abfbfd Added Just's PyFontify 0.4 (now using re).
dinu_gherman
parents:
diff changeset
    22
0fe339abfbfd Added Just's PyFontify 0.4 (now using re).
dinu_gherman
parents:
diff changeset
    23
# Based on FontText.py by Mitchell S. Chapman,
0fe339abfbfd Added Just's PyFontify 0.4 (now using re).
dinu_gherman
parents:
diff changeset
    24
# which was modified by Zachary Roadhouse,
0fe339abfbfd Added Just's PyFontify 0.4 (now using re).
dinu_gherman
parents:
diff changeset
    25
# then un-Tk'd by Just van Rossum.
0fe339abfbfd Added Just's PyFontify 0.4 (now using re).
dinu_gherman
parents:
diff changeset
    26
# Many thanks for regular expression debugging & authoring are due to:
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
    27
#   Tim (the-incredib-ly y'rs) Peters and Cristian Tismer
1108
0fe339abfbfd Added Just's PyFontify 0.4 (now using re).
dinu_gherman
parents:
diff changeset
    28
# So, who owns the copyright? ;-) How about this:
1683
7fa753e4420a Removed all trailing whitespace
andy_robinson
parents: 1677
diff changeset
    29
# Copyright 1996-2001:
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
    30
#   Mitchell S. Chapman,
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
    31
#   Zachary Roadhouse,
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
    32
#   Tim Peters,
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
    33
#   Just van Rossum
1108
0fe339abfbfd Added Just's PyFontify 0.4 (now using re).
dinu_gherman
parents:
diff changeset
    34
0fe339abfbfd Added Just's PyFontify 0.4 (now using re).
dinu_gherman
parents:
diff changeset
    35
__version__ = "0.4"
0fe339abfbfd Added Just's PyFontify 0.4 (now using re).
dinu_gherman
parents:
diff changeset
    36
0fe339abfbfd Added Just's PyFontify 0.4 (now using re).
dinu_gherman
parents:
diff changeset
    37
import re
0fe339abfbfd Added Just's PyFontify 0.4 (now using re).
dinu_gherman
parents:
diff changeset
    38
0fe339abfbfd Added Just's PyFontify 0.4 (now using re).
dinu_gherman
parents:
diff changeset
    39
# First a little helper, since I don't like to repeat things. (Tismer speaking)
2703
6c68afa0c7cb PyFontify: eliminate soon to be keyword usage(with)
rgbecker
parents: 1683
diff changeset
    40
def replace(src, sep, rep):
6c68afa0c7cb PyFontify: eliminate soon to be keyword usage(with)
rgbecker
parents: 1683
diff changeset
    41
    return rep.join(src.split(sep))
1108
0fe339abfbfd Added Just's PyFontify 0.4 (now using re).
dinu_gherman
parents:
diff changeset
    42
0fe339abfbfd Added Just's PyFontify 0.4 (now using re).
dinu_gherman
parents:
diff changeset
    43
# This list of keywords is taken from ref/node13.html of the
0fe339abfbfd Added Just's PyFontify 0.4 (now using re).
dinu_gherman
parents:
diff changeset
    44
# Python 1.3 HTML documentation. ("access" is intentionally omitted.)
0fe339abfbfd Added Just's PyFontify 0.4 (now using re).
dinu_gherman
parents:
diff changeset
    45
keywordsList = [
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
    46
    "as", "assert", "exec",
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
    47
    "del", "from", "lambda", "return",
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
    48
    "and", "elif", "global", "not", "try",
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
    49
    "break", "else", "if", "or", "while",
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
    50
    "class", "except", "import", "pass",
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
    51
    "continue", "finally", "in", "print",
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
    52
    "def", "for", "is", "raise", "yield"]
1108
0fe339abfbfd Added Just's PyFontify 0.4 (now using re).
dinu_gherman
parents:
diff changeset
    53
0fe339abfbfd Added Just's PyFontify 0.4 (now using re).
dinu_gherman
parents:
diff changeset
    54
# Build up a regular expression which will match anything
0fe339abfbfd Added Just's PyFontify 0.4 (now using re).
dinu_gherman
parents:
diff changeset
    55
# interesting, including multi-line triple-quoted strings.
0fe339abfbfd Added Just's PyFontify 0.4 (now using re).
dinu_gherman
parents:
diff changeset
    56
commentPat = r"#[^\n]*"
0fe339abfbfd Added Just's PyFontify 0.4 (now using re).
dinu_gherman
parents:
diff changeset
    57
0fe339abfbfd Added Just's PyFontify 0.4 (now using re).
dinu_gherman
parents:
diff changeset
    58
pat = r"q[^\\q\n]*(\\[\000-\377][^\\q\n]*)*q"
0fe339abfbfd Added Just's PyFontify 0.4 (now using re).
dinu_gherman
parents:
diff changeset
    59
quotePat = replace(pat, "q", "'") + "|" + replace(pat, 'q', '"')
0fe339abfbfd Added Just's PyFontify 0.4 (now using re).
dinu_gherman
parents:
diff changeset
    60
0fe339abfbfd Added Just's PyFontify 0.4 (now using re).
dinu_gherman
parents:
diff changeset
    61
# Way to go, Tim!
0fe339abfbfd Added Just's PyFontify 0.4 (now using re).
dinu_gherman
parents:
diff changeset
    62
pat = r"""
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
    63
    qqq
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
    64
    [^\\q]*
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
    65
    (
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
    66
        (   \\[\000-\377]
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
    67
        |   q
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
    68
            (   \\[\000-\377]
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
    69
            |   [^\q]
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
    70
            |   q
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
    71
                (   \\[\000-\377]
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
    72
                |   [^\\q]
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
    73
                )
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
    74
            )
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
    75
        )
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
    76
        [^\\q]*
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
    77
    )*
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
    78
    qqq
1108
0fe339abfbfd Added Just's PyFontify 0.4 (now using re).
dinu_gherman
parents:
diff changeset
    79
"""
2703
6c68afa0c7cb PyFontify: eliminate soon to be keyword usage(with)
rgbecker
parents: 1683
diff changeset
    80
pat = ''.join(pat.split())  # get rid of whitespace
1108
0fe339abfbfd Added Just's PyFontify 0.4 (now using re).
dinu_gherman
parents:
diff changeset
    81
tripleQuotePat = replace(pat, "q", "'") + "|" + replace(pat, 'q', '"')
0fe339abfbfd Added Just's PyFontify 0.4 (now using re).
dinu_gherman
parents:
diff changeset
    82
0fe339abfbfd Added Just's PyFontify 0.4 (now using re).
dinu_gherman
parents:
diff changeset
    83
# Build up a regular expression which matches all and only
0fe339abfbfd Added Just's PyFontify 0.4 (now using re).
dinu_gherman
parents:
diff changeset
    84
# Python keywords. This will let us skip the uninteresting
0fe339abfbfd Added Just's PyFontify 0.4 (now using re).
dinu_gherman
parents:
diff changeset
    85
# identifier references.
0fe339abfbfd Added Just's PyFontify 0.4 (now using re).
dinu_gherman
parents:
diff changeset
    86
# nonKeyPat identifies characters which may legally precede
0fe339abfbfd Added Just's PyFontify 0.4 (now using re).
dinu_gherman
parents:
diff changeset
    87
# a keyword pattern.
0fe339abfbfd Added Just's PyFontify 0.4 (now using re).
dinu_gherman
parents:
diff changeset
    88
nonKeyPat = r"(^|[^a-zA-Z0-9_.\"'])"
0fe339abfbfd Added Just's PyFontify 0.4 (now using re).
dinu_gherman
parents:
diff changeset
    89
2703
6c68afa0c7cb PyFontify: eliminate soon to be keyword usage(with)
rgbecker
parents: 1683
diff changeset
    90
keyPat = nonKeyPat + "(" + "|".join(keywordsList) + ")" + nonKeyPat
1108
0fe339abfbfd Added Just's PyFontify 0.4 (now using re).
dinu_gherman
parents:
diff changeset
    91
0fe339abfbfd Added Just's PyFontify 0.4 (now using re).
dinu_gherman
parents:
diff changeset
    92
matchPat = commentPat + "|" + keyPat + "|" + tripleQuotePat + "|" + quotePat
0fe339abfbfd Added Just's PyFontify 0.4 (now using re).
dinu_gherman
parents:
diff changeset
    93
matchRE = re.compile(matchPat)
0fe339abfbfd Added Just's PyFontify 0.4 (now using re).
dinu_gherman
parents:
diff changeset
    94
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
    95
idKeyPat = "[ \t]*[A-Za-z_][A-Za-z_0-9.]*"  # Ident w. leading whitespace.
1108
0fe339abfbfd Added Just's PyFontify 0.4 (now using re).
dinu_gherman
parents:
diff changeset
    96
idRE = re.compile(idKeyPat)
0fe339abfbfd Added Just's PyFontify 0.4 (now using re).
dinu_gherman
parents:
diff changeset
    97
0fe339abfbfd Added Just's PyFontify 0.4 (now using re).
dinu_gherman
parents:
diff changeset
    98
0fe339abfbfd Added Just's PyFontify 0.4 (now using re).
dinu_gherman
parents:
diff changeset
    99
def fontify(pytext, searchfrom = 0, searchto = None):
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
   100
    if searchto is None:
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
   101
        searchto = len(pytext)
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
   102
    # Cache a few attributes for quicker reference.
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
   103
    search = matchRE.search
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
   104
    idSearch = idRE.search
1683
7fa753e4420a Removed all trailing whitespace
andy_robinson
parents: 1677
diff changeset
   105
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
   106
    tags = []
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
   107
    tags_append = tags.append
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
   108
    commentTag = 'comment'
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
   109
    stringTag = 'string'
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
   110
    keywordTag = 'keyword'
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
   111
    identifierTag = 'identifier'
1683
7fa753e4420a Removed all trailing whitespace
andy_robinson
parents: 1677
diff changeset
   112
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
   113
    start = 0
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
   114
    end = searchfrom
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
   115
    while 1:
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
   116
        m = search(pytext, end)
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
   117
        if m is None:
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
   118
            break   # EXIT LOOP
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
   119
        start = m.start()
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
   120
        if start >= searchto:
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
   121
            break   # EXIT LOOP
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
   122
        match = m.group(0)
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
   123
        end = start + len(match)
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
   124
        c = match[0]
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
   125
        if c not in "#'\"":
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
   126
            # Must have matched a keyword.
3326
ce725978d11c Initial Python3 compatibility fixes
damian
parents: 3031
diff changeset
   127
            if start != searchfrom:
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
   128
                # there's still a redundant char before and after it, strip!
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
   129
                match = match[1:-1]
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
   130
                start = start + 1
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
   131
            else:
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
   132
                # this is the first keyword in the text.
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
   133
                # Only a space at the end.
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
   134
                match = match[:-1]
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
   135
            end = end - 1
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
   136
            tags_append((keywordTag, start, end, None))
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
   137
            # If this was a defining keyword, look ahead to the
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
   138
            # following identifier.
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
   139
            if match in ["def", "class"]:
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
   140
                m = idSearch(pytext, end)
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
   141
                if m is not None:
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
   142
                    start = m.start()
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
   143
                    if start == end:
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
   144
                        match = m.group(0)
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
   145
                        end = start + len(match)
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
   146
                        tags_append((identifierTag, start, end, None))
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
   147
        elif c == "#":
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
   148
            tags_append((commentTag, start, end, None))
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
   149
        else:
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
   150
            tags_append((stringTag, start, end, None))
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
   151
    return tags
1108
0fe339abfbfd Added Just's PyFontify 0.4 (now using re).
dinu_gherman
parents:
diff changeset
   152
0fe339abfbfd Added Just's PyFontify 0.4 (now using re).
dinu_gherman
parents:
diff changeset
   153
0fe339abfbfd Added Just's PyFontify 0.4 (now using re).
dinu_gherman
parents:
diff changeset
   154
def test(path):
1677
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
   155
    f = open(path)
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
   156
    text = f.read()
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
   157
    f.close()
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
   158
    tags = fontify(text)
1450177dd19e Exterminated all tab characters and added a test to make sure
andy_robinson
parents: 1602
diff changeset
   159
    for tag, start, end, sublist in tags:
3721
0c93dd8ff567 initial changes from 2to3-3.3
rptlab
parents: 3617
diff changeset
   160
        print(tag, repr(text[start:end]))