demos/gadflypaper/gfe.py
author robin <robin@reportlab.com>
Tue, 07 Mar 2017 10:00:34 +0000
changeset 4330 617ffa6bbdc8
parent 4252 fe660f227cac
child 4584 5c65456396b9
permissions -rw-r--r--
changes for release 3.4.0
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
4330
617ffa6bbdc8 changes for release 3.4.0
robin <robin@reportlab.com>
parents: 4252
diff changeset
     1
#Copyright ReportLab Europe Ltd. 2000-2017
2963
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
     2
#see license.txt for license details
2967
ea62529bd1df reportlab-2.2: first stage changes in on the trunk
rgbecker
parents: 2963
diff changeset
     3
__doc__=''
4252
fe660f227cac changes for release 3.3.0
robin
parents: 3617
diff changeset
     4
__version__='3.3.0'
2963
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
     5
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
     6
#REPORTLAB_TEST_SCRIPT
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
     7
import sys
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
     8
from reportlab.platypus import *
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
     9
from reportlab.lib.styles import getSampleStyleSheet
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    10
from reportlab.rl_config import defaultPageSize
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    11
PAGE_HEIGHT=defaultPageSize[1]
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    12
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    13
styles = getSampleStyleSheet()
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    14
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    15
Title = "Integrating Diverse Data Sources with Gadfly 2"
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    16
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    17
Author = "Aaron Watters"
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    18
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    19
URL = "http://www.chordate.com/"
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    20
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    21
email = "arw@ifu.net"
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    22
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    23
Abstract = """This paper describes the primative methods underlying the implementation
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    24
of SQL query evaluation in Gadfly 2, a database management system implemented
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    25
in Python [Van Rossum]. The major design goals behind
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    26
the architecture described here are to simplify the implementation
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    27
and to permit flexible and efficient extensions to the gadfly
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    28
engine. Using this architecture and its interfaces programmers
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    29
can add functionality to the engine such as alternative disk based
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    30
indexed table implementations, dynamic interfaces to remote data
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    31
bases or or other data sources, and user defined computations."""
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    32
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    33
from reportlab.lib.units import inch
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    34
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    35
pageinfo = "%s / %s / %s" % (Author, email, Title)
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    36
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    37
def myFirstPage(canvas, doc):
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    38
    canvas.saveState()
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    39
    #canvas.setStrokeColorRGB(1,0,0)
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    40
    #canvas.setLineWidth(5)
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    41
    #canvas.line(66,72,66,PAGE_HEIGHT-72)
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    42
    canvas.setFont('Times-Bold',16)
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    43
    canvas.drawString(108, PAGE_HEIGHT-108, Title)
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    44
    canvas.setFont('Times-Roman',9)
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    45
    canvas.drawString(inch, 0.75 * inch, "First Page / %s" % pageinfo)
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    46
    canvas.restoreState()
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    47
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    48
def myLaterPages(canvas, doc):
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    49
    #canvas.drawImage("snkanim.gif", 36, 36)
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    50
    canvas.saveState()
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    51
    #canvas.setStrokeColorRGB(1,0,0)
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    52
    #canvas.setLineWidth(5)
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    53
    #canvas.line(66,72,66,PAGE_HEIGHT-72)
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    54
    canvas.setFont('Times-Roman',9)
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    55
    canvas.drawString(inch, 0.75 * inch, "Page %d %s" % (doc.page, pageinfo))
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    56
    canvas.restoreState()
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    57
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    58
def go():
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    59
    Elements.insert(0,Spacer(0,inch))
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    60
    doc = SimpleDocTemplate('gfe.pdf')
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    61
    doc.build(Elements,onFirstPage=myFirstPage, onLaterPages=myLaterPages)
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    62
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    63
Elements = []
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    64
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    65
HeaderStyle = styles["Heading1"] # XXXX
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    66
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    67
def header(txt, style=HeaderStyle, klass=Paragraph, sep=0.3):
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    68
    s = Spacer(0.2*inch, sep*inch)
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    69
    Elements.append(s)
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    70
    para = klass(txt, style)
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    71
    Elements.append(para)
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    72
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    73
ParaStyle = styles["Normal"]
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    74
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    75
def p(txt):
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    76
    return header(txt, style=ParaStyle, sep=0.1)
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    77
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    78
#pre = p # XXX
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    79
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    80
PreStyle = styles["Code"]
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    81
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    82
def pre(txt):
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    83
    s = Spacer(0.1*inch, 0.1*inch)
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    84
    Elements.append(s)
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    85
    p = Preformatted(txt, PreStyle)
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    86
    Elements.append(p)
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    87
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    88
#header(Title, sep=0.1. style=ParaStyle)
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    89
header(Author, sep=0.1, style=ParaStyle)
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    90
header(URL, sep=0.1, style=ParaStyle)
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    91
header(email, sep=0.1, style=ParaStyle)
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    92
header("ABSTRACT")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    93
p(Abstract)
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    94
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    95
header("Backgrounder")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    96
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    97
p("""\
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    98
The term "database" usually refers to a persistent
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
    99
collection of data.  Data is persistent if it continues
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   100
to exist whether or not it is associated with a running
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   101
process on the computer, or even if the computer is
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   102
shut down and restarted at some future time.  Database
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   103
management systems provide support for constructing databases,
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   104
maintaining databases, and extracting information from databases.""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   105
p("""\
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   106
Relational databases manipulate and store persistent
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   107
table structures called relations, such as the following
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   108
three tables""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   109
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   110
pre("""\
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   111
 -- drinkers who frequent bars (this is a comment)
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   112
 select * from frequents
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   113
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   114
 DRINKER | PERWEEK | BAR
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   115
 ============================
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   116
 adam    | 1       | lolas
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   117
 woody   | 5       | cheers
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   118
 sam     | 5       | cheers
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   119
 norm    | 3       | cheers
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   120
 wilt    | 2       | joes
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   121
 norm    | 1       | joes
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   122
 lola    | 6       | lolas
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   123
 norm    | 2       | lolas
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   124
 woody   | 1       | lolas
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   125
 pierre  | 0       | frankies
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   126
)
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   127
""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   128
pre("""\
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   129
 -- drinkers who like beers
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   130
 select * from likes
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   131
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   132
 DRINKER | PERDAY | BEER
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   133
 ===============================
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   134
 adam    | 2      | bud
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   135
 wilt    | 1      | rollingrock
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   136
 sam     | 2      | bud
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   137
 norm    | 3      | rollingrock
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   138
 norm    | 2      | bud
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   139
 nan     | 1      | sierranevada
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   140
 woody   | 2      | pabst
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   141
 lola    | 5      | mickies
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   142
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   143
""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   144
pre("""\
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   145
 -- beers served from bars
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   146
 select * from serves
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   147
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   148
 BAR      | QUANTITY | BEER
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   149
 =================================
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   150
 cheers   | 500      | bud
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   151
 cheers   | 255      | samadams
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   152
 joes     | 217      | bud
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   153
 joes     | 13       | samadams
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   154
 joes     | 2222     | mickies
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   155
 lolas    | 1515     | mickies
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   156
 lolas    | 333      | pabst
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   157
 winkos   | 432      | rollingrock
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   158
 frankies | 5        | snafu
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   159
""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   160
p("""
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   161
The relational model for database structures makes
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   162
the simplifying assumption that all data in a database
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   163
can be represented in simple table structures
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   164
such as these.  Although this assumption seems extreme
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   165
it provides a good foundation for defining solid and
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   166
well defined database management systems and some
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   167
of the most successful software companies in the
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   168
world, such as Oracle, Sybase, IBM, and Microsoft,
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   169
have marketed database management systems based on
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   170
the relational model quite successfully.
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   171
""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   172
p("""
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   173
SQL stands for Structured Query Language.
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   174
The SQL language defines industry standard
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   175
mechanisms for creating, querying, and modified
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   176
relational tables. Several years ago SQL was one
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   177
of many Relational Database Management System
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   178
(RDBMS) query languages in use, and many would
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   179
argue not the best on. Now, largely due
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   180
to standardization efforts and the
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   181
backing of IBM, SQL is THE standard way to talk
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   182
to database systems.
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   183
""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   184
p("""
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   185
There are many advantages SQL offers over other
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   186
database query languages and alternative paradigms
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   187
at this time (please see [O'Neill] or [Korth and Silberschatz]
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   188
for more extensive discussions and comparisons between the
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   189
SQL/relational approach and others.)
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   190
""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   191
p("""
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   192
The chief advantage over all contenders at this time
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   193
is that SQL and the relational model are now widely
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   194
used as interfaces and back end data stores to many
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   195
different products with different performance characteristics,
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   196
user interfaces, and other qualities: Oracle, Sybase,
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   197
Ingres, SQL Server, Access, Outlook,
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   198
Excel, IBM DB2, Paradox, MySQL, MSQL, POSTgres, and many
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   199
others.  For this reason a program designed to use
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   200
an SQL database as its data storage mechanism can
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   201
easily be ported from one SQL data manager to another,
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   202
possibly on different platforms.  In fact the same
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   203
program can seamlessly use several backends and/or
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   204
import/export data between different data base platforms
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   205
with trivial ease.
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   206
No other paradigm offers such flexibility at the moment.
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   207
""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   208
p("""
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   209
Another advantage which is not as immediately
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   210
obvious is that the relational model and the SQL
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   211
query language are easily understood by semi-technical
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   212
and non-technical professionals, such as business
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   213
people and accountants.  Human resources managers
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   214
who would be terrified by an object model diagram
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   215
or a snippet of code that resembles a conventional
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   216
programming language will frequently feel quite at
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   217
ease with a relational model which resembles the
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   218
sort of tabular data they deal with on paper in
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   219
reports and forms on a daily basis.  With a little training the
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   220
same HR managers may be able to translate the request
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   221
"Who are the drinkers who like bud and frequent cheers?"
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   222
into the SQL query
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   223
""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   224
pre("""
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   225
    select drinker
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   226
    from frequents
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   227
    where bar='cheers'
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   228
      and drinker in (
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   229
          select drinker
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   230
          from likes
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   231
          where beer='bud')
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   232
""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   233
p("""
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   234
(or at least they have some hope of understanding
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   235
the query once it is written by a technical person
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   236
or generated by a GUI interface tool).  Thus the use
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   237
of SQL and the relational model enables communication
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   238
between different communities which must understand
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   239
and interact with stored information. In contrast
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   240
many other approaches cannot be understood easily
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   241
by people without extensive programming experience.
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   242
""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   243
p("""
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   244
Furthermore the declarative nature of SQL
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   245
lends itself to automatic query optimization,
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   246
and engines such as Gadfly can automatically translate a user query
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   247
into an optimized query plan which takes
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   248
advantage of available indices and other data characteristics.
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   249
In contrast more navigational techniques require the application
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   250
program itself to optimize the accesses to the database and
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   251
explicitly make use of indices.
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   252
""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   253
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   254
# HACK
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   255
Elements.append(PageBreak())
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   256
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   257
p("""
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   258
While it must be admitted that there are application
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   259
domains such as computer aided engineering design where
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   260
the relational model is unnatural, it is also important
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   261
to recognize that for many application domains (such
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   262
as scheduling, accounting, inventory, finance, personal
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   263
information management, electronic mail) the relational
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   264
model is a very natural fit and the SQL query language
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   265
make most accesses to the underlying data (even sophisticated
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   266
ones) straightforward.  """)
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   267
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   268
p("""For an example of a moderately
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   269
sophisticated query using the tables given above,
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   270
the following query lists the drinkers who frequent lolas bar
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   271
and like at least two beers not served by lolas
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   272
""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   273
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   274
if 0:
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   275
   go()
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   276
   sys.exit(1)
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   277
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   278
pre("""
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   279
    select f.drinker
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   280
    from frequents f, likes l
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   281
    where f.drinker=l.drinker and f.bar='lolas'
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   282
      and l.beer not in
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   283
       (select beer from serves where bar='lolas')
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   284
    group by f.drinker
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   285
    having count(distinct beer)>=2
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   286
""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   287
p("""
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   288
yielding the result
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   289
""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   290
pre("""
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   291
    DRINKER
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   292
    =======
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   293
    norm
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   294
""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   295
p("""
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   296
Experience shows that queries of this sort are actually
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   297
quite common in many applications, and are often much more
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   298
difficult to formulate using some navigational database
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   299
organizations, such as some "object oriented" database
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   300
paradigms.
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   301
""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   302
p("""
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   303
Certainly,
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   304
SQL does not provide all you need to interact with
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   305
databases -- in order to do "real work" with SQL you
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   306
need to use SQL and at least one other language
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   307
(such as C, Pascal, C++, Perl, Python, TCL, Visual Basic
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   308
or others) to do work (such as readable formatting a report
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   309
from raw data) that SQL was not designed to do.
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   310
""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   311
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   312
header("Why Gadfly 1?")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   313
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   314
p("""Gadfly 1.0 is an SQL based relational database implementation
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   315
implemented entirely in the Python programming language, with
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   316
optional fast data structure accellerators implemented in the
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   317
C programming language. Gadfly is relatively small, highly portable,
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   318
very easy to use (especially for programmers with previous experience
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   319
with SQL databases such as MS Access or Oracle), and reasonably
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   320
fast (especially when the kjbuckets C accellerators are used).
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   321
For moderate sized problems Gadfly offers a fairly complete
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   322
set of features such as transaction semantics, failure recovery,
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   323
and a TCP/IP based client/server mode (Please see [Gadfly] for
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   324
detailed discussion).""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   325
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   326
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   327
header("Why Gadfly 2?")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   328
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   329
p("""Gadfly 1.0 also has significant limitations. An active Gadfly
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   330
1.0 database keeps all data in (virtual) memory, and hence a Gadfly
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   331
1.0 database is limited in size to available virtual memory. Important
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   332
features such as date/time/interval operations, regular expression
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   333
matching and other standard SQL features are not implemented in
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   334
Gadfly 1.0. The optimizer and the query evaluator perform optimizations
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   335
using properties of the equality predicate but do not optimize
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   336
using properties of inequalities such as BETWEEN or less-than.
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   337
It is possible to add "extension views" to a Gadfly
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   338
1.0 database, but the mechanism is somewhat clumsy and indices
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   339
over extension views are not well supported. The features of Gadfly
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   340
2.0 discussed here attempt to address these deficiencies by providing
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   341
a uniform extension model that permits addition of alternate table,
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   342
function, and predicate implementations.""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   343
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   344
p("""Other deficiencies, such as missing constructs like "ALTER
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   345
TABLE" and the lack of outer joins and NULL values are not
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   346
addressed here, although they may be addressed in Gadfly 2.0 or
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   347
a later release. This paper also does not intend to explain
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   348
the complete operations of the internals; it is intended to provide
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   349
at least enough information to understand the basic mechanisms
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   350
for extending gadfly.""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   351
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   352
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   353
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   354
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   355
p("""Some concepts and definitions provided next help with the description
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   356
of the gadfly interfaces. [Note: due to the terseness of this
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   357
format the ensuing is not a highly formal presentation, but attempts
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   358
to approach precision where precision is important.]""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   359
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   360
header("The semilattice of substitutions")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   361
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   362
p("""Underlying the gadfly implementation are the basic concepts
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   363
associated with substitutions. A substitution is a mapping
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   364
of attribute names to values (implemented in gadfly using kjbuckets.kjDict
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   365
objects). Here an attribute refers to some sort of "descriptive
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   366
variable", such as NAME and a value is an assignment for that variable,
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   367
like "Dave Ascher".  In Gadfly a table is implemented as a sequence
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   368
of substitutions, and substitutions are used in many other ways as well.
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   369
""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   370
p("""
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   371
For example consider the substitutions""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   372
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   373
pre("""
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   374
    A = [DRINKER=>'sam']
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   375
    B = [DRINKER=>'sam', BAR=>'cheers']
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   376
    C = [DRINKER=>'woody', BEER=>'bud']
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   377
    D = [DRINKER=>'sam', BEER=>'mickies']
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   378
    E = [DRINKER=>'sam', BAR=>'cheers', BEER=>'mickies']
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   379
    F = [DRINKER=>'sam', BEER=>'mickies']
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   380
    G = [BEER=>'bud', BAR=>'lolas']
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   381
    H = [] # the empty substitution
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   382
    I = [BAR=>'cheers', CAPACITY=>300]""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   383
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   384
p("""A trivial but important observation is that since substitutions
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   385
are mappings, no attribute can assume more than one value in a
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   386
substitution. In the operations described below whenever an operator
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   387
"tries" to assign more than one value to an attribute
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   388
the operator yields an "overdefined" or "inconsistent"
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   389
result.""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   390
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   391
header("Information Semi-order:")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   392
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   393
p("""Substitution B is said to be
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   394
more informative than A because B agrees with all assignments
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   395
in A (in addition to providing more information as well). Similarly
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   396
we say that E is more informative than A, B, D, F. and H but E
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   397
is not more informative than the others since, for example G disagrees
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   398
with E on the value assigned to the BEER attribute and I provides
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   399
additional CAPACITY information not provided in E.""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   400
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   401
header("Joins and Inconsistency:")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   402
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   403
p("""A join of two substitutions
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   404
X and Y is the least informative substitution Z such that Z is
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   405
more informative (or equally informative) than both X and Y. For
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   406
example B is the join of B with A, E is the join of B with D and""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   407
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   408
pre("""
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   409
    E join I =
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   410
      [DRINKER=>'sam', BAR=>'cheers', BEER=>'mickies', CAPACITY=>300]""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   411
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   412
p("""For any two substitutions either (1) they disagree on the value
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   413
assigned to some attribute and have no join or (2) they agree
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   414
on all common attributes (if there are any) and their join is
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   415
the union of all (name, value) assignments in both substitutions.
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   416
Written in terms of kjbucket.kjDict operations two kjDicts X and
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   417
Y have a join Z = (X+Y) if and only if Z.Clean() is not None.
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   418
Two substitutions that have no join are said to be inconsistent.
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   419
For example I and G are inconsistent since they disagree on
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   420
the value assigned to the BAR attribute and therefore have no
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   421
join. The algebra of substitutions with joins technically defines
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   422
an abstract algebraic structure called a semilattice.""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   423
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   424
header("Name space remapping")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   425
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   426
p("""Another primitive operation over substitutions is the remap
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   427
operation S2 = S.remap(R) where S is a substitution and R is a
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   428
graph of attribute names and S2 is a substitution. This operation
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   429
is defined to produce the substitution S2 such that""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   430
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   431
pre("""
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   432
    Name=>Value in S2 if and only if
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   433
        Name1=>Value in S and Name<=Name1 in R
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   434
""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   435
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   436
p("""or if there is no such substitution S2 the remap value is said
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   437
to be overdefined.""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   438
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   439
p("""For example the remap operation may be used to eliminate attributes
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   440
from a substitution. For example""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   441
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   442
pre("""
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   443
    E.remap([DRINKER<=DRINKER, BAR<=BAR])
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   444
       = [DRINKER=>'sam', BAR=>'cheers']
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   445
""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   446
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   447
p("""Illustrating that remapping using the [DRINKER&lt;=DRINKER,
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   448
BAR&lt;=BAR] graph eliminates all attributes except DRINKER and
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   449
BAR, such as BEER. More generally remap can be used in this way
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   450
to implement the classical relational projection operation. (See [Korth and Silberschatz]
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   451
for a detailed discussion of the projection operator and other relational
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   452
algebra operators such as selection, rename, difference and joins.)""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   453
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   454
p("""The remap operation can also be used to implement "selection
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   455
on attribute equality". For example if we are interested
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   456
in the employee names of employees who are their own bosses we
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   457
can use the remapping graph""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   458
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   459
pre("""
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   460
    R1 = [NAME<=NAME, NAME<=BOSS]
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   461
""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   462
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   463
p("""and reject substitutions where remapping using R1 is overdefined.
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   464
For example""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   465
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   466
pre("""
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   467
    S1 = [NAME=>'joe', BOSS=>'joe']
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   468
    S1.remap(R1) = [NAME=>'joe']
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   469
    S2 = [NAME=>'fred', BOSS=>'joe']
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   470
    S2.remap(R1) is overdefined.
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   471
""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   472
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   473
p("""The last remap is overdefined because the NAME attribute cannot
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   474
assume both the values 'fred' and 'joe' in a substitution.""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   475
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   476
p("""Furthermore, of course, the remap operation can be used to
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   477
"rename attributes" or "copy attribute values"
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   478
in substitutions. Note below that the missing attribute CAPACITY
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   479
in B is effectively ignored in the remapping operation.""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   480
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   481
pre("""
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   482
    B.remap([D<=DRINKER, B<=BAR, B2<=BAR, C<=CAPACITY])
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   483
       = [D=>'sam', B=>'cheers', B2=>'cheers']
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   484
""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   485
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   486
p("""More interestingly, a single remap operation can be used to
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   487
perform a combination of renaming, projection, value copying,
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   488
and attribute equality selection as one operation. In kjbuckets the remapper
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   489
graph is implemented using a kjbuckets.kjGraph and the remap operation
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   490
is an intrinsic method of kjbuckets.kjDict objects.""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   491
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   492
header("Generalized Table Joins and the Evaluator Mainloop""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   493
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   494
p("""Strictly speaking the Gadfly 2.0 query evaluator only uses
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   495
the join and remap operations as its "basic assembly language"
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   496
-- all other computations, including inequality comparisons and
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   497
arithmetic, are implemented externally to the evaluator as "generalized
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   498
table joins." """)
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   499
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   500
p("""A table is a sequence of substitutions (which in keeping with
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   501
SQL semantics may contain redundant entries). The join between
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   502
two tables T1 and T2 is the sequence of all possible defined joins
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   503
between pairs of elements from the two tables. Procedurally we
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   504
might compute the join as""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   505
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   506
pre("""
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   507
    T1JoinT2 = empty
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   508
    for t1 in T1:
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   509
        for t2 in T2:
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   510
            if t1 join t2 is defined:
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   511
                add t1 join t2 to T1joinT2""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   512
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   513
p("""In general circumstances this intuitive implementation is a
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   514
very inefficient way to compute the join, and Gadfly almost always
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   515
uses other methods, particularly since, as described below, a
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   516
"generalized table" can have an "infinite"
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   517
number of entries.""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   518
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   519
p("""For an example of a table join consider the EMPLOYEES table
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   520
containing""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   521
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   522
pre("""
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   523
    [NAME=>'john', JOB=>'executive']
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   524
    [NAME=>'sue', JOB=>'programmer']
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   525
    [NAME=>'eric', JOB=>'peon']
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   526
    [NAME=>'bill', JOB=>'peon']
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   527
""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   528
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   529
p("""and the ACTIVITIES table containing""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   530
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   531
pre("""
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   532
     [JOB=>'peon', DOES=>'windows']
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   533
     [JOB=>'peon', DOES=>'floors']
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   534
     [JOB=>'programmer', DOES=>'coding']
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   535
     [JOB=>'secretary', DOES=>'phone']""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   536
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   537
p("""then the join between EMPLOYEES and ACTIVITIES must containining""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   538
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   539
pre("""
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   540
    [NAME=>'sue', JOB=>'programmer', DOES=>'coding']
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   541
    [NAME=>'eric', JOB=>'peon', DOES=>'windows']
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   542
    [NAME=>'bill', JOB=>'peon', DOES=>'windows']
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   543
    [NAME=>'eric', JOB=>'peon', DOES=>'floors']
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   544
    [NAME=>'bill', JOB=>'peon', DOES=>'floors']""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   545
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   546
p("""A compiled gadfly subquery ultimately appears to the evaluator
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   547
as a sequence of generalized tables that must be joined (in combination
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   548
with certain remapping operations that are beyond the scope of
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   549
this discussion). The Gadfly mainloop proceeds following the very
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   550
loose pseudocode:""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   551
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   552
pre("""
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   553
    Subs = [ [] ] # the unary sequence containing "true"
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   554
    While some table hasn't been chosen yet:
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   555
        Choose an unchosen table with the least cost join estimate.
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   556
        Subs = Subs joined with the chosen table
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   557
    return Subs""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   558
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   559
p("""[Note that it is a property of the join operation that the
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   560
order in which the joins are carried out will not affect the result,
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   561
so the greedy strategy of evaluating the "cheapest join next"
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   562
will not effect the result. Also note that the treatment of logical
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   563
OR and NOT as well as EXIST, IN, UNION, and aggregation and so
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   564
forth are not discussed here, even though they do fit into this
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   565
approach.]""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   566
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   567
p("""The actual implementation is a bit more complex than this,
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   568
but the above outline may provide some useful intuition. The "cost
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   569
estimation" step and the implementation of the join operation
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   570
itself are left up to the generalized table object implementation.
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   571
A table implementation has the ability to give an "infinite"
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   572
cost estimate, which essentially means "don't join me in
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   573
yet under any circumstances." """)
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   574
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   575
header("Implementing Functions")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   576
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   577
p("""As mentioned above operations such as arithmetic are implemented
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   578
using generalized tables. For example the arithmetic Add operation
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   579
is implemented in Gadfly internally as an "infinite generalized
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   580
table" containing all possible substitutions""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   581
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   582
pre("""
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   583
    ARG0=>a, ARG1=>b, RESULT=>a+b]
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   584
""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   585
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   586
p("""Where a and b are all possible values which can be summed.
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   587
Clearly, it is not possible to enumerate this table, but given
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   588
a sequence of substitutions with defined values for ARG0 and ARG1
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   589
such as""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   590
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   591
pre("""
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   592
    [ARG0=>1, ARG1=-4]
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   593
    [ARG0=>2.6, ARG1=50]
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   594
    [ARG0=>99, ARG1=1]
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   595
""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   596
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   597
p("""it is possible to implement a "join operation" against
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   598
this sequence that performs the same augmentation as a join with
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   599
the infinite table defined above:""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   600
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   601
pre("""
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   602
    [ARG0=>1, ARG1=-4, RESULT=-3]
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   603
    [ARG0=>2.6, ARG1=50, RESULT=52.6]
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   604
    [ARG0=>99, ARG1=1, RESULT=100]
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   605
""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   606
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   607
p("""Furthermore by giving an "infinite estimate" for
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   608
all attempts to evaluate the join where ARG0 and ARG1 are not
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   609
available the generalized table implementation for the addition
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   610
operation can refuse to compute an "infinite join." """)
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   611
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   612
p("""More generally all functions f(a,b,c,d) are represented in
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   613
gadfly as generalized tables containing all possible relevant
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   614
entries""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   615
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   616
pre("""
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   617
    [ARG0=>a, ARG1=>b, ARG2=>c, ARG3=>d, RESULT=>f(a,b,c,d)]""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   618
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   619
p("""and the join estimation function refuses all attempts to perform
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   620
a join unless all the arguments are provided by the input substitution
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   621
sequence.""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   622
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   623
header("Implementing Predicates")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   624
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   625
p("""Similarly to functions, predicates such as less-than and BETWEEN
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   626
and LIKE are implemented using the generalized table mechanism.
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   627
For example the "x BETWEEN y AND z" predicate is implemented
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   628
as a generalized table "containing" all possible""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   629
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   630
pre("""
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   631
    [ARG0=>a, ARG1=>b, ARG2=>c]""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   632
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   633
p("""where b&lt;a&lt;c. Furthermore joins with this table are not
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   634
permitted unless all three arguments are available in the sequence
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   635
of input substitutions.""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   636
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   637
header("Some Gadfly extension interfaces")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   638
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   639
p("""A gadfly database engine may be extended with user defined
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   640
functions, predicates, and alternative table and index implementations.
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   641
This section snapshots several Gadfly 2.0 interfaces, currently under
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   642
development and likely to change before the package is released.""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   643
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   644
p("""The basic interface for adding functions and predicates (logical tests)
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   645
to a gadfly engine are relatively straightforward.  For example to add the
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   646
ability to match a regular expression within a gadfly query use the
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   647
following implementation.""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   648
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   649
pre("""
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   650
   from re import match
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   651
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   652
   def addrematch(gadflyinstance):
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   653
       gadflyinstance.add_predicate("rematch", match)
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   654
""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   655
p("""
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   656
Then upon connecting to the database execute
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   657
""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   658
pre("""
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   659
   g = gadfly(...)
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   660
   ...
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   661
   addrematch(g)
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   662
""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   663
p("""
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   664
In this case the "semijoin operation" associated with the new predicate
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   665
"rematch" is automatically generated, and after the add_predicate
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   666
binding operation the gadfly instance supports queries such as""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   667
pre("""
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   668
   select drinker, beer
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   669
   from likes
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   670
   where rematch('b*', beer) and drinker not in
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   671
     (select drinker from frequents where rematch('c*', bar))
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   672
""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   673
p("""
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   674
By embedding the "rematch" operation within the query the SQL
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   675
engine can do "more work" for the programmer and reduce or eliminate the
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   676
need to process the query result externally to the engine.
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   677
""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   678
p("""
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   679
In a similar manner functions may be added to a gadfly instance,""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   680
pre("""
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   681
   def modulo(x,y):
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   682
       return x % y
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   683
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   684
   def addmodulo(gadflyinstance):
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   685
       gadflyinstance.add_function("modulo", modulo)
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   686
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   687
   ...
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   688
   g = gadfly(...)
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   689
   ...
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   690
   addmodulo(g)
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   691
""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   692
p("""
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   693
Then after the binding the modulo function can be used whereever
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   694
an SQL expression can occur.
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   695
""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   696
p("""
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   697
Adding alternative table implementations to a Gadfly instance
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   698
is more interesting and more difficult.  An "extension table" implementation
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   699
must conform to the following interface:""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   700
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   701
pre("""
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   702
    # get the kjbuckets.kjSet set of attribute names for this table
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   703
    names = table.attributes()
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   704
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   705
    # estimate the difficulty of evaluating a join given known attributes
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   706
    #  return None for "impossible" or n>=0 otherwise with larger values
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   707
    #    indicating greater difficulty or expense
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   708
    estimate = table.estimate(known_attributes)
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   709
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   710
    # return the join of the rows of the table with
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   711
    # the list of kjbuckets.kjDict mappings as a list of mappings.
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   712
    resultmappings = table.join(listofmappings)
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   713
""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   714
p("""
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   715
In this case add the table to a gadfly instance using""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   716
pre("""
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   717
    gadflyinstance.add_table("table_name", table)
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   718
""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   719
p("""
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   720
For example to add a table which automatically queries filenames
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   721
in the filesystems of the host computer a gadfly instance could
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   722
be augmented with a GLOB table implemented using the standard
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   723
library function glob.glob as follows:""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   724
pre("""
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   725
   import kjbuckets
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   726
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   727
   class GlobTable:
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   728
       def __init__(self): pass
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   729
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   730
       def attributes(self):
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   731
           return kjbuckets.kjSet("PATTERN", "NAME")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   732
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   733
       def estimate(self, known_attributes):
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   734
           if known_attributes.member("PATTERN"):
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   735
               return 66 # join not too difficult
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   736
           else:
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   737
               return None # join is impossible (must have PATTERN)
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   738
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   739
       def join(self, listofmappings):
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   740
           from glob import glob
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   741
           result = []
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   742
           for m in listofmappings:
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   743
               pattern = m["PATTERN"]
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   744
               for name in glob(pattern):
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   745
                   newmapping = kjbuckets.kjDict(m)
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   746
                   newmapping["NAME"] = name
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   747
                   if newmapping.Clean():
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   748
                       result.append(newmapping)
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   749
           return result
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   750
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   751
   ...
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   752
   gadfly_instance.add_table("GLOB", GlobTable())
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   753
""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   754
p("""
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   755
Then one could formulate queries such as "list the files in directories
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   756
associated with packages installed by guido"
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   757
""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   758
pre("""
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   759
   select g.name as filename
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   760
   from packages p, glob g
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   761
   where p.installer = 'guido' and g.pattern=p.root_directory
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   762
""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   763
p("""
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   764
Note that conceptually the GLOB table is an infinite table including
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   765
all filenames on the current computer in the "NAME" column, paired with
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   766
a potentially infinite number of patterns.
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   767
""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   768
p("""
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   769
More interesting examples would allow queries to remotely access
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   770
data served by an HTTP server, or from any other resource.
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   771
""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   772
p("""
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   773
Furthermore an extension table can be augmented with update methods
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   774
""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   775
pre("""
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   776
      table.insert_rows(listofmappings)
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   777
      table.update_rows(oldlist, newlist)
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   778
      table.delete_rows(oldlist)
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   779
""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   780
p("""
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   781
Note: at present the implementation does not enforce recovery or
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   782
transaction semantics for updates to extension tables, although this
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   783
may change in the final release.
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   784
""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   785
p("""
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   786
The table implementation is free to provide its own implementations of
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   787
indices which take advantage of data provided by the join argument.
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   788
""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   789
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   790
header("Efficiency Notes")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   791
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   792
p("""The following thought experiment attempts to explain why the
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   793
Gadfly implementation is surprisingly fast considering that it
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   794
is almost entirely implemented in Python (an interpreted programming
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   795
language which is not especially fast when compared to alternatives).
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   796
Although Gadfly is quite complex, at an abstract level the process
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   797
of query evaluation boils down to a series of embedded loops.
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   798
Consider the following nested loops:""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   799
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   800
pre("""
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   801
   iterate 1000:
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   802
   f(...) # fixed cost of outer loop
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   803
   iterate 10:
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   804
      g(...) # fixed cost of middle loop
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   805
      iterate 10:
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   806
         # the real work (string parse, matrix mul, query eval...)
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   807
         h(...)""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   808
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   809
p("""In my experience many computations follow this pattern where
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   810
f, g, are complex, dynamic, special purpose and h is simple, general
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   811
purpose, static. Some example computations that follow this pattern
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   812
include: file massaging (perl), matrix manipulation (python, tcl),
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   813
database/cgi page generation, and vector graphics/imaging.""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   814
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   815
p("""Suppose implementing f, g, h in python is easy but result in
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   816
execution times10 times slower than a much harder implementation
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   817
in C, choosing arbitrary and debatable numbers assume each function
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   818
call consumes 1 tick in C, 5 ticks in java, 10 ticks in python
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   819
for a straightforward implementation of each function f, g, and
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   820
h. Under these conditions we get the following cost analysis,
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   821
eliminating some uninteresting combinations, of implementing the
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   822
function f, g, and h in combinations of Python, C and java:""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   823
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   824
pre("""
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   825
COST    | FLANG  | GLANG  | HLANG
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   826
==================================
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   827
111000  | C      | C      | C
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   828
115000  | java   | C      | C
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   829
120000  | python | C      | C
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   830
155000  | java   | java   | C
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   831
210000  | python | python | C
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   832
555000  | java   | java   | java
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   833
560000  | python | java   | java
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   834
610000  | python | python | java
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   835
1110000 | python | python | python
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   836
""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   837
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   838
p("""Note that moving only the innermost loop to C (python/python/C)
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   839
speeds up the calculation by half an order of magnitude compared
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   840
to the python-only implementation and brings the speed to within
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   841
a factor of 2 of an implementation done entirely in C.""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   842
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   843
p("""Although this artificial and contrived thought experiment is
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   844
far from conclusive, we may be tempted to draw the conclusion
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   845
that generally programmers should focus first on obtaining a working
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   846
implementation (because as John Ousterhout is reported to have
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   847
said "the biggest performance improvement is the transition
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   848
from non-working to working") using the methodology that
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   849
is most likely to obtain a working solution the quickest (Python). Only then if the performance
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   850
is inadequate should the programmer focus on optimizing
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   851
the inner most loops, perhaps moving them to a very efficient
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   852
implementation (C). Optimizing the outer loops will buy little
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   853
improvement, and should be done later, if ever.""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   854
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   855
p("""This was precisely the strategy behind the gadfly implementations,
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   856
where most of the inner loops are implemented in the kjbuckets
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   857
C extension module and the higher level logic is all in Python.
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   858
This also explains why gadfly appears to be "slower"
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   859
for simple queries over small data sets, but seems to be relatively
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   860
"faster" for more complex queries over larger data sets,
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   861
since larger queries and data sets take better advantage of the
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   862
optimized inner loops.""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   863
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   864
header("A Gadfly variant for OLAP?")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   865
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   866
p("""In private correspondence Andy Robinson points out that the
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   867
basic logical design underlying Gadfly could be adapted to provide
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   868
Online Analytical Processing (OLAP) and other forms of data warehousing
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   869
and data mining. Since SQL is not particularly well suited for
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   870
the kinds of requests common in these domains the higher level
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   871
interfaces would require modification, but the underlying logic
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   872
of substitutions and name mappings seems to be appropriate.""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   873
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   874
header("Conclusion")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   875
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   876
p("""The revamped query engine design in Gadfly 2 supports
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   877
a flexible and general extension methodology that permits programmers
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   878
to extend the gadfly engine to include additional computations
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   879
and access to remote data sources. Among other possibilities this
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   880
will permit the gadfly engine to make use of disk based indexed
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   881
tables and to dynamically retrieve information from remote data
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   882
sources (such as an Excel spreadsheet or an Oracle database).
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   883
These features will make gadfly a very useful tool for data manipulation
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   884
and integration.""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   885
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   886
header("References")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   887
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   888
p("""[Van Rossum] Van Rossum, Python Reference Manual, Tutorial, and Library Manuals,
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   889
please look to http://www.python.org
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   890
for the latest versions, downloads and links to printed versions.""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   891
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   892
p("""[O'Neill] O'Neill, P., Data Base Principles, Programming, Performance,
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   893
Morgan Kaufmann Publishers, San Francisco, 1994.""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   894
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   895
p("""[Korth and Silberschatz] Korth, H. and Silberschatz, A. and Sudarshan, S.
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   896
Data Base System Concepts, McGraw-Hill Series in Computer Science, Boston,
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   897
1997""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   898
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   899
p("""[Gadfly]Gadfly: SQL Relational Database in Python,
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   900
http://www.chordate.com/kwParsing/gadfly.html""")
c414c0ab69e7 reportlab-2.2: first stage of major re-org
rgbecker
parents:
diff changeset
   901
2967
ea62529bd1df reportlab-2.2: first stage changes in on the trunk
rgbecker
parents: 2963
diff changeset
   902
go()