usage.rst: updates docs to import pyRXPU rather than pyRXP and show all output example strings as unicode strings
authorMatthew Pitkin <pitkin@gmail.com>
Sat, 05 Mar 2016 16:40:51 +0000
changeset 54 a7ff548d8a39
parent 53 3a3f172a62bf
child 55 420b7bf3500d
usage.rst: updates docs to import pyRXPU rather than pyRXP and show all output example strings as unicode strings
docs/usage.rst
--- a/docs/usage.rst	Sat Mar 05 16:28:38 2016 +0000
+++ b/docs/usage.rst	Sat Mar 05 16:40:51 2016 +0000
@@ -15,10 +15,10 @@
 
 ::
 
-    >>> import pyRXP
-    >>> pyRXP.version
+    >>> import pyRXPU
+    >>> pyRXPU.version
     '1.16'
-    >>> pyRXP.RXPVersion
+    >>> pyRXPU.RXPVersion
     'RXP 1.5.0 Copyright Richard Tobin, LTG, HCRC, University of Edinburgh'
 
 Once you have imported pyRXP, you can instantiate a parser instance
@@ -26,7 +26,7 @@
 
 ::
 
-    >>>rxp=pyRXP.Parser()
+    >>>rxp=pyRXPU.Parser()
 
 
 To parse some XML, you use the ``parse`` method, passing a string as the first argument and
@@ -34,19 +34,22 @@
 
 ::
 
-    >>> rxp=pyRXP.Parser()
+    >>> rxp=pyRXPU.Parser()
     >>> rxp.parse('<a>some text</a>')
-    ('a', None, ['some text'], None)
+    (u'a', None, [u'some text'], None)
 
 
 As a shortcut, you can call the instance directly:
 
 ::
 
-    >>> rxp=pyRXP.Parser()
+    >>> rxp=pyRXPU.Parser()
     >>> rxp('<a>some text</a>')
-    ('a', None, ['some text'], None)
+    (u'a', None, [u'some text'], None)
+
 
+The current version of PyRXP only contains pyRXPU, which is the 16-bit Unicode aware
+version of pyRXP, and all returned strings are Unicode strings.
 
 __Note__:
 Throughout this documentation, we'll use the explicit call syntax for clarity.
@@ -60,7 +63,7 @@
 ::
 
     >>> rxp.parse('<tag>content</tag>')
-    ('tag', None, ['content'], None)
+    (u'tag', None, [u'content'], None)
 
 
 Each element ("tag") in the XML is represented as a tuple of 4 elements:
@@ -81,18 +84,18 @@
 ::
 
     >>> rxp.parse('<tag1><tag2>content</tag2></tag1>')
-    ('tag1', None, [('tag2', None, ['content'], None)], None)
+    (u'tag1', None, [(u'tag2', None, [u'content'], None)], None)
 
 This may be easier to understand if we lay it out differently:
 
 ::
 
     >>> rxp.parse('<tag1><tag2>content</tag2></tag1>')
-    ('tag1',
+    (u'tag1',
      None,
-         [('tag2',
+         [(u'tag2',
            None,
-           ['content'],
+           [u'content'],
            None)
          ],
     None)
@@ -118,13 +121,13 @@
 ::
 
     >>> rxp.parse('<tag>my contents</tag>')
-    ('tag', None, ['my contents'], None)
+    (u'tag', None, [u'my contents'], None)
 
     >>> rxp.parse('<tag></tag>')
-    ('tag', None, [], None)
+    (u'tag', None, [], None)
 
     >>> rxp.parse('<tag/>')
-    ('tag', None, None, None)
+    (u'tag', None, None, None)
 
 Notice how the contents list is handled differently for the last two
 examples. This is how we can tell the difference between an empty tag
@@ -138,21 +141,21 @@
 ::
 
     >>>rxp.parse('<outerTag><innerTag>bb</innerTag>aaa<singleTag/></outerTag>')
-    ('outerTag', None, [('innerTag', None, ['bb'], None), 'aaa', ('singleTag',
+    (u'outerTag', None, [(u'innerTag', None, [u'bb'], None), u'aaa', (u'singleTag',
     None, None, None)], None)
 
 Again, this is more understandable if we show it like this:
 
 ::
 
-    ('outerTag',
+    (u'outerTag',
      None,
-         [('innerTag',
+         [(u'innerTag',
            None,
-           ['bb'],
+           [u'bb'],
            None),
-              'aaa',
-                  ('singleTag',
+              u'aaa',
+                  (u'singleTag',
                    None,
                    None,
                    None)
@@ -178,14 +181,14 @@
 ::
 
     >>> rxp.parse('<a>some text</a>')
-    ('a', None, ['some text'], None)
+    (u'a', None, [u'some text'], None)
 
 Explicitly setting ExpandEmpty to 1 gives us these:
 
 ::
 
     >>> rxp.parse('<a>some text</a>', ExpandEmpty=1)
-    ('a', {}, ['some text'], None)
+    (u'a', {}, [u'some text'], None)
 
 Notice how the None from the first example is being returned as an empty
 dictionary in the second version. ``ExpandEmpty`` makes the sure that the
@@ -198,12 +201,12 @@
 ::
 
     >>> rxp.parse('<b/>', ExpandEmpty=0)
-    ('b', None, None, None)
+    (u'b', None, None, None)
 
 ::
 
     >>> rxp.parse('<b/>', ExpandEmpty=1)
-    ('b', {}, [], None)
+    (u'b', {}, [], None)
 
 Again, notice how the Nones have been expanded.
 
@@ -213,26 +216,26 @@
 ::
 
     >>> rxp.parse('<a>some text<b>Hello</b></a>', ExpandEmpty=0)
-    ('a', None, ['some text', ('b', None, ['Hello'], None)], None)
+    (u'a', None, [u'some text', (u'b', None, [u'Hello'], None)], None)
 
     >>> rxp.parse('<a>some text<b>Hello</b></a>', ExpandEmpty=1)
-    ('a', {}, ['some text', ('b', {}, ['Hello'], None)], None)
+    (u'a', {}, [u'some text', (u'b', {}, [u'Hello'], None)], None)
 
 ::
 
     >>> rxp.parse('<a>some text<b></b></a>', ExpandEmpty=0)
-    ('a', None, ['some text', ('b', None, [], None)], None)
+    (u'a', None, [u'some text', (u'b', None, [], None)], None)
 
     >>> rxp.parse('<a>some text<b></b></a>', ExpandEmpty=1)
-    ('a', {}, ['some text', ('b', {}, [], None)], None)
+    (u'a', {}, [u'some text', (u'b', {}, [], None)], None)
 
 ::
 
     >>> rxp.parse('<a>some text<b/></a>', ExpandEmpty=0)
-    ('a', None, ['some text', ('b', None, None, None)], None)
+    (u'a', None, [u'some text', (u'b', None, None, None)], None)
 
     >>> rxp.parse('<a>some text<b/></a>', ExpandEmpty=1)
-    ('a', {}, ['some text', ('b', {}, [], None)], None)
+    (u'a', {}, [u'some text', (u'b', {}, [], None)], None)
 
 3.1.4 Processing instructions
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -248,10 +251,10 @@
 ::
 
     >>> rxp.parse(<a><?works document="hello.doc"?></a>')
-    ('a', None, [], None)
+    (u'a', None, [], None)
     >>> #vanishes - like a comment
     >>> rxp.parse('<a><?works document="hello.doc"?></a>', ReturnProcessingInstructions=1)
-    ('a', None, [('<?', {'name': 'works'}, ['document="hello.doc"'], None)], None)
+    (u'a', None, [(u'<?', {u'name': u'works'}, [u'document="hello.doc"'], None)], None)
     >>>
 
 
@@ -261,7 +264,7 @@
 ::
 
     >>> pyRXP.piTagName
-    '<?'
+    u'<?'
 
 You can test against ``piTagName`` - but don't try and change it. See the
 section on trying to change ``commentTagName`` for an example of what would
@@ -289,7 +292,7 @@
 ::
 
     >>> rxp.parse('<tag><!-- this is a comment about the tag --></tag>')
-    ('tag', None, [], None)
+    (u'tag', None, [], None)
 
     >>> rxp.parse('<!-- this is a comment -->')
     Traceback (most recent call last):
@@ -309,7 +312,7 @@
 
 
     >>> rxp.parse('<tag><!-- this is a comment about the tag --></tag>', ReturnComments=1)
-    ('tag', None, [('<!--', None, [' this is a comment about the tag '], None)], None)
+    (u'tag', None, [(u'<!--', None, [u' this is a comment about the tag '], None)], None)
 
 Using ``ReturnComments``, the comment are returned in the same way as an
 ordinary tag, except that the tag has a special name. This special name
@@ -322,23 +325,23 @@
       File "<stdin>", line 1, in <module>
     AttributeError: commentTagName
 
-    >>> pyRXP.commentTagName
-    '<!--'
+    >>> pyRXPU.commentTagName
+    u'<!--'
 
 Please note that changing ``commentTagName`` won't work: what would be changed is simply the
 Python representation, while the underlying C object would remain untouched:
 
 ::
 
-    >>> import pyRXP
-    >>> p=pyRXP.Parser()
-    >>> pyRXP.commentTagName = "##" # THIS WON'T WORK!
-    >>> pyRXP.commentTagName
+    >>> import pyRXPU
+    >>> p=pyRXPU.Parser()
+    >>> pyRXPU.commentTagName = "##" # THIS WON'T WORK!
+    >>> pyRXPU.commentTagName
     '##'
     >>> #LOOKS LIKE IT WORKS - BUT SEE BELOW FOR WHY IT DOESN'T
     >>> rxp.parse('<a><!-- this is another comment comment --></a>', ReturnComments = 1)
     >>> # DOESN'T WORK!
-    >>> ('a', None, [('<!--', None, [' this is another comment comment '], None)], None)
+    >>> (u'a', None, [(u'<!--', None, [u' this is another comment comment '], None)], None)
     >>> #SEE?
 
 What it is useful for is to check against to see if you have been
@@ -347,9 +350,9 @@
 ::
 
     >>> rxp.parse('<a><!-- comment --></a>', ReturnComments=1)
-    ('a', None, [('<!--', None, [' comment '], None)], None)
+    (u'a', None, [(u'<!--', None, [u' comment '], None)], None)
     >>> rxp.parse('<a><!-- comment --></a>', ReturnComments=1)[2][0][0]
-    '<!--'
+    u'<!--'
     >>> #this returns the comment name tag from the tuple tree...
     >>> rxp.parse('<a><!-- comment --></a>', ReturnComments=1)[2][0][0] is pyRXP.commentTagName
     1
@@ -365,14 +368,14 @@
 
 
     >>> rxp.parse('<tag/><!-- this is a comment about the tag -->', ReturnComments=1)
-    ('tag', None, None, None)
+    (u'tag', None, None, None)
 
 To get around this, you need to use the ``ReturnList`` attribute:
 
 ::
 
     >>> rxp.parse('<tag/><!-- this is a comment about the tag -->', ReturnComments=1, ReturnList=1)
-    [('tag', None, None, None), ('<!--', None, [' this is a comment about the tag '], None)]
+    [(u'tag', None, None, None), (u'<!--', None, [u' this is a comment about the tag '], None)]
     >>>
 
 Since we've seen a number of errors in the preceding paragraphs, it
@@ -412,40 +415,7 @@
     Parse Failed!
 
     >>> rxp.parse('<outer><a></a><b></b></outer>')
-    ('outer', None, [('a', None, [], None), ('b', None, [], None)], None)
-
-3.1.6 A brief note on pyRXPU
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-PyRXPU is the 16-bit Unicode aware version of pyRXP.
-
-In most cases, the only difference in behaviour between pyRXP and pyRXPU
-is that pyRXPU returns Unicode strings. This may be inconveneient for
-some applications as Python doesn't yet handle unicode filenames etc
-terribly well. A work around is to get pyRXPU to return **utf8** using
-the *ReturnUTF8* boolean argument in the parser creation or call. Then
-all values are returned as utf8 encoded strings.
-
-pyRXPU is built to try and do the right thing with both unicode and
-non-unicode strings.
-
-::
-
-    >>> import pyRXPU
-    >>> pyRXPU.Parser()('<a><?works document="hello.doc"?></a>', ReturnProcessingInstructions=1)
-    (u'a', None, [(u'<?', {'name': u'works'}, [u'document="hello.doc"'], None)], None)
-
-In most cases, the only way to tell the difference (*other* than sending
-in Unicode) is by the module name.
-
-::
-
-    >>> import pyRXPU
-    >>> pyRXPU.__name__
-    'pyRXPU'
-    >>> import pyRXP
-    >>> pyRXP.__name__
-    'pyRXP'
+    (u'outer', None, [(u'a', None, [], None), (u'b', None, [], None)], None)
 
 3.2. Validating against a DTD
 -------------------------------------------------------------------------
@@ -495,9 +465,9 @@
 
 
     >> rxp.parse(fn)
-    ('a',
+    (u'a',
      None,
-     ['\n', ('b', None, ['This tag is the contents'], None), '\n'],
+     [u'\n', (u'b', None, [u'This tag is the contents'], None), '\n'],
      None)
     >>>
 
@@ -875,7 +845,7 @@
 
     >>> rxp.ErrorOnBadCharacterEntities=0
     >>> rxp.parse('<a>&#999;</a>')
-    ('a', None, [''], None)
+    (u'a', None, [u''], None)
 
     >>> rxp.ErrorOnBadCharacterEntities=1
     >>> rxp.parse('<a>&#999;</a>')
@@ -932,7 +902,7 @@
 
     >>> rxp.ErrorOnUndefinedEntities=0
     >>> rxp.parse('<a>&dud;</a>')
-    ('a', None, ['&dud;'], None)
+    (u'a', None, [u'&dud;'], None)
 
     >>> rxp.ErrorOnUndefinedEntities=1
     >>> rxp.parse('<a>&dud;</a>')
@@ -1000,11 +970,11 @@
 
     >>> rxp.ExpandCharacterEntities=1
     >>> rxp.parse('<a>&#109;</a>')
-    ('a', None, ['m'], None)
+    (u'a', None, [u'm'], None)
 
     >>> rxp.ExpandCharacterEntities=0
     >>> rxp.parse('<a>&#109;</a>')
-    ('a', None, ['&#109;'], None)
+    (u'a', None, [u'&#109;'], None)
 
 
 
@@ -1030,11 +1000,11 @@
 
     >>> rxp.ExpandGeneralEntities=0
     >>> rxp.parse('<a>&amp;</a>')
-    ('a', None, ['&amp;'], None)
+    (u'a', None, [u'&amp;'], None)
 
     >>> rxp.ExpandGeneralEntities=1
     >>> rxp.parse('<a>&amp;</a>')
-    ('a', None, ['&#38;'], None)
+    (u'a', None, [u'&#38;'], None)
 
 .. _IgnoreEntities:
 
@@ -1054,11 +1024,11 @@
 
     >>> rxp.IgnoreEntities=0
     >>> rxp.parse('<a>&amp;</a>')
-    ('a', None, ['&#38;'], None)
+    (u'a', None, [u'&#38;'], None)
 
     >>> rxp.IgnoreEntities=1
     >>> rxp.parse('<a>&amp;</a>')
-    ('a', None, ['&amp;'], None)
+    (u'a', None, [u'&amp;'], None)
 
 .. _IgnorePlacementErrors:
 
@@ -1169,7 +1139,7 @@
 Description:
 
 If this is set, comments are returned as nodes with tag name
-``pyRXP.commentTagName``, otherwise they are ignored.
+``pyRXPU.commentTagName``, otherwise they are ignored.
 
 Example:
 
@@ -1218,11 +1188,11 @@
     >>> rxp.ReturnComments=1
     >>> rxp.ReturnList=1
     >>> rxp.parse('<!-- comment --><a>Some Text</a><!-- another comment -->')
-    [('<!--', None, [' comment '], None), ('a', None, ['Some Text'], None), ('<!--',
-     None, [' another comment '], None)]
+    [(u'<!--', None, [u' comment '], None), (u'a', None, [u'Some Text'], None), ('<!--',
+     None, [u' another comment '], None)]
     >>> rxp.ReturnList=0
     >>> rxp.parse('<!-- comment --><a>Some Text</a><!-- another comment -->')
-    ('a', None, ['Some Text'], None)
+    (u'a', None, [u'Some Text'], None)
     >>>
 
 See also: :ref:`ReturnComments`
@@ -1248,7 +1218,7 @@
 Description:
 
 If this is set, processing instructions are returned as nodes with
-tagname ``pyRXP.piTagname``, otherwise they are ignored.
+tagname ``pyRXPU.piTagname``, otherwise they are ignored.
 
 .. _SimpleErrorFormat:
 
@@ -1391,7 +1361,7 @@
 
     >>> rxp.XMLPredefinedEntities=1
     >>> rxp.parse('<a>&amp;</a>')
-    ('a', None, ['&'], None)
+    (u'a', None, [u'&'], None)
 
     >>> rxp.XMLPredefinedEntities=0
     >>> rxp.parse('<a>&amp;</a>')