Re-add Weather bots
authorMichael Schultheiss <schultmc@gmail.com>
Thu Oct 22 10:14:56 2009 -0400 (2 years ago)
branchbots
changeset 95fe849bb9f86f
parent 948d837005bcd0
child 9690bfa26a2ac1
Re-add Weather
trunk/quahog/plugins/Weather
trunk/quahog/plugins/Weather/README.txt
trunk/quahog/plugins/Weather/__init__.py
trunk/quahog/plugins/Weather/config.py
trunk/quahog/plugins/Weather/local/BeautifulSoup.py
trunk/quahog/plugins/Weather/local/__init__.py
trunk/quahog/plugins/Weather/local/feedparser.py
trunk/quahog/plugins/Weather/local/simplejson/__init__.py
trunk/quahog/plugins/Weather/local/simplejson/decoder.py
trunk/quahog/plugins/Weather/local/simplejson/encoder.py
trunk/quahog/plugins/Weather/local/simplejson/scanner.py
trunk/quahog/plugins/Weather/local/simplejson/tool.py
trunk/quahog/plugins/Weather/plugin.py
trunk/quahog/plugins/Weather/test.py
       1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
       2 +++ b/trunk/quahog/plugins/Weather/README.txt	Thu Oct 22 10:14:56 2009 -0400
       3 @@ -0,0 +1,9 @@
       4 +Plugin which allows users to query weather conditions from various
       5 +websites.  The weather command will try each supported weather site
       6 +until it gets a valid response.  One can also query a specific weather
       7 +site using the appropriate command.
       8 +
       9 +Dependencies:
      10 +- feedparser <http://www.feedparser.org/>
      11 +- simplejson <http://undefined.org/python/#simplejson> (unless Python
      12 +  2.6 is being used)
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/trunk/quahog/plugins/Weather/__init__.py	Thu Oct 22 10:14:56 2009 -0400
     1.3 @@ -0,0 +1,70 @@
     1.4 +###
     1.5 +# Copyright (c) 2005, James Vega
     1.6 +# All rights reserved.
     1.7 +#
     1.8 +# Redistribution and use in source and binary forms, with or without
     1.9 +# modification, are permitted provided that the following conditions are met:
    1.10 +#
    1.11 +#   * Redistributions of source code must retain the above copyright notice,
    1.12 +#     this list of conditions, and the following disclaimer.
    1.13 +#   * Redistributions in binary form must reproduce the above copyright notice,
    1.14 +#     this list of conditions, and the following disclaimer in the
    1.15 +#     documentation and/or other materials provided with the distribution.
    1.16 +#   * Neither the name of the author of this software nor the name of
    1.17 +#     contributors to this software may be used to endorse or promote products
    1.18 +#     derived from this software without specific prior written consent.
    1.19 +#
    1.20 +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
    1.21 +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    1.22 +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    1.23 +# ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
    1.24 +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
    1.25 +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
    1.26 +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
    1.27 +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
    1.28 +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
    1.29 +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
    1.30 +# POSSIBILITY OF SUCH DAMAGE.
    1.31 +###
    1.32 +
    1.33 +"""
    1.34 +This plugin does weather-related stuff.  It can't change the weather, though,
    1.35 +so don't get your hopes up.  We just report it.
    1.36 +"""
    1.37 +
    1.38 +import supybot
    1.39 +import supybot.world as world
    1.40 +
    1.41 +# Use this for the version of this plugin.  You may wish to put a CVS keyword
    1.42 +# in here if you're keeping the plugin in CVS or some similar system.
    1.43 +__version__ = "%%VERSION%%"
    1.44 +
    1.45 +__author__ = supybot.authors.unknown
    1.46 +
    1.47 +supybot.authors.mtughan = supybot.Author('Michael Tughan', 'mtughan', 'michaelsprogramming@gmail.com')
    1.48 +
    1.49 +# This is a dictionary mapping supybot.Author instances to lists of
    1.50 +# contributions.
    1.51 +__contributors__ = {
    1.52 +    supybot.authors.jamessan: ['cnn', 'wunder', 'wunder.rss',
    1.53 +                               'temperatureUnit configuration variable',
    1.54 +                               'convert configuration variable'],
    1.55 +    supybot.authors.jemfinch: ['weather'],
    1.56 +    supybot.authors.bwp: ['ham'],
    1.57 +    supybot.authors.mtughan: ['cnn', 'wunder', 'wunder.rss', 'ham'],
    1.58 +    }
    1.59 +
    1.60 +import config
    1.61 +import plugin
    1.62 +reload(plugin) # In case we're being reloaded.
    1.63 +# Add more reloads here if you add third-party modules and want them to be
    1.64 +# reloaded when this plugin is reloaded.  Don't forget to import them as well!
    1.65 +
    1.66 +if world.testing:
    1.67 +    import test
    1.68 +
    1.69 +Class = plugin.Class
    1.70 +configure = config.configure
    1.71 +
    1.72 +
    1.73 +# vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79:
     2.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.2 +++ b/trunk/quahog/plugins/Weather/config.py	Thu Oct 22 10:14:56 2009 -0400
     2.3 @@ -0,0 +1,73 @@
     2.4 +###
     2.5 +# Copyright (c) 2005, James Vega
     2.6 +# All rights reserved.
     2.7 +#
     2.8 +# Redistribution and use in source and binary forms, with or without
     2.9 +# modification, are permitted provided that the following conditions are met:
    2.10 +#
    2.11 +#   * Redistributions of source code must retain the above copyright notice,
    2.12 +#     this list of conditions, and the following disclaimer.
    2.13 +#   * Redistributions in binary form must reproduce the above copyright notice,
    2.14 +#     this list of conditions, and the following disclaimer in the
    2.15 +#     documentation and/or other materials provided with the distribution.
    2.16 +#   * Neither the name of the author of this software nor the name of
    2.17 +#     contributors to this software may be used to endorse or promote products
    2.18 +#     derived from this software without specific prior written consent.
    2.19 +#
    2.20 +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
    2.21 +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    2.22 +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    2.23 +# ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
    2.24 +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
    2.25 +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
    2.26 +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
    2.27 +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
    2.28 +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
    2.29 +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
    2.30 +# POSSIBILITY OF SUCH DAMAGE.
    2.31 +###
    2.32 +
    2.33 +import plugin
    2.34 +
    2.35 +import supybot.conf as conf
    2.36 +import supybot.utils as utils
    2.37 +import supybot.registry as registry
    2.38 +
    2.39 +def configure(advanced):
    2.40 +    # This will be called by supybot to configure this module.  advanced is
    2.41 +    # a bool that specifies whether the user identified himself as an advanced
    2.42 +    # user or not.  You should effect your configuration by manipulating the
    2.43 +    # registry as appropriate.
    2.44 +    from supybot.questions import expect, anything, something, yn
    2.45 +    conf.registerPlugin('Weather', True)
    2.46 +
    2.47 +class WeatherUnit(registry.String):
    2.48 +    def setValue(self, s):
    2.49 +        s = s.capitalize()
    2.50 +        if s not in plugin.unitAbbrevs:
    2.51 +            raise registry.InvalidRegistryValue,\
    2.52 +                  'Unit must be one of Fahrenheit, Celsius, or Kelvin.'
    2.53 +        s = plugin.unitAbbrevs[s]
    2.54 +        registry.String.setValue(self, s)
    2.55 +
    2.56 +class WeatherCommand(registry.OnlySomeStrings):
    2.57 +    validStrings = plugin.Weather.weatherCommands
    2.58 +
    2.59 +Weather = conf.registerPlugin('Weather')
    2.60 +conf.registerChannelValue(Weather, 'temperatureUnit',
    2.61 +    WeatherUnit('Fahrenheit', """Sets the default temperature unit to use when
    2.62 +    reporting the weather."""))
    2.63 +conf.registerChannelValue(Weather, 'command',
    2.64 +    WeatherCommand('wunder', """Sets the default command to use when retrieving
    2.65 +    the weather.  Command must be one of %s.""" %
    2.66 +    utils.str.commaAndify(plugin.Weather.weatherCommands, And='or')))
    2.67 +conf.registerChannelValue(Weather, 'convert',
    2.68 +    registry.Boolean(True, """Determines whether the weather commands will
    2.69 +    automatically convert weather units to the unit specified in
    2.70 +    supybot.plugins.Weather.temperatureUnit."""))
    2.71 +
    2.72 +conf.registerUserValue(conf.users.plugins.Weather, 'lastLocation',
    2.73 +    registry.String('', ''))
    2.74 +
    2.75 +
    2.76 +# vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79:
     3.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.2 +++ b/trunk/quahog/plugins/Weather/local/BeautifulSoup.py	Thu Oct 22 10:14:56 2009 -0400
     3.3 @@ -0,0 +1,1080 @@
     3.4 +"""Beautiful Soup
     3.5 +Elixir and Tonic
     3.6 +"The Screen-Scraper's Friend"
     3.7 +v2.1.1
     3.8 +http://www.crummy.com/software/BeautifulSoup/
     3.9 +
    3.10 +Beautiful Soup parses arbitrarily invalid XML- or HTML-like substance
    3.11 +into a tree representation. It provides methods and Pythonic idioms
    3.12 +that make it easy to search and modify the tree.
    3.13 +
    3.14 +A well-formed XML/HTML document will yield a well-formed data
    3.15 +structure. An ill-formed XML/HTML document will yield a
    3.16 +correspondingly ill-formed data structure. If your document is only
    3.17 +locally well-formed, you can use this library to find and process the
    3.18 +well-formed part of it. The BeautifulSoup class has heuristics for
    3.19 +obtaining a sensible parse tree in the face of common HTML errors.
    3.20 +
    3.21 +Beautiful Soup has no external dependencies. It works with Python 2.2
    3.22 +and up.
    3.23 +
    3.24 +Beautiful Soup defines classes for four different parsing strategies:
    3.25 +
    3.26 + * BeautifulStoneSoup, for parsing XML, SGML, or your domain-specific
    3.27 +   language that kind of looks like XML.
    3.28 +
    3.29 + * BeautifulSoup, for parsing run-of-the-mill HTML code, be it valid
    3.30 +   or invalid.
    3.31 +
    3.32 + * ICantBelieveItsBeautifulSoup, for parsing valid but bizarre HTML
    3.33 +   that trips up BeautifulSoup.
    3.34 +
    3.35 + * BeautifulSOAP, for making it easier to parse XML documents that use
    3.36 +   lots of subelements containing a single string, where you'd prefer
    3.37 +   they put that string into an attribute (such as SOAP messages).
    3.38 +
    3.39 +You can subclass BeautifulStoneSoup or BeautifulSoup to create a
    3.40 +parsing strategy specific to an XML schema or a particular bizarre
    3.41 +HTML document. Typically your subclass would just override
    3.42 +SELF_CLOSING_TAGS and/or NESTABLE_TAGS.
    3.43 +"""
    3.44 +from __future__ import generators
    3.45 +
    3.46 +__author__ = "Leonard Richardson (leonardr@segfault.org)"
    3.47 +__version__ = "2.1.1"
    3.48 +__date__ = "$Date: 2004/10/18 00:14:20 $"
    3.49 +__copyright__ = "Copyright (c) 2004-2005 Leonard Richardson"
    3.50 +__license__ = "PSF"
    3.51 +
    3.52 +from sgmllib import SGMLParser, SGMLParseError
    3.53 +import types
    3.54 +import re
    3.55 +import sgmllib
    3.56 +
    3.57 +#This code makes Beautiful Soup able to parse XML with namespaces
    3.58 +sgmllib.tagfind = re.compile('[a-zA-Z][-_.:a-zA-Z0-9]*')
    3.59 +
    3.60 +class NullType(object):
    3.61 +
    3.62 +    """Similar to NoneType with a corresponding singleton instance
    3.63 +    'Null' that, unlike None, accepts any message and returns itself.
    3.64 +
    3.65 +    Examples:
    3.66 +    >>> Null("send", "a", "message")("and one more",
    3.67 +    ...      "and what you get still") is Null
    3.68 +    True
    3.69 +    """
    3.70 +
    3.71 +    def __new__(cls):                    return Null
    3.72 +    def __call__(self, *args, **kwargs): return Null
    3.73 +##    def __getstate__(self, *args):       return Null
    3.74 +    def __getattr__(self, attr):         return Null
    3.75 +    def __getitem__(self, item):         return Null
    3.76 +    def __setattr__(self, attr, value):  pass
    3.77 +    def __setitem__(self, item, value):  pass
    3.78 +    def __len__(self):                   return 0
    3.79 +    # FIXME: is this a python bug? otherwise ``for x in Null: pass``
    3.80 +    #        never terminates...
    3.81 +    def __iter__(self):                  return iter([])
    3.82 +    def __contains__(self, item):        return False
    3.83 +    def __repr__(self):                  return "Null"
    3.84 +Null = object.__new__(NullType)
    3.85 +
    3.86 +class PageElement:
    3.87 +    """Contains the navigational information for some part of the page
    3.88 +    (either a tag or a piece of text)"""
    3.89 +
    3.90 +    def setup(self, parent=Null, previous=Null):
    3.91 +        """Sets up the initial relations between this element and
    3.92 +        other elements."""
    3.93 +        self.parent = parent
    3.94 +        self.previous = previous
    3.95 +        self.next = Null
    3.96 +        self.previousSibling = Null
    3.97 +        self.nextSibling = Null
    3.98 +        if self.parent and self.parent.contents:
    3.99 +            self.previousSibling = self.parent.contents[-1]
   3.100 +            self.previousSibling.nextSibling = self
   3.101 +
   3.102 +    def findNext(self, name=None, attrs={}, text=None):
   3.103 +        """Returns the first item that matches the given criteria and
   3.104 +        appears after this Tag in the document."""
   3.105 +        return self._first(self.fetchNext, name, attrs, text)
   3.106 +    firstNext = findNext
   3.107 +
   3.108 +    def fetchNext(self, name=None, attrs={}, text=None, limit=None):
   3.109 +        """Returns all items that match the given criteria and appear
   3.110 +        before after Tag in the document."""
   3.111 +        return self._fetch(name, attrs, text, limit, self.nextGenerator)
   3.112 +
   3.113 +    def findNextSibling(self, name=None, attrs={}, text=None):
   3.114 +        """Returns the closest sibling to this Tag that matches the
   3.115 +        given criteria and appears after this Tag in the document."""
   3.116 +        return self._first(self.fetchNextSiblings, name, attrs, text)
   3.117 +    firstNextSibling = findNextSibling
   3.118 +
   3.119 +    def fetchNextSiblings(self, name=None, attrs={}, text=None, limit=None):
   3.120 +        """Returns the siblings of this Tag that match the given
   3.121 +        criteria and appear after this Tag in the document."""
   3.122 +        return self._fetch(name, attrs, text, limit, self.nextSiblingGenerator)
   3.123 +
   3.124 +    def findPrevious(self, name=None, attrs={}, text=None):
   3.125 +        """Returns the first item that matches the given criteria and
   3.126 +        appears before this Tag in the document."""
   3.127 +        return self._first(self.fetchPrevious, name, attrs, text)
   3.128 +
   3.129 +    def fetchPrevious(self, name=None, attrs={}, text=None, limit=None):
   3.130 +        """Returns all items that match the given criteria and appear
   3.131 +        before this Tag in the document."""
   3.132 +        return self._fetch(name, attrs, text, limit, self.previousGenerator)
   3.133 +    firstPrevious = findPrevious
   3.134 +
   3.135 +    def findPreviousSibling(self, name=None, attrs={}, text=None):
   3.136 +        """Returns the closest sibling to this Tag that matches the
   3.137 +        given criteria and appears before this Tag in the document."""
   3.138 +        return self._first(self.fetchPreviousSiblings, name, attrs, text)
   3.139 +    firstPreviousSibling = findPreviousSibling
   3.140 +
   3.141 +    def fetchPreviousSiblings(self, name=None, attrs={}, text=None,
   3.142 +                              limit=None):
   3.143 +        """Returns the siblings of this Tag that match the given
   3.144 +        criteria and appear before this Tag in the document."""
   3.145 +        return self._fetch(name, attrs, text, limit,
   3.146 +                           self.previousSiblingGenerator)
   3.147 +
   3.148 +    def findParent(self, name=None, attrs={}):
   3.149 +        """Returns the closest parent of this Tag that matches the given
   3.150 +        criteria."""
   3.151 +        r = Null
   3.152 +        l = self.fetchParents(name, attrs, 1)
   3.153 +        if l:
   3.154 +            r = l[0]
   3.155 +        return r
   3.156 +    firstParent = findParent
   3.157 +
   3.158 +    def fetchParents(self, name=None, attrs={}, limit=None):
   3.159 +        """Returns the parents of this Tag that match the given
   3.160 +        criteria."""
   3.161 +        return self._fetch(name, attrs, None, limit, self.parentGenerator)
   3.162 +
   3.163 +    #These methods do the real heavy lifting.
   3.164 +
   3.165 +    def _first(self, method, name, attrs, text):
   3.166 +        r = Null
   3.167 +        l = method(name, attrs, text, 1)
   3.168 +        if l:
   3.169 +            r = l[0]
   3.170 +        return r
   3.171 +    
   3.172 +    def _fetch(self, name, attrs, text, limit, generator):
   3.173 +        "Iterates over a generator looking for things that match."
   3.174 +        if not hasattr(attrs, 'items'):
   3.175 +            attrs = {'class' : attrs}
   3.176 +
   3.177 +        results = []
   3.178 +        g = generator()
   3.179 +        while True:
   3.180 +            try:
   3.181 +                i = g.next()
   3.182 +            except StopIteration:
   3.183 +                break
   3.184 +            found = None
   3.185 +            if isinstance(i, Tag):
   3.186 +                if not text:
   3.187 +                    if not name or self._matches(i, name):
   3.188 +                        match = True
   3.189 +                        for attr, matchAgainst in attrs.items():
   3.190 +                            check = i.get(attr)
   3.191 +                            if not self._matches(check, matchAgainst):
   3.192 +                                match = False
   3.193 +                                break
   3.194 +                        if match:
   3.195 +                            found = i
   3.196 +            elif text:
   3.197 +                if self._matches(i, text):
   3.198 +                    found = i                    
   3.199 +            if found:
   3.200 +                results.append(found)
   3.201 +                if limit and len(results) >= limit:
   3.202 +                    break
   3.203 +        return results
   3.204 +
   3.205 +    #Generators that can be used to navigate starting from both
   3.206 +    #NavigableTexts and Tags.                
   3.207 +    def nextGenerator(self):
   3.208 +        i = self
   3.209 +        while i:
   3.210 +            i = i.next
   3.211 +            yield i
   3.212 +
   3.213 +    def nextSiblingGenerator(self):
   3.214 +        i = self
   3.215 +        while i:
   3.216 +            i = i.nextSibling
   3.217 +            yield i
   3.218 +
   3.219 +    def previousGenerator(self):
   3.220 +        i = self
   3.221 +        while i:
   3.222 +            i = i.previous
   3.223 +            yield i
   3.224 +
   3.225 +    def previousSiblingGenerator(self):
   3.226 +        i = self
   3.227 +        while i:
   3.228 +            i = i.previousSibling
   3.229 +            yield i
   3.230 +
   3.231 +    def parentGenerator(self):
   3.232 +        i = self
   3.233 +        while i:
   3.234 +            i = i.parent
   3.235 +            yield i
   3.236 +
   3.237 +    def _matches(self, chunk, howToMatch):
   3.238 +        #print 'looking for %s in %s' % (howToMatch, chunk)
   3.239 +        #
   3.240 +        # If given a list of items, return true if the list contains a
   3.241 +        # text element that matches.
   3.242 +        if isList(chunk) and not isinstance(chunk, Tag):
   3.243 +            for tag in chunk:
   3.244 +                if isinstance(tag, NavigableText) and self._matches(tag, howToMatch):
   3.245 +                    return True
   3.246 +            return False
   3.247 +        if callable(howToMatch):
   3.248 +            return howToMatch(chunk)
   3.249 +        if isinstance(chunk, Tag):
   3.250 +            #Custom match methods take the tag as an argument, but all other
   3.251 +            #ways of matching match the tag name as a string
   3.252 +            chunk = chunk.name
   3.253 +        #Now we know that chunk is a string
   3.254 +        if not isinstance(chunk, basestring):
   3.255 +            chunk = str(chunk)
   3.256 +        if hasattr(howToMatch, 'match'):
   3.257 +            # It's a regexp object.
   3.258 +            return howToMatch.search(chunk)
   3.259 +        if isList(howToMatch):
   3.260 +            return chunk in howToMatch
   3.261 +        if hasattr(howToMatch, 'items'):
   3.262 +            return howToMatch.has_key(chunk)
   3.263 +        #It's just a string
   3.264 +        return str(howToMatch) == chunk
   3.265 +
   3.266 +class NavigableText(PageElement):
   3.267 +
   3.268 +    def __getattr__(self, attr):
   3.269 +        "For backwards compatibility, text.string gives you text"
   3.270 +        if attr == 'string':
   3.271 +            return self
   3.272 +        else:
   3.273 +            raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__.__name__, attr)
   3.274 +        
   3.275 +class NavigableString(str, NavigableText):
   3.276 +    pass
   3.277 +
   3.278 +class NavigableUnicodeString(unicode, NavigableText):
   3.279 +    pass
   3.280 +
   3.281 +class Tag(PageElement):
   3.282 +
   3.283 +    """Represents a found HTML tag with its attributes and contents."""
   3.284 +
   3.285 +    def __init__(self, name, attrs=None, parent=Null, previous=Null):
   3.286 +        "Basic constructor."
   3.287 +        self.name = name
   3.288 +        if attrs == None:
   3.289 +            attrs = []
   3.290 +        self.attrs = attrs
   3.291 +        self.contents = []
   3.292 +        self.setup(parent, previous)
   3.293 +        self.hidden = False
   3.294 +
   3.295 +    def get(self, key, default=None):
   3.296 +        """Returns the value of the 'key' attribute for the tag, or
   3.297 +        the value given for 'default' if it doesn't have that
   3.298 +        attribute."""
   3.299 +        return self._getAttrMap().get(key, default)    
   3.300 +
   3.301 +    def __getitem__(self, key):
   3.302 +        """tag[key] returns the value of the 'key' attribute for the tag,
   3.303 +        and throws an exception if it's not there."""
   3.304 +        return self._getAttrMap()[key]
   3.305 +
   3.306 +    def __iter__(self):
   3.307 +        "Iterating over a tag iterates over its contents."
   3.308 +        return iter(self.contents)
   3.309 +
   3.310 +    def __len__(self):
   3.311 +        "The length of a tag is the length of its list of contents."
   3.312 +        return len(self.contents)
   3.313 +
   3.314 +    def __contains__(self, x):
   3.315 +        return x in self.contents
   3.316 +
   3.317 +    def __nonzero__(self):
   3.318 +        "A tag is non-None even if it has no contents."
   3.319 +        return True
   3.320 +
   3.321 +    def __setitem__(self, key, value):        
   3.322 +        """Setting tag[key] sets the value of the 'key' attribute for the
   3.323 +        tag."""
   3.324 +        self._getAttrMap()
   3.325 +        self.attrMap[key] = value
   3.326 +        found = False
   3.327 +        for i in range(0, len(self.attrs)):
   3.328 +            if self.attrs[i][0] == key:
   3.329 +                self.attrs[i] = (key, value)
   3.330 +                found = True
   3.331 +        if not found:
   3.332 +            self.attrs.append((key, value))
   3.333 +        self._getAttrMap()[key] = value
   3.334 +
   3.335 +    def __delitem__(self, key):
   3.336 +        "Deleting tag[key] deletes all 'key' attributes for the tag."
   3.337 +        for item in self.attrs:
   3.338 +            if item[0] == key:
   3.339 +                self.attrs.remove(item)
   3.340 +                #We don't break because bad HTML can define the same
   3.341 +                #attribute multiple times.
   3.342 +            self._getAttrMap()
   3.343 +            if self.attrMap.has_key(key):
   3.344 +                del self.attrMap[key]
   3.345 +
   3.346 +    def __call__(self, *args, **kwargs):
   3.347 +        """Calling a tag like a function is the same as calling its
   3.348 +        fetch() method. Eg. tag('a') returns a list of all the A tags
   3.349 +        found within this tag."""
   3.350 +        return apply(self.fetch, args, kwargs)
   3.351 +
   3.352 +    def __getattr__(self, tag):
   3.353 +        if len(tag) > 3 and tag.rfind('Tag') == len(tag)-3:
   3.354 +            return self.first(tag[:-3])
   3.355 +        elif tag.find('__') != 0:
   3.356 +            return self.first(tag)
   3.357 +
   3.358 +    def __eq__(self, other):
   3.359 +        """Returns true iff this tag has the same name, the same attributes,
   3.360 +        and the same contents (recursively) as the given tag.
   3.361 +
   3.362 +        NOTE: right now this will return false if two tags have the
   3.363 +        same attributes in a different order. Should this be fixed?"""
   3.364 +        if not hasattr(other, 'name') or not hasattr(other, 'attrs') or not hasattr(other, 'contents') or self.name != other.name or self.attrs != other.attrs or len(self) != len(other):
   3.365 +            return False
   3.366 +        for i in range(0, len(self.contents)):
   3.367 +            if self.contents[i] != other.contents[i]:
   3.368 +                return False
   3.369 +        return True
   3.370 +
   3.371 +    def __ne__(self, other):
   3.372 +        """Returns true iff this tag is not identical to the other tag,
   3.373 +        as defined in __eq__."""
   3.374 +        return not self == other
   3.375 +
   3.376 +    def __repr__(self):
   3.377 +        """Renders this tag as a string."""
   3.378 +        return str(self)
   3.379 +
   3.380 +    def __unicode__(self):
   3.381 +        return self.__str__(1)
   3.382 +
   3.383 +    def __str__(self, needUnicode=None, showStructureIndent=None):
   3.384 +        """Returns a string or Unicode representation of this tag and
   3.385 +        its contents.
   3.386 +
   3.387 +        NOTE: since Python's HTML parser consumes whitespace, this
   3.388 +        method is not certain to reproduce the whitespace present in
   3.389 +        the original string."""
   3.390 +        
   3.391 +        attrs = []
   3.392 +        if self.attrs:
   3.393 +            for key, val in self.attrs:
   3.394 +                attrs.append('%s="%s"' % (key, val))
   3.395 +        close = ''
   3.396 +        closeTag = ''
   3.397 +        if self.isSelfClosing():
   3.398 +            close = ' /'
   3.399 +        else:
   3.400 +            closeTag = '</%s>' % self.name
   3.401 +        indentIncrement = None        
   3.402 +        if showStructureIndent != None:
   3.403 +            indentIncrement = showStructureIndent
   3.404 +            if not self.hidden:
   3.405 +                indentIncrement += 1
   3.406 +        contents = self.renderContents(indentIncrement, needUnicode=needUnicode)        
   3.407 +        if showStructureIndent:
   3.408 +            space = '\n%s' % (' ' * showStructureIndent)
   3.409 +        if self.hidden:
   3.410 +            s = contents
   3.411 +        else:
   3.412 +            s = []
   3.413 +            attributeString = ''
   3.414 +            if attrs:
   3.415 +                attributeString = ' ' + ' '.join(attrs)            
   3.416 +            if showStructureIndent:
   3.417 +                s.append(space)
   3.418 +            s.append('<%s%s%s>' % (self.name, attributeString, close))
   3.419 +            s.append(contents)
   3.420 +            if closeTag and showStructureIndent != None:
   3.421 +                s.append(space)
   3.422 +            s.append(closeTag)
   3.423 +            s = ''.join(s)
   3.424 +        isUnicode = type(s) == types.UnicodeType
   3.425 +        if needUnicode and not isUnicode:
   3.426 +            s = unicode(s)
   3.427 +        elif isUnicode and needUnicode==False:
   3.428 +            s = str(s)
   3.429 +        return s
   3.430 +
   3.431 +    def prettify(self, needUnicode=None):
   3.432 +        return self.__str__(needUnicode, showStructureIndent=True)
   3.433 +
   3.434 +    def renderContents(self, showStructureIndent=None, needUnicode=None):
   3.435 +        """Renders the contents of this tag as a (possibly Unicode) 
   3.436 +        string."""
   3.437 +        s=[]
   3.438 +        for c in self:
   3.439 +            text = None
   3.440 +            if isinstance(c, NavigableUnicodeString) or type(c) == types.UnicodeType:
   3.441 +                text = unicode(c)
   3.442 +            elif isinstance(c, Tag):
   3.443 +                s.append(c.__str__(needUnicode, showStructureIndent))
   3.444 +            elif needUnicode:
   3.445 +                text = unicode(c)
   3.446 +            else:
   3.447 +                text = str(c)
   3.448 +            if text:
   3.449 +                if showStructureIndent != None:
   3.450 +                    if text[-1] == '\n':
   3.451 +                        text = text[:-1]
   3.452 +                s.append(text)
   3.453 +        return ''.join(s)    
   3.454 +
   3.455 +    #Soup methods
   3.456 +
   3.457 +    def firstText(self, text, recursive=True):
   3.458 +        """Convenience method to retrieve the first piece of text matching the
   3.459 +        given criteria. 'text' can be a string, a regular expression object,
   3.460 +        a callable that takes a string and returns whether or not the
   3.461 +        string 'matches', etc."""
   3.462 +        return self.first(recursive=recursive, text=text)
   3.463 +
   3.464 +    def fetchText(self, text, recursive=True, limit=None):
   3.465 +        """Convenience method to retrieve all pieces of text matching the
   3.466 +        given criteria. 'text' can be a string, a regular expression object,
   3.467 +        a callable that takes a string and returns whether or not the
   3.468 +        string 'matches', etc."""
   3.469 +        return self.fetch(recursive=recursive, text=text, limit=limit)
   3.470 +
   3.471 +    def first(self, name=None, attrs={}, recursive=True, text=None):
   3.472 +        """Return only the first child of this
   3.473 +        Tag matching the given criteria."""
   3.474 +        r = Null
   3.475 +        l = self.fetch(name, attrs, recursive, text, 1)
   3.476 +        if l:
   3.477 +            r = l[0]
   3.478 +        return r
   3.479 +    findChild = first
   3.480 +
   3.481 +    def fetch(self, name=None, attrs={}, recursive=True, text=None,
   3.482 +              limit=None):
   3.483 +        """Extracts a list of Tag objects that match the given
   3.484 +        criteria.  You can specify the name of the Tag and any
   3.485 +        attributes you want the Tag to have.
   3.486 +
   3.487 +        The value of a key-value pair in the 'attrs' map can be a
   3.488 +        string, a list of strings, a regular expression object, or a
   3.489 +        callable that takes a string and returns whether or not the
   3.490 +        string matches for some custom definition of 'matches'. The
   3.491 +        same is true of the tag name."""
   3.492 +        generator = self.recursiveChildGenerator
   3.493 +        if not recursive:
   3.494 +            generator = self.childGenerator
   3.495 +        return self._fetch(name, attrs, text, limit, generator)
   3.496 +    fetchChildren = fetch
   3.497 +    
   3.498 +    #Utility methods
   3.499 +
   3.500 +    def isSelfClosing(self):
   3.501 +        """Returns true iff this is a self-closing tag as defined in the HTML
   3.502 +        standard.
   3.503 +
   3.504 +        TODO: This is specific to BeautifulSoup and its subclasses, but it's
   3.505 +        used by __str__"""
   3.506 +        return self.name in BeautifulSoup.SELF_CLOSING_TAGS
   3.507 +
   3.508 +    def append(self, tag):
   3.509 +        """Appends the given tag to the contents of this tag."""
   3.510 +        self.contents.append(tag)
   3.511 +
   3.512 +    #Private methods
   3.513 +
   3.514 +    def _getAttrMap(self):
   3.515 +        """Initializes a map representation of this tag's attributes,
   3.516 +        if not already initialized."""
   3.517 +        if not getattr(self, 'attrMap'):
   3.518 +            self.attrMap = {}
   3.519 +            for (key, value) in self.attrs:
   3.520 +                self.attrMap[key] = value 
   3.521 +        return self.attrMap
   3.522 +
   3.523 +    #Generator methods
   3.524 +    def childGenerator(self):
   3.525 +        for i in range(0, len(self.contents)):
   3.526 +            yield self.contents[i]
   3.527 +        raise StopIteration
   3.528 +    
   3.529 +    def recursiveChildGenerator(self):
   3.530 +        stack = [(self, 0)]
   3.531 +        while stack:
   3.532 +            tag, start = stack.pop()
   3.533 +            if isinstance(tag, Tag):            
   3.534 +                for i in range(start, len(tag.contents)):
   3.535 +                    a = tag.contents[i]
   3.536 +                    yield a
   3.537 +                    if isinstance(a, Tag) and tag.contents:
   3.538 +                        if i < len(tag.contents) - 1:
   3.539 +                            stack.append((tag, i+1))
   3.540 +                        stack.append((a, 0))
   3.541 +                        break
   3.542 +        raise StopIteration
   3.543 +
   3.544 +
   3.545 +def isList(l):
   3.546 +    """Convenience method that works with all 2.x versions of Python
   3.547 +    to determine whether or not something is listlike."""
   3.548 +    return hasattr(l, '__iter__') \
   3.549 +           or (type(l) in (types.ListType, types.TupleType))
   3.550 +
   3.551 +def buildTagMap(default, *args):
   3.552 +    """Turns a list of maps, lists, or scalars into a single map.
   3.553 +    Used to build the SELF_CLOSING_TAGS and NESTABLE_TAGS maps out
   3.554 +    of lists and partial maps."""
   3.555 +    built = {}
   3.556 +    for portion in args:
   3.557 +        if hasattr(portion, 'items'):
   3.558 +            #It's a map. Merge it.
   3.559 +            for k,v in portion.items():
   3.560 +                built[k] = v
   3.561 +        elif isList(portion):
   3.562 +            #It's a list. Map each item to the default.
   3.563 +            for k in portion:
   3.564 +                built[k] = default
   3.565 +        else:
   3.566 +            #It's a scalar. Map it to the default.
   3.567 +            built[portion] = default
   3.568 +    return built
   3.569 +
   3.570 +class BeautifulStoneSoup(Tag, SGMLParser):
   3.571 +
   3.572 +    """This class contains the basic parser and fetch code. It defines
   3.573 +    a parser that knows nothing about tag behavior except for the
   3.574 +    following:
   3.575 +   
   3.576 +      You can't close a tag without closing all the tags it encloses.
   3.577 +      That is, "<foo><bar></foo>" actually means
   3.578 +      "<foo><bar></bar></foo>".
   3.579 +
   3.580 +    [Another possible explanation is "<foo><bar /></foo>", but since
   3.581 +    this class defines no SELF_CLOSING_TAGS, it will never use that
   3.582 +    explanation.]
   3.583 +
   3.584 +    This class is useful for parsing XML or made-up markup languages,
   3.585 +    or when BeautifulSoup makes an assumption counter to what you were
   3.586 +    expecting."""
   3.587 +
   3.588 +    SELF_CLOSING_TAGS = {}
   3.589 +    NESTABLE_TAGS = {}
   3.590 +    RESET_NESTING_TAGS = {}
   3.591 +    QUOTE_TAGS = {}
   3.592 +
   3.593 +    #As a public service we will by default silently replace MS smart quotes
   3.594 +    #and similar characters with their HTML or ASCII equivalents.
   3.595 +    MS_CHARS = { '\x80' : '&euro;',
   3.596 +                 '\x81' : ' ',
   3.597 +                 '\x82' : '&sbquo;',
   3.598 +                 '\x83' : '&fnof;',
   3.599 +                 '\x84' : '&bdquo;',
   3.600 +                 '\x85' : '&hellip;',
   3.601 +                 '\x86' : '&dagger;',
   3.602 +                 '\x87' : '&Dagger;',
   3.603 +                 '\x88' : '&caret;',
   3.604 +                 '\x89' : '%',
   3.605 +                 '\x8A' : '&Scaron;',
   3.606 +                 '\x8B' : '&lt;',
   3.607 +                 '\x8C' : '&OElig;',
   3.608 +                 '\x8D' : '?',
   3.609 +                 '\x8E' : 'Z',
   3.610 +                 '\x8F' : '?',
   3.611 +                 '\x90' : '?',
   3.612 +                 '\x91' : '&lsquo;',
   3.613 +                 '\x92' : '&rsquo;',
   3.614 +                 '\x93' : '&ldquo;',
   3.615 +                 '\x94' : '&rdquo;',
   3.616 +                 '\x95' : '&bull;',
   3.617 +                 '\x96' : '&ndash;',
   3.618 +                 '\x97' : '&mdash;',
   3.619 +                 '\x98' : '&tilde;',
   3.620 +                 '\x99' : '&trade;',
   3.621 +                 '\x9a' : '&scaron;',
   3.622 +                 '\x9b' : '&gt;',
   3.623 +                 '\x9c' : '&oelig;',
   3.624 +                 '\x9d' : '?',
   3.625 +                 '\x9e' : 'z',
   3.626 +                 '\x9f' : '&Yuml;',}
   3.627 +
   3.628 +    PARSER_MASSAGE = [(re.compile('(<[^<>]*)/>'),
   3.629 +                       lambda(x):x.group(1) + ' />'),
   3.630 +                      (re.compile('<!\s+([^<>]*)>'),
   3.631 +                       lambda(x):'<!' + x.group(1) + '>'),
   3.632 +                      (re.compile("([\x80-\x9f])"),
   3.633 +                       lambda(x): BeautifulStoneSoup.MS_CHARS.get(x.group(1)))
   3.634 +                      ]
   3.635 +
   3.636 +    ROOT_TAG_NAME = '[document]'
   3.637 +
   3.638 +    def __init__(self, text=None, avoidParserProblems=True,
   3.639 +                 initialTextIsEverything=True):
   3.640 +        """Initialize this as the 'root tag' and feed in any text to
   3.641 +        the parser.
   3.642 +
   3.643 +        NOTE about avoidParserProblems: sgmllib will process most bad
   3.644 +        HTML, and BeautifulSoup has tricks for dealing with some HTML
   3.645 +        that kills sgmllib, but Beautiful Soup can nonetheless choke
   3.646 +        or lose data if your data uses self-closing tags or
   3.647 +        declarations incorrectly. By default, Beautiful Soup sanitizes
   3.648 +        its input to avoid the vast majority of these problems. The
   3.649 +        problems are relatively rare, even in bad HTML, so feel free
   3.650 +        to pass in False to avoidParserProblems if they don't apply to
   3.651 +        you, and you'll get better performance. The only reason I have
   3.652 +        this turned on by default is so I don't get so many tech
   3.653 +        support questions.
   3.654 +
   3.655 +        The two most common instances of invalid HTML that will choke
   3.656 +        sgmllib are fixed by the default parser massage techniques:
   3.657 +
   3.658 +         <br/> (No space between name of closing tag and tag close)
   3.659 +         <! --Comment--> (Extraneous whitespace in declaration)
   3.660 +
   3.661 +        You can pass in a custom list of (RE object, replace method)
   3.662 +        tuples to get Beautiful Soup to scrub your input the way you
   3.663 +        want."""
   3.664 +        Tag.__init__(self, self.ROOT_TAG_NAME)
   3.665 +        if avoidParserProblems \
   3.666 +           and not isList(avoidParserProblems):
   3.667 +            avoidParserProblems = self.PARSER_MASSAGE            
   3.668 +        self.avoidParserProblems = avoidParserProblems
   3.669 +        SGMLParser.__init__(self)
   3.670 +        self.quoteStack = []
   3.671 +        self.hidden = 1
   3.672 +        self.reset()
   3.673 +        if hasattr(text, 'read'):
   3.674 +            #It's a file-type object.
   3.675 +            text = text.read()
   3.676 +        if text:
   3.677 +            self.feed(text)
   3.678 +        if initialTextIsEverything:
   3.679 +            self.done()
   3.680 +
   3.681 +    def __getattr__(self, methodName):
   3.682 +        """This method routes method call requests to either the SGMLParser
   3.683 +        superclass or the Tag superclass, depending on the method name."""
   3.684 +        if methodName.find('start_') == 0 or methodName.find('end_') == 0 \
   3.685 +               or methodName.find('do_') == 0:
   3.686 +            return SGMLParser.__getattr__(self, methodName)
   3.687 +        elif methodName.find('__') != 0:
   3.688 +            return Tag.__getattr__(self, methodName)
   3.689 +        else:
   3.690 +            raise AttributeError
   3.691 +
   3.692 +    def feed(self, text):
   3.693 +        if self.avoidParserProblems:
   3.694 +            for fix, m in self.avoidParserProblems:
   3.695 +                text = fix.sub(m, text)
   3.696 +        SGMLParser.feed(self, text)
   3.697 +
   3.698 +    def done(self):
   3.699 +        """Called when you're done parsing, so that the unclosed tags can be
   3.700 +        correctly processed."""
   3.701 +        self.endData() #NEW
   3.702 +        while self.currentTag.name != self.ROOT_TAG_NAME:
   3.703 +            self.popTag()
   3.704 +            
   3.705 +    def reset(self):
   3.706 +        SGMLParser.reset(self)
   3.707 +        self.currentData = []
   3.708 +        self.currentTag = None
   3.709 +        self.tagStack = []
   3.710 +        self.pushTag(self)        
   3.711 +    
   3.712 +    def popTag(self):
   3.713 +        tag = self.tagStack.pop()
   3.714 +        # Tags with just one string-owning child get the child as a
   3.715 +        # 'string' property, so that soup.tag.string is shorthand for
   3.716 +        # soup.tag.contents[0]
   3.717 +        if len(self.currentTag.contents) == 1 and \
   3.718 +           isinstance(self.currentTag.contents[0], NavigableText):
   3.719 +            self.currentTag.string = self.currentTag.contents[0]
   3.720 +
   3.721 +        #print "Pop", tag.name
   3.722 +        if self.tagStack:
   3.723 +            self.currentTag = self.tagStack[-1]
   3.724 +        return self.currentTag
   3.725 +
   3.726 +    def pushTag(self, tag):
   3.727 +        #print "Push", tag.name
   3.728 +        if self.currentTag:
   3.729 +            self.currentTag.append(tag)
   3.730 +        self.tagStack.append(tag)
   3.731 +        self.currentTag = self.tagStack[-1]
   3.732 +
   3.733 +    def endData(self):
   3.734 +        currentData = ''.join(self.currentData)
   3.735 +        if currentData:
   3.736 +            if not currentData.strip():
   3.737 +                if '\n' in currentData:
   3.738 +                    currentData = '\n'
   3.739 +                else:
   3.740 +                    currentData = ' '
   3.741 +            c = NavigableString
   3.742 +            if type(currentData) == types.UnicodeType:
   3.743 +                c = NavigableUnicodeString
   3.744 +            o = c(currentData)
   3.745 +            o.setup(self.currentTag, self.previous)
   3.746 +            if self.previous:
   3.747 +                self.previous.next = o
   3.748 +            self.previous = o
   3.749 +            self.currentTag.contents.append(o)
   3.750 +        self.currentData = []
   3.751 +
   3.752 +    def _popToTag(self, name, inclusivePop=True):
   3.753 +        """Pops the tag stack up to and including the most recent
   3.754 +        instance of the given tag. If inclusivePop is false, pops the tag
   3.755 +        stack up to but *not* including the most recent instqance of
   3.756 +        the given tag."""
   3.757 +        if name == self.ROOT_TAG_NAME:
   3.758 +            return            
   3.759 +
   3.760 +        numPops = 0
   3.761 +        mostRecentTag = None
   3.762 +        for i in range(len(self.tagStack)-1, 0, -1):
   3.763 +            if name == self.tagStack[i].name:
   3.764 +                numPops = len(self.tagStack)-i
   3.765 +                break
   3.766 +        if not inclusivePop:
   3.767 +            numPops = numPops - 1
   3.768 +
   3.769 +        for i in range(0, numPops):
   3.770 +            mostRecentTag = self.popTag()
   3.771 +        return mostRecentTag    
   3.772 +
   3.773 +    def _smartPop(self, name):
   3.774 +
   3.775 +        """We need to pop up to the previous tag of this type, unless
   3.776 +        one of this tag's nesting reset triggers comes between this
   3.777 +        tag and the previous tag of this type, OR unless this tag is a
   3.778 +        generic nesting trigger and another generic nesting trigger
   3.779 +        comes between this tag and the previous tag of this type.
   3.780 +
   3.781 +        Examples:
   3.782 +         <p>Foo<b>Bar<p> should pop to 'p', not 'b'.
   3.783 +         <p>Foo<table>Bar<p> should pop to 'table', not 'p'.
   3.784 +         <p>Foo<table><tr>Bar<p> should pop to 'tr', not 'p'.
   3.785 +         <p>Foo<b>Bar<p> should pop to 'p', not 'b'.
   3.786 +
   3.787 +         <li><ul><li> *<li>* should pop to 'ul', not the first 'li'.
   3.788 +         <tr><table><tr> *<tr>* should pop to 'table', not the first 'tr'
   3.789 +         <td><tr><td> *<td>* should pop to 'tr', not the first 'td'
   3.790 +        """
   3.791 +
   3.792 +        nestingResetTriggers = self.NESTABLE_TAGS.get(name)
   3.793 +        isNestable = nestingResetTriggers != None
   3.794 +        isResetNesting = self.RESET_NESTING_TAGS.has_key(name)
   3.795 +        popTo = None
   3.796 +        inclusive = True
   3.797 +        for i in range(len(self.tagStack)-1, 0, -1):
   3.798 +            p = self.tagStack[i]
   3.799 +            if (not p or p.name == name) and not isNestable:
   3.800 +                #Non-nestable tags get popped to the top or to their
   3.801 +                #last occurance.
   3.802 +                popTo = name
   3.803 +                break
   3.804 +            if (nestingResetTriggers != None
   3.805 +                and p.name in nestingResetTriggers) \
   3.806 +                or (nestingResetTriggers == None and isResetNesting
   3.807 +                    and self.RESET_NESTING_TAGS.has_key(p.name)):
   3.808 +                
   3.809 +                #If we encounter one of the nesting reset triggers
   3.810 +                #peculiar to this tag, or we encounter another tag
   3.811 +                #that causes nesting to reset, pop up to but not
   3.812 +                #including that tag.
   3.813 +
   3.814 +                popTo = p.name
   3.815 +                inclusive = False
   3.816 +                break
   3.817 +            p = p.parent
   3.818 +        if popTo:
   3.819 +            self._popToTag(popTo, inclusive)
   3.820 +
   3.821 +    def unknown_starttag(self, name, attrs, selfClosing=0):
   3.822 +        #print "Start tag %s" % name
   3.823 +        if self.quoteStack:
   3.824 +            #This is not a real tag.
   3.825 +            #print "<%s> is not real!" % name
   3.826 +            attrs = ''.join(map(lambda(x, y): ' %s="%s"' % (x, y), attrs))
   3.827 +            self.handle_data('<%s%s>' % (name, attrs))
   3.828 +            return
   3.829 +        self.endData()
   3.830 +        if not name in self.SELF_CLOSING_TAGS and not selfClosing:
   3.831 +            self._smartPop(name)
   3.832 +        tag = Tag(name, attrs, self.currentTag, self.previous)        
   3.833 +        if self.previous:
   3.834 +            self.previous.next = tag
   3.835 +        self.previous = tag
   3.836 +        self.pushTag(tag)
   3.837 +        if selfClosing or name in self.SELF_CLOSING_TAGS:
   3.838 +            self.popTag()                
   3.839 +        if name in self.QUOTE_TAGS:
   3.840 +            #print "Beginning quote (%s)" % name
   3.841 +            self.quoteStack.append(name)
   3.842 +            self.literal = 1
   3.843 +
   3.844 +    def unknown_endtag(self, name):
   3.845 +        if self.quoteStack and self.quoteStack[-1] != name:
   3.846 +            #This is not a real end tag.
   3.847 +            #print "</%s> is not real!" % name
   3.848 +            self.handle_data('</%s>' % name)
   3.849 +            return
   3.850 +        self.endData()
   3.851 +        self._popToTag(name)
   3.852 +        if self.quoteStack and self.quoteStack[-1] == name:
   3.853 +            self.quoteStack.pop()
   3.854 +            self.literal = (len(self.quoteStack) > 0)
   3.855 +
   3.856 +    def handle_data(self, data):
   3.857 +        self.currentData.append(data)
   3.858 +
   3.859 +    def handle_pi(self, text):
   3.860 +        "Propagate processing instructions right through."
   3.861 +        self.handle_data("<?%s>" % text)
   3.862 +
   3.863 +    def handle_comment(self, text):
   3.864 +        "Propagate comments right through."
   3.865 +        self.handle_data("<!--%s-->" % text)
   3.866 +
   3.867 +    def handle_charref(self, ref):
   3.868 +        "Propagate char refs right through."
   3.869 +        self.handle_data('&#%s;' % ref)
   3.870 +
   3.871 +    def handle_entityref(self, ref):
   3.872 +        "Propagate entity refs right through."
   3.873 +        self.handle_data('&%s;' % ref)
   3.874 +        
   3.875 +    def handle_decl(self, data):
   3.876 +        "Propagate DOCTYPEs and the like right through."
   3.877 +        self.handle_data('<!%s>' % data)
   3.878 +
   3.879 +    def parse_declaration(self, i):
   3.880 +        """Treat a bogus SGML declaration as raw data. Treat a CDATA
   3.881 +        declaration as regular data."""
   3.882 +        j = None
   3.883 +        if self.rawdata[i:i+9] == '<![CDATA[':
   3.884 +             k = self.rawdata.find(']]>', i)
   3.885 +             if k == -1:
   3.886 +                 k = len(self.rawdata)
   3.887 +             self.handle_data(self.rawdata[i+9:k])
   3.888 +             j = k+3
   3.889 +        else:
   3.890 +            try:
   3.891 +                j = SGMLParser.parse_declaration(self, i)
   3.892 +            except SGMLParseError:
   3.893 +                toHandle = self.rawdata[i:]
   3.894 +                self.handle_data(toHandle)
   3.895 +                j = i + len(toHandle)
   3.896 +        return j
   3.897 +
   3.898 +class BeautifulSoup(BeautifulStoneSoup):
   3.899 +
   3.900 +    """This parser knows the following facts about HTML:
   3.901 +
   3.902 +    * Some tags have no closing tag and should be interpreted as being
   3.903 +      closed as soon as they are encountered.
   3.904 +
   3.905 +    * The text inside some tags (ie. 'script') may contain tags which
   3.906 +      are not really part of the document and which should be parsed
   3.907 +      as text, not tags. If you want to parse the text as tags, you can
   3.908 +      always fetch it and parse it explicitly.
   3.909 +
   3.910 +    * Tag nesting rules:
   3.911 +
   3.912 +      Most tags can't be nested at all. For instance, the occurance of
   3.913 +      a <p> tag should implicitly close the previous <p> tag.
   3.914 +
   3.915 +       <p>Para1<p>Para2
   3.916 +        should be transformed into:
   3.917 +       <p>Para1</p><p>Para2
   3.918 +
   3.919 +      Some tags can be nested arbitrarily. For instance, the occurance
   3.920 +      of a <blockquote> tag should _not_ implicitly close the previous
   3.921 +      <blockquote> tag.
   3.922 +
   3.923 +       Alice said: <blockquote>Bob said: <blockquote>Blah
   3.924 +        should NOT be transformed into:
   3.925 +       Alice said: <blockquote>Bob said: </blockquote><blockquote>Blah
   3.926 +
   3.927 +      Some tags can be nested, but the nesting is reset by the
   3.928 +      interposition of other tags. For instance, a <tr> tag should
   3.929 +      implicitly close the previous <tr> tag within the same <table>,
   3.930 +      but not close a <tr> tag in another table.
   3.931 +
   3.932 +       <table><tr>Blah<tr>Blah
   3.933 +        should be transformed into:
   3.934 +       <table><tr>Blah</tr><tr>Blah
   3.935 +        but,
   3.936 +       <tr>Blah<table><tr>Blah
   3.937 +        should NOT be transformed into
   3.938 +       <tr>Blah<table></tr><tr>Blah
   3.939 +
   3.940 +    Differing assumptions about tag nesting rules are a major source
   3.941 +    of problems with the BeautifulSoup class. If BeautifulSoup is not
   3.942 +    treating as nestable a tag your page author treats as nestable,
   3.943 +    try ICantBelieveItsBeautifulSoup before writing your own
   3.944 +    subclass."""
   3.945 +
   3.946 +    SELF_CLOSING_TAGS = buildTagMap(None, ['br' , 'hr', 'input', 'img', 'meta',
   3.947 +                                           'spacer', 'link', 'frame', 'base'])
   3.948 +
   3.949 +    QUOTE_TAGS = {'script': None}
   3.950 +    
   3.951 +    #According to the HTML standard, each of these inline tags can
   3.952 +    #contain another tag of the same type. Furthermore, it's common
   3.953 +    #to actually use these tags this way.
   3.954 +    NESTABLE_INLINE_TAGS = ['span', 'font', 'q', 'object', 'bdo', 'sub', 'sup',
   3.955 +                            'center']
   3.956 +
   3.957 +    #According to the HTML standard, these block tags can contain
   3.958 +    #another tag of the same type. Furthermore, it's common
   3.959 +    #to actually use these tags this way.
   3.960 +    NESTABLE_BLOCK_TAGS = ['blockquote', 'div', 'fieldset', 'ins', 'del']
   3.961 +
   3.962 +    #Lists can contain other lists, but there are restrictions.    
   3.963 +    NESTABLE_LIST_TAGS = { 'ol' : [],
   3.964 +                           'ul' : [],
   3.965 +                           'li' : ['ul', 'ol'],
   3.966 +                           'dl' : [],
   3.967 +                           'dd' : ['dl'],
   3.968 +                           'dt' : ['dl'] }
   3.969 +
   3.970 +    #Tables can contain other tables, but there are restrictions.    
   3.971 +    NESTABLE_TABLE_TAGS = {'table' : [], 
   3.972 +                           'tr' : ['table', 'tbody', 'tfoot', 'thead'],
   3.973 +                           'td' : ['tr'],
   3.974 +                           'th' : ['tr'],
   3.975 +                           }
   3.976 +
   3.977 +    NON_NESTABLE_BLOCK_TAGS = ['address', 'form', 'p', 'pre']
   3.978 +
   3.979 +    #If one of these tags is encountered, all tags up to the next tag of
   3.980 +    #this type are popped.
   3.981 +    RESET_NESTING_TAGS = buildTagMap(None, NESTABLE_BLOCK_TAGS, 'noscript',
   3.982 +                                     NON_NESTABLE_BLOCK_TAGS,
   3.983 +                                     NESTABLE_LIST_TAGS,
   3.984 +                                     NESTABLE_TABLE_TAGS)
   3.985 +
   3.986 +    NESTABLE_TAGS = buildTagMap([], NESTABLE_INLINE_TAGS, NESTABLE_BLOCK_TAGS,
   3.987 +                                NESTABLE_LIST_TAGS, NESTABLE_TABLE_TAGS)
   3.988 +    
   3.989 +class ICantBelieveItsBeautifulSoup(BeautifulSoup):
   3.990 +
   3.991 +    """The BeautifulSoup class is oriented towards skipping over
   3.992 +    common HTML errors like unclosed tags. However, sometimes it makes
   3.993 +    errors of its own. For instance, consider this fragment:
   3.994 +
   3.995 +     <b>Foo<b>Bar</b></b>
   3.996 +
   3.997 +    This is perfectly valid (if bizarre) HTML. However, the
   3.998 +    BeautifulSoup class will implicitly close the first b tag when it
   3.999 +    encounters the second 'b'. It will think the author wrote
  3.1000 +    "<b>Foo<b>Bar", and didn't close the first 'b' tag, because
  3.1001 +    there's no real-world reason to bold something that's already
  3.1002 +    bold. When it encounters '</b></b>' it will close two more 'b'
  3.1003 +    tags, for a grand total of three tags closed instead of two. This
  3.1004 +    can throw off the rest of your document structure. The same is
  3.1005 +    true of a number of other tags, listed below.
  3.1006 +
  3.1007 +    It's much more common for someone to forget to close (eg.) a 'b'
  3.1008 +    tag than to actually use nested 'b' tags, and the BeautifulSoup
  3.1009 +    class handles the common case. This class handles the
  3.1010 +    not-co-common case: where you can't believe someone wrote what
  3.1011 +    they did, but it's valid HTML and BeautifulSoup screwed up by
  3.1012 +    assuming it wouldn't be.
  3.1013 +
  3.1014 +    If this doesn't do what you need, try subclassing this class or
  3.1015 +    BeautifulSoup, and providing your own list of NESTABLE_TAGS."""
  3.1016 +
  3.1017 +    I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS = \
  3.1018 +     ['em', 'big', 'i', 'small', 'tt', 'abbr', 'acronym', 'strong',
  3.1019 +      'cite', 'code', 'dfn', 'kbd', 'samp', 'strong', 'var', 'b',
  3.1020 +      'big']
  3.1021 +
  3.1022 +    I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS = ['noscript']
  3.1023 +
  3.1024 +    NESTABLE_TAGS = buildTagMap([], BeautifulSoup.NESTABLE_TAGS,
  3.1025 +                                I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS,
  3.1026 +                                I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS)
  3.1027 +
  3.1028 +class BeautifulSOAP(BeautifulStoneSoup):
  3.1029 +    """This class will push a tag with only a single string child into
  3.1030 +    the tag's parent as an attribute. The attribute's name is the tag
  3.1031 +    name, and the value is the string child. An example should give
  3.1032 +    the flavor of the change:
  3.1033 +
  3.1034 +    <foo><bar>baz</bar></foo>
  3.1035 +     =>
  3.1036 +    <foo bar="baz"><bar>baz</bar></foo>
  3.1037 +
  3.1038 +    You can then access fooTag['bar'] instead of fooTag.barTag.string.
  3.1039 +
  3.1040 +    This is, of course, useful for scraping structures that tend to
  3.1041 +    use subelements instead of attributes, such as SOAP messages. Note
  3.1042 +    that it modifies its input, so don't print the modified version
  3.1043 +    out.
  3.1044 +
  3.1045 +    I'm not sure how many people really want to use this class; let me
  3.1046 +    know if you do. Mainly I like the name."""
  3.1047 +
  3.1048 +    def popTag(self):
  3.1049 +        if len(self.tagStack) > 1:
  3.1050 +            tag = self.tagStack[-1]
  3.1051 +            parent = self.tagStack[-2]
  3.1052 +            parent._getAttrMap()
  3.1053 +            if (isinstance(tag, Tag) and len(tag.contents) == 1 and
  3.1054 +                isinstance(tag.contents[0], NavigableText) and 
  3.1055 +                not parent.attrMap.has_key(tag.name)):
  3.1056 +                parent[tag.name] = tag.contents[0]
  3.1057 +        BeautifulStoneSoup.popTag(self)
  3.1058 +
  3.1059 +#Enterprise class names! It has come to our attention that some people
  3.1060 +#think the names of the Beautiful Soup parser classes are too silly
  3.1061 +#and "unprofessional" for use in enterprise screen-scraping. We feel
  3.1062 +#your pain! For such-minded folk, the Beautiful Soup Consortium And
  3.1063 +#All-Night Kosher Bakery recommends renaming this file to
  3.1064 +#"RobustParser.py" (or, in cases of extreme enterprisitude,
  3.1065 +#"RobustParserBeanInterface.class") and using the following
  3.1066 +#enterprise-friendly class aliases:
  3.1067 +class RobustXMLParser(BeautifulStoneSoup):
  3.1068 +    pass
  3.1069 +class RobustHTMLParser(BeautifulSoup):
  3.1070 +    pass
  3.1071 +class RobustWackAssHTMLParser(ICantBelieveItsBeautifulSoup):
  3.1072 +    pass
  3.1073 +class SimplifyingSOAPParser(BeautifulSOAP):
  3.1074 +    pass
  3.1075 +
  3.1076 +###
  3.1077 +
  3.1078 +
  3.1079 +#By default, act as an HTML pretty-printer.
  3.1080 +if __name__ == '__main__':
  3.1081 +    import sys
  3.1082 +    soup = BeautifulStoneSoup(sys.stdin.read())
  3.1083 +    print soup.prettify()
     4.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.2 +++ b/trunk/quahog/plugins/Weather/local/__init__.py	Thu Oct 22 10:14:56 2009 -0400
     4.3 @@ -0,0 +1,1 @@
     4.4 +# Stub so local is a module, used for third-party modules
     5.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     5.2 +++ b/trunk/quahog/plugins/Weather/local/feedparser.py	Thu Oct 22 10:14:56 2009 -0400
     5.3 @@ -0,0 +1,2858 @@
     5.4 +#!/usr/bin/env python
     5.5 +"""Universal feed parser
     5.6 +
     5.7 +Handles RSS 0.9x, RSS 1.0, RSS 2.0, CDF, Atom 0.3, and Atom 1.0 feeds
     5.8 +
     5.9 +Visit http://feedparser.org/ for the latest version
    5.10 +Visit http://feedparser.org/docs/ for the latest documentation
    5.11 +
    5.12 +Required: Python 2.1 or later
    5.13 +Recommended: Python 2.3 or later
    5.14 +Recommended: CJKCodecs and iconv_codec <http://cjkpython.i18n.org/>
    5.15 +"""
    5.16 +
    5.17 +__version__ = "4.1"# + "$Revision: 1.92 $"[11:15] + "-cvs"
    5.18 +__license__ = """Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved.
    5.19 +
    5.20 +Redistribution and use in source and binary forms, with or without modification,
    5.21 +are permitted provided that the following conditions are met:
    5.22 +
    5.23 +* Redistributions of source code must retain the above copyright notice,
    5.24 +  this list of conditions and the following disclaimer.
    5.25 +* Redistributions in binary form must reproduce the above copyright notice,
    5.26 +  this list of conditions and the following disclaimer in the documentation
    5.27 +  and/or other materials provided with the distribution.
    5.28 +
    5.29 +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
    5.30 +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    5.31 +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    5.32 +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
    5.33 +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
    5.34 +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
    5.35 +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
    5.36 +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
    5.37 +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
    5.38 +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
    5.39 +POSSIBILITY OF SUCH DAMAGE."""
    5.40 +__author__ = "Mark Pilgrim <http://diveintomark.org/>"
    5.41 +__contributors__ = ["Jason Diamond <http://injektilo.org/>",
    5.42 +                    "John Beimler <http://john.beimler.org/>",
    5.43 +                    "Fazal Majid <http://www.majid.info/mylos/weblog/>",
    5.44 +                    "Aaron Swartz <http://aaronsw.com/>",
    5.45 +                    "Kevin Marks <http://epeus.blogspot.com/>"]
    5.46 +_debug = 0
    5.47 +
    5.48 +# HTTP "User-Agent" header to send to servers when downloading feeds.
    5.49 +# If you are embedding feedparser in a larger application, you should
    5.50 +# change this to your application name and URL.
    5.51 +USER_AGENT = "UniversalFeedParser/%s +http://feedparser.org/" % __version__
    5.52 +
    5.53 +# HTTP "Accept" header to send to servers when downloading feeds.  If you don't
    5.54 +# want to send an Accept header, set this to None.
    5.55 +ACCEPT_HEADER = "application/atom+xml,application/rdf+xml,application/rss+xml,application/x-netcdf,application/xml;q=0.9,text/xml;q=0.2,*/*;q=0.1"
    5.56 +
    5.57 +# List of preferred XML parsers, by SAX driver name.  These will be tried first,
    5.58 +# but if they're not installed, Python will keep searching through its own list
    5.59 +# of pre-installed parsers until it finds one that supports everything we need.
    5.60 +PREFERRED_XML_PARSERS = ["drv_libxml2"]
    5.61 +
    5.62 +# If you want feedparser to automatically run HTML markup through HTML Tidy, set
    5.63 +# this to 1.  Requires mxTidy <http://www.egenix.com/files/python/mxTidy.html>
    5.64 +# or utidylib <http://utidylib.berlios.de/>.
    5.65 +TIDY_MARKUP = 0
    5.66 +
    5.67 +# List of Python interfaces for HTML Tidy, in order of preference.  Only useful
    5.68 +# if TIDY_MARKUP = 1
    5.69 +PREFERRED_TIDY_INTERFACES = ["uTidy", "mxTidy"]
    5.70 +
    5.71 +# ---------- required modules (should come with any Python distribution) ----------
    5.72 +import sgmllib, re, sys, copy, urlparse, time, rfc822, types, cgi, urllib, urllib2
    5.73 +try:
    5.74 +    from cStringIO import StringIO as _StringIO
    5.75 +except:
    5.76 +    from StringIO import StringIO as _StringIO
    5.77 +
    5.78 +# ---------- optional modules (feedparser will work without these, but with reduced functionality) ----------
    5.79 +
    5.80 +# gzip is included with most Python distributions, but may not be available if you compiled your own
    5.81 +try:
    5.82 +    import gzip
    5.83 +except:
    5.84 +    gzip = None
    5.85 +try:
    5.86 +    import zlib
    5.87 +except:
    5.88 +    zlib = None
    5.89 +
    5.90 +# If a real XML parser is available, feedparser will attempt to use it.  feedparser has
    5.91 +# been tested with the built-in SAX parser, PyXML, and libxml2.  On platforms where the
    5.92 +# Python distribution does not come with an XML parser (such as Mac OS X 10.2 and some
    5.93 +# versions of FreeBSD), feedparser will quietly fall back on regex-based parsing.
    5.94 +try:
    5.95 +    import xml.sax
    5.96 +    xml.sax.make_parser(PREFERRED_XML_PARSERS) # test for valid parsers
    5.97 +    from xml.sax.saxutils import escape as _xmlescape
    5.98 +    _XML_AVAILABLE = 1
    5.99 +except:
   5.100 +    _XML_AVAILABLE = 0
   5.101 +    def _xmlescape(data):
   5.102 +        data = data.replace('&', '&amp;')
   5.103 +        data = data.replace('>', '&gt;')
   5.104 +        data = data.replace('<', '&lt;')
   5.105 +        return data
   5.106 +
   5.107 +# base64 support for Atom feeds that contain embedded binary data
   5.108 +try:
   5.109 +    import base64, binascii
   5.110 +except:
   5.111 +    base64 = binascii = None
   5.112 +
   5.113 +# cjkcodecs and iconv_codec provide support for more character encodings.
   5.114 +# Both are available from http://cjkpython.i18n.org/
   5.115 +try:
   5.116 +    import cjkcodecs.aliases
   5.117 +except:
   5.118 +    pass
   5.119 +try:
   5.120 +    import iconv_codec
   5.121 +except:
   5.122 +    pass
   5.123 +
   5.124 +# chardet library auto-detects character encodings
   5.125 +# Download from http://chardet.feedparser.org/
   5.126 +try:
   5.127 +    import chardet
   5.128 +    if _debug:
   5.129 +        import chardet.constants
   5.130 +        chardet.constants._debug = 1
   5.131 +except:
   5.132 +    chardet = None
   5.133 +
   5.134 +# ---------- don't touch these ----------
   5.135 +class ThingsNobodyCaresAboutButMe(Exception): pass
   5.136 +class CharacterEncodingOverride(ThingsNobodyCaresAboutButMe): pass
   5.137 +class CharacterEncodingUnknown(ThingsNobodyCaresAboutButMe): pass
   5.138 +class NonXMLContentType(ThingsNobodyCaresAboutButMe): pass
   5.139 +class UndeclaredNamespace(Exception): pass
   5.140 +
   5.141 +sgmllib.tagfind = re.compile('[a-zA-Z][-_.:a-zA-Z0-9]*')
   5.142 +sgmllib.special = re.compile('<!')
   5.143 +sgmllib.charref = re.compile('&#(x?[0-9A-Fa-f]+)[^0-9A-Fa-f]')
   5.144 +
   5.145 +SUPPORTED_VERSIONS = {'': 'unknown',
   5.146 +                      'rss090': 'RSS 0.90',
   5.147 +                      'rss091n': 'RSS 0.91 (Netscape)',
   5.148 +                      'rss091u': 'RSS 0.91 (Userland)',
   5.149 +                      'rss092': 'RSS 0.92',
   5.150 +                      'rss093': 'RSS 0.93',
   5.151 +                      'rss094': 'RSS 0.94',
   5.152 +                      'rss20': 'RSS 2.0',
   5.153 +                      'rss10': 'RSS 1.0',
   5.154 +                      'rss': 'RSS (unknown version)',
   5.155 +                      'atom01': 'Atom 0.1',
   5.156 +                      'atom02': 'Atom 0.2',
   5.157 +                      'atom03': 'Atom 0.3',
   5.158 +                      'atom10': 'Atom 1.0',
   5.159 +                      'atom': 'Atom (unknown version)',
   5.160 +                      'cdf': 'CDF',
   5.161 +                      'hotrss': 'Hot RSS'
   5.162 +                      }
   5.163 +
   5.164 +try:
   5.165 +    UserDict = dict
   5.166 +except NameError:
   5.167 +    # Python 2.1 does not have dict
   5.168 +    from UserDict import UserDict
   5.169 +    def dict(aList):
   5.170 +        rc = {}
   5.171 +        for k, v in aList:
   5.172 +            rc[k] = v
   5.173 +        return rc
   5.174 +
   5.175 +class FeedParserDict(UserDict):
   5.176 +    keymap = {'channel': 'feed',
   5.177 +              'items': 'entries',
   5.178 +              'guid': 'id',
   5.179 +              'date': 'updated',
   5.180 +              'date_parsed': 'updated_parsed',
   5.181 +              'description': ['subtitle', 'summary'],
   5.182 +              'url': ['href'],
   5.183 +              'modified': 'updated',
   5.184 +              'modified_parsed': 'updated_parsed',
   5.185 +              'issued': 'published',
   5.186 +              'issued_parsed': 'published_parsed',
   5.187 +              'copyright': 'rights',
   5.188 +              'copyright_detail': 'rights_detail',
   5.189 +              'tagline': 'subtitle',
   5.190 +              'tagline_detail': 'subtitle_detail'}
   5.191 +    def __getitem__(self, key):
   5.192 +        if key == 'category':
   5.193 +            return UserDict.__getitem__(self, 'tags')[0]['term']
   5.194 +        if key == 'categories':
   5.195 +            return [(tag['scheme'], tag['term']) for tag in UserDict.__getitem__(self, 'tags')]
   5.196 +        realkey = self.keymap.get(key, key)
   5.197 +        if type(realkey) == types.ListType:
   5.198 +            for k in realkey:
   5.199 +                if UserDict.has_key(self, k):
   5.200 +                    return UserDict.__getitem__(self, k)
   5.201 +        if UserDict.has_key(self, key):
   5.202 +            return UserDict.__getitem__(self, key)
   5.203 +        return UserDict.__getitem__(self, realkey)
   5.204 +
   5.205 +    def __setitem__(self, key, value):
   5.206 +        for k in self.keymap.keys():
   5.207 +            if key == k:
   5.208 +                key = self.keymap[k]
   5.209 +                if type(key) == types.ListType:
   5.210 +                    key = key[0]
   5.211 +        return UserDict.__setitem__(self, key, value)
   5.212 +
   5.213 +    def get(self, key, default=None):
   5.214 +        if self.has_key(key):
   5.215 +            return self[key]
   5.216 +        else:
   5.217 +            return default
   5.218 +
   5.219 +    def setdefault(self, key, value):
   5.220 +        if not self.has_key(key):
   5.221 +            self[key] = value
   5.222 +        return self[key]
   5.223 +        
   5.224 +    def has_key(self, key):
   5.225 +        try:
   5.226 +            return hasattr(self, key) or UserDict.has_key(self, key)
   5.227 +        except AttributeError:
   5.228 +            return False
   5.229 +        
   5.230 +    def __getattr__(self, key):
   5.231 +        try:
   5.232 +            return self.__dict__[key]
   5.233 +        except KeyError:
   5.234 +            pass
   5.235 +        try:
   5.236 +            assert not key.startswith('_')
   5.237 +            return self.__getitem__(key)
   5.238 +        except:
   5.239 +            raise AttributeError, "object has no attribute '%s'" % key
   5.240 +
   5.241 +    def __setattr__(self, key, value):
   5.242 +        if key.startswith('_') or key == 'data':
   5.243 +            self.__dict__[key] = value
   5.244 +        else:
   5.245 +            return self.__setitem__(key, value)
   5.246 +
   5.247 +    def __contains__(self, key):
   5.248 +        return self.has_key(key)
   5.249 +
   5.250 +def zopeCompatibilityHack():
   5.251 +    global FeedParserDict
   5.252 +    del FeedParserDict
   5.253 +    def FeedParserDict(aDict=None):
   5.254 +        rc = {}
   5.255 +        if aDict:
   5.256 +            rc.update(aDict)
   5.257 +        return rc
   5.258 +
   5.259 +_ebcdic_to_ascii_map = None
   5.260 +def _ebcdic_to_ascii(s):
   5.261 +    global _ebcdic_to_ascii_map
   5.262 +    if not _ebcdic_to_ascii_map:
   5.263 +        emap = (
   5.264 +            0,1,2,3,156,9,134,127,151,141,142,11,12,13,14,15,
   5.265 +            16,17,18,19,157,133,8,135,24,25,146,143,28,29,30,31,
   5.266 +            128,129,130,131,132,10,23,27,136,137,138,139,140,5,6,7,
   5.267 +            144,145,22,147,148,149,150,4,152,153,154,155,20,21,158,26,
   5.268 +            32,160,161,162,163,164,165,166,167,168,91,46,60,40,43,33,
   5.269 +            38,169,170,171,172,173,174,175,176,177,93,36,42,41,59,94,
   5.270 +            45,47,178,179,180,181,182,183,184,185,124,44,37,95,62,63,
   5.271 +            186,187,188,189,190,191,192,193,194,96,58,35,64,39,61,34,
   5.272 +            195,97,98,99,100,101,102,103,104,105,196,197,198,199,200,201,
   5.273 +            202,106,107,108,109,110,111,112,113,114,203,204,205,206,207,208,
   5.274 +            209,126,115,116,117,118,119,120,121,122,210,211,212,213,214,215,
   5.275 +            216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,
   5.276 +            123,65,66,67,68,69,70,71,72,73,232,233,234,235,236,237,
   5.277 +            125,74,75,76,77,78,79,80,81,82,238,239,240,241,242,243,
   5.278 +            92,159,83,84,85,86,87,88,89,90,244,245,246,247,248,249,
   5.279 +            48,49,50,51,52,53,54,55,56,57,250,251,252,253,254,255
   5.280 +            )
   5.281 +        import string
   5.282 +        _ebcdic_to_ascii_map = string.maketrans( \
   5.283 +            ''.join(map(chr, range(256))), ''.join(map(chr, emap)))
   5.284 +    return s.translate(_ebcdic_to_ascii_map)
   5.285 +
   5.286 +_urifixer = re.compile('^([A-Za-z][A-Za-z0-9+-.]*://)(/*)(.*?)')
   5.287 +def _urljoin(base, uri):
   5.288 +    uri = _urifixer.sub(r'\1\3', uri)
   5.289 +    return urlparse.urljoin(base, uri)
   5.290 +
   5.291 +class _FeedParserMixin:
   5.292 +    namespaces = {'': '',
   5.293 +                  'http://backend.userland.com/rss': '',
   5.294 +                  'http://blogs.law.harvard.edu/tech/rss': '',
   5.295 +                  'http://purl.org/rss/1.0/': '',
   5.296 +                  'http://my.netscape.com/rdf/simple/0.9/': '',
   5.297 +                  'http://example.com/newformat#': '',
   5.298 +                  'http://example.com/necho': '',
   5.299 +                  'http://purl.org/echo/': '',
   5.300 +                  'uri/of/echo/namespace#': '',
   5.301 +                  'http://purl.org/pie/': '',
   5.302 +                  'http://purl.org/atom/ns#': '',
   5.303 +                  'http://www.w3.org/2005/Atom': '',
   5.304 +                  'http://purl.org/rss/1.0/modules/rss091#': '',
   5.305 +                  
   5.306 +                  'http://webns.net/mvcb/':                               'admin',
   5.307 +                  'http://purl.org/rss/1.0/modules/aggregation/':         'ag',
   5.308 +                  'http://purl.org/rss/1.0/modules/annotate/':            'annotate',
   5.309 +                  'http://media.tangent.org/rss/1.0/':                    'audio',
   5.310 +                  'http://backend.userland.com/blogChannelModule':        'blogChannel',
   5.311 +                  'http://web.resource.org/cc/':                          'cc',
   5.312 +                  'http://backend.userland.com/creativeCommonsRssModule': 'creativeCommons',
   5.313 +                  'http://purl.org/rss/1.0/modules/company':              'co',
   5.314 +                  'http://purl.org/rss/1.0/modules/content/':             'content',
   5.315 +                  'http://my.theinfo.org/changed/1.0/rss/':               'cp',
   5.316 +                  'http://purl.org/dc/elements/1.1/':                     'dc',
   5.317 +                  'http://purl.org/dc/terms/':                            'dcterms',
   5.318 +                  'http://purl.org/rss/1.0/modules/email/':               'email',
   5.319 +                  'http://purl.org/rss/1.0/modules/event/':               'ev',
   5.320 +                  'http://rssnamespace.org/feedburner/ext/1.0':           'feedburner',
   5.321 +                  'http://freshmeat.net/rss/fm/':                         'fm',
   5.322 +                  'http://xmlns.com/foaf/0.1/':                           'foaf',
   5.323 +                  'http://www.w3.org/2003/01/geo/wgs84_pos#':             'geo',
   5.324 +                  'http://postneo.com/icbm/':                             'icbm',
   5.325 +                  'http://purl.org/rss/1.0/modules/image/':               'image',
   5.326 +                  'http://www.itunes.com/DTDs/PodCast-1.0.dtd':           'itunes',
   5.327 +                  'http://example.com/DTDs/PodCast-1.0.dtd':              'itunes',
   5.328 +                  'http://purl.org/rss/1.0/modules/link/':                'l',
   5.329 +                  'http://search.yahoo.com/mrss':                         'media',
   5.330 +                  'http://madskills.com/public/xml/rss/module/pingback/': 'pingback',
   5.331 +                  'http://prismstandard.org/namespaces/1.2/basic/':       'prism',
   5.332 +                  'http://www.w3.org/1999/02/22-rdf-syntax-ns#':          'rdf',
   5.333 +                  'http://www.w3.org/2000/01/rdf-schema#':                'rdfs',
   5.334 +                  'http://purl.org/rss/1.0/modules/reference/':           'ref',
   5.335 +                  'http://purl.org/rss/1.0/modules/richequiv/':           'reqv',
   5.336 +                  'http://purl.org/rss/1.0/modules/search/':              'search',
   5.337 +                  'http://purl.org/rss/1.0/modules/slash/':               'slash',
   5.338 +                  'http://schemas.xmlsoap.org/soap/envelope/':            'soap',
   5.339 +                  'http://purl.org/rss/1.0/modules/servicestatus/':       'ss',
   5.340 +                  'http://hacks.benhammersley.com/rss/streaming/':        'str',
   5.341 +                  'http://purl.org/rss/1.0/modules/subscription/':        'sub',
   5.342 +                  'http://purl.org/rss/1.0/modules/syndication/':         'sy',
   5.343 +                  'http://purl.org/rss/1.0/modules/taxonomy/':            'taxo',
   5.344 +                  'http://purl.org/rss/1.0/modules/threading/':           'thr',
   5.345 +                  'http://purl.org/rss/1.0/modules/textinput/':           'ti',
   5.346 +                  'http://madskills.com/public/xml/rss/module/trackback/':'trackback',
   5.347 +                  'http://wellformedweb.org/commentAPI/':                 'wfw',
   5.348 +                  'http://purl.org/rss/1.0/modules/wiki/':                'wiki',
   5.349 +                  'http://www.w3.org/1999/xhtml':                         'xhtml',
   5.350 +                  'http://www.w3.org/XML/1998/namespace':                 'xml',
   5.351 +                  'http://schemas.pocketsoap.com/rss/myDescModule/':      'szf'
   5.352 +}
   5.353 +    _matchnamespaces = {}
   5.354 +
   5.355 +    can_be_relative_uri = ['link', 'id', 'wfw_comment', 'wfw_commentrss', 'docs', 'url', 'href', 'comments', 'license', 'icon', 'logo']
   5.356 +    can_contain_relative_uris = ['content', 'title', 'summary', 'info', 'tagline', 'subtitle', 'copyright', 'rights', 'description']
   5.357 +    can_contain_dangerous_markup = ['content', 'title', 'summary', 'info', 'tagline', 'subtitle', 'copyright', 'rights', 'description']
   5.358 +    html_types = ['text/html', 'application/xhtml+xml']
   5.359 +    
   5.360 +    def __init__(self, baseuri=None, baselang=None, encoding='utf-8'):
   5.361 +        if _debug: sys.stderr.write('initializing FeedParser\n')
   5.362 +        if not self._matchnamespaces:
   5.363 +            for k, v in self.namespaces.items():
   5.364 +                self._matchnamespaces[k.lower()] = v
   5.365 +        self.feeddata = FeedParserDict() # feed-level data
   5.366 +        self.encoding = encoding # character encoding
   5.367 +        self.entries = [] # list of entry-level data
   5.368 +        self.version = '' # feed type/version, see SUPPORTED_VERSIONS
   5.369 +        self.namespacesInUse = {} # dictionary of namespaces defined by the feed
   5.370 +
   5.371 +        # the following are used internally to track state;
   5.372 +        # this is really out of control and should be refactored
   5.373 +        self.infeed = 0
   5.374 +        self.inentry = 0
   5.375 +        self.incontent = 0
   5.376 +        self.intextinput = 0
   5.377 +        self.inimage = 0
   5.378 +        self.inauthor = 0
   5.379 +        self.incontributor = 0
   5.380 +        self.inpublisher = 0
   5.381 +        self.insource = 0
   5.382 +        self.sourcedata = FeedParserDict()
   5.383 +        self.contentparams = FeedParserDict()
   5.384 +        self._summaryKey = None
   5.385 +        self.namespacemap = {}
   5.386 +        self.elementstack = []
   5.387 +        self.basestack = []
   5.388 +        self.langstack = []
   5.389 +        self.baseuri = baseuri or ''
   5.390 +        self.lang = baselang or None
   5.391 +        if baselang:
   5.392 +            self.feeddata['language'] = baselang
   5.393 +
   5.394 +    def unknown_starttag(self, tag, attrs):
   5.395 +        if _debug: sys.stderr.write('start %s with %s\n' % (tag, attrs))
   5.396 +        # normalize attrs
   5.397 +        attrs = [(k.lower(), v) for k, v in attrs]
   5.398 +        attrs = [(k, k in ('rel', 'type') and v.lower() or v) for k, v in attrs]
   5.399 +        
   5.400 +        # track xml:base and xml:lang
   5.401 +        attrsD = dict(attrs)
   5.402 +        baseuri = attrsD.get('xml:base', attrsD.get('base')) or self.baseuri
   5.403 +        self.baseuri = _urljoin(self.baseuri, baseuri)
   5.404 +        lang = attrsD.get('xml:lang', attrsD.get('lang'))
   5.405 +        if lang == '':
   5.406 +            # xml:lang could be explicitly set to '', we need to capture that
   5.407 +            lang = None
   5.408 +        elif lang is None:
   5.409 +            # if no xml:lang is specified, use parent lang
   5.410 +            lang = self.lang
   5.411 +        if lang:
   5.412 +            if tag in ('feed', 'rss', 'rdf:RDF'):
   5.413 +                self.feeddata['language'] = lang
   5.414 +        self.lang = lang
   5.415 +        self.basestack.append(self.baseuri)
   5.416 +        self.langstack.append(lang)
   5.417 +        
   5.418 +        # track namespaces
   5.419 +        for prefix, uri in attrs:
   5.420 +            if prefix.startswith('xmlns:'):
   5.421 +                self.trackNamespace(prefix[6:], uri)
   5.422 +            elif prefix == 'xmlns':
   5.423 +                self.trackNamespace(None, uri)
   5.424 +
   5.425 +        # track inline content
   5.426 +        if self.incontent and self.contentparams.has_key('type') and not self.contentparams.get('type', 'xml').endswith('xml'):
   5.427 +            # element declared itself as escaped markup, but it isn't really
   5.428 +            self.contentparams['type'] = 'application/xhtml+xml'
   5.429 +        if self.incontent and self.contentparams.get('type') == 'application/xhtml+xml':
   5.430 +            # Note: probably shouldn't simply recreate localname here, but
   5.431 +            # our namespace handling isn't actually 100% correct in cases where
   5.432 +            # the feed redefines the default namespace (which is actually
   5.433 +            # the usual case for inline content, thanks Sam), so here we
   5.434 +            # cheat and just reconstruct the element based on localname
   5.435 +            # because that compensates for the bugs in our namespace handling.
   5.436 +            # This will horribly munge inline content with non-empty qnames,
   5.437 +            # but nobody actually does that, so I'm not fixing it.
   5.438 +            tag = tag.split(':')[-1]
   5.439 +            return self.handle_data('<%s%s>' % (tag, ''.join([' %s="%s"' % t for t in attrs])), escape=0)
   5.440 +
   5.441 +        # match namespaces
   5.442 +        if tag.find(':') <> -1:
   5.443 +            prefix, suffix = tag.split(':', 1)
   5.444 +        else:
   5.445 +            prefix, suffix = '', tag
   5.446 +        prefix = self.namespacemap.get(prefix, prefix)
   5.447 +        if prefix:
   5.448 +            prefix = prefix + '_'
   5.449 +
   5.450 +        # special hack for better tracking of empty textinput/image elements in illformed feeds
   5.451 +        if (not prefix) and tag not in ('title', 'link', 'description', 'name'):
   5.452 +            self.intextinput = 0
   5.453 +        if (not prefix) and tag not in ('title', 'link', 'description', 'url', 'href', 'width', 'height'):
   5.454 +            self.inimage = 0
   5.455 +        
   5.456 +        # call special handler (if defined) or default handler
   5.457 +        methodname = '_start_' + prefix + suffix
   5.458 +        try:
   5.459 +            method = getattr(self, methodname)
   5.460 +            return method(attrsD)
   5.461 +        except AttributeError:
   5.462 +            return self.push(prefix + suffix, 1)
   5.463 +
   5.464 +    def unknown_endtag(self, tag):
   5.465 +        if _debug: sys.stderr.write('end %s\n' % tag)
   5.466 +        # match namespaces
   5.467 +        if tag.find(':') <> -1:
   5.468 +            prefix, suffix = tag.split(':', 1)
   5.469 +        else:
   5.470 +            prefix, suffix = '', tag
   5.471 +        prefix = self.namespacemap.get(prefix, prefix)
   5.472 +        if prefix:
   5.473 +            prefix = prefix + '_'
   5.474 +
   5.475 +        # call special handler (if defined) or default handler
   5.476 +        methodname = '_end_' + prefix + suffix
   5.477 +        try:
   5.478 +            method = getattr(self, methodname)
   5.479 +            method()
   5.480 +        except AttributeError:
   5.481 +            self.pop(prefix + suffix)
   5.482 +
   5.483 +        # track inline content
   5.484 +        if self.incontent and self.contentparams.has_key('type') and not self.contentparams.get('type', 'xml').endswith('xml'):
   5.485 +            # element declared itself as escaped markup, but it isn't really
   5.486 +            self.contentparams['type'] = 'application/xhtml+xml'
   5.487 +        if self.incontent and self.contentparams.get('type') == 'application/xhtml+xml':
   5.488 +            tag = tag.split(':')[-1]
   5.489 +            self.handle_data('</%s>' % tag, escape=0)
   5.490 +
   5.491 +        # track xml:base and xml:lang going out of scope
   5.492 +        if self.basestack:
   5.493 +            self.basestack.pop()
   5.494 +            if self.basestack and self.basestack[-1]:
   5.495 +                self.baseuri = self.basestack[-1]
   5.496 +        if self.langstack:
   5.497 +            self.langstack.pop()
   5.498 +            if self.langstack: # and (self.langstack[-1] is not None):
   5.499 +                self.lang = self.langstack[-1]
   5.500 +
   5.501 +    def handle_charref(self, ref):
   5.502 +        # called for each character reference, e.g. for '&#160;', ref will be '160'
   5.503 +        if not self.elementstack: return
   5.504 +        ref = ref.lower()
   5.505 +        if ref in ('34', '38', '39', '60', '62', 'x22', 'x26', 'x27', 'x3c', 'x3e'):
   5.506 +            text = '&#%s;' % ref
   5.507 +        else:
   5.508 +            if ref[0] == 'x':
   5.509 +                c = int(ref[1:], 16)
   5.510 +            else:
   5.511 +                c = int(ref)
   5.512 +            text = unichr(c).encode('utf-8')
   5.513 +        self.elementstack[-1][2].append(text)
   5.514 +
   5.515 +    def handle_entityref(self, ref):
   5.516 +        # called for each entity reference, e.g. for '&copy;', ref will be 'copy'
   5.517 +        if not self.elementstack: return
   5.518 +        if _debug: sys.stderr.write('entering handle_entityref with %s\n' % ref)
   5.519 +        if ref in ('lt', 'gt', 'quot', 'amp', 'apos'):
   5.520 +            text = '&%s;' % ref
   5.521 +        else:
   5.522 +            # entity resolution graciously donated by Aaron Swartz
   5.523 +            def name2cp(k):
   5.524 +                import htmlentitydefs
   5.525 +                if hasattr(htmlentitydefs, 'name2codepoint'): # requires Python 2.3
   5.526 +                    return htmlentitydefs.name2codepoint[k]
   5.527 +                k = htmlentitydefs.entitydefs[k]
   5.528 +                if k.startswith('&#') and k.endswith(';'):
   5.529 +                    return int(k[2:-1]) # not in latin-1
   5.530 +                return ord(k)
   5.531 +            try: name2cp(ref)
   5.532 +            except KeyError: text = '&%s;' % ref
   5.533 +            else: text = unichr(name2cp(ref)).encode('utf-8')
   5.534 +        self.elementstack[-1][2].append(text)
   5.535 +
   5.536 +    def handle_data(self, text, escape=1):
   5.537 +        # called for each block of plain text, i.e. outside of any tag and
   5.538 +        # not containing any character or entity references
   5.539 +        if not self.elementstack: return
   5.540 +        if escape and self.contentparams.get('type') == 'application/xhtml+xml':
   5.541 +            text = _xmlescape(text)
   5.542 +        self.elementstack[-1][2].append(text)
   5.543 +
   5.544 +    def handle_comment(self, text):
   5.545 +        # called for each comment, e.g. <!-- insert message here -->
   5.546 +        pass
   5.547 +
   5.548 +    def handle_pi(self, text):
   5.549 +        # called for each processing instruction, e.g. <?instruction>
   5.550 +        pass
   5.551 +
   5.552 +    def handle_decl(self, text):
   5.553 +        pass
   5.554 +
   5.555 +    def parse_declaration(self, i):
   5.556 +        # override internal declaration handler to handle CDATA blocks
   5.557 +        if _debug: sys.stderr.write('entering parse_declaration\n')
   5.558 +        if self.rawdata[i:i+9] == '<![CDATA[':
   5.559 +            k = self.rawdata.find(']]>', i)
   5.560 +            if k == -1: k = len(self.rawdata)
   5.561 +            self.handle_data(_xmlescape(self.rawdata[i+9:k]), 0)
   5.562 +            return k+3
   5.563 +        else:
   5.564 +            k = self.rawdata.find('>', i)
   5.565 +            return k+1
   5.566 +
   5.567 +    def mapContentType(self, contentType):
   5.568 +        contentType = contentType.lower()
   5.569 +        if contentType == 'text':
   5.570 +            contentType = 'text/plain'
   5.571 +        elif contentType == 'html':
   5.572 +            contentType = 'text/html'
   5.573 +        elif contentType == 'xhtml':
   5.574 +            contentType = 'application/xhtml+xml'
   5.575 +        return contentType
   5.576 +    
   5.577 +    def trackNamespace(self, prefix, uri):
   5.578 +        loweruri = uri.lower()
   5.579 +        if (prefix, loweruri) == (None, 'http://my.netscape.com/rdf/simple/0.9/') and not self.version:
   5.580 +            self.version = 'rss090'
   5.581 +        if loweruri == 'http://purl.org/rss/1.0/' and not self.version:
   5.582 +            self.version = 'rss10'
   5.583 +        if loweruri == 'http://www.w3.org/2005/atom' and not self.version:
   5.584 +            self.version = 'atom10'
   5.585 +        if loweruri.find('backend.userland.com/rss') <> -1:
   5.586 +            # match any backend.userland.com namespace
   5.587 +            uri = 'http://backend.userland.com/rss'
   5.588 +            loweruri = uri
   5.589 +        if self._matchnamespaces.has_key(loweruri):
   5.590 +            self.namespacemap[prefix] = self._matchnamespaces[loweruri]
   5.591 +            self.namespacesInUse[self._matchnamespaces[loweruri]] = uri
   5.592 +        else:
   5.593 +            self.namespacesInUse[prefix or ''] = uri
   5.594 +
   5.595 +    def resolveURI(self, uri):
   5.596 +        return _urljoin(self.baseuri or '', uri)
   5.597 +    
   5.598 +    def decodeEntities(self, element, data):
   5.599 +        return data
   5.600 +
   5.601 +    def push(self, element, expectingText):
   5.602 +        self.elementstack.append([element, expectingText, []])
   5.603 +
   5.604 +    def pop(self, element, stripWhitespace=1):
   5.605 +        if not self.elementstack: return
   5.606 +        if self.elementstack[-1][0] != element: return
   5.607 +        
   5.608 +        element, expectingText, pieces = self.elementstack.pop()
   5.609 +        output = ''.join(pieces)
   5.610 +        if stripWhitespace:
   5.611 +            output = output.strip()
   5.612 +        if not expectingText: return output
   5.613 +
   5.614 +        # decode base64 content
   5.615 +        if base64 and self.contentparams.get('base64', 0):
   5.616 +            try:
   5.617 +                output = base64.decodestring(output)
   5.618 +            except binascii.Error:
   5.619 +                pass
   5.620 +            except binascii.Incomplete:
   5.621 +                pass
   5.622 +                
   5.623 +        # resolve relative URIs
   5.624 +        if (element in self.can_be_relative_uri) and output:
   5.625 +            output = self.resolveURI(output)
   5.626 +        
   5.627 +        # decode entities within embedded markup
   5.628 +        if not self.contentparams.get('base64', 0):
   5.629 +            output = self.decodeEntities(element, output)
   5.630 +
   5.631 +        # remove temporary cruft from contentparams
   5.632 +        try:
   5.633 +            del self.contentparams['mode']
   5.634 +        except KeyError:
   5.635 +            pass
   5.636 +        try:
   5.637 +            del self.contentparams['base64']
   5.638 +        except KeyError:
   5.639 +            pass
   5.640 +
   5.641 +        # resolve relative URIs within embedded markup
   5.642 +        if self.mapContentType(self.contentparams.get('type', 'text/html')) in self.html_types:
   5.643 +            if element in self.can_contain_relative_uris:
   5.644 +                output = _resolveRelativeURIs(output, self.baseuri, self.encoding)
   5.645 +        
   5.646 +        # sanitize embedded markup
   5.647 +        if self.mapContentType(self.contentparams.get('type', 'text/html')) in self.html_types:
   5.648 +            if element in self.can_contain_dangerous_markup:
   5.649 +                output = _sanitizeHTML(output, self.encoding)
   5.650 +
   5.651 +        if self.encoding and type(output) != type(u''):
   5.652 +            try:
   5.653 +                output = unicode(output, self.encoding)
   5.654 +            except:
   5.655 +                pass
   5.656 +
   5.657 +        # categories/tags/keywords/whatever are handled in _end_category
   5.658 +        if element == 'category':
   5.659 +            return output
   5.660 +        
   5.661 +        # store output in appropriate place(s)
   5.662 +        if self.inentry and not self.insource:
   5.663 +            if element == 'content':
   5.664 +                self.entries[-1].setdefault(element, [])
   5.665 +                contentparams = copy.deepcopy(self.contentparams)
   5.666 +                contentparams['value'] = output
   5.667 +                self.entries[-1][element].append(contentparams)
   5.668 +            elif element == 'link':
   5.669 +                self.entries[-1][element] = output
   5.670 +                if output:
   5.671 +                    self.entries[-1]['links'][-1]['href'] = output
   5.672 +            else:
   5.673 +                if element == 'description':
   5.674 +                    element = 'summary'
   5.675 +                self.entries[-1][element] = output
   5.676 +                if self.incontent:
   5.677 +                    contentparams = copy.deepcopy(self.contentparams)
   5.678 +                    contentparams['value'] = output
   5.679 +                    self.entries[-1][element + '_detail'] = contentparams
   5.680 +        elif (self.infeed or self.insource) and (not self.intextinput) and (not self.inimage):
   5.681 +            context = self._getContext()
   5.682 +            if element == 'description':
   5.683 +                element = 'subtitle'
   5.684 +            context[element] = output
   5.685 +            if element == 'link':
   5.686 +                context['links'][-1]['href'] = output
   5.687 +            elif self.incontent:
   5.688 +                contentparams = copy.deepcopy(self.contentparams)
   5.689 +                contentparams['value'] = output
   5.690 +                context[element + '_detail'] = contentparams
   5.691 +        return output
   5.692 +
   5.693 +    def pushContent(self, tag, attrsD, defaultContentType, expectingText):
   5.694 +        self.incontent += 1
   5.695 +        self.contentparams = FeedParserDict({
   5.696 +            'type': self.mapContentType(attrsD.get('type', defaultContentType)),
   5.697 +            'language': self.lang,
   5.698 +            'base': self.baseuri})
   5.699 +        self.contentparams['base64'] = self._isBase64(attrsD, self.contentparams)
   5.700 +        self.push(tag, expectingText)
   5.701 +
   5.702 +    def popContent(self, tag):
   5.703 +        value = self.pop(tag)
   5.704 +        self.incontent -= 1
   5.705 +        self.contentparams.clear()
   5.706 +        return value
   5.707 +        
   5.708 +    def _mapToStandardPrefix(self, name):
   5.709 +        colonpos = name.find(':')
   5.710 +        if colonpos <> -1:
   5.711 +            prefix = name[:colonpos]
   5.712 +            suffix = name[colonpos+1:]
   5.713 +            prefix = self.namespacemap.get(prefix, prefix)
   5.714 +            name = prefix + ':' + suffix
   5.715 +        return name
   5.716 +        
   5.717 +    def _getAttribute(self, attrsD, name):
   5.718 +        return attrsD.get(self._mapToStandardPrefix(name))
   5.719 +
   5.720 +    def _isBase64(self, attrsD, contentparams):
   5.721 +        if attrsD.get('mode', '') == 'base64':
   5.722 +            return 1
   5.723 +        if self.contentparams['type'].startswith('text/'):
   5.724 +            return 0
   5.725 +        if self.contentparams['type'].endswith('+xml'):
   5.726 +            return 0
   5.727 +        if self.contentparams['type'].endswith('/xml'):
   5.728 +            return 0
   5.729 +        return 1
   5.730 +
   5.731 +    def _itsAnHrefDamnIt(self, attrsD):
   5.732 +        href = attrsD.get('url', attrsD.get('uri', attrsD.get('href', None)))
   5.733 +        if href:
   5.734 +            try:
   5.735 +                del attrsD['url']
   5.736 +            except KeyError:
   5.737 +                pass
   5.738 +            try:
   5.739 +                del attrsD['uri']
   5.740 +            except KeyError:
   5.741 +                pass
   5.742 +            attrsD['href'] = href
   5.743 +        return attrsD
   5.744 +    
   5.745 +    def _save(self, key, value):
   5.746 +        context = self._getContext()
   5.747 +        context.setdefault(key, value)
   5.748 +
   5.749 +    def _start_rss(self, attrsD):
   5.750 +        versionmap = {'0.91': 'rss091u',
   5.751 +                      '0.92': 'rss092',
   5.752 +                      '0.93': 'rss093',
   5.753 +                      '0.94': 'rss094'}
   5.754 +        if not self.version:
   5.755 +            attr_version = attrsD.get('version', '')
   5.756 +            version = versionmap.get(attr_version)
   5.757 +            if version:
   5.758 +                self.version = version
   5.759 +            elif attr_version.startswith('2.'):
   5.760 +                self.version = 'rss20'
   5.761 +            else:
   5.762 +                self.version = 'rss'
   5.763 +    
   5.764 +    def _start_dlhottitles(self, attrsD):
   5.765 +        self.version = 'hotrss'
   5.766 +
   5.767 +    def _start_channel(self, attrsD):
   5.768 +        self.infeed = 1
   5.769 +        self._cdf_common(attrsD)
   5.770 +    _start_feedinfo = _start_channel
   5.771 +
   5.772 +    def _cdf_common(self, attrsD):
   5.773 +        if attrsD.has_key('lastmod'):
   5.774 +            self._start_modified({})
   5.775 +            self.elementstack[-1][-1] = attrsD['lastmod']
   5.776 +            self._end_modified()
   5.777 +        if attrsD.has_key('href'):
   5.778 +            self._start_link({})
   5.779 +            self.elementstack[-1][-1] = attrsD['href']
   5.780 +            self._end_link()
   5.781 +    
   5.782 +    def _start_feed(self, attrsD):
   5.783 +        self.infeed = 1
   5.784 +        versionmap = {'0.1': 'atom01',
   5.785 +                      '0.2': 'atom02',
   5.786 +                      '0.3': 'atom03'}
   5.787 +        if not self.version:
   5.788 +            attr_version = attrsD.get('version')
   5.789 +            version = versionmap.get(attr_version)
   5.790 +            if version:
   5.791 +                self.version = version
   5.792 +            else:
   5.793 +                self.version = 'atom'
   5.794 +
   5.795 +    def _end_channel(self):
   5.796 +        self.infeed = 0
   5.797 +    _end_feed = _end_channel
   5.798 +    
   5.799 +    def _start_image(self, attrsD):
   5.800 +        self.inimage = 1
   5.801 +        self.push('image', 0)
   5.802 +        context = self._getContext()
   5.803 +        context.setdefault('image', FeedParserDict())
   5.804 +            
   5.805 +    def _end_image(self):
   5.806 +        self.pop('image')
   5.807 +        self.inimage = 0
   5.808 +
   5.809 +    def _start_textinput(self, attrsD):
   5.810 +        self.intextinput = 1
   5.811 +        self.push('textinput', 0)
   5.812 +        context = self._getContext()
   5.813 +        context.setdefault('textinput', FeedParserDict())
   5.814 +    _start_textInput = _start_textinput
   5.815 +    
   5.816 +    def _end_textinput(self):
   5.817 +        self.pop('textinput')
   5.818 +        self.intextinput = 0
   5.819 +    _end_textInput = _end_textinput
   5.820 +
   5.821 +    def _start_author(self, attrsD):
   5.822 +        self.inauthor = 1
   5.823 +        self.push('author', 1)
   5.824 +    _start_managingeditor = _start_author
   5.825 +    _start_dc_author = _start_author
   5.826 +    _start_dc_creator = _start_author
   5.827 +    _start_itunes_author = _start_author
   5.828 +
   5.829 +    def _end_author(self):
   5.830 +        self.pop('author')
   5.831 +        self.inauthor = 0
   5.832 +        self._sync_author_detail()
   5.833 +    _end_managingeditor = _end_author
   5.834 +    _end_dc_author = _end_author
   5.835 +    _end_dc_creator = _end_author
   5.836 +    _end_itunes_author = _end_author
   5.837 +
   5.838 +    def _start_itunes_owner(self, attrsD):
   5.839 +        self.inpublisher = 1
   5.840 +        self.push('publisher', 0)
   5.841 +
   5.842 +    def _end_itunes_owner(self):
   5.843 +        self.pop('publisher')
   5.844 +        self.inpublisher = 0
   5.845 +        self._sync_author_detail('publisher')
   5.846 +
   5.847 +    def _start_contributor(self, attrsD):
   5.848 +        self.incontributor = 1
   5.849 +        context = self._getContext()
   5.850 +        context.setdefault('contributors', [])
   5.851 +        context['contributors'].append(FeedParserDict())
   5.852 +        self.push('contributor', 0)
   5.853 +
   5.854 +    def _end_contributor(self):
   5.855 +        self.pop('contributor')
   5.856 +        self.incontributor = 0
   5.857 +
   5.858 +    def _start_dc_contributor(self, attrsD):
   5.859 +        self.incontributor = 1
   5.860 +        context = self._getContext()
   5.861 +        context.setdefault('contributors', [])
   5.862 +        context['contributors'].append(FeedParserDict())
   5.863 +        self.push('name', 0)
   5.864 +
   5.865 +    def _end_dc_contributor(self):
   5.866 +        self._end_name()
   5.867 +        self.incontributor = 0
   5.868 +
   5.869 +    def _start_name(self, attrsD):
   5.870 +        self.push('name', 0)
   5.871 +    _start_itunes_name = _start_name
   5.872 +
   5.873 +    def _end_name(self):
   5.874 +        value = self.pop('name')
   5.875 +        if self.inpublisher:
   5.876 +            self._save_author('name', value, 'publisher')
   5.877 +        elif self.inauthor:
   5.878 +            self._save_author('name', value)
   5.879 +        elif self.incontributor:
   5.880 +            self._save_contributor('name', value)
   5.881 +        elif self.intextinput:
   5.882 +            context = self._getContext()
   5.883 +            context['textinput']['name'] = value
   5.884 +    _end_itunes_name = _end_name
   5.885 +
   5.886 +    def _start_width(self, attrsD):
   5.887 +        self.push('width', 0)
   5.888 +
   5.889 +    def _end_width(self):
   5.890 +        value = self.pop('width')
   5.891 +        try:
   5.892 +            value = int(value)
   5.893 +        except:
   5.894 +            value = 0
   5.895 +        if self.inimage:
   5.896 +            context = self._getContext()
   5.897 +            context['image']['width'] = value
   5.898 +
   5.899 +    def _start_height(self, attrsD):
   5.900 +        self.push('height', 0)
   5.901 +
   5.902 +    def _end_height(self):
   5.903 +        value = self.pop('height')
   5.904 +        try:
   5.905 +            value = int(value)
   5.906 +        except:
   5.907 +            value = 0
   5.908 +        if self.inimage:
   5.909 +            context = self._getContext()
   5.910 +            context['image']['height'] = value
   5.911 +
   5.912 +    def _start_url(self, attrsD):
   5.913 +        self.push('href', 1)
   5.914 +    _start_homepage = _start_url
   5.915 +    _start_uri = _start_url
   5.916 +
   5.917 +    def _end_url(self):
   5.918 +        value = self.pop('href')
   5.919 +        if self.inauthor:
   5.920 +            self._save_author('href', value)
   5.921 +        elif self.incontributor:
   5.922 +            self._save_contributor('href', value)
   5.923 +        elif self.inimage:
   5.924 +            context = self._getContext()
   5.925 +            context['image']['href'] = value
   5.926 +        elif self.intextinput:
   5.927 +            context = self._getContext()
   5.928 +            context['textinput']['link'] = value
   5.929 +    _end_homepage = _end_url
   5.930 +    _end_uri = _end_url
   5.931 +
   5.932 +    def _start_email(self, attrsD):
   5.933 +        self.push('email', 0)
   5.934 +    _start_itunes_email = _start_email
   5.935 +
   5.936 +    def _end_email(self):
   5.937 +        value = self.pop('email')
   5.938 +        if self.inpublisher:
   5.939 +            self._save_author('email', value, 'publisher')
   5.940 +        elif self.inauthor:
   5.941 +            self._save_author('email', value)
   5.942 +        elif self.incontributor:
   5.943 +            self._save_contributor('email', value)
   5.944 +    _end_itunes_email = _end_email
   5.945 +
   5.946 +    def _getContext(self):
   5.947 +        if self.insource:
   5.948 +            context = self.sourcedata
   5.949 +        elif self.inentry:
   5.950 +            context = self.entries[-1]
   5.951 +        else:
   5.952 +            context = self.feeddata
   5.953 +        return context
   5.954 +
   5.955 +    def _save_author(self, key, value, prefix='author'):
   5.956 +        context = self._getContext()
   5.957 +        context.setdefault(prefix + '_detail', FeedParserDict())
   5.958 +        context[prefix + '_detail'][key] = value
   5.959 +        self._sync_author_detail()
   5.960 +
   5.961 +    def _save_contributor(self, key, value):
   5.962 +        context = self._getContext()
   5.963 +        context.setdefault('contributors', [FeedParserDict()])
   5.964 +        context['contributors'][-1][key] = value
   5.965 +
   5.966 +    def _sync_author_detail(self, key='author'):
   5.967 +        context = self._getContext()
   5.968 +        detail = context.get('%s_detail' % key)
   5.969 +        if detail:
   5.970 +            name = detail.get('name')
   5.971 +            email = detail.get('email')
   5.972 +            if name and email:
   5.973 +                context[key] = '%s (%s)' % (name, email)
   5.974 +            elif name:
   5.975 +                context[key] = name
   5.976 +            elif email:
   5.977 +                context[key] = email
   5.978 +        else:
   5.979 +            author = context.get(key)
   5.980 +            if not author: return
   5.981 +            emailmatch = re.search(r'''(([a-zA-Z0-9\_\-\.\+]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?))''', author)
   5.982 +            if not emailmatch: return
   5.983 +            email = emailmatch.group(0)
   5.984 +            # probably a better way to do the following, but it passes all the tests
   5.985 +            author = author.replace(email, '')
   5.986 +            author = author.replace('()', '')
   5.987 +            author = author.strip()
   5.988 +            if author and (author[0] == '('):
   5.989 +                author = author[1:]
   5.990 +            if author and (author[-1] == ')'):
   5.991 +                author = author[:-1]
   5.992 +            author = author.strip()
   5.993 +            context.setdefault('%s_detail' % key, FeedParserDict())
   5.994 +            context['%s_detail' % key]['name'] = author
   5.995 +            context['%s_detail' % key]['email'] = email
   5.996 +
   5.997 +    def _start_subtitle(self, attrsD):
   5.998 +        self.pushContent('subtitle', attrsD, 'text/plain', 1)
   5.999 +    _start_tagline = _start_subtitle
  5.1000 +    _start_itunes_subtitle = _start_subtitle
  5.1001 +
  5.1002 +    def _end_subtitle(self):
  5.1003 +        self.popContent('subtitle')
  5.1004 +    _end_tagline = _end_subtitle
  5.1005 +    _end_itunes_subtitle = _end_subtitle
  5.1006 +            
  5.1007 +    def _start_rights(self, attrsD):
  5.1008 +        self.pushContent('rights', attrsD, 'text/plain', 1)
  5.1009 +    _start_dc_rights = _start_rights
  5.1010 +    _start_copyright = _start_rights
  5.1011 +
  5.1012 +    def _end_rights(self):
  5.1013 +        self.popContent('rights')
  5.1014 +    _end_dc_rights = _end_rights
  5.1015 +    _end_copyright = _end_rights
  5.1016 +
  5.1017 +    def _start_item(self, attrsD):
  5.1018 +        self.entries.append(FeedParserDict())
  5.1019 +        self.push('item', 0)
  5.1020 +        self.inentry = 1
  5.1021 +        self.guidislink = 0
  5.1022 +        id = self._getAttribute(attrsD, 'rdf:about')
  5.1023 +        if id:
  5.1024 +            context = self._getContext()
  5.1025 +            context['id'] = id
  5.1026 +        self._cdf_common(attrsD)
  5.1027 +    _start_entry = _start_item
  5.1028 +    _start_product = _start_item
  5.1029 +
  5.1030 +    def _end_item(self):
  5.1031 +        self.pop('item')
  5.1032 +        self.inentry = 0
  5.1033 +    _end_entry = _end_item
  5.1034 +
  5.1035 +    def _start_dc_language(self, attrsD):
  5.1036 +        self.push('language', 1)
  5.1037 +    _start_language = _start_dc_language
  5.1038 +
  5.1039 +    def _end_dc_language(self):
  5.1040 +        self.lang = self.pop('language')
  5.1041 +    _end_language = _end_dc_language
  5.1042 +
  5.1043 +    def _start_dc_publisher(self, attrsD):
  5.1044 +        self.push('publisher', 1)
  5.1045 +    _start_webmaster = _start_dc_publisher
  5.1046 +
  5.1047 +    def _end_dc_publisher(self):
  5.1048 +        self.pop('publisher')
  5.1049 +        self._sync_author_detail('publisher')
  5.1050 +    _end_webmaster = _end_dc_publisher
  5.1051 +
  5.1052 +    def _start_published(self, attrsD):
  5.1053 +        self.push('published', 1)
  5.1054 +    _start_dcterms_issued = _start_published
  5.1055 +    _start_issued = _start_published
  5.1056 +
  5.1057 +    def _end_published(self):
  5.1058 +        value = self.pop('published')
  5.1059 +        self._save('published_parsed', _parse_date(value))
  5.1060 +    _end_dcterms_issued = _end_published
  5.1061 +    _end_issued = _end_published
  5.1062 +
  5.1063 +    def _start_updated(self, attrsD):
  5.1064 +        self.push('updated', 1)
  5.1065 +    _start_modified = _start_updated
  5.1066 +    _start_dcterms_modified = _start_updated
  5.1067 +    _start_pubdate = _start_updated
  5.1068 +    _start_dc_date = _start_updated
  5.1069 +
  5.1070 +    def _end_updated(self):
  5.1071 +        value = self.pop('updated')
  5.1072 +        parsed_value = _parse_date(value)
  5.1073 +        self._save('updated_parsed', parsed_value)
  5.1074 +    _end_modified = _end_updated
  5.1075 +    _end_dcterms_modified = _end_updated
  5.1076 +    _end_pubdate = _end_updated
  5.1077 +    _end_dc_date = _end_updated
  5.1078 +
  5.1079 +    def _start_created(self, attrsD):
  5.1080 +        self.push('created', 1)
  5.1081 +    _start_dcterms_created = _start_created
  5.1082 +
  5.1083 +    def _end_created(self):
  5.1084 +        value = self.pop('created')
  5.1085 +        self._save('created_parsed', _parse_date(value))
  5.1086 +    _end_dcterms_created = _end_created
  5.1087 +
  5.1088 +    def _start_expirationdate(self, attrsD):
  5.1089 +        self.push('expired', 1)
  5.1090 +
  5.1091 +    def _end_expirationdate(self):
  5.1092 +        self._save('expired_parsed', _parse_date(self.pop('expired')))
  5.1093 +
  5.1094 +    def _start_cc_license(self, attrsD):
  5.1095 +        self.push('license', 1)
  5.1096 +        value = self._getAttribute(attrsD, 'rdf:resource')
  5.1097 +        if value:
  5.1098 +            self.elementstack[-1][2].append(value)
  5.1099 +        self.pop('license')
  5.1100 +        
  5.1101 +    def _start_creativecommons_license(self, attrsD):
  5.1102 +        self.push('license', 1)
  5.1103 +
  5.1104 +    def _end_creativecommons_license(self):
  5.1105 +        self.pop('license')
  5.1106 +
  5.1107 +    def _addTag(self, term, scheme, label):
  5.1108 +        context = self._getContext()
  5.1109 +        tags = context.setdefault('tags', [])
  5.1110 +        if (not term) and (not scheme) and (not label): return
  5.1111 +        value = FeedParserDict({'term': term, 'scheme': scheme, 'label': label})
  5.1112 +        if value not in tags:
  5.1113 +            tags.append(FeedParserDict({'term': term, 'scheme': scheme, 'label': label}))
  5.1114 +
  5.1115 +    def _start_category(self, attrsD):
  5.1116 +        if _debug: sys.stderr.write('entering _start_category with %s\n' % repr(attrsD))
  5.1117 +        term = attrsD.get('term')
  5.1118 +        scheme = attrsD.get('scheme', attrsD.get('domain'))
  5.1119 +        label = attrsD.get('label')
  5.1120 +        self._addTag(term, scheme, label)
  5.1121 +        self.push('category', 1)
  5.1122 +    _start_dc_subject = _start_category
  5.1123 +    _start_keywords = _start_category
  5.1124 +        
  5.1125 +    def _end_itunes_keywords(self):
  5.1126 +        for term in self.pop('itunes_keywords').split():
  5.1127 +            self._addTag(term, 'http://www.itunes.com/', None)
  5.1128 +        
  5.1129 +    def _start_itunes_category(self, attrsD):
  5.1130 +        self._addTag(attrsD.get('text'), 'http://www.itunes.com/', None)
  5.1131 +        self.push('category', 1)
  5.1132 +        
  5.1133 +    def _end_category(self):
  5.1134 +        value = self.pop('category')
  5.1135 +        if not value: return
  5.1136 +        context = self._getContext()
  5.1137 +        tags = context['tags']
  5.1138 +        if value and len(tags) and not tags[-1]['term']:
  5.1139 +            tags[-1]['term'] = value
  5.1140 +        else:
  5.1141 +            self._addTag(value, None, None)
  5.1142 +    _end_dc_subject = _end_category
  5.1143 +    _end_keywords = _end_category
  5.1144 +    _end_itunes_category = _end_category
  5.1145 +
  5.1146 +    def _start_cloud(self, attrsD):
  5.1147 +        self._getContext()['cloud'] = FeedParserDict(attrsD)
  5.1148 +        
  5.1149 +    def _start_link(self, attrsD):
  5.1150 +        attrsD.setdefault('rel', 'alternate')
  5.1151 +        attrsD.setdefault('type', 'text/html')
  5.1152 +        attrsD = self._itsAnHrefDamnIt(attrsD)
  5.1153 +        if attrsD.has_key('href'):
  5.1154 +            attrsD['href'] = self.resolveURI(attrsD['href'])
  5.1155 +        expectingText = self.infeed or self.inentry or self.insource
  5.1156 +        context = self._getContext()
  5.1157 +        context.setdefault('links', [])
  5.1158 +        context['links'].append(FeedParserDict(attrsD))
  5.1159 +        if attrsD['rel'] == 'enclosure':
  5.1160 +            self._start_enclosure(attrsD)
  5.1161 +        if attrsD.has_key('href'):
  5.1162 +            expectingText = 0
  5.1163 +            if (attrsD.get('rel') == 'alternate') and (self.mapContentType(attrsD.get('type')) in self.html_types):
  5.1164 +                context['link'] = attrsD['href']
  5.1165 +        else:
  5.1166 +            self.push('link', expectingText)
  5.1167 +    _start_producturl = _start_link
  5.1168 +
  5.1169 +    def _end_link(self):
  5.1170 +        value = self.pop('link')
  5.1171 +        context = self._getContext()
  5.1172 +        if self.intextinput:
  5.1173 +            context['textinput']['link'] = value
  5.1174 +        if self.inimage:
  5.1175 +            context['image']['link'] = value
  5.1176 +    _end_producturl = _end_link
  5.1177 +
  5.1178 +    def _start_guid(self, attrsD):
  5.1179 +        self.guidislink = (attrsD.get('ispermalink', 'true') == 'true')
  5.1180 +        self.push('id', 1)
  5.1181 +
  5.1182 +    def _end_guid(self):
  5.1183 +        value = self.pop('id')
  5.1184 +        self._save('guidislink', self.guidislink and not self._getContext().has_key('link'))
  5.1185 +        if self.guidislink:
  5.1186 +            # guid acts as link, but only if 'ispermalink' is not present or is 'true',
  5.1187 +            # and only if the item doesn't already have a link element
  5.1188 +            self._save('link', value)
  5.1189 +
  5.1190 +    def _start_title(self, attrsD):
  5.1191 +        self.pushContent('title', attrsD, 'text/plain', self.infeed or self.inentry or self.insource)
  5.1192 +    _start_dc_title = _start_title
  5.1193 +    _start_media_title = _start_title
  5.1194 +
  5.1195 +    def _end_title(self):
  5.1196 +        value = self.popContent('title')
  5.1197 +        context = self._getContext()
  5.1198 +        if self.intextinput:
  5.1199 +            context['textinput']['title'] = value
  5.1200 +        elif self.inimage:
  5.1201 +            context['image']['title'] = value
  5.1202 +    _end_dc_title = _end_title
  5.1203 +    _end_media_title = _end_title
  5.1204 +
  5.1205 +    def _start_description(self, attrsD):
  5.1206 +        context = self._getContext()
  5.1207 +        if context.has_key('summary'):
  5.1208 +            self._summaryKey = 'content'
  5.1209 +            self._start_content(attrsD)
  5.1210 +        else:
  5.1211 +            self.pushContent('description', attrsD, 'text/html', self.infeed or self.inentry or self.insource)
  5.1212 +
  5.1213 +    def _start_abstract(self, attrsD):
  5.1214 +        self.pushContent('description', attrsD, 'text/plain', self.infeed or self.inentry or self.insource)
  5.1215 +
  5.1216 +    def _end_description(self):
  5.1217 +        if self._summaryKey == 'content':
  5.1218 +            self._end_content()
  5.1219 +        else:
  5.1220 +            value = self.popContent('description')
  5.1221 +            context = self._getContext()
  5.1222 +            if self.intextinput:
  5.1223 +                context['textinput']['description'] = value
  5.1224 +            elif self.inimage:
  5.1225 +                context['image']['description'] = value
  5.1226 +        self._summaryKey = None
  5.1227 +    _end_abstract = _end_description
  5.1228 +
  5.1229 +    def _start_info(self, attrsD):
  5.1230 +        self.pushContent('info', attrsD, 'text/plain', 1)
  5.1231 +    _start_feedburner_browserfriendly = _start_info
  5.1232 +
  5.1233 +    def _end_info(self):
  5.1234 +        self.popContent('info')
  5.1235 +    _end_feedburner_browserfriendly = _end_info
  5.1236 +
  5.1237 +    def _start_generator(self, attrsD):
  5.1238 +        if attrsD:
  5.1239 +            attrsD = self._itsAnHrefDamnIt(attrsD)
  5.1240 +            if attrsD.has_key('href'):
  5.1241 +                attrsD['href'] = self.resolveURI(attrsD['href'])
  5.1242 +        self._getContext()['generator_detail'] = FeedParserDict(attrsD)
  5.1243 +        self.push('generator', 1)
  5.1244 +
  5.1245 +    def _end_generator(self):
  5.1246 +        value = self.pop('generator')
  5.1247 +        context = self._getContext()
  5.1248 +        if context.has_key('generator_detail'):
  5.1249 +            context['generator_detail']['name'] = value
  5.1250 +            
  5.1251 +    def _start_admin_generatoragent(self, attrsD):
  5.1252 +        self.push('generator', 1)
  5.1253 +        value = self._getAttribute(attrsD, 'rdf:resource')
  5.1254 +        if value:
  5.1255 +            self.elementstack[-1][2].append(value)
  5.1256 +        self.pop('generator')
  5.1257 +        self._getContext()['generator_detail'] = FeedParserDict({'href': value})
  5.1258 +
  5.1259 +    def _start_admin_errorreportsto(self, attrsD):
  5.1260 +        self.push('errorreportsto', 1)
  5.1261 +        value = self._getAttribute(attrsD, 'rdf:resource')
  5.1262 +        if value:
  5.1263 +            self.elementstack[-1][2].append(value)
  5.1264 +        self.pop('errorreportsto')
  5.1265 +        
  5.1266 +    def _start_summary(self, attrsD):
  5.1267 +        context = self._getContext()
  5.1268 +        if context.has_key('summary'):
  5.1269 +            self._summaryKey = 'content'
  5.1270 +            self._start_content(attrsD)
  5.1271 +        else:
  5.1272 +            self._summaryKey = 'summary'
  5.1273 +            self.pushContent(self._summaryKey, attrsD, 'text/plain', 1)
  5.1274 +    _start_itunes_summary = _start_summary
  5.1275 +
  5.1276 +    def _end_summary(self):
  5.1277 +        if self._summaryKey == 'content':
  5.1278 +            self._end_content()
  5.1279 +        else:
  5.1280 +            self.popContent(self._summaryKey or 'summary')
  5.1281 +        self._summaryKey = None
  5.1282 +    _end_itunes_summary = _end_summary
  5.1283 +        
  5.1284 +    def _start_enclosure(self, attrsD):
  5.1285 +        attrsD = self._itsAnHrefDamnIt(attrsD)
  5.1286 +        self._getContext().setdefault('enclosures', []).append(FeedParserDict(attrsD))
  5.1287 +        href = attrsD.get('href')
  5.1288 +        if href:
  5.1289 +            context = self._getContext()
  5.1290 +            if not context.get('id'):
  5.1291 +                context['id'] = href
  5.1292 +            
  5.1293 +    def _start_source(self, attrsD):
  5.1294 +        self.insource = 1
  5.1295 +
  5.1296 +    def _end_source(self):
  5.1297 +        self.insource = 0
  5.1298 +        self._getContext()['source'] = copy.deepcopy(self.sourcedata)
  5.1299 +        self.sourcedata.clear()
  5.1300 +
  5.1301 +    def _start_content(self, attrsD):
  5.1302 +        self.pushContent('content', attrsD, 'text/plain', 1)
  5.1303 +        src = attrsD.get('src')
  5.1304 +        if src:
  5.1305 +            self.contentparams['src'] = src
  5.1306 +        self.push('content', 1)
  5.1307 +
  5.1308 +    def _start_prodlink(self, attrsD):
  5.1309 +        self.pushContent('content', attrsD, 'text/html', 1)
  5.1310 +
  5.1311 +    def _start_body(self, attrsD):
  5.1312 +        self.pushContent('content', attrsD, 'application/xhtml+xml', 1)
  5.1313 +    _start_xhtml_body = _start_body
  5.1314 +
  5.1315 +    def _start_content_encoded(self, attrsD):
  5.1316 +        self.pushContent('content', attrsD, 'text/html', 1)
  5.1317 +    _start_fullitem = _start_content_encoded
  5.1318 +
  5.1319 +    def _end_content(self):
  5.1320 +        copyToDescription = self.mapContentType(self.contentparams.get('type')) in (['text/plain'] + self.html_types)
  5.1321 +        value = self.popContent('content')
  5.1322 +        if copyToDescription:
  5.1323 +            self._save('description', value)
  5.1324 +    _end_body = _end_content
  5.1325 +    _end_xhtml_body = _end_content
  5.1326 +    _end_content_encoded = _end_content
  5.1327 +    _end_fullitem = _end_content
  5.1328 +    _end_prodlink = _end_content
  5.1329 +
  5.1330 +    def _start_itunes_image(self, attrsD):
  5.1331 +        self.push('itunes_image', 0)
  5.1332 +        self._getContext()['image'] = FeedParserDict({'href': attrsD.get('href')})
  5.1333 +    _start_itunes_link = _start_itunes_image
  5.1334 +        
  5.1335 +    def _end_itunes_block(self):
  5.1336 +        value = self.pop('itunes_block', 0)
  5.1337 +        self._getContext()['itunes_block'] = (value == 'yes') and 1 or 0
  5.1338 +
  5.1339 +    def _end_itunes_explicit(self):
  5.1340 +        value = self.pop('itunes_explicit', 0)
  5.1341 +        self._getContext()['itunes_explicit'] = (value == 'yes') and 1 or 0
  5.1342 +
  5.1343 +if _XML_AVAILABLE:
  5.1344 +    class _StrictFeedParser(_FeedParserMixin, xml.sax.handler.ContentHandler):
  5.1345 +        def __init__(self, baseuri, baselang, encoding):
  5.1346 +            if _debug: sys.stderr.write('trying StrictFeedParser\n')
  5.1347 +            xml.sax.handler.ContentHandler.__init__(self)
  5.1348 +            _FeedParserMixin.__init__(self, baseuri, baselang, encoding)
  5.1349 +            self.bozo = 0
  5.1350 +            self.exc = None
  5.1351 +        
  5.1352 +        def startPrefixMapping(self, prefix, uri):
  5.1353 +            self.trackNamespace(prefix, uri)
  5.1354 +        
  5.1355 +        def startElementNS(self, name, qname, attrs):
  5.1356 +            namespace, localname = name
  5.1357 +            lowernamespace = str(namespace or '').lower()
  5.1358 +            if lowernamespace.find('backend.userland.com/rss') <> -1:
  5.1359 +                # match any backend.userland.com namespace
  5.1360 +                namespace = 'http://backend.userland.com/rss'
  5.1361 +                lowernamespace = namespace
  5.1362 +            if qname and qname.find(':') > 0:
  5.1363 +                givenprefix = qname.split(':')[0]
  5.1364 +            else:
  5.1365 +                givenprefix = None
  5.1366 +            prefix = self._matchnamespaces.get(lowernamespace, givenprefix)
  5.1367 +            if givenprefix and (prefix == None or (prefix == '' and lowernamespace == '')) and not self.namespacesInUse.has_key(givenprefix):
  5.1368 +                    raise UndeclaredNamespace, "'%s' is not associated with a namespace" % givenprefix
  5.1369 +            if prefix:
  5.1370 +                localname = prefix + ':' + localname
  5.1371 +            localname = str(localname).lower()
  5.1372 +            if _debug: sys.stderr.write('startElementNS: qname = %s, namespace = %s, givenprefix = %s, prefix = %s, attrs = %s, localname = %s\n' % (qname, namespace, givenprefix, prefix, attrs.items(), localname))
  5.1373 +
  5.1374 +            # qname implementation is horribly broken in Python 2.1 (it
  5.1375 +            # doesn't report any), and slightly broken in Python 2.2 (it
  5.1376 +            # doesn't report the xml: namespace). So we match up namespaces
  5.1377 +            # with a known list first, and then possibly override them with
  5.1378 +            # the qnames the SAX parser gives us (if indeed it gives us any
  5.1379 +            # at all).  Thanks to MatejC for helping me test this and
  5.1380 +            # tirelessly telling me that it didn't work yet.
  5.1381 +            attrsD = {}
  5.1382 +            for (namespace, attrlocalname), attrvalue in attrs._attrs.items():
  5.1383 +                lowernamespace = (namespace or '').lower()
  5.1384 +                prefix = self._matchnamespaces.get(lowernamespace, '')
  5.1385 +                if prefix:
  5.1386 +                    attrlocalname = prefix + ':' + attrlocalname
  5.1387 +                attrsD[str(attrlocalname).lower()] = attrvalue
  5.1388 +            for qname in attrs.getQNames():
  5.1389 +                attrsD[str(qname).lower()] = attrs.getValueByQName(qname)
  5.1390 +            self.unknown_starttag(localname, attrsD.items())
  5.1391 +
  5.1392 +        def characters(self, text):
  5.1393 +            self.handle_data(text)
  5.1394 +
  5.1395 +        def endElementNS(self, name, qname):
  5.1396 +            namespace, localname = name
  5.1397 +            lowernamespace = str(namespace or '').lower()
  5.1398 +            if qname and qname.find(':') > 0:
  5.1399 +                givenprefix = qname.split(':')[0]
  5.1400 +            else:
  5.1401 +                givenprefix = ''
  5.1402 +            prefix = self._matchnamespaces.get(lowernamespace, givenprefix)
  5.1403 +            if prefix:
  5.1404 +                localname = prefix + ':' + localname
  5.1405 +            localname = str(localname).lower()
  5.1406 +            self.unknown_endtag(localname)
  5.1407 +
  5.1408 +        def error(self, exc):
  5.1409 +            self.bozo = 1
  5.1410 +            self.exc = exc
  5.1411 +            
  5.1412 +        def fatalError(self, exc):
  5.1413 +            self.error(exc)
  5.1414 +            raise exc
  5.1415 +
  5.1416 +class _BaseHTMLProcessor(sgmllib.SGMLParser):
  5.1417 +    elements_no_end_tag = ['area', 'base', 'basefont', 'br', 'col', 'frame', 'hr',
  5.1418 +      'img', 'input', 'isindex', 'link', 'meta', 'param']
  5.1419 +    
  5.1420 +    def __init__(self, encoding):
  5.1421 +        self.encoding = encoding
  5.1422 +        if _debug: sys.stderr.write('entering BaseHTMLProcessor, encoding=%s\n' % self.encoding)
  5.1423 +        sgmllib.SGMLParser.__init__(self)
  5.1424 +        
  5.1425 +    def reset(self):
  5.1426 +        self.pieces = []
  5.1427 +        sgmllib.SGMLParser.reset(self)
  5.1428 +
  5.1429 +    def _shorttag_replace(self, match):
  5.1430 +        tag = match.group(1)
  5.1431 +        if tag in self.elements_no_end_tag:
  5.1432 +            return '<' + tag + ' />'
  5.1433 +        else:
  5.1434 +            return '<' + tag + '></' + tag + '>'
  5.1435 +        
  5.1436 +    def feed(self, data):
  5.1437 +        data = re.compile(r'<!((?!DOCTYPE|--|\[))', re.IGNORECASE).sub(r'&lt;!\1', data)
  5.1438 +        #data = re.sub(r'<(\S+?)\s*?/>', self._shorttag_replace, data) # bug [ 1399464 ] Bad regexp for _shorttag_replace
  5.1439 +        data = re.sub(r'<([^<\s]+?)\s*/>', self._shorttag_replace, data) 
  5.1440 +        data = data.replace('&#39;', "'")
  5.1441 +        data = data.replace('&#34;', '"')
  5.1442 +        if self.encoding and type(data) == type(u''):
  5.1443 +            data = data.encode(self.encoding)
  5.1444 +        sgmllib.SGMLParser.feed(self, data)
  5.1445 +
  5.1446 +    def normalize_attrs(self, attrs):
  5.1447 +        # utility method to be called by descendants
  5.1448 +        attrs = [(k.lower(), v) for k, v in attrs]
  5.1449 +        attrs = [(k, k in ('rel', 'type') and v.lower() or v) for k, v in attrs]
  5.1450 +        return attrs
  5.1451 +
  5.1452 +    def unknown_starttag(self, tag, attrs):
  5.1453 +        # called for each start tag
  5.1454 +        # attrs is a list of (attr, value) tuples
  5.1455 +        # e.g. for <pre class='screen'>, tag='pre', attrs=[('class', 'screen')]
  5.1456 +        if _debug: sys.stderr.write('_BaseHTMLProcessor, unknown_starttag, tag=%s\n' % tag)
  5.1457 +        uattrs = []
  5.1458 +        # thanks to Kevin Marks for this breathtaking hack to deal with (valid) high-bit attribute values in UTF-8 feeds
  5.1459 +        for key, value in attrs:
  5.1460 +            if type(value) != type(u''):
  5.1461 +                value = unicode(value, self.encoding)
  5.1462 +            uattrs.append((unicode(key, self.encoding), value))
  5.1463 +        strattrs = u''.join([u' %s="%s"' % (key, value) for key, value in uattrs]).encode(self.encoding)
  5.1464 +        if tag in self.elements_no_end_tag:
  5.1465 +            self.pieces.append('<%(tag)s%(strattrs)s />' % locals())
  5.1466 +        else:
  5.1467 +            self.pieces.append('<%(tag)s%(strattrs)s>' % locals())
  5.1468 +
  5.1469 +    def unknown_endtag(self, tag):
  5.1470 +        # called for each end tag, e.g. for </pre>, tag will be 'pre'
  5.1471 +        # Reconstruct the original end tag.
  5.1472 +        if tag not in self.elements_no_end_tag:
  5.1473 +            self.pieces.append("</%(tag)s>" % locals())
  5.1474 +
  5.1475 +    def handle_charref(self, ref):
  5.1476 +        # called for each character reference, e.g. for '&#160;', ref will be '160'
  5.1477 +        # Reconstruct the original character reference.
  5.1478 +        self.pieces.append('&#%(ref)s;' % locals())
  5.1479 +        
  5.1480 +    def handle_entityref(self, ref):
  5.1481 +        # called for each entity reference, e.g. for '&copy;', ref will be 'copy'
  5.1482 +        # Reconstruct the original entity reference.
  5.1483 +        self.pieces.append('&%(ref)s;' % locals())
  5.1484 +
  5.1485 +    def handle_data(self, text):
  5.1486 +        # called for each block of plain text, i.e. outside of any tag and
  5.1487 +        # not containing any character or entity references
  5.1488 +        # Store the original text verbatim.
  5.1489 +        if _debug: sys.stderr.write('_BaseHTMLProcessor, handle_text, text=%s\n' % text)
  5.1490 +        self.pieces.append(text)
  5.1491 +        
  5.1492 +    def handle_comment(self, text):
  5.1493 +        # called for each HTML comment, e.g. <!-- insert Javascript code here -->
  5.1494 +        # Reconstruct the original comment.
  5.1495 +        self.pieces.append('<!--%(text)s-->' % locals())
  5.1496 +        
  5.1497 +    def handle_pi(self, text):
  5.1498 +        # called for each processing instruction, e.g. <?instruction>
  5.1499 +        # Reconstruct original processing instruction.
  5.1500 +        self.pieces.append('<?%(text)s>' % locals())
  5.1501 +
  5.1502 +    def handle_decl(self, text):
  5.1503 +        # called for the DOCTYPE, if present, e.g.
  5.1504 +        # <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
  5.1505 +        #     "http://www.w3.org/TR/html4/loose.dtd">
  5.1506 +        # Reconstruct original DOCTYPE
  5.1507 +        self.pieces.append('<!%(text)s>' % locals())
  5.1508 +        
  5.1509 +    _new_declname_match = re.compile(r'[a-zA-Z][-_.a-zA-Z0-9:]*\s*').match
  5.1510 +    def _scan_name(self, i, declstartpos):
  5.1511 +        rawdata = self.rawdata
  5.1512 +        n = len(rawdata)
  5.1513 +        if i == n:
  5.1514 +            return None, -1
  5.1515 +        m = self._new_declname_match(rawdata, i)
  5.1516 +        if m:
  5.1517 +            s = m.group()
  5.1518 +            name = s.strip()
  5.1519 +            if (i + len(s)) == n:
  5.1520 +                return None, -1  # end of buffer
  5.1521 +            return name.lower(), m.end()
  5.1522 +        else:
  5.1523 +            self.handle_data(rawdata)
  5.1524 +#            self.updatepos(declstartpos, i)
  5.1525 +            return None, -1
  5.1526 +
  5.1527 +    def output(self):
  5.1528 +        '''Return processed HTML as a single string'''
  5.1529 +        return ''.join([str(p) for p in self.pieces])
  5.1530 +
  5.1531 +class _LooseFeedParser(_FeedParserMixin, _BaseHTMLProcessor):
  5.1532 +    def __init__(self, baseuri, baselang, encoding):
  5.1533 +        sgmllib.SGMLParser.__init__(self)
  5.1534 +        _FeedParserMixin.__init__(self, baseuri, baselang, encoding)
  5.1535 +
  5.1536 +    def decodeEntities(self, element, data):
  5.1537 +        data = data.replace('&#60;', '&lt;')
  5.1538 +        data = data.replace('&#x3c;', '&lt;')
  5.1539 +        data = data.replace('&#62;', '&gt;')
  5.1540 +        data = data.replace('&#x3e;', '&gt;')
  5.1541 +        data = data.replace('&#38;', '&amp;')
  5.1542 +        data = data.replace('&#x26;', '&amp;')
  5.1543 +        data = data.replace('&#34;', '&quot;')
  5.1544 +        data = data.replace('&#x22;', '&quot;')
  5.1545 +        data = data.replace('&#39;', '&apos;')
  5.1546 +        data = data.replace('&#x27;', '&apos;')
  5.1547 +        if self.contentparams.has_key('type') and not self.contentparams.get('type', 'xml').endswith('xml'):
  5.1548 +            data = data.replace('&lt;', '<')
  5.1549 +            data = data.replace('&gt;', '>')
  5.1550 +            data = data.replace('&amp;', '&')
  5.1551 +            data = data.replace('&quot;', '"')
  5.1552 +            data = data.replace('&apos;', "'")
  5.1553 +        return data
  5.1554 +        
  5.1555 +class _RelativeURIResolver(_BaseHTMLProcessor):
  5.1556 +    relative_uris = [('a', 'href'),
  5.1557 +                     ('applet', 'codebase'),
  5.1558 +                     ('area', 'href'),
  5.1559 +                     ('blockquote', 'cite'),
  5.1560 +                     ('body', 'background'),
  5.1561 +                     ('del', 'cite'),
  5.1562 +                     ('form', 'action'),
  5.1563 +                     ('frame', 'longdesc'),
  5.1564 +                     ('frame', 'src'),
  5.1565 +                     ('iframe', 'longdesc'),
  5.1566 +                     ('iframe', 'src'),
  5.1567 +                     ('head', 'profile'),
  5.1568 +                     ('img', 'longdesc'),
  5.1569 +                     ('img', 'src'),
  5.1570 +                     ('img', 'usemap'),
  5.1571 +                     ('input', 'src'),
  5.1572 +                     ('input', 'usemap'),
  5.1573 +                     ('ins', 'cite'),
  5.1574 +                     ('link', 'href'),
  5.1575 +                     ('object', 'classid'),
  5.1576 +                     ('object', 'codebase'),
  5.1577 +                     ('object', 'data'),
  5.1578 +                     ('object', 'usemap'),
  5.1579 +                     ('q', 'cite'),
  5.1580 +                     ('script', 'src')]
  5.1581 +
  5.1582 +    def __init__(self, baseuri, encoding):
  5.1583 +        _BaseHTMLProcessor.__init__(self, encoding)
  5.1584 +        self.baseuri = baseuri
  5.1585 +
  5.1586 +    def resolveURI(self, uri):
  5.1587 +        return _urljoin(self.baseuri, uri)
  5.1588 +    
  5.1589 +    def unknown_starttag(self, tag, attrs):
  5.1590 +        attrs = self.normalize_attrs(attrs)
  5.1591 +        attrs = [(key, ((tag, key) in self.relative_uris) and self.resolveURI(value) or value) for key, value in attrs]
  5.1592 +        _BaseHTMLProcessor.unknown_starttag(self, tag, attrs)
  5.1593 +        
  5.1594 +def _resolveRelativeURIs(htmlSource, baseURI, encoding):
  5.1595 +    if _debug: sys.stderr.write('entering _resolveRelativeURIs\n')
  5.1596 +    p = _RelativeURIResolver(baseURI, encoding)
  5.1597 +    p.feed(htmlSource)
  5.1598 +    return p.output()
  5.1599 +
  5.1600 +class _HTMLSanitizer(_BaseHTMLProcessor):
  5.1601 +    acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area', 'b', 'big',
  5.1602 +      'blockquote', 'br', 'button', 'caption', 'center', 'cite', 'code', 'col',
  5.1603 +      'colgroup', 'dd', 'del', 'dfn', 'dir', 'div', 'dl', 'dt', 'em', 'fieldset',
  5.1604 +      'font', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'img', 'input',
  5.1605 +      'ins', 'kbd', 'label', 'legend', 'li', 'map', 'menu', 'ol', 'optgroup',
  5.1606 +      'option', 'p', 'pre', 'q', 's', 'samp', 'select', 'small', 'span', 'strike',
  5.1607 +      'strong', 'sub', 'sup', 'table', 'tbody', 'td', 'textarea', 'tfoot', 'th',
  5.1608 +      'thead', 'tr', 'tt', 'u', 'ul', 'var']
  5.1609 +
  5.1610 +    acceptable_attributes = ['abbr', 'accept', 'accept-charset', 'accesskey',
  5.1611 +      'action', 'align', 'alt', 'axis', 'border', 'cellpadding', 'cellspacing',
  5.1612 +      'char', 'charoff', 'charset', 'checked', 'cite', 'class', 'clear', 'cols',
  5.1613 +      'colspan', 'color', 'compact', 'coords', 'datetime', 'dir', 'disabled',
  5.1614 +      'enctype', 'for', 'frame', 'headers', 'height', 'href', 'hreflang', 'hspace',
  5.1615 +      'id', 'ismap', 'label', 'lang', 'longdesc', 'maxlength', 'media', 'method',
  5.1616 +      'multiple', 'name', 'nohref', 'noshade', 'nowrap', 'prompt', 'readonly',
  5.1617 +      'rel', 'rev', 'rows', 'rowspan', 'rules', 'scope', 'selected', 'shape', 'size',
  5.1618 +      'span', 'src', 'start', 'summary', 'tabindex', 'target', 'title', 'type',
  5.1619 +      'usemap', 'valign', 'value', 'vspace', 'width']
  5.1620 +
  5.1621 +    unacceptable_elements_with_end_tag = ['script', 'applet']
  5.1622 +
  5.1623 +    def reset(self):
  5.1624 +        _BaseHTMLProcessor.reset(self)
  5.1625 +        self.unacceptablestack = 0
  5.1626 +        
  5.1627 +    def unknown_starttag(self, tag, attrs):
  5.1628 +        if not tag in self.acceptable_elements:
  5.1629 +            if tag in self.unacceptable_elements_with_end_tag:
  5.1630 +                self.unacceptablestack += 1
  5.1631 +            return
  5.1632 +        attrs = self.normalize_attrs(attrs)
  5.1633 +        attrs = [(key, value) for key, value in attrs if key in self.acceptable_attributes]
  5.1634 +        _BaseHTMLProcessor.unknown_starttag(self, tag, attrs)
  5.1635 +        
  5.1636 +    def unknown_endtag(self, tag):
  5.1637 +        if not tag in self.acceptable_elements:
  5.1638 +            if tag in self.unacceptable_elements_with_end_tag:
  5.1639 +                self.unacceptablestack -= 1
  5.1640 +            return
  5.1641 +        _BaseHTMLProcessor.unknown_endtag(self, tag)
  5.1642 +
  5.1643 +    def handle_pi(self, text):
  5.1644 +        pass
  5.1645 +
  5.1646 +    def handle_decl(self, text):
  5.1647 +        pass
  5.1648 +
  5.1649 +    def handle_data(self, text):
  5.1650 +        if not self.unacceptablestack:
  5.1651 +            _BaseHTMLProcessor.handle_data(self, text)
  5.1652 +
  5.1653 +def _sanitizeHTML(htmlSource, encoding):
  5.1654 +    p = _HTMLSanitizer(encoding)
  5.1655 +    p.feed(htmlSource)
  5.1656 +    data = p.output()
  5.1657 +    if TIDY_MARKUP:
  5.1658 +        # loop through list of preferred Tidy interfaces looking for one that's installed,
  5.1659 +        # then set up a common _tidy function to wrap the interface-specific API.
  5.1660 +        _tidy = None
  5.1661 +        for tidy_interface in PREFERRED_TIDY_INTERFACES:
  5.1662 +            try:
  5.1663 +                if tidy_interface == "uTidy":
  5.1664 +                    from tidy import parseString as _utidy
  5.1665 +                    def _tidy(data, **kwargs):
  5.1666 +                        return str(_utidy(data, **kwargs))
  5.1667 +                    break
  5.1668 +                elif tidy_interface == "mxTidy":
  5.1669 +                    from mx.Tidy import Tidy as _mxtidy
  5.1670 +                    def _tidy(data, **kwargs):
  5.1671 +                        nerrors, nwarnings, data, errordata = _mxtidy.tidy(data, **kwargs)
  5.1672 +                        return data
  5.1673 +                    break
  5.1674 +            except:
  5.1675 +                pass
  5.1676 +        if _tidy:
  5.1677 +            utf8 = type(data) == type(u'')
  5.1678 +            if utf8:
  5.1679 +                data = data.encode('utf-8')
  5.1680 +            data = _tidy(data, output_xhtml=1, numeric_entities=1, wrap=0, char_encoding="utf8")
  5.1681 +            if utf8:
  5.1682 +                data = unicode(data, 'utf-8')
  5.1683 +            if data.count('<body'):
  5.1684 +                data = data.split('<body', 1)[1]
  5.1685 +                if data.count('>'):
  5.1686 +                    data = data.split('>', 1)[1]
  5.1687 +            if data.count('</body'):
  5.1688 +                data = data.split('</body', 1)[0]
  5.1689 +    data = data.strip().replace('\r\n', '\n')
  5.1690 +    return data
  5.1691 +
  5.1692 +class _FeedURLHandler(urllib2.HTTPDigestAuthHandler, urllib2.HTTPRedirectHandler, urllib2.HTTPDefaultErrorHandler):
  5.1693 +    def http_error_default(self, req, fp, code, msg, headers):
  5.1694 +        if ((code / 100) == 3) and (code != 304):
  5.1695 +            return self.http_error_302(req, fp, code, msg, headers)
  5.1696 +        infourl = urllib.addinfourl(fp, headers, req.get_full_url())
  5.1697 +        infourl.status = code
  5.1698 +        return infourl
  5.1699 +
  5.1700 +    def http_error_302(self, req, fp, code, msg, headers):
  5.1701 +        if headers.dict.has_key('location'):
  5.1702 +            infourl = urllib2.HTTPRedirectHandler.http_error_302(self, req, fp, code, msg, headers)
  5.1703 +        else:
  5.1704 +            infourl = urllib.addinfourl(fp, headers, req.get_full_url())
  5.1705 +        if not hasattr(infourl, 'status'):
  5.1706 +            infourl.status = code
  5.1707 +        return infourl
  5.1708 +
  5.1709 +    def http_error_301(self, req, fp, code, msg, headers):
  5.1710 +        if headers.dict.has_key('location'):
  5.1711 +            infourl = urllib2.HTTPRedirectHandler.http_error_301(self, req, fp, code, msg, headers)
  5.1712 +        else:
  5.1713 +            infourl = urllib.addinfourl(fp, headers, req.get_full_url())
  5.1714 +        if not hasattr(infourl, 'status'):
  5.1715 +            infourl.status = code
  5.1716 +        return infourl
  5.1717 +
  5.1718 +    http_error_300 = http_error_302
  5.1719 +    http_error_303 = http_error_302
  5.1720 +    http_error_307 = http_error_302
  5.1721 +        
  5.1722 +    def http_error_401(self, req, fp, code, msg, headers):
  5.1723 +        # Check if
  5.1724 +        # - server requires digest auth, AND
  5.1725 +        # - we tried (unsuccessfully) with basic auth, AND
  5.1726 +        # - we're using Python 2.3.3 or later (digest auth is irreparably broken in earlier versions)
  5.1727 +        # If all conditions hold, parse authentication information
  5.1728 +        # out of the Authorization header we sent the first time
  5.1729 +        # (for the username and password) and the WWW-Authenticate
  5.1730 +        # header the server sent back (for the realm) and retry
  5.1731 +        # the request with the appropriate digest auth headers instead.
  5.1732 +        # This evil genius hack has been brought to you by Aaron Swartz.
  5.1733 +        host = urlparse.urlparse(req.get_full_url())[1]
  5.1734 +        try:
  5.1735 +            assert sys.version.split()[0] >= '2.3.3'
  5.1736 +            assert base64 != None
  5.1737 +            user, passw = base64.decodestring(req.headers['Authorization'].split(' ')[1]).split(':')
  5.1738 +            realm = re.findall('realm="([^"]*)"', headers['WWW-Authenticate'])[0]
  5.1739 +            self.add_password(realm, host, user, passw)
  5.1740 +            retry = self.http_error_auth_reqed('www-authenticate', host, req, headers)
  5.1741 +            self.reset_retry_count()
  5.1742 +            return retry
  5.1743 +        except:
  5.1744 +            return self.http_error_default(req, fp, code, msg, headers)
  5.1745 +
  5.1746 +def _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, handlers):
  5.1747 +    """URL, filename, or string --> stream
  5.1748 +
  5.1749 +    This function lets you define parsers that take any input source
  5.1750 +    (URL, pathname to local or network file, or actual data as a string)
  5.1751 +    and deal with it in a uniform manner.  Returned object is guaranteed
  5.1752 +    to have all the basic stdio read methods (read, readline, readlines).
  5.1753 +    Just .close() the object when you're done with it.
  5.1754 +
  5.1755 +    If the etag argument is supplied, it will be used as the value of an
  5.1756 +    If-None-Match request header.
  5.1757 +
  5.1758 +    If the modified argument is supplied, it must be a tuple of 9 integers
  5.1759 +    as returned by gmtime() in the standard Python time module. This MUST
  5.1760 +    be in GMT (Greenwich Mean Time). The formatted date/time will be used
  5.1761 +    as the value of an If-Modified-Since request header.
  5.1762 +
  5.1763 +    If the agent argument is supplied, it will be used as the value of a
  5.1764 +    User-Agent request header.
  5.1765 +
  5.1766 +    If the referrer argument is supplied, it will be used as the value of a
  5.1767 +    Referer[sic] request header.
  5.1768 +
  5.1769 +    If handlers is supplied, it is a list of handlers used to build a
  5.1770 +    urllib2 opener.
  5.1771 +    """
  5.1772 +
  5.1773 +    if hasattr(url_file_stream_or_string, 'read'):
  5.1774 +        return url_file_stream_or_string
  5.1775 +
  5.1776 +    if url_file_stream_or_string == '-':
  5.1777 +        return sys.stdin
  5.1778 +
  5.1779 +    if urlparse.urlparse(url_file_stream_or_string)[0] in ('http', 'https', 'ftp'):
  5.1780 +        if not agent:
  5.1781 +            agent = USER_AGENT
  5.1782 +        # test for inline user:password for basic auth
  5.1783 +        auth = None
  5.1784 +        if base64:
  5.1785 +            urltype, rest = urllib.splittype(url_file_stream_or_string)
  5.1786 +            realhost, rest = urllib.splithost(rest)
  5.1787 +            if realhost:
  5.1788 +                user_passwd, realhost = urllib.splituser(realhost)
  5.1789 +                if user_passwd:
  5.1790 +                    url_file_stream_or_string = '%s://%s%s' % (urltype, realhost, rest)
  5.1791 +                    auth = base64.encodestring(user_passwd).strip()
  5.1792 +        # try to open with urllib2 (to use optional headers)
  5.1793 +        request = urllib2.Request(url_file_stream_or_string)
  5.1794 +        request.add_header('User-Agent', agent)
  5.1795 +        if etag:
  5.1796 +            request.add_header('If-None-Match', etag)
  5.1797 +        if modified:
  5.1798 +            # format into an RFC 1123-compliant timestamp. We can't use
  5.1799 +            # time.strftime() since the %a and %b directives can be affected
  5.1800 +            # by the current locale, but RFC 2616 states that dates must be
  5.1801 +            # in English.
  5.1802 +            short_weekdays = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
  5.1803 +            months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
  5.1804 +            request.add_header('If-Modified-Since', '%s, %02d %s %04d %02d:%02d:%02d GMT' % (short_weekdays[modified[6]], modified[2], months[modified[1] - 1], modified[0], modified[3], modified[4], modified[5]))
  5.1805 +        if referrer:
  5.1806 +            request.add_header('Referer', referrer)
  5.1807 +        if gzip and zlib:
  5.1808 +            request.add_header('Accept-encoding', 'gzip, deflate')
  5.1809 +        elif gzip:
  5.1810 +            request.add_header('Accept-encoding', 'gzip')
  5.1811 +        elif zlib:
  5.1812 +            request.add_header('Accept-encoding', 'deflate')
  5.1813 +        else:
  5.1814 +            request.add_header('Accept-encoding', '')
  5.1815 +        if auth:
  5.1816 +            request.add_header('Authorization', 'Basic %s' % auth)
  5.1817 +        if ACCEPT_HEADER:
  5.1818 +            request.add_header('Accept', ACCEPT_HEADER)
  5.1819 +        request.add_header('A-IM', 'feed') # RFC 3229 support
  5.1820 +        opener = apply(urllib2.build_opener, tuple([_FeedURLHandler()] + handlers))
  5.1821 +        opener.addheaders = [] # RMK - must clear so we only send our custom User-Agent
  5.1822 +        try:
  5.1823 +            return opener.open(request)
  5.1824 +        finally:
  5.1825 +            opener.close() # JohnD
  5.1826 +    
  5.1827 +    # try to open with native open function (if url_file_stream_or_string is a filename)
  5.1828 +    try:
  5.1829 +        return open(url_file_stream_or_string)
  5.1830 +    except:
  5.1831 +        pass
  5.1832 +
  5.1833 +    # treat url_file_stream_or_string as string
  5.1834 +    return _StringIO(str(url_file_stream_or_string))
  5.1835 +
  5.1836 +_date_handlers = []
  5.1837 +def registerDateHandler(func):
  5.1838 +    '''Register a date handler function (takes string, returns 9-tuple date in GMT)'''
  5.1839 +    _date_handlers.insert(0, func)
  5.1840 +    
  5.1841 +# ISO-8601 date parsing routines written by Fazal Majid.
  5.1842 +# The ISO 8601 standard is very convoluted and irregular - a full ISO 8601
  5.1843 +# parser is beyond the scope of feedparser and would be a worthwhile addition
  5.1844 +# to the Python library.
  5.1845 +# A single regular expression cannot parse ISO 8601 date formats into groups
  5.1846 +# as the standard is highly irregular (for instance is 030104 2003-01-04 or
  5.1847 +# 0301-04-01), so we use templates instead.
  5.1848 +# Please note the order in templates is significant because we need a
  5.1849 +# greedy match.
  5.1850 +_iso8601_tmpl = ['YYYY-?MM-?DD', 'YYYY-MM', 'YYYY-?OOO',
  5.1851 +                'YY-?MM-?DD', 'YY-?OOO', 'YYYY', 
  5.1852 +                '-YY-?MM', '-OOO', '-YY',
  5.1853 +                '--MM-?DD', '--MM',
  5.1854 +                '---DD',
  5.1855 +                'CC', '']
  5.1856 +_iso8601_re = [
  5.1857 +    tmpl.replace(
  5.1858 +    'YYYY', r'(?P<year>\d{4})').replace(
  5.1859 +    'YY', r'(?P<year>\d\d)').replace(
  5.1860 +    'MM', r'(?P<month>[01]\d)').replace(
  5.1861 +    'DD', r'(?P<day>[0123]\d)').replace(
  5.1862 +    'OOO', r'(?P<ordinal>[0123]\d\d)').replace(
  5.1863 +    'CC', r'(?P<century>\d\d$)')
  5.1864 +    + r'(T?(?P<hour>\d{2}):(?P<minute>\d{2})'
  5.1865 +    + r'(:(?P<second>\d{2}))?'
  5.1866 +    + r'(?P<tz>[+-](?P<tzhour>\d{2})(:(?P<tzmin>\d{2}))?|Z)?)?'
  5.1867 +    for tmpl in _iso8601_tmpl]
  5.1868 +del tmpl
  5.1869 +_iso8601_matches = [re.compile(regex).match for regex in _iso8601_re]
  5.1870 +del regex
  5.1871 +def _parse_date_iso8601(dateString):
  5.1872 +    '''Parse a variety of ISO-8601-compatible formats like 20040105'''
  5.1873 +    m = None
  5.1874 +    for _iso8601_match in _iso8601_matches:
  5.1875 +        m = _iso8601_match(dateString)
  5.1876 +        if m: break
  5.1877 +    if not m: return
  5.1878 +    if m.span() == (0, 0): return
  5.1879 +    params = m.groupdict()
  5.1880 +    ordinal = params.get('ordinal', 0)
  5.1881 +    if ordinal:
  5.1882 +        ordinal = int(ordinal)
  5.1883 +    else:
  5.1884 +        ordinal = 0
  5.1885 +    year = params.get('year', '--')
  5.1886 +    if not year or year == '--':
  5.1887 +        year = time.gmtime()[0]
  5.1888 +    elif len(year) == 2:
  5.1889 +        # ISO 8601 assumes current century, i.e. 93 -> 2093, NOT 1993
  5.1890 +        year = 100 * int(time.gmtime()[0] / 100) + int(year)
  5.1891 +    else:
  5.1892 +        year = int(year)
  5.1893 +    month = params.get('month', '-')
  5.1894 +    if not month or month == '-':
  5.1895 +        # ordinals are NOT normalized by mktime, we simulate them
  5.1896 +        # by setting month=1, day=ordinal
  5.1897 +        if ordinal:
  5.1898 +            month = 1
  5.1899 +        else:
  5.1900 +            month = time.gmtime()[1]
  5.1901 +    month = int(month)
  5.1902 +    day = params.get('day', 0)
  5.1903 +    if not day:
  5.1904 +        # see above
  5.1905 +        if ordinal:
  5.1906 +            day = ordinal
  5.1907 +        elif params.get('century', 0) or \
  5.1908 +                 params.get('year', 0) or params.get('month', 0):
  5.1909 +            day = 1
  5.1910 +        else:
  5.1911 +            day = time.gmtime()[2]
  5.1912 +    else:
  5.1913 +        day = int(day)
  5.1914 +    # special case of the century - is the first year of the 21st century
  5.1915 +    # 2000 or 2001 ? The debate goes on...
  5.1916 +    if 'century' in params.keys():
  5.1917 +        year = (int(params['century']) - 1) * 100 + 1
  5.1918 +    # in ISO 8601 most fields are optional
  5.1919 +    for field in ['hour', 'minute', 'second', 'tzhour', 'tzmin']:
  5.1920 +        if not params.get(field, None):
  5.1921 +            params[field] = 0
  5.1922 +    hour = int(params.get('hour', 0))
  5.1923 +    minute = int(params.get('minute', 0))
  5.1924 +    second = int(params.get('second', 0))
  5.1925 +    # weekday is normalized by mktime(), we can ignore it
  5.1926 +    weekday = 0
  5.1927 +    # daylight savings is complex, but not needed for feedparser's purposes
  5.1928 +    # as time zones, if specified, include mention of whether it is active
  5.1929 +    # (e.g. PST vs. PDT, CET). Using -1 is implementation-dependent and
  5.1930 +    # and most implementations have DST bugs
  5.1931 +    daylight_savings_flag = 0
  5.1932 +    tm = [year, month, day, hour, minute, second, weekday,
  5.1933 +          ordinal, daylight_savings_flag]
  5.1934 +    # ISO 8601 time zone adjustments
  5.1935 +    tz = params.get('tz')
  5.1936 +    if tz and tz != 'Z':
  5.1937 +        if tz[0] == '-':
  5.1938 +            tm[3] += int(params.get('tzhour', 0))
  5.1939 +            tm[4] += int(params.get('tzmin', 0))
  5.1940 +        elif tz[0] == '+':
  5.1941 +            tm[3] -= int(params.get('tzhour', 0))
  5.1942 +            tm[4] -= int(params.get('tzmin', 0))
  5.1943 +        else:
  5.1944 +            return None
  5.1945 +    # Python's time.mktime() is a wrapper around the ANSI C mktime(3c)
  5.1946 +    # which is guaranteed to normalize d/m/y/h/m/s.
  5.1947 +    # Many implementations have bugs, but we'll pretend they don't.
  5.1948 +    return time.localtime(time.mktime(tm))
  5.1949 +registerDateHandler(_parse_date_iso8601)
  5.1950 +    
  5.1951 +# 8-bit date handling routines written by ytrewq1.
  5.1952 +_korean_year  = u'\ub144' # b3e2 in euc-kr
  5.1953 +_korean_month = u'\uc6d4' # bff9 in euc-kr
  5.1954 +_korean_day   = u'\uc77c' # c0cf in euc-kr
  5.1955 +_korean_am    = u'\uc624\uc804' # bfc0 c0fc in euc-kr
  5.1956 +_korean_pm    = u'\uc624\ud6c4' # bfc0 c8c4 in euc-kr
  5.1957 +
  5.1958 +_korean_onblog_date_re = \
  5.1959 +    re.compile('(\d{4})%s\s+(\d{2})%s\s+(\d{2})%s\s+(\d{2}):(\d{2}):(\d{2})' % \
  5.1960 +               (_korean_year, _korean_month, _korean_day))
  5.1961 +_korean_nate_date_re = \
  5.1962 +    re.compile(u'(\d{4})-(\d{2})-(\d{2})\s+(%s|%s)\s+(\d{,2}):(\d{,2}):(\d{,2})' % \
  5.1963 +               (_korean_am, _korean_pm))
  5.1964 +def _parse_date_onblog(dateString):
  5.1965 +    '''Parse a string according to the OnBlog 8-bit date format'''
  5.1966 +    m = _korean_onblog_date_re.match(dateString)
  5.1967 +    if not m: return
  5.1968 +    w3dtfdate = '%(year)s-%(month)s-%(day)sT%(hour)s:%(minute)s:%(second)s%(zonediff)s' % \
  5.1969 +                {'year': m.group(1), 'month': m.group(2), 'day': m.group(3),\
  5.1970 +                 'hour': m.group(4), 'minute': m.group(5), 'second': m.group(6),\
  5.1971 +                 'zonediff': '+09:00'}
  5.1972 +    if _debug: sys.stderr.write('OnBlog date parsed as: %s\n' % w3dtfdate)
  5.1973 +    return _parse_date_w3dtf(w3dtfdate)
  5.1974 +registerDateHandler(_parse_date_onblog)
  5.1975 +
  5.1976 +def _parse_date_nate(dateString):
  5.1977 +    '''Parse a string according to the Nate 8-bit date format'''
  5.1978 +    m = _korean_nate_date_re.match(dateString)
  5.1979 +    if not m: return
  5.1980 +    hour = int(m.group(5))
  5.1981 +    ampm = m.group(4)
  5.1982 +    if (ampm == _korean_pm):
  5.1983 +        hour += 12
  5.1984 +    hour = str(hour)
  5.1985 +    if len(hour) == 1:
  5.1986 +        hour = '0' + hour
  5.1987 +    w3dtfdate = '%(year)s-%(month)s-%(day)sT%(hour)s:%(minute)s:%(second)s%(zonediff)s' % \
  5.1988 +                {'year': m.group(1), 'month': m.group(2), 'day': m.group(3),\
  5.1989 +                 'hour': hour, 'minute': m.group(6), 'second': m.group(7),\
  5.1990 +                 'zonediff': '+09:00'}
  5.1991 +    if _debug: sys.stderr.write('Nate date parsed as: %s\n' % w3dtfdate)
  5.1992 +    return _parse_date_w3dtf(w3dtfdate)
  5.1993 +registerDateHandler(_parse_date_nate)
  5.1994 +
  5.1995 +_mssql_date_re = \
  5.1996 +    re.compile('(\d{4})-(\d{2})-(\d{2})\s+(\d{2}):(\d{2}):(\d{2})(\.\d+)?')
  5.1997 +def _parse_date_mssql(dateString):
  5.1998 +    '''Parse a string according to the MS SQL date format'''
  5.1999 +    m = _mssql_date_re.match(dateString)
  5.2000 +    if not m: return
  5.2001 +    w3dtfdate = '%(year)s-%(month)s-%(day)sT%(hour)s:%(minute)s:%(second)s%(zonediff)s' % \
  5.2002 +                {'year': m.group(1), 'month': m.group(2), 'day': m.group(3),\
  5.2003 +                 'hour': m.group(4), 'minute': m.group(5), 'second': m.group(6),\
  5.2004 +                 'zonediff': '+09:00'}
  5.2005 +    if _debug: sys.stderr.write('MS SQL date parsed as: %s\n' % w3dtfdate)
  5.2006 +    return _parse_date_w3dtf(w3dtfdate)
  5.2007 +registerDateHandler(_parse_date_mssql)
  5.2008 +
  5.2009 +# Unicode strings for Greek date strings
  5.2010 +_greek_months = \
  5.2011 +  { \
  5.2012 +   u'\u0399\u03b1\u03bd': u'Jan',       # c9e1ed in iso-8859-7
  5.2013 +   u'\u03a6\u03b5\u03b2': u'Feb',       # d6e5e2 in iso-8859-7
  5.2014 +   u'\u039c\u03ac\u03ce': u'Mar',       # ccdcfe in iso-8859-7
  5.2015 +   u'\u039c\u03b1\u03ce': u'Mar',       # cce1fe in iso-8859-7
  5.2016 +   u'\u0391\u03c0\u03c1': u'Apr',       # c1f0f1 in iso-8859-7
  5.2017 +   u'\u039c\u03ac\u03b9': u'May',       # ccdce9 in iso-8859-7
  5.2018 +   u'\u039c\u03b1\u03ca': u'May',       # cce1fa in iso-8859-7
  5.2019 +   u'\u039c\u03b1\u03b9': u'May',       # cce1e9 in iso-8859-7
  5.2020 +   u'\u0399\u03bf\u03cd\u03bd': u'Jun', # c9effded in iso-8859-7
  5.2021 +   u'\u0399\u03bf\u03bd': u'Jun',       # c9efed in iso-8859-7
  5.2022 +   u'\u0399\u03bf\u03cd\u03bb': u'Jul', # c9effdeb in iso-8859-7
  5.2023 +   u'\u0399\u03bf\u03bb': u'Jul',       # c9f9eb in iso-8859-7
  5.2024 +   u'\u0391\u03cd\u03b3': u'Aug',       # c1fde3 in iso-8859-7
  5.2025 +   u'\u0391\u03c5\u03b3': u'Aug',       # c1f5e3 in iso-8859-7
  5.2026 +   u'\u03a3\u03b5\u03c0': u'Sep',       # d3e5f0 in iso-8859-7
  5.2027 +   u'\u039f\u03ba\u03c4': u'Oct',       # cfeaf4 in iso-8859-7
  5.2028 +   u'\u039d\u03bf\u03ad': u'Nov',       # cdefdd in iso-8859-7
  5.2029 +   u'\u039d\u03bf\u03b5': u'Nov',       # cdefe5 in iso-8859-7
  5.2030 +   u'\u0394\u03b5\u03ba': u'Dec',       # c4e5ea in iso-8859-7
  5.2031 +  }
  5.2032 +
  5.2033 +_greek_wdays = \
  5.2034 +  { \
  5.2035 +   u'\u039a\u03c5\u03c1': u'Sun', # caf5f1 in iso-8859-7
  5.2036 +   u'\u0394\u03b5\u03c5': u'Mon', # c4e5f5 in iso-8859-7
  5.2037 +   u'\u03a4\u03c1\u03b9': u'Tue', # d4f1e9 in iso-8859-7
  5.2038 +   u'\u03a4\u03b5\u03c4': u'Wed', # d4e5f4 in iso-8859-7
  5.2039 +   u'\u03a0\u03b5\u03bc': u'Thu', # d0e5ec in iso-8859-7
  5.2040 +   u'\u03a0\u03b1\u03c1': u'Fri', # d0e1f1 in iso-8859-7
  5.2041 +   u'\u03a3\u03b1\u03b2': u'Sat', # d3e1e2 in iso-8859-7   
  5.2042 +  }
  5.2043 +
  5.2044 +_greek_date_format_re = \
  5.2045 +    re.compile(u'([^,]+),\s+(\d{2})\s+([^\s]+)\s+(\d{4})\s+(\d{2}):(\d{2}):(\d{2})\s+([^\s]+)')
  5.2046 +
  5.2047 +def _parse_date_greek(dateString):
  5.2048 +    '''Parse a string according to a Greek 8-bit date format.'''
  5.2049 +    m = _greek_date_format_re.match(dateString)
  5.2050 +    if not m: return
  5.2051 +    try:
  5.2052 +        wday = _greek_wdays[m.group(1)]
  5.2053 +        month = _greek_months[m.group(3)]
  5.2054 +    except:
  5.2055 +        return
  5.2056 +    rfc822date = '%(wday)s, %(day)s %(month)s %(year)s %(hour)s:%(minute)s:%(second)s %(zonediff)s' % \
  5.2057 +                 {'wday': wday, 'day': m.group(2), 'month': month, 'year': m.group(4),\
  5.2058 +                  'hour': m.group(5), 'minute': m.group(6), 'second': m.group(7),\
  5.2059 +                  'zonediff': m.group(8)}
  5.2060 +    if _debug: sys.stderr.write('Greek date parsed as: %s\n' % rfc822date)
  5.2061 +    return _parse_date_rfc822(rfc822date)
  5.2062 +registerDateHandler(_parse_date_greek)
  5.2063 +
  5.2064 +# Unicode strings for Hungarian date strings
  5.2065 +_hungarian_months = \
  5.2066 +  { \
  5.2067 +    u'janu\u00e1r':   u'01',  # e1 in iso-8859-2
  5.2068 +    u'febru\u00e1ri': u'02',  # e1 in iso-8859-2
  5.2069 +    u'm\u00e1rcius':  u'03',  # e1 in iso-8859-2
  5.2070 +    u'\u00e1prilis':  u'04',  # e1 in iso-8859-2
  5.2071 +    u'm\u00e1ujus':   u'05',  # e1 in iso-8859-2
  5.2072 +    u'j\u00fanius':   u'06',  # fa in iso-8859-2
  5.2073 +    u'j\u00falius':   u'07',  # fa in iso-8859-2
  5.2074 +    u'augusztus':     u'08',
  5.2075 +    u'szeptember':    u'09',
  5.2076 +    u'okt\u00f3ber':  u'10',  # f3 in iso-8859-2
  5.2077 +    u'november':      u'11',
  5.2078 +    u'december':      u'12',
  5.2079 +  }
  5.2080 +
  5.2081 +_hungarian_date_format_re = \
  5.2082 +  re.compile(u'(\d{4})-([^-]+)-(\d{,2})T(\d{,2}):(\d{2})((\+|-)(\d{,2}:\d{2}))')
  5.2083 +
  5.2084 +def _parse_date_hungarian(dateString):
  5.2085 +    '''Parse a string according to a Hungarian 8-bit date format.'''
  5.2086 +    m = _hungarian_date_format_re.match(dateString)
  5.2087 +    if not m: return
  5.2088 +    try:
  5.2089 +        month = _hungarian_months[m.group(2)]
  5.2090 +        day = m.group(3)
  5.2091 +        if len(day) == 1:
  5.2092 +            day = '0' + day
  5.2093 +        hour = m.group(4)
  5.2094 +        if len(hour) == 1:
  5.2095 +            hour = '0' + hour
  5.2096 +    except:
  5.2097 +        return
  5.2098 +    w3dtfdate = '%(year)s-%(month)s-%(day)sT%(hour)s:%(minute)s%(zonediff)s' % \
  5.2099 +                {'year': m.group(1), 'month': month, 'day': day,\
  5.2100 +                 'hour': hour, 'minute': m.group(5),\
  5.2101 +                 'zonediff': m.group(6)}
  5.2102 +    if _debug: sys.stderr.write('Hungarian date parsed as: %s\n' % w3dtfdate)
  5.2103 +    return _parse_date_w3dtf(w3dtfdate)
  5.2104 +registerDateHandler(_parse_date_hungarian)
  5.2105 +
  5.2106 +# W3DTF-style date parsing adapted from PyXML xml.utils.iso8601, written by
  5.2107 +# Drake and licensed under the Python license.  Removed all range checking
  5.2108 +# for month, day, hour, minute, and second, since mktime will normalize
  5.2109 +# these later
  5.2110 +def _parse_date_w3dtf(dateString):
  5.2111 +    def __extract_date(m):
  5.2112 +        year = int(m.group('year'))
  5.2113 +        if year < 100:
  5.2114 +            year = 100 * int(time.gmtime()[0] / 100) + int(year)
  5.2115 +        if year < 1000:
  5.2116 +            return 0, 0, 0
  5.2117 +        julian = m.group('julian')
  5.2118 +        if julian:
  5.2119 +            julian = int(julian)
  5.2120 +            month = julian / 30 + 1
  5.2121 +            day = julian % 30 + 1
  5.2122 +            jday = None
  5.2123 +            while jday != julian:
  5.2124 +                t = time.mktime((year, month, day, 0, 0, 0, 0, 0, 0))
  5.2125 +                jday = time.gmtime(t)[-2]
  5.2126 +                diff = abs(jday - julian)
  5.2127 +                if jday > julian:
  5.2128 +                    if diff < day:
  5.2129 +                        day = day - diff
  5.2130 +                    else:
  5.2131 +                        month = month - 1
  5.2132 +                        day = 31
  5.2133 +                elif jday < julian:
  5.2134 +                    if day + diff < 28:
  5.2135 +                       day = day + diff
  5.2136 +                    else:
  5.2137 +                        month = month + 1
  5.2138 +            return year, month, day
  5.2139 +        month = m.group('month')
  5.2140 +        day = 1
  5.2141 +        if month is None:
  5.2142 +            month = 1
  5.2143 +        else:
  5.2144 +            month = int(month)
  5.2145 +            day = m.group('day')
  5.2146 +            if day:
  5.2147 +                day = int(day)
  5.2148 +            else:
  5.2149 +                day = 1
  5.2150 +        return year, month, day
  5.2151 +
  5.2152 +    def __extract_time(m):
  5.2153 +        if not m:
  5.2154 +            return 0, 0, 0
  5.2155 +        hours = m.group('hours')
  5.2156 +        if not hours:
  5.2157 +            return 0, 0, 0
  5.2158 +        hours = int(hours)
  5.2159 +        minutes = int(m.group('minutes'))
  5.2160 +        seconds = m.group('seconds')
  5.2161 +        if seconds:
  5.2162 +            seconds = int(seconds)
  5.2163 +        else:
  5.2164 +            seconds = 0
  5.2165 +        return hours, minutes, seconds
  5.2166 +
  5.2167 +    def __extract_tzd(m):
  5.2168 +        '''Return the Time Zone Designator as an offset in seconds from UTC.'''
  5.2169 +        if not m:
  5.2170 +            return 0
  5.2171 +        tzd = m.group('tzd')
  5.2172 +        if not tzd:
  5.2173 +            return 0
  5.2174 +        if tzd == 'Z':
  5.2175 +            return 0
  5.2176 +        hours = int(m.group('tzdhours'))
  5.2177 +        minutes = m.group('tzdminutes')
  5.2178 +        if minutes:
  5.2179 +            minutes = int(minutes)
  5.2180 +        else:
  5.2181 +            minutes = 0
  5.2182 +        offset = (hours*60 + minutes) * 60
  5.2183 +        if tzd[0] == '+':
  5.2184 +            return -offset
  5.2185 +        return offset
  5.2186 +
  5.2187 +    __date_re = ('(?P<year>\d\d\d\d)'
  5.2188 +                 '(?:(?P<dsep>-|)'
  5.2189 +                 '(?:(?P<julian>\d\d\d)'
  5.2190 +                 '|(?P<month>\d\d)(?:(?P=dsep)(?P<day>\d\d))?))?')
  5.2191 +    __tzd_re = '(?P<tzd>[-+](?P<tzdhours>\d\d)(?::?(?P<tzdminutes>\d\d))|Z)'
  5.2192 +    __tzd_rx = re.compile(__tzd_re)
  5.2193 +    __time_re = ('(?P<hours>\d\d)(?P<tsep>:|)(?P<minutes>\d\d)'
  5.2194 +                 '(?:(?P=tsep)(?P<seconds>\d\d(?:[.,]\d+)?))?'
  5.2195 +                 + __tzd_re)
  5.2196 +    __datetime_re = '%s(?:T%s)?' % (__date_re, __time_re)
  5.2197 +    __datetime_rx = re.compile(__datetime_re)
  5.2198 +    m = __datetime_rx.match(dateString)
  5.2199 +    if (m is None) or (m.group() != dateString): return
  5.2200 +    gmt = __extract_date(m) + __extract_time(m) + (0, 0, 0)
  5.2201 +    if gmt[0] == 0: return
  5.2202 +    return time.gmtime(time.mktime(gmt) + __extract_tzd(m) - time.timezone)
  5.2203 +registerDateHandler(_parse_date_w3dtf)
  5.2204 +
  5.2205 +def _parse_date_rfc822(dateString):
  5.2206 +    '''Parse an RFC822, RFC1123, RFC2822, or asctime-style date'''
  5.2207 +    data = dateString.split()
  5.2208 +    if data[0][-1] in (',', '.') or data[0].lower() in rfc822._daynames:
  5.2209 +        del data[0]
  5.2210 +    if len(data) == 4:
  5.2211 +        s = data[3]
  5.2212 +        i = s.find('+')
  5.2213 +        if i > 0:
  5.2214 +            data[3:] = [s[:i], s[i+1:]]
  5.2215 +        else:
  5.2216 +            data.append('')
  5.2217 +        dateString = " ".join(data)
  5.2218 +    if len(data) < 5:
  5.2219 +        dateString += ' 00:00:00 GMT'
  5.2220 +    tm = rfc822.parsedate_tz(dateString)
  5.2221 +    if tm:
  5.2222 +        return time.gmtime(rfc822.mktime_tz(tm))
  5.2223 +# rfc822.py defines several time zones, but we define some extra ones.
  5.2224 +# 'ET' is equivalent to 'EST', etc.
  5.2225 +_additional_timezones = {'AT': -400, 'ET': -500, 'CT': -600, 'MT': -700, 'PT': -800}
  5.2226 +rfc822._timezones.update(_additional_timezones)
  5.2227 +registerDateHandler(_parse_date_rfc822)    
  5.2228 +
  5.2229 +def _parse_date(dateString):
  5.2230 +    '''Parses a variety of date formats into a 9-tuple in GMT'''
  5.2231 +    for handler in _date_handlers:
  5.2232 +        try:
  5.2233 +            date9tuple = handler(dateString)
  5.2234 +            if not date9tuple: continue
  5.2235 +            if len(date9tuple) != 9:
  5.2236 +                if _debug: sys.stderr.write('date handler function must return 9-tuple\n')
  5.2237 +                raise ValueError
  5.2238 +            map(int, date9tuple)
  5.2239 +            return date9tuple
  5.2240 +        except Exception, e:
  5.2241 +            if _debug: sys.stderr.write('%s raised %s\n' % (handler.__name__, repr(e)))
  5.2242 +            pass
  5.2243 +    return None
  5.2244 +
  5.2245 +def _getCharacterEncoding(http_headers, xml_data):
  5.2246 +    '''Get the character encoding of the XML document
  5.2247 +
  5.2248 +    http_headers is a dictionary
  5.2249 +    xml_data is a raw string (not Unicode)
  5.2250 +    
  5.2251 +    This is so much trickier than it sounds, it's not even funny.
  5.2252 +    According to RFC 3023 ('XML Media Types'), if the HTTP Content-Type
  5.2253 +    is application/xml, application/*+xml,
  5.2254 +    application/xml-external-parsed-entity, or application/xml-dtd,
  5.2255 +    the encoding given in the charset parameter of the HTTP Content-Type
  5.2256 +    takes precedence over the encoding given in the XML prefix within the
  5.2257 +    document, and defaults to 'utf-8' if neither are specified.  But, if
  5.2258 +    the HTTP Content-Type is text/xml, text/*+xml, or
  5.2259 +    text/xml-external-parsed-entity, the encoding given in the XML prefix
  5.2260 +    within the document is ALWAYS IGNORED and only the encoding given in
  5.2261 +    the charset parameter of the HTTP Content-Type header should be
  5.2262 +    respected, and it defaults to 'us-ascii' if not specified.
  5.2263 +
  5.2264 +    Furthermore, discussion on the atom-syntax mailing list with the
  5.2265 +    author of RFC 3023 leads me to the conclusion that any document
  5.2266 +    served with a Content-Type of text/* and no charset parameter
  5.2267 +    must be treated as us-ascii.  (We now do this.)  And also that it
  5.2268 +    must always be flagged as non-well-formed.  (We now do this too.)
  5.2269 +    
  5.2270 +    If Content-Type is unspecified (input was local file or non-HTTP source)
  5.2271 +    or unrecognized (server just got it totally wrong), then go by the
  5.2272 +    encoding given in the XML prefix of the document and default to
  5.2273 +    'iso-8859-1' as per the HTTP specification (RFC 2616).
  5.2274 +    
  5.2275 +    Then, assuming we didn't find a character encoding in the HTTP headers
  5.2276 +    (and the HTTP Content-type allowed us to look in the body), we need
  5.2277 +    to sniff the first few bytes of the XML data and try to determine
  5.2278 +    whether the encoding is ASCII-compatible.  Section F of the XML
  5.2279 +    specification shows the way here:
  5.2280 +    http://www.w3.org/TR/REC-xml/#sec-guessing-no-ext-info
  5.2281 +
  5.2282 +    If the sniffed encoding is not ASCII-compatible, we need to make it
  5.2283 +    ASCII compatible so that we can sniff further into the XML declaration
  5.2284 +    to find the encoding attribute, which will tell us the true encoding.
  5.2285 +
  5.2286 +    Of course, none of this guarantees that we will be able to parse the
  5.2287 +    feed in the declared character encoding (assuming it was declared
  5.2288 +    correctly, which many are not).  CJKCodecs and iconv_codec help a lot;
  5.2289 +    you should definitely install them if you can.
  5.2290 +    http://cjkpython.i18n.org/
  5.2291 +    '''
  5.2292 +
  5.2293 +    def _parseHTTPContentType(content_type):
  5.2294 +        '''takes HTTP Content-Type header and returns (content type, charset)
  5.2295 +
  5.2296 +        If no charset is specified, returns (content type, '')
  5.2297 +        If no content type is specified, returns ('', '')
  5.2298 +        Both return parameters are guaranteed to be lowercase strings
  5.2299 +        '''
  5.2300 +        content_type = content_type or ''
  5.2301 +        content_type, params = cgi.parse_header(content_type)
  5.2302 +        return content_type, params.get('charset', '').replace("'", '')
  5.2303 +
  5.2304 +    sniffed_xml_encoding = ''
  5.2305 +    xml_encoding = ''
  5.2306 +    true_encoding = ''
  5.2307 +    http_content_type, http_encoding = _parseHTTPContentType(http_headers.get('content-type'))
  5.2308 +    # Must sniff for non-ASCII-compatible character encodings before
  5.2309 +    # searching for XML declaration.  This heuristic is defined in
  5.2310 +    # section F of the XML specification:
  5.2311 +    # http://www.w3.org/TR/REC-xml/#sec-guessing-no-ext-info
  5.2312 +    try:
  5.2313 +        if xml_data[:4] == '\x4c\x6f\xa7\x94':
  5.2314 +            # EBCDIC
  5.2315 +            xml_data = _ebcdic_to_ascii(xml_data)
  5.2316 +        elif xml_data[:4] == '\x00\x3c\x00\x3f':
  5.2317 +            # UTF-16BE
  5.2318 +            sniffed_xml_encoding = 'utf-16be'
  5.2319 +            xml_data = unicode(xml_data, 'utf-16be').encode('utf-8')
  5.2320 +        elif (len(xml_data) >= 4) and (xml_data[:2] == '\xfe\xff') and (xml_data[2:4] != '\x00\x00'):
  5.2321 +            # UTF-16BE with BOM
  5.2322 +            sniffed_xml_encoding = 'utf-16be'
  5.2323 +            xml_data = unicode(xml_data[2:], 'utf-16be').encode('utf-8')
  5.2324 +        elif xml_data[:4] == '\x3c\x00\x3f\x00':
  5.2325 +            # UTF-16LE
  5.2326 +            sniffed_xml_encoding = 'utf-16le'
  5.2327 +            xml_data = unicode(xml_data, 'utf-16le').encode('utf-8')
  5.2328 +        elif (len(xml_data) >= 4) and (xml_data[:2] == '\xff\xfe') and (xml_data[2:4] != '\x00\x00'):
  5.2329 +            # UTF-16LE with BOM
  5.2330 +            sniffed_xml_encoding = 'utf-16le'
  5.2331 +            xml_data = unicode(xml_data[2:], 'utf-16le').encode('utf-8')
  5.2332 +        elif xml_data[:4] == '\x00\x00\x00\x3c':
  5.2333 +            # UTF-32BE
  5.2334 +            sniffed_xml_encoding = 'utf-32be'
  5.2335 +            xml_data = unicode(xml_data, 'utf-32be').encode('utf-8')
  5.2336 +        elif xml_data[:4] == '\x3c\x00\x00\x00':
  5.2337 +            # UTF-32LE
  5.2338 +            sniffed_xml_encoding = 'utf-32le'
  5.2339 +            xml_data = unicode(xml_data, 'utf-32le').encode('utf-8')
  5.2340 +        elif xml_data[:4] == '\x00\x00\xfe\xff':
  5.2341 +            # UTF-32BE with BOM
  5.2342 +            sniffed_xml_encoding = 'utf-32be'
  5.2343 +            xml_data = unicode(xml_data[4:], 'utf-32be').encode('utf-8')
  5.2344 +        elif xml_data[:4] == '\xff\xfe\x00\x00':
  5.2345 +            # UTF-32LE with BOM
  5.2346 +            sniffed_xml_encoding = 'utf-32le'
  5.2347 +            xml_data = unicode(xml_data[4:], 'utf-32le').encode('utf-8')
  5.2348 +        elif xml_data[:3] == '\xef\xbb\xbf':
  5.2349 +            # UTF-8 with BOM
  5.2350 +            sniffed_xml_encoding = 'utf-8'
  5.2351 +            xml_data = unicode(xml_data[3:], 'utf-8').encode('utf-8')
  5.2352 +        else:
  5.2353 +            # ASCII-compatible
  5.2354 +            pass
  5.2355 +        xml_encoding_match = re.compile('^<\?.*encoding=[\'"](.*?)[\'"].*\?>').match(xml_data)
  5.2356 +    except:
  5.2357 +        xml_encoding_match = None
  5.2358 +    if xml_encoding_match:
  5.2359 +        xml_encoding = xml_encoding_match.groups()[0].lower()
  5.2360 +        if sniffed_xml_encoding and (xml_encoding in ('iso-10646-ucs-2', 'ucs-2', 'csunicode', 'iso-10646-ucs-4', 'ucs-4', 'csucs4', 'utf-16', 'utf-32', 'utf_16', 'utf_32', 'utf16', 'u16')):
  5.2361 +            xml_encoding = sniffed_xml_encoding
  5.2362 +    acceptable_content_type = 0
  5.2363 +    application_content_types = ('application/xml', 'application/xml-dtd', 'application/xml-external-parsed-entity')
  5.2364 +    text_content_types = ('text/xml', 'text/xml-external-parsed-entity')
  5.2365 +    if (http_content_type in application_content_types) or \
  5.2366 +       (http_content_type.startswith('application/') and http_content_type.endswith('+xml')):
  5.2367 +        acceptable_content_type = 1
  5.2368 +        true_encoding = http_encoding or xml_encoding or 'utf-8'
  5.2369 +    elif (http_content_type in text_content_types) or \
  5.2370 +         (http_content_type.startswith('text/')) and http_content_type.endswith('+xml'):
  5.2371 +        acceptable_content_type = 1
  5.2372 +        true_encoding = http_encoding or 'us-ascii'
  5.2373 +    elif http_content_type.startswith('text/'):
  5.2374 +        true_encoding = http_encoding or 'us-ascii'
  5.2375 +    elif http_headers and (not http_headers.has_key('content-type')):
  5.2376 +        true_encoding = xml_encoding or 'iso-8859-1'
  5.2377 +    else:
  5.2378 +        true_encoding = xml_encoding or 'utf-8'
  5.2379 +    return true_encoding, http_encoding, xml_encoding, sniffed_xml_encoding, acceptable_content_type
  5.2380 +    
  5.2381 +def _toUTF8(data, encoding):
  5.2382 +    '''Changes an XML data stream on the fly to specify a new encoding
  5.2383 +
  5.2384 +    data is a raw sequence of bytes (not Unicode) that is presumed to be in %encoding already
  5.2385 +    encoding is a string recognized by encodings.aliases
  5.2386 +    '''
  5.2387 +    if _debug: sys.stderr.write('entering _toUTF8, trying encoding %s\n' % encoding)
  5.2388 +    # strip Byte Order Mark (if present)
  5.2389 +    if (len(data) >= 4) and (data[:2] == '\xfe\xff') and (data[2:4] != '\x00\x00'):
  5.2390 +        if _debug:
  5.2391 +            sys.stderr.write('stripping BOM\n')
  5.2392 +            if encoding != 'utf-16be':
  5.2393 +                sys.stderr.write('trying utf-16be instead\n')
  5.2394 +        encoding = 'utf-16be'
  5.2395 +        data = data[2:]
  5.2396 +    elif (len(data) >= 4) and (data[:2] == '\xff\xfe') and (data[2:4] != '\x00\x00'):
  5.2397 +        if _debug:
  5.2398 +            sys.stderr.write('stripping BOM\n')
  5.2399 +            if encoding != 'utf-16le':
  5.2400 +                sys.stderr.write('trying utf-16le instead\n')
  5.2401 +        encoding = 'utf-16le'
  5.2402 +        data = data[2:]
  5.2403 +    elif data[:3] == '\xef\xbb\xbf':
  5.2404 +        if _debug:
  5.2405 +            sys.stderr.write('stripping BOM\n')
  5.2406 +            if encoding != 'utf-8':
  5.2407 +                sys.stderr.write('trying utf-8 instead\n')
  5.2408 +        encoding = 'utf-8'
  5.2409 +        data = data[3:]
  5.2410 +    elif data[:4] == '\x00\x00\xfe\xff':
  5.2411 +        if _debug:
  5.2412 +            sys.stderr.write('stripping BOM\n')
  5.2413 +            if encoding != 'utf-32be':
  5.2414 +                sys.stderr.write('trying utf-32be instead\n')
  5.2415 +        encoding = 'utf-32be'
  5.2416 +        data = data[4:]
  5.2417 +    elif data[:4] == '\xff\xfe\x00\x00':
  5.2418 +        if _debug:
  5.2419 +            sys.stderr.write('stripping BOM\n')
  5.2420 +            if encoding != 'utf-32le':
  5.2421 +                sys.stderr.write('trying utf-32le instead\n')
  5.2422 +        encoding = 'utf-32le'
  5.2423 +        data = data[4:]
  5.2424 +    newdata = unicode(data, encoding)
  5.2425 +    if _debug: sys.stderr.write('successfully converted %s data to unicode\n' % encoding)
  5.2426 +    declmatch = re.compile('^<\?xml[^>]*?>')
  5.2427 +    newdecl = '''<?xml version='1.0' encoding='utf-8'?>'''
  5.2428 +    if declmatch.search(newdata):
  5.2429 +        newdata = declmatch.sub(newdecl, newdata)
  5.2430 +    else:
  5.2431 +        newdata = newdecl + u'\n' + newdata
  5.2432 +    return newdata.encode('utf-8')
  5.2433 +
  5.2434 +def _stripDoctype(data):
  5.2435 +    '''Strips DOCTYPE from XML document, returns (rss_version, stripped_data)
  5.2436 +
  5.2437 +    rss_version may be 'rss091n' or None
  5.2438 +    stripped_data is the same XML document, minus the DOCTYPE
  5.2439 +    '''
  5.2440 +    entity_pattern = re.compile(r'<!ENTITY([^>]*?)>', re.MULTILINE)
  5.2441 +    data = entity_pattern.sub('', data)
  5.2442 +    doctype_pattern = re.compile(r'<!DOCTYPE([^>]*?)>', re.MULTILINE)
  5.2443 +    doctype_results = doctype_pattern.findall(data)
  5.2444 +    doctype = doctype_results and doctype_results[0] or ''
  5.2445 +    if doctype.lower().count('netscape'):
  5.2446 +        version = 'rss091n'
  5.2447 +    else:
  5.2448 +        version = None
  5.2449 +    data = doctype_pattern.sub('', data)
  5.2450 +    return version, data
  5.2451 +    
  5.2452 +def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, referrer=None, handlers=[]):
  5.2453 +    '''Parse a feed from a URL, file, stream, or string'''
  5.2454 +    result = FeedParserDict()
  5.2455 +    result['feed'] = FeedParserDict()
  5.2456 +    result['entries'] = []
  5.2457 +    if _XML_AVAILABLE:
  5.2458 +        result['bozo'] = 0
  5.2459 +    if type(handlers) == types.InstanceType:
  5.2460 +        handlers = [handlers]
  5.2461 +    try:
  5.2462 +        f = _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, handlers)
  5.2463 +        data = f.read()
  5.2464 +    except Exception, e:
  5.2465 +        result['bozo'] = 1
  5.2466 +        result['bozo_exception'] = e
  5.2467 +        data = ''
  5.2468 +        f = None
  5.2469 +
  5.2470 +    # if feed is gzip-compressed, decompress it
  5.2471 +    if f and data and hasattr(f, 'headers'):
  5.2472 +        if gzip and f.headers.get('content-encoding', '') == 'gzip':
  5.2473 +            try:
  5.2474 +                data = gzip.GzipFile(fileobj=_StringIO(data)).read()
  5.2475 +            except Exception, e:
  5.2476 +                # Some feeds claim to be gzipped but they're not, so
  5.2477 +                # we get garbage.  Ideally, we should re-request the
  5.2478 +                # feed without the 'Accept-encoding: gzip' header,
  5.2479 +                # but we don't.
  5.2480 +                result['bozo'] = 1
  5.2481 +                result['bozo_exception'] = e
  5.2482 +                data = ''
  5.2483 +        elif zlib and f.headers.get('content-encoding', '') == 'deflate':
  5.2484 +            try:
  5.2485 +                data = zlib.decompress(data, -zlib.MAX_WBITS)
  5.2486 +            except Exception, e:
  5.2487 +                result['bozo'] = 1
  5.2488 +                result['bozo_exception'] = e
  5.2489 +                data = ''
  5.2490 +
  5.2491 +    # save HTTP headers
  5.2492 +    if hasattr(f, 'info'):
  5.2493 +        info = f.info()
  5.2494 +        result['etag'] = info.getheader('ETag')
  5.2495 +        last_modified = info.getheader('Last-Modified')
  5.2496 +        if last_modified:
  5.2497 +            result['modified'] = _parse_date(last_modified)
  5.2498 +    if hasattr(f, 'url'):
  5.2499 +        result['href'] = f.url
  5.2500 +        result['status'] = 200
  5.2501 +    if hasattr(f, 'status'):
  5.2502 +        result['status'] = f.status
  5.2503 +    if hasattr(f, 'headers'):
  5.2504 +        result['headers'] = f.headers.dict
  5.2505 +    if hasattr(f, 'close'):
  5.2506 +        f.close()
  5.2507 +
  5.2508 +    # there are four encodings to keep track of:
  5.2509 +    # - http_encoding is the encoding declared in the Content-Type HTTP header
  5.2510 +    # - xml_encoding is the encoding declared in the <?xml declaration
  5.2511 +    # - sniffed_encoding is the encoding sniffed from the first 4 bytes of the XML data
  5.2512 +    # - result['encoding'] is the actual encoding, as per RFC 3023 and a variety of other conflicting specifications
  5.2513 +    http_headers = result.get('headers', {})
  5.2514 +    result['encoding'], http_encoding, xml_encoding, sniffed_xml_encoding, acceptable_content_type = \
  5.2515 +        _getCharacterEncoding(http_headers, data)
  5.2516 +    if http_headers and (not acceptable_content_type):
  5.2517 +        if http_headers.has_key('content-type'):
  5.2518 +            bozo_message = '%s is not an XML media type' % http_headers['content-type']
  5.2519 +        else:
  5.2520 +            bozo_message = 'no Content-type specified'
  5.2521 +        result['bozo'] = 1
  5.2522 +        result['bozo_exception'] = NonXMLContentType(bozo_message)
  5.2523 +        
  5.2524 +    result['version'], data = _stripDoctype(data)
  5.2525 +
  5.2526 +    baseuri = http_headers.get('content-location', result.get('href'))
  5.2527 +    baselang = http_headers.get('content-language', None)
  5.2528 +
  5.2529 +    # if server sent 304, we're done
  5.2530 +    if result.get('status', 0) == 304:
  5.2531 +        result['version'] = ''
  5.2532 +        result['debug_message'] = 'The feed has not changed since you last checked, ' + \
  5.2533 +            'so the server sent no data.  This is a feature, not a bug!'
  5.2534 +        return result
  5.2535 +
  5.2536 +    # if there was a problem downloading, we're done
  5.2537 +    if not data:
  5.2538 +        return result
  5.2539 +
  5.2540 +    # determine character encoding
  5.2541 +    use_strict_parser = 0
  5.2542 +    known_encoding = 0
  5.2543 +    tried_encodings = []
  5.2544 +    # try: HTTP encoding, declared XML encoding, encoding sniffed from BOM
  5.2545 +    for proposed_encoding in (result['encoding'], xml_encoding, sniffed_xml_encoding):
  5.2546 +        if not proposed_encoding: continue
  5.2547 +        if proposed_encoding in tried_encodings: continue
  5.2548 +        tried_encodings.append(proposed_encoding)
  5.2549 +        try:
  5.2550 +            data = _toUTF8(data, proposed_encoding)
  5.2551 +            known_encoding = use_strict_parser = 1
  5.2552 +            break
  5.2553 +        except:
  5.2554 +            pass
  5.2555 +    # if no luck and we have auto-detection library, try that
  5.2556 +    if (not known_encoding) and chardet:
  5.2557 +        try:
  5.2558 +            proposed_encoding = chardet.detect(data)['encoding']
  5.2559 +            if proposed_encoding and (proposed_encoding not in tried_encodings):
  5.2560 +                tried_encodings.append(proposed_encoding)
  5.2561 +                data = _toUTF8(data, proposed_encoding)
  5.2562 +                known_encoding = use_strict_parser = 1
  5.2563 +        except:
  5.2564 +            pass
  5.2565 +    # if still no luck and we haven't tried utf-8 yet, try that
  5.2566 +    if (not known_encoding) and ('utf-8' not in tried_encodings):
  5.2567 +        try:
  5.2568 +            proposed_encoding = 'utf-8'
  5.2569 +            tried_encodings.append(proposed_encoding)
  5.2570 +            data = _toUTF8(data, proposed_encoding)
  5.2571 +            known_encoding = use_strict_parser = 1
  5.2572 +        except:
  5.2573 +            pass
  5.2574 +    # if still no luck and we haven't tried windows-1252 yet, try that
  5.2575 +    if (not known_encoding) and ('windows-1252' not in tried_encodings):
  5.2576 +        try:
  5.2577 +            proposed_encoding = 'windows-1252'
  5.2578 +            tried_encodings.append(proposed_encoding)
  5.2579 +            data = _toUTF8(data, proposed_encoding)
  5.2580 +            known_encoding = use_strict_parser = 1
  5.2581 +        except:
  5.2582 +            pass
  5.2583 +    # if still no luck, give up
  5.2584 +    if not known_encoding:
  5.2585 +        result['bozo'] = 1
  5.2586 +        result['bozo_exception'] = CharacterEncodingUnknown( \
  5.2587 +            'document encoding unknown, I tried ' + \
  5.2588 +            '%s, %s, utf-8, and windows-1252 but nothing worked' % \
  5.2589 +            (result['encoding'], xml_encoding))
  5.2590 +        result['encoding'] = ''
  5.2591 +    elif proposed_encoding != result['encoding']:
  5.2592 +        result['bozo'] = 1
  5.2593 +        result['bozo_exception'] = CharacterEncodingOverride( \
  5.2594 +            'documented declared as %s, but parsed as %s' % \
  5.2595 +            (result['encoding'], proposed_encoding))
  5.2596 +        result['encoding'] = proposed_encoding
  5.2597 +
  5.2598 +    if not _XML_AVAILABLE:
  5.2599 +        use_strict_parser = 0
  5.2600 +    if use_strict_parser:
  5.2601 +        # initialize the SAX parser
  5.2602 +        feedparser = _StrictFeedParser(baseuri, baselang, 'utf-8')
  5.2603 +        saxparser = xml.sax.make_parser(PREFERRED_XML_PARSERS)
  5.2604 +        saxparser.setFeature(xml.sax.handler.feature_namespaces, 1)
  5.2605 +        saxparser.setContentHandler(feedparser)
  5.2606 +        saxparser.setErrorHandler(feedparser)
  5.2607 +        source = xml.sax.xmlreader.InputSource()
  5.2608 +        source.setByteStream(_StringIO(data))
  5.2609 +        if hasattr(saxparser, '_ns_stack'):
  5.2610 +            # work around bug in built-in SAX parser (doesn't recognize xml: namespace)
  5.2611 +            # PyXML doesn't have this problem, and it doesn't have _ns_stack either
  5.2612 +            saxparser._ns_stack.append({'http://www.w3.org/XML/1998/namespace':'xml'})
  5.2613 +        try:
  5.2614 +            saxparser.parse(source)
  5.2615 +        except Exception, e:
  5.2616 +            if _debug:
  5.2617 +                import traceback
  5.2618 +                traceback.print_stack()
  5.2619 +                traceback.print_exc()
  5.2620 +                sys.stderr.write('xml parsing failed\n')
  5.2621 +            result['bozo'] = 1
  5.2622 +            result['bozo_exception'] = feedparser.exc or e
  5.2623 +            use_strict_parser = 0
  5.2624 +    if not use_strict_parser:
  5.2625 +        feedparser = _LooseFeedParser(baseuri, baselang, known_encoding and 'utf-8' or '')
  5.2626 +        feedparser.feed(data)
  5.2627 +    result['feed'] = feedparser.feeddata
  5.2628 +    result['entries'] = feedparser.entries
  5.2629 +    result['version'] = result['version'] or feedparser.version
  5.2630 +    result['namespaces'] = feedparser.namespacesInUse
  5.2631 +    return result
  5.2632 +
  5.2633 +if __name__ == '__main__':
  5.2634 +    if not sys.argv[1:]:
  5.2635 +        print __doc__
  5.2636 +        sys.exit(0)
  5.2637 +    else:
  5.2638 +        urls = sys.argv[1:]
  5.2639 +    zopeCompatibilityHack()
  5.2640 +    from pprint import pprint
  5.2641 +    for url in urls:
  5.2642 +        print url
  5.2643 +        print
  5.2644 +        result = parse(url)
  5.2645 +        pprint(result)
  5.2646 +        print
  5.2647 +
  5.2648 +#REVISION HISTORY
  5.2649 +#1.0 - 9/27/2002 - MAP - fixed namespace processing on prefixed RSS 2.0 elements,
  5.2650 +#  added Simon Fell's test suite
  5.2651 +#1.1 - 9/29/2002 - MAP - fixed infinite loop on incomplete CDATA sections
  5.2652 +#2.0 - 10/19/2002
  5.2653 +#  JD - use inchannel to watch out for image and textinput elements which can
  5.2654 +#  also contain title, link, and description elements
  5.2655 +#  JD - check for isPermaLink='false' attribute on guid elements
  5.2656 +#  JD - replaced openAnything with open_resource supporting ETag and
  5.2657 +#  If-Modified-Since request headers
  5.2658 +#  JD - parse now accepts etag, modified, agent, and referrer optional
  5.2659 +#  arguments
  5.2660 +#  JD - modified parse to return a dictionary instead of a tuple so that any
  5.2661 +#  etag or modified information can be returned and cached by the caller
  5.2662 +#2.0.1 - 10/21/2002 - MAP - changed parse() so that if we don't get anything
  5.2663 +#  because of etag/modified, return the old etag/modified to the caller to
  5.2664 +#  indicate why nothing is being returned
  5.2665 +#2.0.2 - 10/21/2002 - JB - added the inchannel to the if statement, otherwise its
  5.2666 +#  useless.  Fixes the problem JD was addressing by adding it.
  5.2667 +#2.1 - 11/14/2002 - MAP - added gzip support
  5.2668 +#2.2 - 1/27/2003 - MAP - added attribute support, admin:generatorAgent.
  5.2669 +#  start_admingeneratoragent is an example of how to handle elements with
  5.2670 +#  only attributes, no content.
  5.2671 +#2.3 - 6/11/2003 - MAP - added USER_AGENT for default (if caller doesn't specify);
  5.2672 +#  also, make sure we send the User-Agent even if urllib2 isn't available.
  5.2673 +#  Match any variation of backend.userland.com/rss namespace.
  5.2674 +#2.3.1 - 6/12/2003 - MAP - if item has both link and guid, return both as-is.
  5.2675 +#2.4 - 7/9/2003 - MAP - added preliminary Pie/Atom/Echo support based on Sam Ruby's
  5.2676 +#  snapshot of July 1 <http://www.intertwingly.net/blog/1506.html>; changed
  5.2677 +#  project name
  5.2678 +#2.5 - 7/25/2003 - MAP - changed to Python license (all contributors agree);
  5.2679 +#  removed unnecessary urllib code -- urllib2 should always be available anyway;
  5.2680 +#  return actual url, status, and full HTTP headers (as result['url'],
  5.2681 +#  result['status'], and result['headers']) if parsing a remote feed over HTTP --
  5.2682 +#  this should pass all the HTTP tests at <http://diveintomark.org/tests/client/http/>;
  5.2683 +#  added the latest namespace-of-the-week for RSS 2.0
  5.2684 +#2.5.1 - 7/26/2003 - RMK - clear opener.addheaders so we only send our custom
  5.2685 +#  User-Agent (otherwise urllib2 sends two, which confuses some servers)
  5.2686 +#2.5.2 - 7/28/2003 - MAP - entity-decode inline xml properly; added support for
  5.2687 +#  inline <xhtml:body> and <xhtml:div> as used in some RSS 2.0 feeds
  5.2688 +#2.5.3 - 8/6/2003 - TvdV - patch to track whether we're inside an image or
  5.2689 +#  textInput, and also to return the character encoding (if specified)
  5.2690 +#2.6 - 1/1/2004 - MAP - dc:author support (MarekK); fixed bug tracking
  5.2691 +#  nested divs within content (JohnD); fixed missing sys import (JohanS);
  5.2692 +#  fixed regular expression to capture XML character encoding (Andrei);
  5.2693 +#  added support for Atom 0.3-style links; fixed bug with textInput tracking;
  5.2694 +#  added support for cloud (MartijnP); added support for multiple
  5.2695 +#  category/dc:subject (MartijnP); normalize content model: 'description' gets
  5.2696 +#  description (which can come from description, summary, or full content if no
  5.2697 +#  description), 'content' gets dict of base/language/type/value (which can come
  5.2698 +#  from content:encoded, xhtml:body, content, or fullitem);
  5.2699 +#  fixed bug matching arbitrary Userland namespaces; added xml:base and xml:lang
  5.2700 +#  tracking; fixed bug tracking unknown tags; fixed bug tracking content when
  5.2701 +#  <content> element is not in default namespace (like Pocketsoap feed);
  5.2702 +#  resolve relative URLs in link, guid, docs, url, comments, wfw:comment,
  5.2703 +#  wfw:commentRSS; resolve relative URLs within embedded HTML markup in
  5.2704 +#  description, xhtml:body, content, content:encoded, title, subtitle,
  5.2705 +#  summary, info, tagline, and copyright; added support for pingback and
  5.2706 +#  trackback namespaces
  5.2707 +#2.7 - 1/5/2004 - MAP - really added support for trackback and pingback
  5.2708 +#  namespaces, as opposed to 2.6 when I said I did but didn't really;
  5.2709 +#  sanitize HTML markup within some elements; added mxTidy support (if
  5.2710 +#  installed) to tidy HTML markup within some elements; fixed indentation
  5.2711 +#  bug in _parse_date (FazalM); use socket.setdefaulttimeout if available
  5.2712 +#  (FazalM); universal date parsing and normalization (FazalM): 'created', modified',
  5.2713 +#  'issued' are parsed into 9-tuple date format and stored in 'created_parsed',
  5.2714 +#  'modified_parsed', and 'issued_parsed'; 'date' is duplicated in 'modified'
  5.2715 +#  and vice-versa; 'date_parsed' is duplicated in 'modified_parsed' and vice-versa
  5.2716 +#2.7.1 - 1/9/2004 - MAP - fixed bug handling &quot; and &apos;.  fixed memory
  5.2717 +#  leak not closing url opener (JohnD); added dc:publisher support (MarekK);
  5.2718 +#  added admin:errorReportsTo support (MarekK); Python 2.1 dict support (MarekK)
  5.2719 +#2.7.4 - 1/14/2004 - MAP - added workaround for improperly formed <br/> tags in
  5.2720 +#  encoded HTML (skadz); fixed unicode handling in normalize_attrs (ChrisL);
  5.2721 +#  fixed relative URI processing for guid (skadz); added ICBM support; added
  5.2722 +#  base64 support
  5.2723 +#2.7.5 - 1/15/2004 - MAP - added workaround for malformed DOCTYPE (seen on many
  5.2724 +#  blogspot.com sites); added _debug variable
  5.2725 +#2.7.6 - 1/16/2004 - MAP - fixed bug with StringIO importing
  5.2726 +#3.0b3 - 1/23/2004 - MAP - parse entire feed with real XML parser (if available);
  5.2727 +#  added several new supported namespaces; fixed bug tracking naked markup in
  5.2728 +#  description; added support for enclosure; added support for source; re-added
  5.2729 +#  support for cloud which got dropped somehow; added support for expirationDate
  5.2730 +#3.0b4 - 1/26/2004 - MAP - fixed xml:lang inheritance; fixed multiple bugs tracking
  5.2731 +#  xml:base URI, one for documents that don't define one explicitly and one for
  5.2732 +#  documents that define an outer and an inner xml:base that goes out of scope
  5.2733 +#  before the end of the document
  5.2734 +#3.0b5 - 1/26/2004 - MAP - fixed bug parsing multiple links at feed level
  5.2735 +#3.0b6 - 1/27/2004 - MAP - added feed type and version detection, result['version']
  5.2736 +#  will be one of SUPPORTED_VERSIONS.keys() or empty string if unrecognized;
  5.2737 +#  added support for creativeCommons:license and cc:license; added support for
  5.2738 +#  full Atom content model in title, tagline, info, copyright, summary; fixed bug
  5.2739 +#  with gzip encoding (not always telling server we support it when we do)
  5.2740 +#3.0b7 - 1/28/2004 - MAP - support Atom-style author element in author_detail
  5.2741 +#  (dictionary of 'name', 'url', 'email'); map author to author_detail if author
  5.2742 +#  contains name + email address
  5.2743 +#3.0b8 - 1/28/2004 - MAP - added support for contributor
  5.2744 +#3.0b9 - 1/29/2004 - MAP - fixed check for presence of dict function; added
  5.2745 +#  support for summary
  5.2746 +#3.0b10 - 1/31/2004 - MAP - incorporated ISO-8601 date parsing routines from
  5.2747 +#  xml.util.iso8601
  5.2748 +#3.0b11 - 2/2/2004 - MAP - added 'rights' to list of elements that can contain
  5.2749 +#  dangerous markup; fiddled with decodeEntities (not right); liberalized
  5.2750 +#  date parsing even further
  5.2751 +#3.0b12 - 2/6/2004 - MAP - fiddled with decodeEntities (still not right);
  5.2752 +#  added support to Atom 0.2 subtitle; added support for Atom content model
  5.2753 +#  in copyright; better sanitizing of dangerous HTML elements with end tags
  5.2754 +#  (script, frameset)
  5.2755 +#3.0b13 - 2/8/2004 - MAP - better handling of empty HTML tags (br, hr, img,
  5.2756 +#  etc.) in embedded markup, in either HTML or XHTML form (<br>, <br/>, <br />)
  5.2757 +#3.0b14 - 2/8/2004 - MAP - fixed CDATA handling in non-wellformed feeds under
  5.2758 +#  Python 2.1
  5.2759 +#3.0b15 - 2/11/2004 - MAP - fixed bug resolving relative links in wfw:commentRSS;
  5.2760 +#  fixed bug capturing author and contributor URL; fixed bug resolving relative
  5.2761 +#  links in author and contributor URL; fixed bug resolvin relative links in
  5.2762 +#  generator URL; added support for recognizing RSS 1.0; passed Simon Fell's
  5.2763 +#  namespace tests, and included them permanently in the test suite with his
  5.2764 +#  permission; fixed namespace handling under Python 2.1
  5.2765 +#3.0b16 - 2/12/2004 - MAP - fixed support for RSS 0.90 (broken in b15)
  5.2766 +#3.0b17 - 2/13/2004 - MAP - determine character encoding as per RFC 3023
  5.2767 +#3.0b18 - 2/17/2004 - MAP - always map description to summary_detail (Andrei);
  5.2768 +#  use libxml2 (if available)
  5.2769 +#3.0b19 - 3/15/2004 - MAP - fixed bug exploding author information when author
  5.2770 +#  name was in parentheses; removed ultra-problematic mxTidy support; patch to
  5.2771 +#  workaround crash in PyXML/expat when encountering invalid entities
  5.2772 +#  (MarkMoraes); support for textinput/textInput
  5.2773 +#3.0b20 - 4/7/2004 - MAP - added CDF support
  5.2774 +#3.0b21 - 4/14/2004 - MAP - added Hot RSS support
  5.2775 +#3.0b22 - 4/19/2004 - MAP - changed 'channel' to 'feed', 'item' to 'entries' in
  5.2776 +#  results dict; changed results dict to allow getting values with results.key
  5.2777 +#  as well as results[key]; work around embedded illformed HTML with half
  5.2778 +#  a DOCTYPE; work around malformed Content-Type header; if character encoding
  5.2779 +#  is wrong, try several common ones before falling back to regexes (if this
  5.2780 +#  works, bozo_exception is set to CharacterEncodingOverride); fixed character
  5.2781 +#  encoding issues in BaseHTMLProcessor by tracking encoding and converting
  5.2782 +#  from Unicode to raw strings before feeding data to sgmllib.SGMLParser;
  5.2783 +#  convert each value in results to Unicode (if possible), even if using
  5.2784 +#  regex-based parsing
  5.2785 +#3.0b23 - 4/21/2004 - MAP - fixed UnicodeDecodeError for feeds that contain
  5.2786 +#  high-bit characters in attributes in embedded HTML in description (thanks
  5.2787 +#  Thijs van de Vossen); moved guid, date, and date_parsed to mapped keys in
  5.2788 +#  FeedParserDict; tweaked FeedParserDict.has_key to return True if asking
  5.2789 +#  about a mapped key
  5.2790 +#3.0fc1 - 4/23/2004 - MAP - made results.entries[0].links[0] and
  5.2791 +#  results.entries[0].enclosures[0] into FeedParserDict; fixed typo that could
  5.2792 +#  cause the same encoding to be tried twice (even if it failed the first time);
  5.2793 +#  fixed DOCTYPE stripping when DOCTYPE contained entity declarations;
  5.2794 +#  better textinput and image tracking in illformed RSS 1.0 feeds
  5.2795 +#3.0fc2 - 5/10/2004 - MAP - added and passed Sam's amp tests; added and passed
  5.2796 +#  my blink tag tests
  5.2797 +#3.0fc3 - 6/18/2004 - MAP - fixed bug in _changeEncodingDeclaration that
  5.2798 +#  failed to parse utf-16 encoded feeds; made source into a FeedParserDict;
  5.2799 +#  duplicate admin:generatorAgent/@rdf:resource in generator_detail.url;
  5.2800 +#  added support for image; refactored parse() fallback logic to try other
  5.2801 +#  encodings if SAX parsing fails (previously it would only try other encodings
  5.2802 +#  if re-encoding failed); remove unichr madness in normalize_attrs now that
  5.2803 +#  we're properly tracking encoding in and out of BaseHTMLProcessor; set
  5.2804 +#  feed.language from root-level xml:lang; set entry.id from rdf:about;
  5.2805 +#  send Accept header
  5.2806 +#3.0 - 6/21/2004 - MAP - don't try iso-8859-1 (can't distinguish between
  5.2807 +#  iso-8859-1 and windows-1252 anyway, and most incorrectly marked feeds are
  5.2808 +#  windows-1252); fixed regression that could cause the same encoding to be
  5.2809 +#  tried twice (even if it failed the first time)
  5.2810 +#3.0.1 - 6/22/2004 - MAP - default to us-ascii for all text/* content types;
  5.2811 +#  recover from malformed content-type header parameter with no equals sign
  5.2812 +#  ('text/xml; charset:iso-8859-1')
  5.2813 +#3.1 - 6/28/2004 - MAP - added and passed tests for converting HTML entities
  5.2814 +#  to Unicode equivalents in illformed feeds (aaronsw); added and
  5.2815 +#  passed tests for converting character entities to Unicode equivalents
  5.2816 +#  in illformed feeds (aaronsw); test for valid parsers when setting
  5.2817 +#  XML_AVAILABLE; make version and encoding available when server returns
  5.2818 +#  a 304; add handlers parameter to pass arbitrary urllib2 handlers (like
  5.2819 +#  digest auth or proxy support); add code to parse username/password
  5.2820 +#  out of url and send as basic authentication; expose downloading-related
  5.2821 +#  exceptions in bozo_exception (aaronsw); added __contains__ method to
  5.2822 +#  FeedParserDict (aaronsw); added publisher_detail (aaronsw)
  5.2823 +#3.2 - 7/3/2004 - MAP - use cjkcodecs and iconv_codec if available; always
  5.2824 +#  convert feed to UTF-8 before passing to XML parser; completely revamped
  5.2825 +#  logic for determining character encoding and attempting XML parsing
  5.2826 +#  (much faster); increased default timeout to 20 seconds; test for presence
  5.2827 +#  of Location header on redirects; added tests for many alternate character
  5.2828 +#  encodings; support various EBCDIC encodings; support UTF-16BE and
  5.2829 +#  UTF16-LE with or without a BOM; support UTF-8 with a BOM; support
  5.2830 +#  UTF-32BE and UTF-32LE with or without a BOM; fixed crashing bug if no
  5.2831 +#  XML parsers are available; added support for 'Content-encoding: deflate';
  5.2832 +#  send blank 'Accept-encoding: ' header if neither gzip nor zlib modules
  5.2833 +#  are available
  5.2834 +#3.3 - 7/15/2004 - MAP - optimize EBCDIC to ASCII conversion; fix obscure
  5.2835 +#  problem tracking xml:base and xml:lang if element declares it, child
  5.2836 +#  doesn't, first grandchild redeclares it, and second grandchild doesn't;
  5.2837 +#  refactored date parsing; defined public registerDateHandler so callers
  5.2838 +#  can add support for additional date formats at runtime; added support
  5.2839 +#  for OnBlog, Nate, MSSQL, Greek, and Hungarian dates (ytrewq1); added
  5.2840 +#  zopeCompatibilityHack() which turns FeedParserDict into a regular
  5.2841 +#  dictionary, required for Zope compatibility, and also makes command-
  5.2842 +#  line debugging easier because pprint module formats real dictionaries
  5.2843 +#  better than dictionary-like objects; added NonXMLContentType exception,
  5.2844 +#  which is stored in bozo_exception when a feed is served with a non-XML
  5.2845 +#  media type such as 'text/plain'; respect Content-Language as default
  5.2846 +#  language if not xml:lang is present; cloud dict is now FeedParserDict;
  5.2847 +#  generator dict is now FeedParserDict; better tracking of xml:lang,
  5.2848 +#  including support for xml:lang='' to unset the current language;
  5.2849 +#  recognize RSS 1.0 feeds even when RSS 1.0 namespace is not the default
  5.2850 +#  namespace; don't overwrite final status on redirects (scenarios:
  5.2851 +#  redirecting to a URL that returns 304, redirecting to a URL that
  5.2852 +#  redirects to another URL with a different type of redirect); add
  5.2853 +#  support for HTTP 303 redirects
  5.2854 +#4.0 - MAP - support for relative URIs in xml:base attribute; fixed
  5.2855 +#  encoding issue with mxTidy (phopkins); preliminary support for RFC 3229;
  5.2856 +#  support for Atom 1.0; support for iTunes extensions; new 'tags' for
  5.2857 +#  categories/keywords/etc. as array of dict
  5.2858 +#  {'term': term, 'scheme': scheme, 'label': label} to match Atom 1.0
  5.2859 +#  terminology; parse RFC 822-style dates with no time; lots of other
  5.2860 +#  bug fixes
  5.2861 +#4.1 - MAP - removed socket timeout; added support for chardet library
     6.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     6.2 +++ b/trunk/quahog/plugins/Weather/local/simplejson/__init__.py	Thu Oct 22 10:14:56 2009 -0400
     6.3 @@ -0,0 +1,318 @@
     6.4 +r"""JSON (JavaScript Object Notation) <http://json.org> is a subset of
     6.5 +JavaScript syntax (ECMA-262 3rd edition) used as a lightweight data
     6.6 +interchange format.
     6.7 +
     6.8 +:mod:`simplejson` exposes an API familiar to users of the standard library
     6.9 +:mod:`marshal` and :mod:`pickle` modules. It is the externally maintained
    6.10 +version of the :mod:`json` library contained in Python 2.6, but maintains
    6.11 +compatibility with Python 2.4 and Python 2.5 and (currently) has
    6.12 +significant performance advantages, even without using the optional C
    6.13 +extension for speedups.
    6.14 +
    6.15 +Encoding basic Python object hierarchies::
    6.16 +
    6.17 +    >>> import simplejson as json
    6.18 +    >>> json.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}])
    6.19 +    '["foo", {"bar": ["baz", null, 1.0, 2]}]'
    6.20 +    >>> print json.dumps("\"foo\bar")
    6.21 +    "\"foo\bar"
    6.22 +    >>> print json.dumps(u'\u1234')
    6.23 +    "\u1234"
    6.24 +    >>> print json.dumps('\\')
    6.25 +    "\\"
    6.26 +    >>> print json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True)
    6.27 +    {"a": 0, "b": 0, "c": 0}
    6.28 +    >>> from StringIO import StringIO
    6.29 +    >>> io = StringIO()
    6.30 +    >>> json.dump(['streaming API'], io)
    6.31 +    >>> io.getvalue()
    6.32 +    '["streaming API"]'
    6.33 +
    6.34 +Compact encoding::
    6.35 +
    6.36 +    >>> import simplejson as json
    6.37 +    >>> json.dumps([1,2,3,{'4': 5, '6': 7}], separators=(',',':'))
    6.38 +    '[1,2,3,{"4":5,"6":7}]'
    6.39 +
    6.40 +Pretty printing::
    6.41 +
    6.42 +    >>> import simplejson as json
    6.43 +    >>> s = json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=4)
    6.44 +    >>> print '\n'.join([l.rstrip() for l in  s.splitlines()])
    6.45 +    {
    6.46 +        "4": 5,
    6.47 +        "6": 7
    6.48 +    }
    6.49 +
    6.50 +Decoding JSON::
    6.51 +
    6.52 +    >>> import simplejson as json
    6.53 +    >>> obj = [u'foo', {u'bar': [u'baz', None, 1.0, 2]}]
    6.54 +    >>> json.loads('["foo", {"bar":["baz", null, 1.0, 2]}]') == obj
    6.55 +    True
    6.56 +    >>> json.loads('"\\"foo\\bar"') == u'"foo\x08ar'
    6.57 +    True
    6.58 +    >>> from StringIO import StringIO
    6.59 +    >>> io = StringIO('["streaming API"]')
    6.60 +    >>> json.load(io)[0] == 'streaming API'
    6.61 +    True
    6.62 +
    6.63 +Specializing JSON object decoding::
    6.64 +
    6.65 +    >>> import simplejson as json
    6.66 +    >>> def as_complex(dct):
    6.67 +    ...     if '__complex__' in dct:
    6.68 +    ...         return complex(dct['real'], dct['imag'])
    6.69 +    ...     return dct
    6.70 +    ...
    6.71 +    >>> json.loads('{"__complex__": true, "real": 1, "imag": 2}',
    6.72 +    ...     object_hook=as_complex)
    6.73 +    (1+2j)
    6.74 +    >>> import decimal
    6.75 +    >>> json.loads('1.1', parse_float=decimal.Decimal) == decimal.Decimal('1.1')
    6.76 +    True
    6.77 +
    6.78 +Specializing JSON object encoding::
    6.79 +
    6.80 +    >>> import simplejson as json
    6.81 +    >>> def encode_complex(obj):
    6.82 +    ...     if isinstance(obj, complex):
    6.83 +    ...         return [obj.real, obj.imag]
    6.84 +    ...     raise TypeError(repr(o) + " is not JSON serializable")
    6.85 +    ...
    6.86 +    >>> json.dumps(2 + 1j, default=encode_complex)
    6.87 +    '[2.0, 1.0]'
    6.88 +    >>> json.JSONEncoder(default=encode_complex).encode(2 + 1j)
    6.89 +    '[2.0, 1.0]'
    6.90 +    >>> ''.join(json.JSONEncoder(default=encode_complex).iterencode(2 + 1j))
    6.91 +    '[2.0, 1.0]'
    6.92 +
    6.93 +
    6.94 +Using simplejson.tool from the shell to validate and pretty-print::
    6.95 +
    6.96 +    $ echo '{"json":"obj"}' | python -m simplejson.tool
    6.97 +    {
    6.98 +        "json": "obj"
    6.99 +    }
   6.100 +    $ echo '{ 1.2:3.4}' | python -m simplejson.tool
   6.101 +    Expecting property name: line 1 column 2 (char 2)
   6.102 +"""
   6.103 +__version__ = '2.0.9'
   6.104 +__all__ = [
   6.105 +    'dump', 'dumps', 'load', 'loads',
   6.106 +    'JSONDecoder', 'JSONEncoder',
   6.107 +]
   6.108 +
   6.109 +__author__ = 'Bob Ippolito <bob@redivi.com>'
   6.110 +
   6.111 +from decoder import JSONDecoder
   6.112 +from encoder import JSONEncoder
   6.113 +
   6.114 +_default_encoder = JSONEncoder(
   6.115 +    skipkeys=False,
   6.116 +    ensure_ascii=True,
   6.117 +    check_circular=True,
   6.118 +    allow_nan=True,
   6.119 +    indent=None,
   6.120 +    separators=None,
   6.121 +    encoding='utf-8',
   6.122 +    default=None,
   6.123 +)
   6.124 +
   6.125 +def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
   6.126 +        allow_nan=True, cls=None, indent=None, separators=None,
   6.127 +        encoding='utf-8', default=None, **kw):
   6.128 +    """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a
   6.129 +    ``.write()``-supporting file-like object).
   6.130 +
   6.131 +    If ``skipkeys`` is true then ``dict`` keys that are not basic types
   6.132 +    (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``)
   6.133 +    will be skipped instead of raising a ``TypeError``.
   6.134 +
   6.135 +    If ``ensure_ascii`` is false, then the some chunks written to ``fp``
   6.136 +    may be ``unicode`` instances, subject to normal Python ``str`` to
   6.137 +    ``unicode`` coercion rules. Unless ``fp.write()`` explicitly
   6.138 +    understands ``unicode`` (as in ``codecs.getwriter()``) this is likely
   6.139 +    to cause an error.
   6.140 +
   6.141 +    If ``check_circular`` is false, then the circular reference check
   6.142 +    for container types will be skipped and a circular reference will
   6.143 +    result in an ``OverflowError`` (or worse).
   6.144 +
   6.145 +    If ``allow_nan`` is false, then it will be a ``ValueError`` to
   6.146 +    serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``)
   6.147 +    in strict compliance of the JSON specification, instead of using the
   6.148 +    JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``).
   6.149 +
   6.150 +    If ``indent`` is a non-negative integer, then JSON array elements and object
   6.151 +    members will be pretty-printed with that indent level. An indent level
   6.152 +    of 0 will only insert newlines. ``None`` is the most compact representation.
   6.153 +
   6.154 +    If ``separators`` is an ``(item_separator, dict_separator)`` tuple
   6.155 +    then it will be used instead of the default ``(', ', ': ')`` separators.
   6.156 +    ``(',', ':')`` is the most compact JSON representation.
   6.157 +
   6.158 +    ``encoding`` is the character encoding for str instances, default is UTF-8.
   6.159 +
   6.160 +    ``default(obj)`` is a function that should return a serializable version
   6.161 +    of obj or raise TypeError. The default simply raises TypeError.
   6.162 +
   6.163 +    To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the
   6.164 +    ``.default()`` method to serialize additional types), specify it with
   6.165 +    the ``cls`` kwarg.
   6.166 +
   6.167 +    """
   6.168 +    # cached encoder
   6.169 +    if (not skipkeys and ensure_ascii and
   6.170 +        check_circular and allow_nan and
   6.171 +        cls is None and indent is None and separators is None and
   6.172 +        encoding == 'utf-8' and default is None and not kw):
   6.173 +        iterable = _default_encoder.iterencode(obj)
   6.174 +    else:
   6.175 +        if cls is None:
   6.176 +            cls = JSONEncoder
   6.177 +        iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii,
   6.178 +            check_circular=check_circular, allow_nan=allow_nan, indent=indent,
   6.179 +            separators=separators, encoding=encoding,
   6.180 +            default=default, **kw).iterencode(obj)
   6.181 +    # could accelerate with writelines in some versions of Python, at
   6.182 +    # a debuggability cost
   6.183 +    for chunk in iterable:
   6.184 +        fp.write(chunk)
   6.185 +
   6.186 +
   6.187 +def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
   6.188 +        allow_nan=True, cls=None, indent=None, separators=None,
   6.189 +        encoding='utf-8', default=None, **kw):
   6.190 +    """Serialize ``obj`` to a JSON formatted ``str``.
   6.191 +
   6.192 +    If ``skipkeys`` is false then ``dict`` keys that are not basic types
   6.193 +    (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``)
   6.194 +    will be skipped instead of raising a ``TypeError``.
   6.195 +
   6.196 +    If ``ensure_ascii`` is false, then the return value will be a
   6.197 +    ``unicode`` instance subject to normal Python ``str`` to ``unicode``
   6.198 +    coercion rules instead of being escaped to an ASCII ``str``.
   6.199 +
   6.200 +    If ``check_circular`` is false, then the circular reference check
   6.201 +    for container types will be skipped and a circular reference will
   6.202 +    result in an ``OverflowError`` (or worse).
   6.203 +
   6.204 +    If ``allow_nan`` is false, then it will be a ``ValueError`` to
   6.205 +    serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in
   6.206 +    strict compliance of the JSON specification, instead of using the
   6.207 +    JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``).
   6.208 +
   6.209 +    If ``indent`` is a non-negative integer, then JSON array elements and
   6.210 +    object members will be pretty-printed with that indent level. An indent
   6.211 +    level of 0 will only insert newlines. ``None`` is the most compact
   6.212 +    representation.
   6.213 +
   6.214 +    If ``separators`` is an ``(item_separator, dict_separator)`` tuple
   6.215 +    then it will be used instead of the default ``(', ', ': ')`` separators.
   6.216 +    ``(',', ':')`` is the most compact JSON representation.
   6.217 +
   6.218 +    ``encoding`` is the character encoding for str instances, default is UTF-8.
   6.219 +
   6.220 +    ``default(obj)`` is a function that should return a serializable version
   6.221 +    of obj or raise TypeError. The default simply raises TypeError.
   6.222 +
   6.223 +    To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the
   6.224 +    ``.default()`` method to serialize additional types), specify it with
   6.225 +    the ``cls`` kwarg.
   6.226 +
   6.227 +    """
   6.228 +    # cached encoder
   6.229 +    if (not skipkeys and ensure_ascii and
   6.230 +        check_circular and allow_nan and
   6.231 +        cls is None and indent is None and separators is None and
   6.232 +        encoding == 'utf-8' and default is None and not kw):
   6.233 +        return _default_encoder.encode(obj)
   6.234 +    if cls is None:
   6.235 +        cls = JSONEncoder
   6.236 +    return cls(
   6.237 +        skipkeys=skipkeys, ensure_ascii=ensure_ascii,
   6.238 +        check_circular=check_circular, allow_nan=allow_nan, indent=indent,
   6.239 +        separators=separators, encoding=encoding, default=default,
   6.240 +        **kw).encode(obj)
   6.241 +
   6.242 +
   6.243 +_default_decoder = JSONDecoder(encoding=None, object_hook=None)
   6.244 +
   6.245 +
   6.246 +def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None,
   6.247 +        parse_int=None, parse_constant=None, **kw):
   6.248 +    """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing
   6.249 +    a JSON document) to a Python object.
   6.250 +
   6.251 +    If the contents of ``fp`` is encoded with an ASCII based encoding other
   6.252 +    than utf-8 (e.g. latin-1), then an appropriate ``encoding`` name must
   6.253 +    be specified. Encodings that are not ASCII based (such as UCS-2) are
   6.254 +    not allowed, and should be wrapped with
   6.255 +    ``codecs.getreader(fp)(encoding)``, or simply decoded to a ``unicode``
   6.256 +    object and passed to ``loads()``
   6.257 +
   6.258 +    ``object_hook`` is an optional function that will be called with the
   6.259 +    result of any object literal decode (a ``dict``). The return value of
   6.260 +    ``object_hook`` will be used instead of the ``dict``. This feature
   6.261 +    can be used to implement custom decoders (e.g. JSON-RPC class hinting).
   6.262 +
   6.263 +    To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
   6.264 +    kwarg.
   6.265 +
   6.266 +    """
   6.267 +    return loads(fp.read(),
   6.268 +        encoding=encoding, cls=cls, object_hook=object_hook,
   6.269 +        parse_float=parse_float, parse_int=parse_int,
   6.270 +        parse_constant=parse_constant, **kw)
   6.271 +
   6.272 +
   6.273 +def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None,
   6.274 +        parse_int=None, parse_constant=None, **kw):
   6.275 +    """Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON
   6.276 +    document) to a Python object.
   6.277 +
   6.278 +    If ``s`` is a ``str`` instance and is encoded with an ASCII based encoding
   6.279 +    other than utf-8 (e.g. latin-1) then an appropriate ``encoding`` name
   6.280 +    must be specified. Encodings that are not ASCII based (such as UCS-2)
   6.281 +    are not allowed and should be decoded to ``unicode`` first.
   6.282 +
   6.283 +    ``object_hook`` is an optional function that will be called with the
   6.284 +    result of any object literal decode (a ``dict``). The return value of
   6.285 +    ``object_hook`` will be used instead of the ``dict``. This feature
   6.286 +    can be used to implement custom decoders (e.g. JSON-RPC class hinting).
   6.287 +
   6.288 +    ``parse_float``, if specified, will be called with the string
   6.289 +    of every JSON float to be decoded. By default this is equivalent to
   6.290 +    float(num_str). This can be used to use another datatype or parser
   6.291 +    for JSON floats (e.g. decimal.Decimal).
   6.292 +
   6.293 +    ``parse_int``, if specified, will be called with the string
   6.294 +    of every JSON int to be decoded. By default this is equivalent to
   6.295 +    int(num_str). This can be used to use another datatype or parser
   6.296 +    for JSON integers (e.g. float).
   6.297 +
   6.298 +    ``parse_constant``, if specified, will be called with one of the
   6.299 +    following strings: -Infinity, Infinity, NaN, null, true, false.
   6.300 +    This can be used to raise an exception if invalid JSON numbers
   6.301 +    are encountered.
   6.302 +
   6.303 +    To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
   6.304 +    kwarg.
   6.305 +
   6.306 +    """
   6.307 +    if (cls is None and encoding is None and object_hook is None and
   6.308 +            parse_int is None and parse_float is None and
   6.309 +            parse_constant is None and not kw):
   6.310 +        return _default_decoder.decode(s)
   6.311 +    if cls is None:
   6.312 +        cls = JSONDecoder
   6.313 +    if object_hook is not None:
   6.314 +        kw['object_hook'] = object_hook
   6.315 +    if parse_float is not None:
   6.316 +        kw['parse_float'] = parse_float
   6.317 +    if parse_int is not None:
   6.318 +        kw['parse_int'] = parse_int
   6.319 +    if parse_constant is not None:
   6.320 +        kw['parse_constant'] = parse_constant
   6.321 +    return cls(encoding=encoding, **kw).decode(s)
     7.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     7.2 +++ b/trunk/quahog/plugins/Weather/local/simplejson/decoder.py	Thu Oct 22 10:14:56 2009 -0400
     7.3 @@ -0,0 +1,354 @@
     7.4 +"""Implementation of JSONDecoder
     7.5 +"""
     7.6 +import re
     7.7 +import sys
     7.8 +import struct
     7.9 +
    7.10 +from scanner import make_scanner
    7.11 +try:
    7.12 +    from _speedups import scanstring as c_scanstring
    7.13 +except ImportError:
    7.14 +    c_scanstring = None
    7.15 +
    7.16 +__all__ = ['JSONDecoder']
    7.17 +
    7.18 +FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
    7.19 +
    7.20 +def _floatconstants():
    7.21 +    _BYTES = '7FF80000000000007FF0000000000000'.decode('hex')
    7.22 +    if sys.byteorder != 'big':
    7.23 +        _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1]
    7.24 +    nan, inf = struct.unpack('dd', _BYTES)
    7.25 +    return nan, inf, -inf
    7.26 +
    7.27 +NaN, PosInf, NegInf = _floatconstants()
    7.28 +
    7.29 +
    7.30 +def linecol(doc, pos):
    7.31 +    lineno = doc.count('\n', 0, pos) + 1
    7.32 +    if lineno == 1:
    7.33 +        colno = pos
    7.34 +    else:
    7.35 +        colno = pos - doc.rindex('\n', 0, pos)
    7.36 +    return lineno, colno
    7.37 +
    7.38 +
    7.39 +def errmsg(msg, doc, pos, end=None):
    7.40 +    # Note that this function is called from _speedups
    7.41 +    lineno, colno = linecol(doc, pos)
    7.42 +    if end is None:
    7.43 +        #fmt = '{0}: line {1} column {2} (char {3})'
    7.44 +        #return fmt.format(msg, lineno, colno, pos)
    7.45 +        fmt = '%s: line %d column %d (char %d)'
    7.46 +        return fmt % (msg, lineno, colno, pos)
    7.47 +    endlineno, endcolno = linecol(doc, end)
    7.48 +    #fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})'
    7.49 +    #return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end)
    7.50 +    fmt = '%s: line %d column %d - line %d column %d (char %d - %d)'
    7.51 +    return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end)
    7.52 +
    7.53 +
    7.54 +_CONSTANTS = {
    7.55 +    '-Infinity': NegInf,
    7.56 +    'Infinity': PosInf,
    7.57 +    'NaN': NaN,
    7.58 +}
    7.59 +
    7.60 +STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
    7.61 +BACKSLASH = {
    7.62 +    '"': u'"', '\\': u'\\', '/': u'/',
    7.63 +    'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t',
    7.64 +}
    7.65 +
    7.66 +DEFAULT_ENCODING = "utf-8"
    7.67 +
    7.68 +def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match):
    7.69 +    """Scan the string s for a JSON string. End is the index of the
    7.70 +    character in s after the quote that started the JSON string.
    7.71 +    Unescapes all valid JSON string escape sequences and raises ValueError
    7.72 +    on attempt to decode an invalid string. If strict is False then literal
    7.73 +    control characters are allowed in the string.
    7.74 +    
    7.75 +    Returns a tuple of the decoded string and the index of the character in s
    7.76 +    after the end quote."""
    7.77 +    if encoding is None:
    7.78 +        encoding = DEFAULT_ENCODING
    7.79 +    chunks = []
    7.80 +    _append = chunks.append
    7.81 +    begin = end - 1
    7.82 +    while 1:
    7.83 +        chunk = _m(s, end)
    7.84 +        if chunk is None:
    7.85 +            raise ValueError(
    7.86 +                errmsg("Unterminated string starting at", s, begin))
    7.87 +        end = chunk.end()
    7.88 +        content, terminator = chunk.groups()
    7.89 +        # Content is contains zero or more unescaped string characters
    7.90 +        if content:
    7.91 +            if not isinstance(content, unicode):
    7.92 +                content = unicode(content, encoding)
    7.93 +            _append(content)
    7.94 +        # Terminator is the end of string, a literal control character,
    7.95 +        # or a backslash denoting that an escape sequence follows
    7.96 +        if terminator == '"':
    7.97 +            break
    7.98 +        elif terminator != '\\':
    7.99 +            if strict:
   7.100 +                msg = "Invalid control character %r at" % (terminator,)
   7.101 +                #msg = "Invalid control character {0!r} at".format(terminator)
   7.102 +                raise ValueError(errmsg(msg, s, end))
   7.103 +            else:
   7.104 +                _append(terminator)
   7.105 +                continue
   7.106 +        try:
   7.107 +            esc = s[end]
   7.108 +        except IndexError:
   7.109 +            raise ValueError(
   7.110 +                errmsg("Unterminated string starting at", s, begin))
   7.111 +        # If not a unicode escape sequence, must be in the lookup table
   7.112 +        if esc != 'u':
   7.113 +            try:
   7.114 +                char = _b[esc]
   7.115 +            except KeyError:
   7.116 +                msg = "Invalid \\escape: " + repr(esc)
   7.117 +                raise ValueError(errmsg(msg, s, end))
   7.118 +            end += 1
   7.119 +        else:
   7.120 +            # Unicode escape sequence
   7.121 +            esc = s[end + 1:end + 5]
   7.122 +            next_end = end + 5
   7.123 +            if len(esc) != 4:
   7.124 +                msg = "Invalid \\uXXXX escape"
   7.125 +                raise ValueError(errmsg(msg, s, end))
   7.126 +            uni = int(esc, 16)
   7.127 +            # Check for surrogate pair on UCS-4 systems
   7.128 +            if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535:
   7.129 +                msg = "Invalid \\uXXXX\\uXXXX surrogate pair"
   7.130 +                if not s[end + 5:end + 7] == '\\u':
   7.131 +                    raise ValueError(errmsg(msg, s, end))
   7.132 +                esc2 = s[end + 7:end + 11]
   7.133 +                if len(esc2) != 4:
   7.134 +                    raise ValueError(errmsg(msg, s, end))
   7.135 +                uni2 = int(esc2, 16)
   7.136 +                uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
   7.137 +                next_end += 6
   7.138 +            char = unichr(uni)
   7.139 +            end = next_end
   7.140 +        # Append the unescaped character
   7.141 +        _append(char)
   7.142 +    return u''.join(chunks), end
   7.143 +
   7.144 +
   7.145 +# Use speedup if available
   7.146 +scanstring = c_scanstring or py_scanstring
   7.147 +
   7.148 +WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)
   7.149 +WHITESPACE_STR = ' \t\n\r'
   7.150 +
   7.151 +def JSONObject((s, end), encoding, strict, scan_once, object_hook, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
   7.152 +    pairs = {}
   7.153 +    # Use a slice to prevent IndexError from being raised, the following
   7.154 +    # check will raise a more specific ValueError if the string is empty
   7.155 +    nextchar = s[end:end + 1]
   7.156 +    # Normally we expect nextchar == '"'
   7.157 +    if nextchar != '"':
   7.158 +        if nextchar in _ws:
   7.159 +            end = _w(s, end).end()
   7.160 +            nextchar = s[end:end + 1]
   7.161 +        # Trivial empty object
   7.162 +        if nextchar == '}':
   7.163 +            return pairs, end + 1
   7.164 +        elif nextchar != '"':
   7.165 +            raise ValueError(errmsg("Expecting property name", s, end))
   7.166 +    end += 1
   7.167 +    while True:
   7.168 +        key, end = scanstring(s, end, encoding, strict)
   7.169 +
   7.170 +        # To skip some function call overhead we optimize the fast paths where
   7.171 +        # the JSON key separator is ": " or just ":".
   7.172 +        if s[end:end + 1] != ':':
   7.173 +            end = _w(s, end).end()
   7.174 +            if s[end:end + 1] != ':':
   7.175 +                raise ValueError(errmsg("Expecting : delimiter", s, end))
   7.176 +
   7.177 +        end += 1
   7.178 +
   7.179 +        try:
   7.180 +            if s[end] in _ws:
   7.181 +                end += 1
   7.182 +                if s[end] in _ws:
   7.183 +                    end = _w(s, end + 1).end()
   7.184 +        except IndexError:
   7.185 +            pass
   7.186 +
   7.187 +        try:
   7.188 +            value, end = scan_once(s, end)
   7.189 +        except StopIteration:
   7.190 +            raise ValueError(errmsg("Expecting object", s, end))
   7.191 +        pairs[key] = value
   7.192 +
   7.193 +        try:
   7.194 +            nextchar = s[end]
   7.195 +            if nextchar in _ws:
   7.196 +                end = _w(s, end + 1).end()
   7.197 +                nextchar = s[end]
   7.198 +        except IndexError:
   7.199 +            nextchar = ''
   7.200 +        end += 1
   7.201 +
   7.202 +        if nextchar == '}':
   7.203 +            break
   7.204 +        elif nextchar != ',':
   7.205 +            raise ValueError(errmsg("Expecting , delimiter", s, end - 1))
   7.206 +
   7.207 +        try:
   7.208 +            nextchar = s[end]
   7.209 +            if nextchar in _ws:
   7.210 +                end += 1
   7.211 +                nextchar = s[end]
   7.212 +                if nextchar in _ws:
   7.213 +                    end = _w(s, end + 1).end()
   7.214 +                    nextchar = s[end]
   7.215 +        except IndexError:
   7.216 +            nextchar = ''
   7.217 +
   7.218 +        end += 1
   7.219 +        if nextchar != '"':
   7.220 +            raise ValueError(errmsg("Expecting property name", s, end - 1))
   7.221 +
   7.222 +    if object_hook is not None:
   7.223 +        pairs = object_hook(pairs)
   7.224 +    return pairs, end
   7.225 +
   7.226 +def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
   7.227 +    values = []
   7.228 +    nextchar = s[end:end + 1]
   7.229 +    if nextchar in _ws:
   7.230 +        end = _w(s, end + 1).end()
   7.231 +        nextchar = s[end:end + 1]
   7.232 +    # Look-ahead for trivial empty array
   7.233 +    if nextchar == ']':
   7.234 +        return values, end + 1
   7.235 +    _append = values.append
   7.236 +    while True:
   7.237 +        try:
   7.238 +            value, end = scan_once(s, end)
   7.239 +        except StopIteration:
   7.240 +            raise ValueError(errmsg("Expecting object", s, end))
   7.241 +        _append(value)
   7.242 +        nextchar = s[end:end + 1]
   7.243 +        if nextchar in _ws:
   7.244 +            end = _w(s, end + 1).end()
   7.245 +            nextchar = s[end:end + 1]
   7.246 +        end += 1
   7.247 +        if nextchar == ']':
   7.248 +            break
   7.249 +        elif nextchar != ',':
   7.250 +            raise ValueError(errmsg("Expecting , delimiter", s, end))
   7.251 +
   7.252 +        try:
   7.253 +            if s[end] in _ws:
   7.254 +                end += 1
   7.255 +                if s[end] in _ws:
   7.256 +                    end = _w(s, end + 1).end()
   7.257 +        except IndexError:
   7.258 +            pass
   7.259 +
   7.260 +    return values, end
   7.261 +
   7.262 +class JSONDecoder(object):
   7.263 +    """Simple JSON <http://json.org> decoder
   7.264 +
   7.265 +    Performs the following translations in decoding by default:
   7.266 +
   7.267 +    +---------------+-------------------+
   7.268 +    | JSON          | Python            |
   7.269 +    +===============+===================+
   7.270 +    | object        | dict              |
   7.271 +    +---------------+-------------------+
   7.272 +    | array         | list              |
   7.273 +    +---------------+-------------------+
   7.274 +    | string        | unicode           |
   7.275 +    +---------------+-------------------+
   7.276 +    | number (int)  | int, long         |
   7.277 +    +---------------+-------------------+
   7.278 +    | number (real) | float             |
   7.279 +    +---------------+-------------------+
   7.280 +    | true          | True              |
   7.281 +    +---------------+-------------------+
   7.282 +    | false         | False             |
   7.283 +    +---------------+-------------------+
   7.284 +    | null          | None              |
   7.285 +    +---------------+-------------------+
   7.286 +
   7.287 +    It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
   7.288 +    their corresponding ``float`` values, which is outside the JSON spec.
   7.289 +
   7.290 +    """
   7.291 +
   7.292 +    def __init__(self, encoding=None, object_hook=None, parse_float=None,
   7.293 +            parse_int=None, parse_constant=None, strict=True):
   7.294 +        """``encoding`` determines the encoding used to interpret any ``str``
   7.295 +        objects decoded by this instance (utf-8 by default).  It has no
   7.296 +        effect when decoding ``unicode`` objects.
   7.297 +
   7.298 +        Note that currently only encodings that are a superset of ASCII work,
   7.299 +        strings of other encodings should be passed in as ``unicode``.
   7.300 +
   7.301 +        ``object_hook``, if specified, will be called with the result
   7.302 +        of every JSON object decoded and its return value will be used in
   7.303 +        place of the given ``dict``.  This can be used to provide custom
   7.304 +        deserializations (e.g. to support JSON-RPC class hinting).
   7.305 +
   7.306 +        ``parse_float``, if specified, will be called with the string
   7.307 +        of every JSON float to be decoded. By default this is equivalent to
   7.308 +        float(num_str). This can be used to use another datatype or parser
   7.309 +        for JSON floats (e.g. decimal.Decimal).
   7.310 +
   7.311 +        ``parse_int``, if specified, will be called with the string
   7.312 +        of every JSON int to be decoded. By default this is equivalent to
   7.313 +        int(num_str). This can be used to use another datatype or parser
   7.314 +        for JSON integers (e.g. float).
   7.315 +
   7.316 +        ``parse_constant``, if specified, will be called with one of the
   7.317 +        following strings: -Infinity, Infinity, NaN.
   7.318 +        This can be used to raise an exception if invalid JSON numbers
   7.319 +        are encountered.
   7.320 +
   7.321 +        """
   7.322 +        self.encoding = encoding
   7.323 +        self.object_hook = object_hook
   7.324 +        self.parse_float = parse_float or float
   7.325 +        self.parse_int = parse_int or int
   7.326 +        self.parse_constant = parse_constant or _CONSTANTS.__getitem__
   7.327 +        self.strict = strict
   7.328 +        self.parse_object = JSONObject
   7.329 +        self.parse_array = JSONArray
   7.330 +        self.parse_string = scanstring
   7.331 +        self.scan_once = make_scanner(self)
   7.332 +
   7.333 +    def decode(self, s, _w=WHITESPACE.match):
   7.334 +        """Return the Python representation of ``s`` (a ``str`` or ``unicode``
   7.335 +        instance containing a JSON document)
   7.336 +
   7.337 +        """
   7.338 +        obj, end = self.raw_decode(s, idx=_w(s, 0).end())
   7.339 +        end = _w(s, end).end()
   7.340 +        if end != len(s):
   7.341 +            raise ValueError(errmsg("Extra data", s, end, len(s)))
   7.342 +        return obj
   7.343 +
   7.344 +    def raw_decode(self, s, idx=0):
   7.345 +        """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning
   7.346 +        with a JSON document) and return a 2-tuple of the Python
   7.347 +        representation and the index in ``s`` where the document ended.
   7.348 +
   7.349 +        This can be used to decode a JSON document from a string that may
   7.350 +        have extraneous data at the end.
   7.351 +
   7.352 +        """
   7.353 +        try:
   7.354 +            obj, end = self.scan_once(s, idx)
   7.355 +        except StopIteration:
   7.356 +            raise ValueError("No JSON object could be decoded")
   7.357 +        return obj, end
     8.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     8.2 +++ b/trunk/quahog/plugins/Weather/local/simplejson/encoder.py	Thu Oct 22 10:14:56 2009 -0400
     8.3 @@ -0,0 +1,440 @@
     8.4 +"""Implementation of JSONEncoder
     8.5 +"""
     8.6 +import re
     8.7 +
     8.8 +try:
     8.9 +    from _speedups import encode_basestring_ascii as c_encode_basestring_ascii
    8.10 +except ImportError:
    8.11 +    c_encode_basestring_ascii = None
    8.12 +try:
    8.13 +    from _speedups import make_encoder as c_make_encoder
    8.14 +except ImportError:
    8.15 +    c_make_encoder = None
    8.16 +
    8.17 +ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
    8.18 +ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
    8.19 +HAS_UTF8 = re.compile(r'[\x80-\xff]')
    8.20 +ESCAPE_DCT = {
    8.21 +    '\\': '\\\\',
    8.22 +    '"': '\\"',
    8.23 +    '\b': '\\b',
    8.24 +    '\f': '\\f',
    8.25 +    '\n': '\\n',
    8.26 +    '\r': '\\r',
    8.27 +    '\t': '\\t',
    8.28 +}
    8.29 +for i in range(0x20):
    8.30 +    #ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
    8.31 +    ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
    8.32 +
    8.33 +# Assume this produces an infinity on all machines (probably not guaranteed)
    8.34 +INFINITY = float('1e66666')
    8.35 +FLOAT_REPR = repr
    8.36 +
    8.37 +def encode_basestring(s):
    8.38 +    """Return a JSON representation of a Python string
    8.39 +
    8.40 +    """
    8.41 +    def replace(match):
    8.42 +        return ESCAPE_DCT[match.group(0)]
    8.43 +    return '"' + ESCAPE.sub(replace, s) + '"'
    8.44 +
    8.45 +
    8.46 +def py_encode_basestring_ascii(s):
    8.47 +    """Return an ASCII-only JSON representation of a Python string
    8.48 +
    8.49 +    """
    8.50 +    if isinstance(s, str) and HAS_UTF8.search(s) is not None:
    8.51 +        s = s.decode('utf-8')
    8.52 +    def replace(match):
    8.53 +        s = match.group(0)
    8.54 +        try:
    8.55 +            return ESCAPE_DCT[s]
    8.56 +        except KeyError:
    8.57 +            n = ord(s)
    8.58 +            if n < 0x10000:
    8.59 +                #return '\\u{0:04x}'.format(n)
    8.60 +                return '\\u%04x' % (n,)
    8.61 +            else:
    8.62 +                # surrogate pair
    8.63 +                n -= 0x10000
    8.64 +                s1 = 0xd800 | ((n >> 10) & 0x3ff)
    8.65 +                s2 = 0xdc00 | (n & 0x3ff)
    8.66 +                #return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
    8.67 +                return '\\u%04x\\u%04x' % (s1, s2)
    8.68 +    return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
    8.69 +
    8.70 +
    8.71 +encode_basestring_ascii = c_encode_basestring_ascii or py_encode_basestring_ascii
    8.72 +
    8.73 +class JSONEncoder(object):
    8.74 +    """Extensible JSON <http://json.org> encoder for Python data structures.
    8.75 +
    8.76 +    Supports the following objects and types by default:
    8.77 +
    8.78 +    +-------------------+---------------+
    8.79 +    | Python            | JSON          |
    8.80 +    +===================+===============+
    8.81 +    | dict              | object        |
    8.82 +    +-------------------+---------------+
    8.83 +    | list, tuple       | array         |
    8.84 +    +-------------------+---------------+
    8.85 +    | str, unicode      | string        |
    8.86 +    +-------------------+---------------+
    8.87 +    | int, long, float  | number        |
    8.88 +    +-------------------+---------------+
    8.89 +    | True              | true          |
    8.90 +    +-------------------+---------------+
    8.91 +    | False             | false         |
    8.92 +    +-------------------+---------------+
    8.93 +    | None              | null          |
    8.94 +    +-------------------+---------------+
    8.95 +
    8.96 +    To extend this to recognize other objects, subclass and implement a
    8.97 +    ``.default()`` method with another method that returns a serializable
    8.98 +    object for ``o`` if possible, otherwise it should call the superclass
    8.99 +    implementation (to raise ``TypeError``).
   8.100 +
   8.101 +    """
   8.102 +    item_separator = ', '
   8.103 +    key_separator = ': '
   8.104 +    def __init__(self, skipkeys=False, ensure_ascii=True,
   8.105 +            check_circular=True, allow_nan=True, sort_keys=False,
   8.106 +            indent=None, separators=None, encoding='utf-8', default=None):
   8.107 +        """Constructor for JSONEncoder, with sensible defaults.
   8.108 +
   8.109 +        If skipkeys is false, then it is a TypeError to attempt
   8.110 +        encoding of keys that are not str, int, long, float or None.  If
   8.111 +        skipkeys is True, such items are simply skipped.
   8.112 +
   8.113 +        If ensure_ascii is true, the output is guaranteed to be str
   8.114 +        objects with all incoming unicode characters escaped.  If
   8.115 +        ensure_ascii is false, the output will be unicode object.
   8.116 +
   8.117 +        If check_circular is true, then lists, dicts, and custom encoded
   8.118 +        objects will be checked for circular references during encoding to
   8.119 +        prevent an infinite recursion (which would cause an OverflowError).
   8.120 +        Otherwise, no such check takes place.
   8.121 +
   8.122 +        If allow_nan is true, then NaN, Infinity, and -Infinity will be
   8.123 +        encoded as such.  This behavior is not JSON specification compliant,
   8.124 +        but is consistent with most JavaScript based encoders and decoders.
   8.125 +        Otherwise, it will be a ValueError to encode such floats.
   8.126 +
   8.127 +        If sort_keys is true, then the output of dictionaries will be
   8.128 +        sorted by key; this is useful for regression tests to ensure
   8.129 +        that JSON serializations can be compared on a day-to-day basis.
   8.130 +
   8.131 +        If indent is a non-negative integer, then JSON array
   8.132 +        elements and object members will be pretty-printed with that
   8.133 +        indent level.  An indent level of 0 will only insert newlines.
   8.134 +        None is the most compact representation.
   8.135 +
   8.136 +        If specified, separators should be a (item_separator, key_separator)
   8.137 +        tuple.  The default is (', ', ': ').  To get the most compact JSON
   8.138 +        representation you should specify (',', ':') to eliminate whitespace.
   8.139 +
   8.140 +        If specified, default is a function that gets called for objects
   8.141 +        that can't otherwise be serialized.  It should return a JSON encodable
   8.142 +        version of the object or raise a ``TypeError``.
   8.143 +
   8.144 +        If encoding is not None, then all input strings will be
   8.145 +        transformed into unicode using that encoding prior to JSON-encoding.
   8.146 +        The default is UTF-8.
   8.147 +
   8.148 +        """
   8.149 +
   8.150 +        self.skipkeys = skipkeys
   8.151 +        self.ensure_ascii = ensure_ascii
   8.152 +        self.check_circular = check_circular
   8.153 +        self.allow_nan = allow_nan
   8.154 +        self.sort_keys = sort_keys
   8.155 +        self.indent = indent
   8.156 +        if separators is not None:
   8.157 +            self.item_separator, self.key_separator = separators
   8.158 +        if default is not None:
   8.159 +            self.default = default
   8.160 +        self.encoding = encoding
   8.161 +
   8.162 +    def default(self, o):
   8.163 +        """Implement this method in a subclass such that it returns
   8.164 +        a serializable object for ``o``, or calls the base implementation
   8.165 +        (to raise a ``TypeError``).
   8.166 +
   8.167 +        For example, to support arbitrary iterators, you could
   8.168 +        implement default like this::
   8.169 +
   8.170 +            def default(self, o):
   8.171 +                try:
   8.172 +                    iterable = iter(o)
   8.173 +                except TypeError:
   8.174 +                    pass
   8.175 +                else:
   8.176 +                    return list(iterable)
   8.177 +                return JSONEncoder.default(self, o)
   8.178 +
   8.179 +        """
   8.180 +        raise TypeError(repr(o) + " is not JSON serializable")
   8.181 +
   8.182 +    def encode(self, o):
   8.183 +        """Return a JSON string representation of a Python data structure.
   8.184 +
   8.185 +        >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
   8.186 +        '{"foo": ["bar", "baz"]}'
   8.187 +
   8.188 +        """
   8.189 +        # This is for extremely simple cases and benchmarks.
   8.190 +        if isinstance(o, basestring):
   8.191 +            if isinstance(o, str):
   8.192 +                _encoding = self.encoding
   8.193 +                if (_encoding is not None
   8.194 +                        and not (_encoding == 'utf-8')):
   8.195 +                    o = o.decode(_encoding)
   8.196 +            if self.ensure_ascii:
   8.197 +                return encode_basestring_ascii(o)
   8.198 +            else:
   8.199 +                return encode_basestring(o)
   8.200 +        # This doesn't pass the iterator directly to ''.join() because the
   8.201 +        # exceptions aren't as detailed.  The list call should be roughly
   8.202 +        # equivalent to the PySequence_Fast that ''.join() would do.
   8.203 +        chunks = self.iterencode(o, _one_shot=True)
   8.204 +        if not isinstance(chunks, (list, tuple)):
   8.205 +            chunks = list(chunks)
   8.206 +        return ''.join(chunks)
   8.207 +
   8.208 +    def iterencode(self, o, _one_shot=False):
   8.209 +        """Encode the given object and yield each string
   8.210 +        representation as available.
   8.211 +
   8.212 +        For example::
   8.213 +
   8.214 +            for chunk in JSONEncoder().iterencode(bigobject):
   8.215 +                mysocket.write(chunk)
   8.216 +
   8.217 +        """
   8.218 +        if self.check_circular:
   8.219 +            markers = {}
   8.220 +        else:
   8.221 +            markers = None
   8.222 +        if self.ensure_ascii:
   8.223 +            _encoder = encode_basestring_ascii
   8.224 +        else:
   8.225 +            _encoder = encode_basestring
   8.226 +        if self.encoding != 'utf-8':
   8.227 +            def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding):
   8.228 +                if isinstance(o, str):
   8.229 +                    o = o.decode(_encoding)
   8.230 +                return _orig_encoder(o)
   8.231 +
   8.232 +        def floatstr(o, allow_nan=self.allow_nan, _repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY):
   8.233 +            # Check for specials.  Note that this type of test is processor- and/or
   8.234 +            # platform-specific, so do tests which don't depend on the internals.
   8.235 +
   8.236 +            if o != o:
   8.237 +                text = 'NaN'
   8.238 +            elif o == _inf:
   8.239 +                text = 'Infinity'
   8.240 +            elif o == _neginf:
   8.241 +                text = '-Infinity'
   8.242 +            else:
   8.243 +                return _repr(o)
   8.244 +
   8.245 +            if not allow_nan:
   8.246 +                raise ValueError(
   8.247 +                    "Out of range float values are not JSON compliant: " +
   8.248 +                    repr(o))
   8.249 +
   8.250 +            return text
   8.251 +
   8.252 +
   8.253 +        if _one_shot and c_make_encoder is not None and not self.indent and not self.sort_keys:
   8.254 +            _iterencode = c_make_encoder(
   8.255 +                markers, self.default, _encoder, self.indent,
   8.256 +                self.key_separator, self.item_separator, self.sort_keys,
   8.257 +                self.skipkeys, self.allow_nan)
   8.258 +        else:
   8.259 +            _iterencode = _make_iterencode(
   8.260 +                markers, self.default, _encoder, self.indent, floatstr,
   8.261 +                self.key_separator, self.item_separator, self.sort_keys,
   8.262 +                self.skipkeys, _one_shot)
   8.263 +        return _iterencode(o, 0)
   8.264 +
   8.265 +def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
   8.266 +        ## HACK: hand-optimized bytecode; turn globals into locals
   8.267 +        False=False,
   8.268 +        True=True,
   8.269 +        ValueError=ValueError,
   8.270 +        basestring=basestring,
   8.271 +        dict=dict,
   8.272 +        float=float,
   8.273 +        id=id,
   8.274 +        int=int,
   8.275 +        isinstance=isinstance,
   8.276 +        list=list,
   8.277 +        long=long,
   8.278 +        str=str,
   8.279 +        tuple=tuple,
   8.280 +    ):
   8.281 +
   8.282 +    def _iterencode_list(lst, _current_indent_level):
   8.283 +        if not lst:
   8.284 +            yield '[]'
   8.285 +            return
   8.286 +        if markers is not None:
   8.287 +            markerid = id(lst)
   8.288 +            if markerid in markers:
   8.289 +                raise ValueError("Circular reference detected")
   8.290 +            markers[markerid] = lst
   8.291 +        buf = '['
   8.292 +        if _indent is not None:
   8.293 +            _current_indent_level += 1
   8.294 +            newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
   8.295 +            separator = _item_separator + newline_indent
   8.296 +            buf += newline_indent
   8.297 +        else:
   8.298 +            newline_indent = None
   8.299 +            separator = _item_separator
   8.300 +        first = True
   8.301 +        for value in lst:
   8.302 +            if first:
   8.303 +                first = False
   8.304 +            else:
   8.305 +                buf = separator
   8.306 +            if isinstance(value, basestring):
   8.307 +                yield buf + _encoder(value)
   8.308 +            elif value is None:
   8.309 +                yield buf + 'null'
   8.310 +            elif value is True:
   8.311 +                yield buf + 'true'
   8.312 +            elif value is False:
   8.313 +                yield buf + 'false'
   8.314 +            elif isinstance(value, (int, long)):
   8.315 +                yield buf + str(value)
   8.316 +            elif isinstance(value, float):
   8.317 +                yield buf + _floatstr(value)
   8.318 +            else:
   8.319 +                yield buf
   8.320 +                if isinstance(value, (list, tuple)):
   8.321 +                    chunks = _iterencode_list(value, _current_indent_level)
   8.322 +                elif isinstance(value, dict):
   8.323 +                    chunks = _iterencode_dict(value, _current_indent_level)
   8.324 +                else:
   8.325 +                    chunks = _iterencode(value, _current_indent_level)
   8.326 +                for chunk in chunks:
   8.327 +                    yield chunk
   8.328 +        if newline_indent is not None:
   8.329 +            _current_indent_level -= 1
   8.330 +            yield '\n' + (' ' * (_indent * _current_indent_level))
   8.331 +        yield ']'
   8.332 +        if markers is not None:
   8.333 +            del markers[markerid]
   8.334 +
   8.335 +    def _iterencode_dict(dct, _current_indent_level):
   8.336 +        if not dct:
   8.337 +            yield '{}'
   8.338 +            return
   8.339 +        if markers is not None:
   8.340 +            markerid = id(dct)
   8.341 +            if markerid in markers:
   8.342 +                raise ValueError("Circular reference detected")
   8.343 +            markers[markerid] = dct
   8.344 +        yield '{'
   8.345 +        if _indent is not None:
   8.346 +            _current_indent_level += 1
   8.347 +            newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
   8.348 +            item_separator = _item_separator + newline_indent
   8.349 +            yield newline_indent
   8.350 +        else:
   8.351 +            newline_indent = None
   8.352 +            item_separator = _item_separator
   8.353 +        first = True
   8.354 +        if _sort_keys:
   8.355 +            items = dct.items()
   8.356 +            items.sort(key=lambda kv: kv[0])
   8.357 +        else:
   8.358 +            items = dct.iteritems()
   8.359 +        for key, value in items:
   8.360 +            if isinstance(key, basestring):
   8.361 +                pass
   8.362 +            # JavaScript is weakly typed for these, so it makes sense to
   8.363 +            # also allow them.  Many encoders seem to do something like this.
   8.364 +            elif isinstance(key, float):
   8.365 +                key = _floatstr(key)
   8.366 +            elif key is True:
   8.367 +                key = 'true'
   8.368 +            elif key is False:
   8.369 +                key = 'false'
   8.370 +            elif key is None:
   8.371 +                key = 'null'
   8.372 +            elif isinstance(key, (int, long)):
   8.373 +                key = str(key)
   8.374 +            elif _skipkeys:
   8.375 +                continue
   8.376 +            else:
   8.377 +                raise TypeError("key " + repr(key) + " is not a string")
   8.378 +            if first:
   8.379 +                first = False
   8.380 +            else:
   8.381 +                yield item_separator
   8.382 +            yield _encoder(key)
   8.383 +            yield _key_separator
   8.384 +            if isinstance(value, basestring):
   8.385 +                yield _encoder(value)
   8.386 +            elif value is None:
   8.387 +                yield 'null'
   8.388 +            elif value is True:
   8.389 +                yield 'true'
   8.390 +            elif value is False:
   8.391 +                yield 'false'
   8.392 +            elif isinstance(value, (int, long)):
   8.393 +                yield str(value)
   8.394 +            elif isinstance(value, float):
   8.395 +                yield _floatstr(value)
   8.396 +            else:
   8.397 +                if isinstance(value, (list, tuple)):
   8.398 +                    chunks = _iterencode_list(value, _current_indent_level)
   8.399 +                elif isinstance(value, dict):
   8.400 +                    chunks = _iterencode_dict(value, _current_indent_level)
   8.401 +                else:
   8.402 +                    chunks = _iterencode(value, _current_indent_level)
   8.403 +                for chunk in chunks:
   8.404 +                    yield chunk
   8.405 +        if newline_indent is not None:
   8.406 +            _current_indent_level -= 1
   8.407 +            yield '\n' + (' ' * (_indent * _current_indent_level))
   8.408 +        yield '}'
   8.409 +        if markers is not None:
   8.410 +            del markers[markerid]
   8.411 +
   8.412 +    def _iterencode(o, _current_indent_level):
   8.413 +        if isinstance(o, basestring):
   8.414 +            yield _encoder(o)
   8.415 +        elif o is None:
   8.416 +            yield 'null'
   8.417 +        elif o is True:
   8.418 +            yield 'true'
   8.419 +        elif o is False:
   8.420 +            yield 'false'
   8.421 +        elif isinstance(o, (int, long)):
   8.422 +            yield str(o)
   8.423 +        elif isinstance(o, float):
   8.424 +            yield _floatstr(o)
   8.425 +        elif isinstance(o, (list, tuple)):
   8.426 +            for chunk in _iterencode_list(o, _current_indent_level):
   8.427 +                yield chunk
   8.428 +        elif isinstance(o, dict):
   8.429 +            for chunk in _iterencode_dict(o, _current_indent_level):
   8.430 +                yield chunk
   8.431 +        else:
   8.432 +            if markers is not None:
   8.433 +                markerid = id(o)
   8.434 +                if markerid in markers:
   8.435 +                    raise ValueError("Circular reference detected")
   8.436 +                markers[markerid] = o
   8.437 +            o = _default(o)
   8.438 +            for chunk in _iterencode(o, _current_indent_level):
   8.439 +                yield chunk
   8.440 +            if markers is not None:
   8.441 +                del markers[markerid]
   8.442 +
   8.443 +    return _iterencode
     9.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     9.2 +++ b/trunk/quahog/plugins/Weather/local/simplejson/scanner.py	Thu Oct 22 10:14:56 2009 -0400
     9.3 @@ -0,0 +1,65 @@
     9.4 +"""JSON token scanner
     9.5 +"""
     9.6 +import re
     9.7 +try:
     9.8 +    from _speedups import make_scanner as c_make_scanner
     9.9 +except ImportError:
    9.10 +    c_make_scanner = None
    9.11 +
    9.12 +__all__ = ['make_scanner']
    9.13 +
    9.14 +NUMBER_RE = re.compile(
    9.15 +    r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?',
    9.16 +    (re.VERBOSE | re.MULTILINE | re.DOTALL))
    9.17 +
    9.18 +def py_make_scanner(context):
    9.19 +    parse_object = context.parse_object
    9.20 +    parse_array = context.parse_array
    9.21 +    parse_string = context.parse_string
    9.22 +    match_number = NUMBER_RE.match
    9.23 +    encoding = context.encoding
    9.24 +    strict = context.strict
    9.25 +    parse_float = context.parse_float
    9.26 +    parse_int = context.parse_int
    9.27 +    parse_constant = context.parse_constant
    9.28 +    object_hook = context.object_hook
    9.29 +
    9.30 +    def _scan_once(string, idx):
    9.31 +        try:
    9.32 +            nextchar = string[idx]
    9.33 +        except IndexError:
    9.34 +            raise StopIteration
    9.35 +
    9.36 +        if nextchar == '"':
    9.37 +            return parse_string(string, idx + 1, encoding, strict)
    9.38 +        elif nextchar == '{':
    9.39 +            return parse_object((string, idx + 1), encoding, strict, _scan_once, object_hook)
    9.40 +        elif nextchar == '[':
    9.41 +            return parse_array((string, idx + 1), _scan_once)
    9.42 +        elif nextchar == 'n' and string[idx:idx + 4] == 'null':
    9.43 +            return None, idx + 4
    9.44 +        elif nextchar == 't' and string[idx:idx + 4] == 'true':
    9.45 +            return True, idx + 4
    9.46 +        elif nextchar == 'f' and string[idx:idx + 5] == 'false':
    9.47 +            return False, idx + 5
    9.48 +
    9.49 +        m = match_number(string, idx)
    9.50 +        if m is not None:
    9.51 +            integer, frac, exp = m.groups()
    9.52 +            if frac or exp:
    9.53 +                res = parse_float(integer + (frac or '') + (exp or ''))
    9.54 +            else:
    9.55 +                res = parse_int(integer)
    9.56 +            return res, m.end()
    9.57 +        elif nextchar == 'N' and string[idx:idx + 3] == 'NaN':
    9.58 +            return parse_constant('NaN'), idx + 3
    9.59 +        elif nextchar == 'I' and string[idx:idx + 8] == 'Infinity':
    9.60 +            return parse_constant('Infinity'), idx + 8
    9.61 +        elif nextchar == '-' and string[idx:idx + 9] == '-Infinity':
    9.62 +            return parse_constant('-Infinity'), idx + 9
    9.63 +        else:
    9.64 +            raise StopIteration
    9.65 +
    9.66 +    return _scan_once
    9.67 +
    9.68 +make_scanner = c_make_scanner or py_make_scanner
    10.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    10.2 +++ b/trunk/quahog/plugins/Weather/local/simplejson/tool.py	Thu Oct 22 10:14:56 2009 -0400
    10.3 @@ -0,0 +1,37 @@
    10.4 +r"""Command-line tool to validate and pretty-print JSON
    10.5 +
    10.6 +Usage::
    10.7 +
    10.8 +    $ echo '{"json":"obj"}' | python -m simplejson.tool
    10.9 +    {
   10.10 +        "json": "obj"
   10.11 +    }
   10.12 +    $ echo '{ 1.2:3.4}' | python -m simplejson.tool
   10.13 +    Expecting property name: line 1 column 2 (char 2)
   10.14 +
   10.15 +"""
   10.16 +import sys
   10.17 +import simplejson
   10.18 +
   10.19 +def main():
   10.20 +    if len(sys.argv) == 1:
   10.21 +        infile = sys.stdin
   10.22 +        outfile = sys.stdout
   10.23 +    elif len(sys.argv) == 2:
   10.24 +        infile = open(sys.argv[1], 'rb')
   10.25 +        outfile = sys.stdout
   10.26 +    elif len(sys.argv) == 3:
   10.27 +        infile = open(sys.argv[1], 'rb')
   10.28 +        outfile = open(sys.argv[2], 'wb')
   10.29 +    else:
   10.30 +        raise SystemExit(sys.argv[0] + " [infile [outfile]]")
   10.31 +    try:
   10.32 +        obj = simplejson.load(infile)
   10.33 +    except ValueError, e:
   10.34 +        raise SystemExit(e)
   10.35 +    simplejson.dump(obj, outfile, sort_keys=True, indent=4)
   10.36 +    outfile.write('\n')
   10.37 +
   10.38 +
   10.39 +if __name__ == '__main__':
   10.40 +    main()
    11.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    11.2 +++ b/trunk/quahog/plugins/Weather/plugin.py	Thu Oct 22 10:14:56 2009 -0400
    11.3 @@ -0,0 +1,487 @@
    11.4 +###
    11.5 +# Copyright (c) 2005,2009, James Vega
    11.6 +# All rights reserved.
    11.7 +#
    11.8 +# Redistribution and use in source and binary forms, with or without
    11.9 +# modification, are permitted provided that the following conditions are met:
   11.10 +#
   11.11 +#   * Redistributions of source code must retain the above copyright notice,
   11.12 +#     this list of conditions, and the following disclaimer.
   11.13 +#   * Redistributions in binary form must reproduce the above copyright notice,
   11.14 +#     this list of conditions, and the following disclaimer in the
   11.15 +#     documentation and/or other materials provided with the distribution.
   11.16 +#   * Neither the name of the author of this software nor the name of
   11.17 +#     contributors to this software may be used to endorse or promote products
   11.18 +#     derived from this software without specific prior written consent.
   11.19 +#
   11.20 +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
   11.21 +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   11.22 +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   11.23 +# ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
   11.24 +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   11.25 +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   11.26 +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   11.27 +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   11.28 +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   11.29 +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   11.30 +# POSSIBILITY OF SUCH DAMAGE.
   11.31 +###
   11.32 +
   11.33 +import re
   11.34 +
   11.35 +# Specifically use our local copy since later versions changed their interface
   11.36 +# and (depending on the version) don't work as well
   11.37 +from local import BeautifulSoup
   11.38 +
   11.39 +import supybot.conf as conf
   11.40 +import supybot.utils as utils
   11.41 +from supybot.commands import *
   11.42 +import supybot.ircutils as ircutils
   11.43 +import supybot.callbacks as callbacks
   11.44 +
   11.45 +try:
   11.46 +    feedparser = utils.python.universalImport('feedparser', 'local.feedparser')
   11.47 +except ImportError:
   11.48 +    raise callbacks.Error, \
   11.49 +            'You need the feedparser module installed to use this plugin.  ' \
   11.50 +            'Download the module at <http://www.feedparser.org/>.'
   11.51 +
   11.52 +simplejson = None
   11.53 +
   11.54 +try:
   11.55 +    import json as simplejson
   11.56 +except ImportError:
   11.57 +    pass
   11.58 +
   11.59 +try:
   11.60 +    # The 3rd party simplejson module was included in Python 2.6 and renamed to
   11.61 +    # json.  Unfortunately, this conflicts with the 3rd party json module.
   11.62 +    # Luckily, the 3rd party json module has a different interface so we test
   11.63 +    # to make sure we aren't using it.
   11.64 +    if simplejson is None or hasattr(simplejson, 'read'):
   11.65 +        simplejson = utils.python.universalImport('simplejson',
   11.66 +                                                  'local.simplejson')
   11.67 +except ImportError:
   11.68 +    raise callbacks.Error, \
   11.69 +            'You need Python2.6 or the simplejson module installed to use ' \
   11.70 +            'this plugin.  Download the module at ' \
   11.71 +            '<http://undefined.org/python/#simplejson>.'
   11.72 +
   11.73 +unitAbbrevs = utils.abbrev(['Fahrenheit', 'Celsius', 'Centigrade', 'Kelvin'])
   11.74 +unitAbbrevs['C'] = 'Celsius'
   11.75 +unitAbbrevs['Ce'] = 'Celsius'
   11.76 +
   11.77 +noLocationError = 'No such location could be found.'
   11.78 +class NoLocation(callbacks.Error):
   11.79 +    pass
   11.80 +
   11.81 +class Weather(callbacks.Plugin):
   11.82 +    weatherCommands = ('wunder', 'wunder rss', 'cnn', 'ham')
   11.83 +    threaded = True
   11.84 +    def callCommand(self, method, irc, msg, *args, **kwargs):
   11.85 +        try:
   11.86 +            super(Weather, self).callCommand(method, irc, msg, *args, **kwargs)
   11.87 +        except utils.web.Error, e:
   11.88 +            irc.error(str(e))
   11.89 +
   11.90 +    def _noLocation():
   11.91 +        raise NoLocation, noLocationError
   11.92 +    _noLocation = staticmethod(_noLocation)
   11.93 +
   11.94 +    def weather(self, irc, msg, args, location):
   11.95 +        """<US zip code | US/Canada city, state | Foreign city, country>
   11.96 +
   11.97 +        Returns the approximate weather conditions for a given city.
   11.98 +        """
   11.99 +        channel = None
  11.100 +        if irc.isChannel(msg.args[0]):
  11.101 +            channel = msg.args[0]
  11.102 +        if not location:
  11.103 +            location = self.userValue('lastLocation', msg.prefix)
  11.104 +        if not location:
  11.105 +            raise callbacks.ArgumentError
  11.106 +        self.setUserValue('lastLocation', msg.prefix,
  11.107 +                          location, ignoreNoUser=True)
  11.108 +        args = [location]
  11.109 +        commandName = self.registryValue('command', channel)
  11.110 +        firstCommand = commandName
  11.111 +        command = self.getCommandMethod(commandName.split())
  11.112 +        try:
  11.113 +            command(irc, msg, args[:])
  11.114 +        except (NoLocation, utils.web.Error):
  11.115 +            self.log.info('%s lookup failed, Trying others.', firstCommand)
  11.116 +            for commandName in self.weatherCommands:
  11.117 +                if commandName != firstCommand:
  11.118 +                    self.log.info('Trying %s.', commandName)
  11.119 +                    try:
  11.120 +                        command = self.getCommandMethod(commandName.split())
  11.121 +                        command(irc, msg, args[:])
  11.122 +                        self.log.info('%s lookup succeeded.', commandName)
  11.123 +                        return
  11.124 +                    except NoLocation:
  11.125 +                        self.log.info('%s lookup failed as backup.',
  11.126 +                                      commandName)
  11.127 +            irc.error(format('Could not retrieve weather for %q.', location))
  11.128 +    weather = wrap(weather, [additional('text')])
  11.129 +
  11.130 +    def _toCelsius(temp, unit):
  11.131 +        if unit == 'K':
  11.132 +            return temp - 273.15
  11.133 +        elif unit == 'F':
  11.134 +            return (temp - 32) * 5 /9
  11.135 +        else:
  11.136 +            return temp
  11.137 +    _toCelsius = staticmethod(_toCelsius)
  11.138 +
  11.139 +    _temp = re.compile(r'(-?\d+)(.*?)(F|C)')
  11.140 +    def _getTemp(temp, deg, unit, chan):
  11.141 +        assert unit == unit.upper()
  11.142 +        assert temp == float(temp)
  11.143 +        default = conf.get(conf.supybot.plugins.Weather.temperatureUnit, chan)
  11.144 +        convert = conf.get(conf.supybot.plugins.Weather.convert, chan)
  11.145 +        # Short circuit if we're the same unit as the default or no conversion
  11.146 +        # has been requested
  11.147 +        if unitAbbrevs[unit] == default or not convert:
  11.148 +            return format('%0.1f%s%s', temp, deg, unit)
  11.149 +        temp = Weather._toCelsius(temp, unit)
  11.150 +        unit = 'C'
  11.151 +        if default == 'Kelvin':
  11.152 +            temp = temp + 273.15
  11.153 +            unit = 'K'
  11.154 +            deg = ' '
  11.155 +        elif default == 'Fahrenheit':
  11.156 +            temp = temp * 9 / 5 + 32
  11.157 +            unit = 'F'
  11.158 +        return '%0.1f%s%s' % (temp, deg, unit)
  11.159 +    _getTemp = staticmethod(_getTemp)
  11.160 +
  11.161 +    _hamLoc = re.compile(
  11.162 +        r'<span class="Place">([^,]+), ([^,\n]+),(.*?)</span>', re.I)
  11.163 +    _interregex = re.compile(
  11.164 +        r'<span class="Place">([^,]+), ([^,\n]+?)</span>', re.I)
  11.165 +    _hamCond = re.compile(
  11.166 +        r'<td width="100%" colspan="2" align="center" class="Wx">([^<]+)</td>',
  11.167 +        re.I)
  11.168 +    _hamTemp = re.compile(
  11.169 +        r'<td valign="top" align="right" class="Temp">(-?\d+)(.*?)(F|C)</td>',
  11.170 +        re.I)
  11.171 +    _hamChill = re.compile(
  11.172 +        r'Wind Chill:</td>\s+<td align="right" class="Value">([^N][^<]+)</td>',
  11.173 +        re.I | re.S)
  11.174 +    _hamHeat = re.compile(
  11.175 +        r'Heat Index:</td>\s+<td align="right" class="Value">([^N][^<]+)</td>',
  11.176 +        re.I | re.S)
  11.177 +    _hamMultiLoc = re.compile(
  11.178 +        r'Select from one of[^<]+</b></font></td></tr>\s*<tr><td><font[^>]+>'
  11.179 +        r'\s*<a href="(/cgi-bin/hw3[^"]+)">', re.I | re.S)
  11.180 +    def ham(self, irc, msg, args, loc):
  11.181 +        """<US zip code | US/Canada city, state | Foreign city, country>
  11.182 +
  11.183 +        Returns the approximate weather conditions for a given city.
  11.184 +        """
  11.185 +        url = 'http://www.hamweather.net/cgi-bin/hw3/hw3.cgi?' \
  11.186 +              'config=&forecast=zandh&pands=%s&Submit=GO' % \
  11.187 +              utils.web.urlquote(loc.lower())
  11.188 +        html = utils.web.getUrl(url)
  11.189 +        if 'was not found' in html:
  11.190 +            self._noLocation()
  11.191 +
  11.192 +        # ham seems to automatically return a location for duplicate names with
  11.193 +        # no list of other possibilities anymore, so this code may not be
  11.194 +        # needed
  11.195 +        if 'Multiple Locations for' in html:
  11.196 +            m = self._hamMultiLoc.search(html)
  11.197 +            if m:
  11.198 +                url = 'http://www.hamweather.net/%s' % m.group(1)
  11.199 +                html = utils.web.getUrl(url)
  11.200 +            else:
  11.201 +                self._noLocation()
  11.202 +        headData = self._hamLoc.search(html)
  11.203 +        if headData is not None:
  11.204 +            (city, state, country) = headData.groups()
  11.205 +        else:
  11.206 +            headData = self._interregex.search(html)
  11.207 +            if headData:
  11.208 +                (city, state) = headData.groups()
  11.209 +            else:
  11.210 +                self._noLocation()
  11.211 +        city = utils.web.htmlToText(city.strip())
  11.212 +        state = utils.web.htmlToText(state.strip())
  11.213 +        temp = self._hamTemp.search(html)
  11.214 +        if temp is not None:
  11.215 +            (temp, deg, unit) = temp.groups()
  11.216 +            deg = utils.web.htmlToText(deg)
  11.217 +            temp = self._getTemp(float(temp), deg, unit, msg.args[0])
  11.218 +        conds = self._hamCond.search(html)
  11.219 +        if conds is not None:
  11.220 +            conds = conds.group(1)
  11.221 +        index = ''
  11.222 +        chill = self._hamChill.search(html)
  11.223 +        if chill is not None:
  11.224 +            chill = chill.group(1)
  11.225 +            chill = utils.web.htmlToText(chill)
  11.226 +            tempsplit = self._temp.search(chill)
  11.227 +            if tempsplit:
  11.228 +                (chill, deg, unit) = tempsplit.groups()
  11.229 +                chill = self._getTemp(float(chill), deg, unit,msg.args[0])
  11.230 +            if float(chill[:-2]) < float(temp[:-2]):
  11.231 +                index = format(' (Wind Chill: %s)', chill)
  11.232 +        heat = self._hamHeat.search(html)
  11.233 +        if heat is not None:
  11.234 +            heat = heat.group(1)
  11.235 +            heat = utils.web.htmlToText(heat)
  11.236 +            tempsplit = self._temp.search(heat)
  11.237 +            if tempsplit:
  11.238 +                (heat, deg, unit) = tempsplit.groups()
  11.239 +                heat= self._getTemp(float(heat), deg, unit,msg.args[0])
  11.240 +            if float(heat[:-2]) > float(temp[:-2]):
  11.241 +                index = format(' (Heat Index: %s)', heat)
  11.242 +        if temp and conds and city and state:
  11.243 +            conds = conds.replace('Tsra', 'Thunderstorms')
  11.244 +            conds = conds.replace('Ts', 'Thunderstorms')
  11.245 +            s = format('The current temperature in %s, %s is %s%s. '
  11.246 +                       'Conditions: %s.',
  11.247 +                       city, state, temp, index, conds)
  11.248 +            irc.reply(s.decode('latin1').encode('utf-8'))
  11.249 +        else:
  11.250 +            irc.errorPossibleBug('The format of the page was odd.')
  11.251 +    ham = wrap(ham, ['text'])
  11.252 +
  11.253 +    _cnnSearchUrl = 'http://weather.cnn.com/weather/citySearch?' \
  11.254 +                    'search_term=%s&mode=json&filter=true'
  11.255 +    _cnnUrl='http://weather.cnn.com/weather/forecast.jsp?locCode=%s&zipCode=%s'
  11.256 +    _cnnFTemp = re.compile(r'<div class="cnnWeatherTempCurrent">' \
  11.257 +                           r'(-?\d+)(&deg;)</div>',
  11.258 +                           re.I | re.S)
  11.259 +    _cnnCond = re.compile(r'<span class="cnnWeatherConditionCurrent">' \
  11.260 +                          r'([^<]+)</span>',
  11.261 +                          re.I | re.S)
  11.262 +    _cnnHumid = re.compile(r'Humidity: </b>(\d+%)', re.I | re.S)
  11.263 +    _cnnWind = re.compile(r'Wind: </b>([^<\n\r]+)', re.I | re.S)
  11.264 +    # Certain countries are expected to use a standard abbreviation
  11.265 +    # The weather we pull uses weird codes.  Map obvious ones here.
  11.266 +    _cnnCountryMap = {'uk': 'en', 'de': 'ge'}
  11.267 +    def cnn(self, irc, msg, args, loc):
  11.268 +        """<US zip code | US/Canada city, state | Foreign city, country>
  11.269 +
  11.270 +        Returns the approximate weather conditions for a given city.
  11.271 +        """
  11.272 +        if ' ' in loc:
  11.273 +            #If we received more than 1 argument, then we got a city with a
  11.274 +            #multi-word name.  ie ['Garden', 'City', 'KS'] instead of
  11.275 +            #['Liberal', 'KS'].
  11.276 +            loc = utils.str.rsplit(loc, None, 1)
  11.277 +            state = loc.pop().lower()
  11.278 +            city = ' '.join(loc)
  11.279 +            city = city.rstrip(',').lower()
  11.280 +            if state in self._cnnCountryMap:
  11.281 +                state = self._cnnCountryMap[state]
  11.282 +            loc = ' '.join([city, state])
  11.283 +        else:
  11.284 +            #We received a single argument.  Zipcode or station id.
  11.285 +            loc = loc.replace(',', '')
  11.286 +        url = self._cnnSearchUrl % (utils.web.urlquote(loc))
  11.287 +        json = simplejson.loads(utils.web.getUrl(url))
  11.288 +        if not json:
  11.289 +            self._noLocation()
  11.290 +        json = json[0]
  11.291 +        url = self._cnnUrl % (json['locCode'], json['zip'])
  11.292 +        text = utils.web.getUrl(url)
  11.293 +        location = ', '.join([json['city'], json['stateOrCountry']])
  11.294 +        temp = self._cnnFTemp.search(text)
  11.295 +        conds = self._cnnCond.search(text)
  11.296 +        humidity = self._cnnHumid.search(text)
  11.297 +        wind = self._cnnWind.search(text)
  11.298 +        if location and temp:
  11.299 +            (temp, deg) = temp.groups()
  11.300 +            unit = 'F'
  11.301 +            temp = self._getTemp(float(temp), deg, unit, msg.args[0])
  11.302 +            resp = [format('The current temperature in %s is %s.',
  11.303 +                           location, temp)]
  11.304 +            if conds is not None:
  11.305 +                resp.append(format('Conditions: %s.', conds.group(1)))
  11.306 +            if humidity is not None:
  11.307 +                resp.append(format('Humidity: %s.', humidity.group(1)))
  11.308 +            if wind is not None:
  11.309 +                resp.append(format('Wind: %s.', wind.group(1)))
  11.310 +            resp = map(utils.web.htmlToText, resp)
  11.311 +            irc.reply(' '.join(resp))
  11.312 +        else:
  11.313 +            irc.errorPossibleBug('Could not find weather information.')
  11.314 +    cnn = wrap(cnn, ['text'])
  11.315 +
  11.316 +    class wunder(callbacks.Commands):
  11.317 +        _backupUrl = re.compile(r'<a href="(/global/stations[^"]+)">')
  11.318 +
  11.319 +        _wunderUrl = 'http://mobile.wunderground.com/cgi-bin/' \
  11.320 +                     'findweather/getForecast?query='
  11.321 +        _wunderSevere = re.compile(r'font color="?#ff0000"?>([^<]+)<', re.I)
  11.322 +        _wunderMultiLoc = re.compile(r'<a href="([^"]+)', re.I | re.S)
  11.323 +        def wunder(self, irc, msg, args, loc):
  11.324 +            """<US zip code | US/Canada city, state | Foreign city, country>
  11.325 +
  11.326 +            Returns the approximate weather conditions for a given city.
  11.327 +            """
  11.328 +            url = '%s%s' % (self._wunderUrl, utils.web.urlquote(loc))
  11.329 +            text = utils.web.getUrl(url)
  11.330 +            if 'Search not found' in text or \
  11.331 +               re.search(r'size="2"> Place </font>', text, re.I):
  11.332 +                Weather._noLocation()
  11.333 +            if 'Place: Temperature' in text:
  11.334 +                m = self._backupUrl.search(text)
  11.335 +                if m is not None:
  11.336 +                    url = 'http://mobile.wunderground.com' + m.group(1)
  11.337 +                    text = utils.web.getUrl(url)
  11.338 +            severe = ''
  11.339 +            m = self._wunderSevere.search(text)
  11.340 +            if m:
  11.341 +                severe = ircutils.bold(format('  %s', m.group(1)))
  11.342 +            text = self._formatSymbols(text)
  11.343 +            soup = BeautifulSoup.BeautifulSoup()
  11.344 +            soup.feed(text)
  11.345 +            # Get the table with all the weather info
  11.346 +            table = soup.first('table', {'border':'1'})
  11.347 +            if not table:
  11.348 +                Weather._noLocation()
  11.349 +            trs = table.fetch('tr')
  11.350 +            (time, location) = trs.pop(0).fetch('b')
  11.351 +            time = time.string
  11.352 +            location = location.string
  11.353 +            info = {}
  11.354 +            def isText(t):
  11.355 +                return not isinstance(t, BeautifulSoup.NavigableText) \
  11.356 +                       and t.contents
  11.357 +            def getText(t):
  11.358 +                s = t.string
  11.359 +                if s is BeautifulSoup.Null:
  11.360 +                    t = t.contents
  11.361 +                    num = t[0].string
  11.362 +                    units = t[1].string
  11.363 +                    # htmlToText strips leading whitespace, so we have to
  11.364 +                    # handle strings with &nbsp; differently.
  11.365 +                    if units.startswith('&nbsp;'):
  11.366 +                        units = utils.web.htmlToText(units)
  11.367 +                        s = ' '.join((num, units))
  11.368 +                    else:
  11.369 +                        units = utils.web.htmlToText(units)
  11.370 +                        s = ' '.join((num, units[0], units[1:]))
  11.371 +                return s
  11.372 +            for tr in trs:
  11.373 +                k = tr.td.string
  11.374 +                v = filter(isText, tr.fetch('td')[1].contents)
  11.375 +                value = map(getText, v)
  11.376 +                info[k] = ' '.join(value)
  11.377 +            temp = info['Temperature']
  11.378 +            if location and temp:
  11.379 +                (temp, deg, unit) = temp.split()[3:] # We only want temp format
  11.380 +                temp = Weather._getTemp(float(temp), deg, unit, msg.args[0])
  11.381 +                resp = ['The current temperature in %s is %s (%s).' %\
  11.382 +                        (location, temp, time)]
  11.383 +                conds = info['Conditions']
  11.384 +                resp.append('Conditions: %s.' % info['Conditions'])
  11.385 +                humidity = info['Humidity']
  11.386 +                resp.append('Humidity: %s.' % info['Humidity'])
  11.387 +                # Apparently, the "Dew Point" and "Wind" categories are
  11.388 +                # occasionally set to "-" instead of an actual reading. So,
  11.389 +                # we'll just catch the ValueError from trying to unpack a tuple
  11.390 +                # of the wrong size.
  11.391 +                try:
  11.392 +                    (dew, deg, unit) = info['Dew Point'].split()[3:]
  11.393 +                    dew = Weather._getTemp(float(dew), deg, unit, msg.args[0])
  11.394 +                    resp.append('Dew Point: %s.' % dew)
  11.395 +                except (ValueError, KeyError):
  11.396 +                    pass
  11.397 +                try:
  11.398 +                    wind = 'Wind: %s at %s %s.' % tuple(info['Wind'].split())
  11.399 +                    resp.append(wind)
  11.400 +                except (ValueError, TypeError):
  11.401 +                    pass
  11.402 +                try:
  11.403 +                    (chill, deg, unit) = info['Windchill'].split()[3:]
  11.404 +                    chill = Weather._getTemp(float(chill), deg,
  11.405 +                                             unit, msg.args[0])
  11.406 +                    resp.append('Windchill: %s.' % chill)
  11.407 +                except (ValueError, KeyError):
  11.408 +                    pass
  11.409 +                if info['Pressure']:
  11.410 +                    resp.append('Pressure: %s.' % info['Pressure'])
  11.411 +                resp.append(severe)
  11.412 +                resp = map(utils.web.htmlToText, resp)
  11.413 +                irc.reply(' '.join(resp).decode('latin1').encode('utf-8'))
  11.414 +            else:
  11.415 +                Weather._noLocation()
  11.416 +        wunder = wrap(wunder, ['text'])
  11.417 +
  11.418 +        _rsswunderUrl = 'http://www.wunderground.com/cgi-bin/findweather/' \
  11.419 +                        'getForecast?query=%s'
  11.420 +        _rsswunderfeed = re.compile(
  11.421 +            r'<link rel="alternate".*href="([^"]+)" */?>', re.I)
  11.422 +        _rsswunderSevere = re.compile(
  11.423 +            r'font color="?#ff0000"?><b>([^<]+)<', re.I)
  11.424 +        _rsswunderLocation = re.compile(
  11.425 +            r'<title>(?:(.*) Weather from Weather Underground|'
  11.426 +            r'Weather Underground - (.*))</title>', re.I)
  11.427 +        _rsswunderForecastDate = re.compile(r'Forecast for (.*) as of', re.I)
  11.428 +        def rss(self, irc, msg, args, loc):
  11.429 +            """<US zip code | US/Canada city, state | Foreign city, country>
  11.430 +
  11.431 +            Returns the approximate weather conditions for a given city.
  11.432 +            """
  11.433 +            url = self._rsswunderUrl % utils.web.urlquote(loc)
  11.434 +            url = url.replace('%20', '+')
  11.435 +            text = utils.web.getUrl(url)
  11.436 +            if 'Search not found' in text or \
  11.437 +               re.search(r'size="2"> Place </font>', text, re.I):
  11.438 +                Weather._noLocation()
  11.439 +            if 'Search Results' in text:
  11.440 +                m = self._backupUrl.search(text)
  11.441 +                if m is not None:
  11.442 +                    url = 'http://www.wunderground.com' + m.group(1)
  11.443 +                    text = utils.web.getUrl(url)
  11.444 +                else:
  11.445 +                    Weather._noLocation()
  11.446 +            self._rss(irc, text)
  11.447 +        rss = wrap(rss, ['text'])
  11.448 +
  11.449 +        def _rss(self, irc, text):
  11.450 +            severe = None
  11.451 +            m = self._rsswunderSevere.search(text)
  11.452 +            if m:
  11.453 +                severe = ircutils.bold(m.group(1))
  11.454 +            feed = self._rsswunderfeed.search(text)
  11.455 +            if not feed:
  11.456 +                Weather._noLocation()
  11.457 +            feed = feed.group(1)
  11.458 +            rss = utils.web.getUrl(feed)
  11.459 +            rss = self._formatSymbols(rss)
  11.460 +            rss = rss.replace(":", ": ")
  11.461 +            rss = rss.replace(":  ", ": ")
  11.462 +            resp = []
  11.463 +            location = self._rsswunderLocation.search(rss)
  11.464 +            if location is not None:
  11.465 +                title = filter(None, location.groups())
  11.466 +                if title:
  11.467 +                    resp.append('Weather for %s' % title[0])
  11.468 +            info = feedparser.parse(rss)
  11.469 +            for e in info['entries']:
  11.470 +                d = self._rsswunderForecastDate.search(e['title'])
  11.471 +                if d is not None:
  11.472 +                    resp.append(d.group(1) + ' - Conditions: ' + e['summary'])
  11.473 +                else:
  11.474 +                    resp.append(e['summary'])
  11.475 +            resp = [s.encode('utf-8').rtrim('.') for s in resp]
  11.476 +            if severe is not None:
  11.477 +                resp.append(severe)
  11.478 +            irc.reply(utils.web.htmlToText('; '.join(resp)))
  11.479 +
  11.480 +        def _formatSymbols(self, text):
  11.481 +            text = text.replace("&amp;", "&")
  11.482 +            text = text.replace("&#176;", "&deg;")
  11.483 +            text = text.replace(" &deg; ", "&deg;")
  11.484 +            text = text.replace("&deg;", "\xb0")
  11.485 +            return text
  11.486 +
  11.487 +Class = Weather
  11.488 +
  11.489 +
  11.490 +# vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79:
    12.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    12.2 +++ b/trunk/quahog/plugins/Weather/test.py	Thu Oct 22 10:14:56 2009 -0400
    12.3 @@ -0,0 +1,110 @@
    12.4 +###
    12.5 +# Copyright (c) 2005,2009, James Vega
    12.6 +# All rights reserved.
    12.7 +#
    12.8 +# Redistribution and use in source and binary forms, with or without
    12.9 +# modification, are permitted provided that the following conditions are met:
   12.10 +#
   12.11 +#   * Redistributions of source code must retain the above copyright notice,
   12.12 +#     this list of conditions, and the following disclaimer.
   12.13 +#   * Redistributions in binary form must reproduce the above copyright notice,
   12.14 +#     this list of conditions, and the following disclaimer in the
   12.15 +#     documentation and/or other materials provided with the distribution.
   12.16 +#   * Neither the name of the author of this software nor the name of
   12.17 +#     contributors to this software may be used to endorse or promote products
   12.18 +#     derived from this software without specific prior written consent.
   12.19 +#
   12.20 +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
   12.21 +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   12.22 +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   12.23 +# ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
   12.24 +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   12.25 +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   12.26 +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   12.27 +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   12.28 +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   12.29 +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   12.30 +# POSSIBILITY OF SUCH DAMAGE.
   12.31 +###
   12.32 +
   12.33 +from supybot.test import *
   12.34 +
   12.35 +class WeatherTestCase(PluginTestCase):
   12.36 +    plugins = ('Weather',)
   12.37 +    if network:
   12.38 +        def testHam(self):
   12.39 +            self.assertNotError('ham Columbus, OH')
   12.40 +            self.assertNotError('ham 43221')
   12.41 +            self.assertNotRegexp('ham Paris, FR', 'Virginia')
   12.42 +            self.assertError('ham alsdkfjasdl, asdlfkjsadlfkj')
   12.43 +            self.assertNotError('ham London, gb')
   12.44 +            self.assertNotError('ham London, GB')
   12.45 +            self.assertNotError('ham Munich, germany')
   12.46 +            self.assertNotError('ham Tucson, AZ')
   12.47 +            # "Multiple locations found" test
   12.48 +            self.assertNotError('ham sandwich')
   12.49 +
   12.50 +        def testCnn(self):
   12.51 +            self.assertNotError('cnn Columbus, OH')
   12.52 +            self.assertNotError('cnn 43221')
   12.53 +            self.assertNotRegexp('cnn Paris, FR', 'Virginia')
   12.54 +            self.assertError('cnn alsdkfjasdl, asdlfkjsadlfkj')
   12.55 +            self.assertNotError('cnn London, uk')
   12.56 +            self.assertNotError('cnn London, UK')
   12.57 +            self.assertNotError('cnn Nurnberg, de')
   12.58 +            self.assertNotError('cnn Tucson, AZ')
   12.59 +
   12.60 +        def testWunder(self):
   12.61 +            self.assertNotError('wunder Columbus, OH')
   12.62 +            self.assertNotError('wunder 43221')
   12.63 +            self.assertNotRegexp('wunder Paris, FR', 'Virginia')
   12.64 +            self.assertError('wunder alsdkfjasdl, asdlfkjsadlfkj')
   12.65 +            self.assertNotError('wunder London, england')
   12.66 +            self.assertNotError('wunder Munich, germany')
   12.67 +            self.assertNotError('wunder Tucson, AZ')
   12.68 +
   12.69 +        def testTemperatureUnit(self):
   12.70 +            try:
   12.71 +                orig = conf.supybot.plugins.Weather.temperatureUnit()
   12.72 +                conf.supybot.plugins.Weather.temperatureUnit.setValue('F')
   12.73 +                self.assertRegexp('cnn Columbus, OH', r'is -?\d+\.\d[^F]*F')
   12.74 +                self.assertRegexp('ham Columbus, OH', r'is -?\d+\.\d[^F]*F')
   12.75 +                conf.supybot.plugins.Weather.temperatureUnit.setValue('C')
   12.76 +                self.assertRegexp('cnn Columbus, OH', r'is -?\d+\.\d[^C]*C')
   12.77 +                self.assertRegexp('ham Columbus, OH', r'is -?\d+\.\d[^C]*C')
   12.78 +                conf.supybot.plugins.Weather.temperatureUnit.setValue('K')
   12.79 +                self.assertRegexp('cnn Columbus, OH', r'is -?\d+\.\d K')
   12.80 +                self.assertRegexp('ham Columbus, OH', r'is -?\d+\.\d K')
   12.81 +            finally:
   12.82 +                conf.supybot.plugins.Weather.temperatureUnit.setValue(orig)
   12.83 +
   12.84 +        def testNoEscapingWebError(self):
   12.85 +            self.assertNotRegexp('ham "buenos aires"', 'WebError')
   12.86 +
   12.87 +        def testWeatherRepliesWithBogusLocation(self):
   12.88 +            self.assertRegexp('weather some place that doesn\'t exist', r'.')
   12.89 +
   12.90 +        def testConvertConfig(self):
   12.91 +            try:
   12.92 +                convert = conf.supybot.plugins.Weather.convert()
   12.93 +                unit = conf.supybot.plugins.Weather.temperatureUnit()
   12.94 +                conf.supybot.plugins.Weather.convert.setValue(False)
   12.95 +                conf.supybot.plugins.Weather.temperatureUnit.setValue('C')
   12.96 +                self.assertRegexp('ham london, gb', r'-?\d+\.\d[^C]*C')
   12.97 +                self.assertRegexp('ham 02115', r'-?\d+\.\d[^F]*F')
   12.98 +                conf.supybot.plugins.Weather.temperatureUnit.setValue('F')
   12.99 +                self.assertRegexp('ham london, gb', r'-?\d+\.\d[^C]*C')
  12.100 +                self.assertRegexp('ham 02115', r'-?\d+\.\d[^F]*F')
  12.101 +                conf.supybot.plugins.Weather.convert.setValue(True)
  12.102 +                conf.supybot.plugins.Weather.temperatureUnit.setValue('C')
  12.103 +                self.assertRegexp('ham london, gb', r'-?\d+\.\d[^C]*C')
  12.104 +                self.assertRegexp('ham 02115', r'-?\d+\.\d[^C]*C')
  12.105 +                conf.supybot.plugins.Weather.temperatureUnit.setValue('F')
  12.106 +                self.assertRegexp('ham london, gb', r'-?\d+\.\d[^F]*F')
  12.107 +                self.assertRegexp('ham 02115', r'-?\d+\.\d[^F]*F')
  12.108 +            finally:
  12.109 +                conf.supybot.plugins.Weather.convert.setValue(convert)
  12.110 +                conf.supybot.plugins.Weather.temperatureUnit.setValue(unit)
  12.111 +
  12.112 +
  12.113 +# vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79:
    13.1 --- a/trunk/quahog/plugins/Weather	Thu Oct 22 10:12:03 2009 -0400
    13.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    13.3 @@ -1,1 +0,0 @@
    13.4 -/home/schultmc/.supybot/Supybot-Weather
    13.5 \ No newline at end of file