summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/APIClients.py104
-rw-r--r--lib/contact_form/__init__.py0
-rw-r--r--lib/contact_form/forms.py233
-rw-r--r--lib/contact_form/urls.py28
-rw-r--r--lib/contact_form/views.py66
-rw-r--r--lib/context-processors/__init__.py0
-rw-r--r--lib/context-processors/context_processors.py3
-rw-r--r--lib/fdigg/__init__.py0
-rw-r--r--lib/fdigg/middleware.py11
-rwxr-xr-xlib/markdown2.py1877
-rw-r--r--lib/pagination/__init__.py1
-rw-r--r--lib/pagination/middleware.py25
-rw-r--r--lib/pagination/models.py1
-rw-r--r--lib/pagination/templates/pagination/pagination.html30
-rw-r--r--lib/pagination/templatetags/__init__.py1
-rw-r--r--lib/pagination/templatetags/pagination_tags.py235
-rw-r--r--lib/pagination/tests.py52
-rw-r--r--lib/pydelicious.py817
-rw-r--r--lib/strutils.py50
-rw-r--r--lib/templatetags/__init__.py0
-rw-r--r--lib/templatetags/templatetags/__init__.py0
-rw-r--r--lib/templatetags/templatetags/get_latest.py1
-rw-r--r--lib/templatetags/templatetags/get_latest_pub.py1
-rw-r--r--lib/templatetags/templatetags/markdown.py9
-rw-r--r--lib/templatetags/templatetags/slugify_under.py15
-rw-r--r--lib/templatetags/templatetags/smartypants.py878
-rw-r--r--lib/templatetags/templatetags/truncateletters.py24
-rw-r--r--lib/templatetags/templatetags/typogrify.py216
-rw-r--r--lib/utils/GeoClient.py292
-rw-r--r--lib/utils/__init__.py0
-rw-r--r--lib/utils/email_multipart.py80
-rw-r--r--lib/utils/pydelicious.py817
32 files changed, 5867 insertions, 0 deletions
diff --git a/lib/APIClients.py b/lib/APIClients.py
new file mode 100644
index 0000000..24ab97b
--- /dev/null
+++ b/lib/APIClients.py
@@ -0,0 +1,104 @@
+# APIClients for grabbing data from popular web services
+# By Scott Gilbertson
+# Copyright is lame, take what you want, except for those portions noted
+
+# Dependencies:
+import sys, urllib
+import xml.etree.cElementTree as xml_parser
+
+
+DEBUG = 0
+
+"""
+base class -- handles GoodReads.com, but works for any rss feed, just send an empty string for anything you don't need
+"""
+class APIClient:
+ def __init__(self, base_path, api_key):
+ self.api_key = api_key
+ self.base_path = base_path
+
+ def __getattr__(self, method):
+ def method(_self=self, _method=method, **params):
+ url = "%s%s?%s&" % (self.base_path, self.api_key, urllib.urlencode(params))
+ if DEBUG: print url
+ data = self.fetch(url)
+ return data
+
+ return method
+
+ def fetch(self, url):
+ u = urllib.FancyURLopener(None)
+ usock = u.open(url)
+ rawdata = usock.read()
+ if DEBUG: print rawdata
+ usock.close()
+ return xml_parser.fromstring(rawdata)
+
+"""
+ Extend APIClient to work with the ma.gnolia.com API
+ (http://wiki.ma.gnolia.com/Ma.gnolia_API)
+ Adds some error handling as well
+"""
+class MagnoliaError(Exception):
+ def __init__(self, code, message):
+ self.code = code
+ self.message = message
+
+ def __str__(self):
+ return 'Magnolia Error %s: %s' % (self.code, self.message)
+
+
+class MagnoliaClient(APIClient):
+ def __getattr__(self, method):
+ def method(_self=self, _method=method, **params):
+ url = "%s%s?%s&api_key=%s" % (self.base_path, _method, urllib.urlencode(params), self.api_key)
+ if DEBUG: print url
+ data = APIClient.fetch(self, url)
+ return data
+ return method
+
+
+"""
+ Extend APIClient to work with the Flickr API
+ (http://www.flickr.com/services/api/)
+ Adds error handling as well
+"""
+
+class FlickrError(Exception):
+ def __init__(self, code, message):
+ self.code = code
+ self.message = message
+
+ def __str__(self):
+ return 'Flickr Error %s: %s' % (self.code, self.message)
+
+class FlickrClient(APIClient):
+ def __getattr__(self, method):
+ def method(_self=self, _method=method, **params):
+ _method = _method.replace("_", ".")
+ url = "%s?method=%s&%s&api_key=%s" % (self.base_path, _method, urllib.urlencode(params), self.api_key)
+ if DEBUG: print url
+ data = APIClient.fetch(self, url)
+ return data
+ return method
+
+class TumblrClient:
+ def __init__(self, base_path):
+ self.base_path = base_path
+
+ def __getattr__(self, method):
+ def method(_self=self, _method=method, **params):
+ url = "%s" % (self.base_path)
+ if DEBUG: print url
+ data = self.fetch(url)
+ return data
+
+ return method
+
+ def fetch(self, url):
+ u = urllib.FancyURLopener(None)
+ usock = u.open(url)
+ rawdata = usock.read()
+ if DEBUG: print rawdata
+ usock.close()
+ return xml_parser.fromstring(rawdata)
diff --git a/lib/contact_form/__init__.py b/lib/contact_form/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/lib/contact_form/__init__.py
diff --git a/lib/contact_form/forms.py b/lib/contact_form/forms.py
new file mode 100644
index 0000000..921d8ee
--- /dev/null
+++ b/lib/contact_form/forms.py
@@ -0,0 +1,233 @@
+"""
+A base contact form for allowing users to send email messages through
+a web interface, and a subclass demonstrating useful functionality.
+
+"""
+
+
+from django import forms
+from django.conf import settings
+from django.core.mail import send_mail
+from django.template import loader, RequestContext
+from django.contrib.sites.models import Site
+
+
+# I put this on all required fields, because it's easier to pick up
+# on them with CSS or JavaScript if they have a class of "required"
+# in the HTML. Your mileage may vary.
+attrs_dict = { 'class': 'required' }
+
+
+class ContactForm(forms.Form):
+ """
+ Base contact form class from which all contact form classes should
+ inherit.
+
+ If you don't need any custom functionality, you can simply use
+ this form to provide basic contact functionality; it will collect
+ name, email address and message.
+
+ The ``contact_form`` view included in this application knows how
+ to work with this form and can handle many types of subclasses as
+ well (see below for a discussion of the important points), so in
+ many cases it will be all that you need. If you'd like to use this
+ form or a subclass of it from one of your own views, just do the
+ following:
+
+ 1. When you instantiate the form, pass the current
+ ``HttpRequest`` object to the constructor as the keyword
+ argument ``request``; this is used internally by the base
+ implementation, and also made available so that subclasses
+ can add functionality which relies on inspecting the
+ request.
+
+ 2. To send the message, call the form's ``save`` method, which
+ accepts the keyword argument ``fail_silently`` and defaults
+ it to ``False``. This argument is passed directly to
+ ``send_mail``, and allows you to suppress or raise
+ exceptions as needed for debugging. The ``save`` method has
+ no return value.
+
+ Other than that, treat it like any other form; validity checks and
+ validated data are handled normally, through the ``is_valid``
+ method and the ``cleaned_data`` dictionary.
+
+
+ Base implementation
+ -------------------
+
+ Under the hood, this form uses a somewhat abstracted interface in
+ order to make it easier to subclass and add functionality. There
+ are several important attributes subclasses may want to look at
+ overriding, all of which will work (in the base implementation) as
+ either plain attributes or as callable methods:
+
+ * ``from_email`` -- used to get the address to use in the
+ ``From:`` header of the message. The base implementation
+ returns the value of the ``DEFAULT_FROM_EMAIL`` setting.
+
+ * ``message`` -- used to get the message body as a string. The
+ base implementation renders a template using the form's
+ ``cleaned_data`` dictionary as context.
+
+ * ``recipient_list`` -- used to generate the list of
+ recipients for the message. The base implementation returns
+ the email addresses specified in the ``MANAGERS`` setting.
+
+ * ``subject`` -- used to generate the subject line for the
+ message. The base implementation returns the string 'Message
+ sent through the web site', with the name of the current
+ ``Site`` prepended.
+
+ * ``template_name`` -- used by the base ``message`` method to
+ determine which template to use for rendering the
+ message. Default is ``contact_form/contact_form.txt``.
+
+ Internally, the base implementation ``_get_message_dict`` method
+ collects ``from_email``, ``message``, ``recipient_list`` and
+ ``subject`` into a dictionary, which the ``save`` method then
+ passes directly to ``send_mail`` as keyword arguments.
+
+ Particularly important is the ``message`` attribute, with its base
+ implementation as a method which renders a template; because it
+ passes ``cleaned_data`` as the template context, any additional
+ fields added by a subclass will automatically be available in the
+ template. This means that many useful subclasses can get by with
+ just adding a few fields and possibly overriding
+ ``template_name``.
+
+ Much useful functionality can be achieved in subclasses without
+ having to override much of the above; adding additional validation
+ methods works the same as any other form, and typically only a few
+ items -- ``recipient_list`` and ``subject_line``, for example,
+ need to be overridden to achieve customized behavior.
+
+
+ Other notes for subclassing
+ ---------------------------
+
+ Subclasses which want to inspect the current ``HttpRequest`` to
+ add functionality can access it via the attribute ``request``; the
+ base ``message`` takes advantage of this to use ``RequestContext``
+ when rendering its template. See the ``AkismetContactForm``
+ subclass in this file for an example of using the request to
+ perform additional validation.
+
+ Subclasses which override ``__init__`` need to accept ``*args``
+ and ``**kwargs``, and pass them via ``super`` in order to ensure
+ proper behavior.
+
+ Subclasses should be careful if overriding ``_get_message_dict``,
+ since that method **must** return a dictionary suitable for
+ passing directly to ``send_mail`` (unless ``save`` is overridden
+ as well).
+
+ Overriding ``save`` is relatively safe, though remember that code
+ which uses your form will expect ``save`` to accept the
+ ``fail_silently`` keyword argument. In the base implementation,
+ that argument defaults to ``False``, on the assumption that it's
+ far better to notice errors than to silently not send mail from
+ the contact form (see also the Zen of Python: "Errors should never
+ pass silently, unless explicitly silenced").
+
+ """
+ def __init__(self, data=None, files=None, request=None, *args, **kwargs):
+ if request is None:
+ raise TypeError("Keyword argument 'request' must be supplied")
+ super(ContactForm, self).__init__(data=data, files=files, *args, **kwargs)
+ self.request = request
+
+ name = forms.CharField(max_length=100,
+ widget=forms.TextInput(attrs=attrs_dict),
+ label=u'Your name')
+ email = forms.EmailField(widget=forms.TextInput(attrs=dict(attrs_dict,
+ maxlength=200)),
+ label=u'Your email address')
+ body = forms.CharField(widget=forms.Textarea(attrs=attrs_dict),
+ label=u'Your message')
+
+ #from_email = settings.DEFAULT_FROM_EMAIL
+
+ recipient_list = [mail_tuple[1] for mail_tuple in settings.MANAGERS]
+
+ subject_template_name = "contact_form/contact_form_subject.txt"
+
+ template_name = 'contact_form/contact_form.txt'
+
+ _context = None
+
+ def message(self):
+ """
+ Renders the body of the message to a string.
+
+ """
+ if callable(self.template_name):
+ template_name = self.template_name()
+ else:
+ template_name = self.template_name
+ return loader.render_to_string(template_name,
+ self.get_context())
+
+ def subject(self):
+ """
+ Renders the subject of the message to a string.
+
+ """
+ subject = loader.render_to_string(self.subject_template_name,
+ self.get_context())
+ return ''.join(subject.splitlines())
+
+ def get_context(self):
+ if not self.is_valid():
+ raise ValueError("Cannot generate Context from invalid contact form")
+ if self._context is None:
+ self.cleaned_data['site_name'] = settings.SITE_NAME
+ self._context = RequestContext(self.request,
+ dict(self.cleaned_data,
+ site=Site.objects.get_current()))
+ return self._context
+
+ def get_message_dict(self):
+ if not self.is_valid():
+ raise ValueError("Message cannot be sent from invalid contact form")
+ message_dict = {}
+ for message_part in ('from_email', 'message', 'recipient_list', 'subject'):
+ attr = getattr(self, message_part)
+ message_dict[message_part] = callable(attr) and attr() or attr
+ return message_dict
+
+ def from_email(self):
+ addy = self.cleaned_data['email']
+ return addy
+
+ def save(self, fail_silently=False):
+ """
+ Builds and sends the email message.
+
+ """
+ send_mail(fail_silently=fail_silently, **self.get_message_dict())
+
+
+class AkismetContactForm(ContactForm):
+ """
+ Contact form which doesn't add any extra fields, but does add an
+ Akismet spam check to the validation routine.
+
+ Requires the setting ``AKISMET_API_KEY``, which should be a valid
+ Akismet API key.
+
+ """
+ def clean_body(self):
+ if 'body' in self.cleaned_data and getattr(settings, 'AKISMET_API_KEY', ''):
+ from akismet import Akismet
+ from django.utils.encoding import smart_str
+ akismet_api = Akismet(key=settings.AKISMET_API_KEY,
+ blog_url='http://%s/' % Site.objects.get_current().domain)
+ if akismet_api.verify_key():
+ akismet_data = { 'comment_type': 'comment',
+ 'referer': self.request.META.get('HTTP_REFERER', ''),
+ 'user_ip': self.request.META.get('REMOTE_ADDR', ''),
+ 'user_agent': self.request.META.get('HTTP_USER_AGENT', '') }
+ if akismet_api.comment_check(smart_str(self.cleaned_data['body']), data=akismet_data, build_data=True):
+ raise forms.ValidationError(u"Akismet thinks this message is spam")
+ return self.cleaned_data['body']
diff --git a/lib/contact_form/urls.py b/lib/contact_form/urls.py
new file mode 100644
index 0000000..f80c27f
--- /dev/null
+++ b/lib/contact_form/urls.py
@@ -0,0 +1,28 @@
+"""
+Example URLConf for a contact form.
+
+Because the ``contact_form`` view takes configurable arguments, it's
+recommended that you manually place it somewhere in your URL
+configuration with the arguments you want. If you just prefer the
+default, however, you can hang this URLConf somewhere in your URL
+hierarchy (for best results with the defaults, include it under
+``/contact/``).
+
+"""
+
+
+from django.conf.urls.defaults import *
+from django.views.generic.simple import direct_to_template
+
+from contact_form.views import contact_form
+
+
+urlpatterns = patterns('',
+ url(r'^$',
+ contact_form,
+ name='contact_form'),
+ url(r'^sent/$',
+ direct_to_template,
+ { 'template': 'contact_form/contact_form_sent.html' },
+ name='contact_form_sent'),
+ )
diff --git a/lib/contact_form/views.py b/lib/contact_form/views.py
new file mode 100644
index 0000000..fc33f4a
--- /dev/null
+++ b/lib/contact_form/views.py
@@ -0,0 +1,66 @@
+"""
+View which can render and send email from a contact form.
+
+"""
+
+
+from django.http import HttpResponseRedirect
+from django.shortcuts import render_to_response
+from django.template import RequestContext
+from django.contrib.auth.views import redirect_to_login
+
+from contact_form.forms import ContactForm
+
+
+def contact_form(request, form_class=ContactForm,
+ template_name='contact_form/contact_form.html',
+ success_url='/contact/sent/', login_required=False,
+ fail_silently=False):
+ """
+ Renders a contact form, validates its input and sends an email
+ from it.
+
+ To specify the form class to use, pass the ``form_class`` keyword
+ argument; if no ``form_class`` is specified, the base
+ ``ContactForm`` class will be used.
+
+ To specify the template to use for rendering the form (*not* the
+ template used to render the email message sent from the form,
+ which is handled by the form class), pass the ``template_name``
+ keyword argument; if not supplied, this will default to
+ ``contact_form/contact_form.html``.
+
+ To specify a URL to redirect to after a successfully-sent message,
+ pass the ``success_url`` keyword argument; if not supplied, this
+ will default to ``/contact/sent/``.
+
+ To allow only registered users to use the form, pass a ``True``
+ value for the ``login_required`` keyword argument.
+
+ To suppress exceptions raised during sending of the email, pass a
+ ``True`` value for the ``fail_silently`` keyword argument. This is
+ **not** recommended.
+
+ Template::
+
+ Passed in the ``template_name`` argument.
+
+ Context::
+
+ form
+ The form instance.
+
+ """
+ if login_required and not request.user.is_authenticated():
+ return redirect_to_login(request.path)
+
+ if request.method == 'POST':
+ form = form_class(data=request.POST, request=request)
+ if form.is_valid():
+ form.save(fail_silently=fail_silently)
+ return HttpResponseRedirect(success_url)
+ else:
+ form = form_class(request=request)
+ return render_to_response(template_name,
+ { 'form': form },
+ context_instance=RequestContext(request))
diff --git a/lib/context-processors/__init__.py b/lib/context-processors/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/lib/context-processors/__init__.py
diff --git a/lib/context-processors/context_processors.py b/lib/context-processors/context_processors.py
new file mode 100644
index 0000000..5691102
--- /dev/null
+++ b/lib/context-processors/context_processors.py
@@ -0,0 +1,3 @@
+def media_url(request):
+ from django.conf import settings
+ return {'media_url': settings.MEDIA_URL, 'map_key':settings.MAP_API_KEY}
diff --git a/lib/fdigg/__init__.py b/lib/fdigg/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/lib/fdigg/__init__.py
diff --git a/lib/fdigg/middleware.py b/lib/fdigg/middleware.py
new file mode 100644
index 0000000..c21ab7f
--- /dev/null
+++ b/lib/fdigg/middleware.py
@@ -0,0 +1,11 @@
+from django.shortcuts import render_to_response
+import re
+import logging
+digg_re = re.compile(r'http://digg.com/\w{1,8}/*(\?.*)?$')
+class FckDiggMiddleware(object):
+ def process_request(self, request):
+
+ if request.META.has_key('HTTP_REFERER'):
+ logging.info(request.META['HTTP_REFERER'])
+ if digg_re.search(request.META['HTTP_REFERER']):
+ return render_to_response('details/fck_digg.html') \ No newline at end of file
diff --git a/lib/markdown2.py b/lib/markdown2.py
new file mode 100755
index 0000000..d72f414
--- /dev/null
+++ b/lib/markdown2.py
@@ -0,0 +1,1877 @@
+#!/usr/bin/env python
+# Copyright (c) 2007-2008 ActiveState Corp.
+# License: MIT (http://www.opensource.org/licenses/mit-license.php)
+
+r"""A fast and complete Python implementation of Markdown.
+
+[from http://daringfireball.net/projects/markdown/]
+> Markdown is a text-to-HTML filter; it translates an easy-to-read /
+> easy-to-write structured text format into HTML. Markdown's text
+> format is most similar to that of plain text email, and supports
+> features such as headers, *emphasis*, code blocks, blockquotes, and
+> links.
+>
+> Markdown's syntax is designed not as a generic markup language, but
+> specifically to serve as a front-end to (X)HTML. You can use span-level
+> HTML tags anywhere in a Markdown document, and you can use block level
+> HTML tags (like <div> and <table> as well).
+
+Module usage:
+
+ >>> import markdown2
+ >>> markdown2.markdown("*boo!*") # or use `html = markdown_path(PATH)`
+ u'<p><em>boo!</em></p>\n'
+
+ >>> markdowner = Markdown()
+ >>> markdowner.convert("*boo!*")
+ u'<p><em>boo!</em></p>\n'
+ >>> markdowner.convert("**boom!**")
+ u'<p><strong>boom!</strong></p>\n'
+
+This implementation of Markdown implements the full "core" syntax plus a
+number of extras (e.g., code syntax coloring, footnotes) as described on
+<http://code.google.com/p/python-markdown2/wiki/Extras>.
+"""
+
+cmdln_desc = """A fast and complete Python implementation of Markdown, a
+text-to-HTML conversion tool for web writers.
+"""
+
+# Dev Notes:
+# - There is already a Python markdown processor
+# (http://www.freewisdom.org/projects/python-markdown/).
+# - Python's regex syntax doesn't have '\z', so I'm using '\Z'. I'm
+# not yet sure if there implications with this. Compare 'pydoc sre'
+# and 'perldoc perlre'.
+
+__version_info__ = (1, 0, 1, 13) # first three nums match Markdown.pl
+__version__ = '1.0.1.13'
+__author__ = "Trent Mick"
+
+import os
+import sys
+from pprint import pprint
+import re
+import logging
+try:
+ from hashlib import md5
+except ImportError:
+ from md5 import md5
+import optparse
+from random import random
+import codecs
+
+
+
+#---- Python version compat
+
+if sys.version_info[:2] < (2,4):
+ from sets import Set as set
+ def reversed(sequence):
+ for i in sequence[::-1]:
+ yield i
+ def _unicode_decode(s, encoding, errors='xmlcharrefreplace'):
+ return unicode(s, encoding, errors)
+else:
+ def _unicode_decode(s, encoding, errors='strict'):
+ return s.decode(encoding, errors)
+
+
+#---- globals
+
+DEBUG = False
+log = logging.getLogger("markdown")
+
+DEFAULT_TAB_WIDTH = 4
+
+# Table of hash values for escaped characters:
+def _escape_hash(s):
+ # Lame attempt to avoid possible collision with someone actually
+ # using the MD5 hexdigest of one of these chars in there text.
+ # Other ideas: random.random(), uuid.uuid()
+ #return md5(s).hexdigest() # Markdown.pl effectively does this.
+ return 'md5-'+md5(s).hexdigest()
+g_escape_table = dict([(ch, _escape_hash(ch))
+ for ch in '\\`*_{}[]()>#+-.!'])
+
+
+
+#---- exceptions
+
+class MarkdownError(Exception):
+ pass
+
+
+
+#---- public api
+
+def markdown_path(path, encoding="utf-8",
+ html4tags=False, tab_width=DEFAULT_TAB_WIDTH,
+ safe_mode=None, extras=None, link_patterns=None,
+ use_file_vars=False):
+ text = codecs.open(path, 'r', encoding).read()
+ return Markdown(html4tags=html4tags, tab_width=tab_width,
+ safe_mode=safe_mode, extras=extras,
+ link_patterns=link_patterns,
+ use_file_vars=use_file_vars).convert(text)
+
+def markdown(text, html4tags=False, tab_width=DEFAULT_TAB_WIDTH,
+ safe_mode=None, extras=None, link_patterns=None,
+ use_file_vars=False):
+ return Markdown(html4tags=html4tags, tab_width=tab_width,
+ safe_mode=safe_mode, extras=extras,
+ link_patterns=link_patterns,
+ use_file_vars=use_file_vars).convert(text)
+
+class Markdown(object):
+ # The dict of "extras" to enable in processing -- a mapping of
+ # extra name to argument for the extra. Most extras do not have an
+ # argument, in which case the value is None.
+ #
+ # This can be set via (a) subclassing and (b) the constructor
+ # "extras" argument.
+ extras = None
+
+ urls = None
+ titles = None
+ html_blocks = None
+ html_spans = None
+ html_removed_text = "[HTML_REMOVED]" # for compat with markdown.py
+
+ # Used to track when we're inside an ordered or unordered list
+ # (see _ProcessListItems() for details):
+ list_level = 0
+
+ _ws_only_line_re = re.compile(r"^[ \t]+$", re.M)
+
+ def __init__(self, html4tags=False, tab_width=4, safe_mode=None,
+ extras=None, link_patterns=None, use_file_vars=False):
+ if html4tags:
+ self.empty_element_suffix = ">"
+ else:
+ self.empty_element_suffix = " />"
+ self.tab_width = tab_width
+
+ # For compatibility with earlier markdown2.py and with
+ # markdown.py's safe_mode being a boolean,
+ # safe_mode == True -> "replace"
+ if safe_mode is True:
+ self.safe_mode = "replace"
+ else:
+ self.safe_mode = safe_mode
+
+ if self.extras is None:
+ self.extras = {}
+ elif not isinstance(self.extras, dict):
+ self.extras = dict([(e, None) for e in self.extras])
+ if extras:
+ if not isinstance(extras, dict):
+ extras = dict([(e, None) for e in extras])
+ self.extras.update(extras)
+ assert isinstance(self.extras, dict)
+ self._instance_extras = self.extras.copy()
+ self.link_patterns = link_patterns
+ self.use_file_vars = use_file_vars
+ self._outdent_re = re.compile(r'^(\t|[ ]{1,%d})' % tab_width, re.M)
+
+ def reset(self):
+ self.urls = {}
+ self.titles = {}
+ self.html_blocks = {}
+ self.html_spans = {}
+ self.list_level = 0
+ self.extras = self._instance_extras.copy()
+ if "footnotes" in self.extras:
+ self.footnotes = {}
+ self.footnote_ids = []
+
+ def convert(self, text):
+ """Convert the given text."""
+ # Main function. The order in which other subs are called here is
+ # essential. Link and image substitutions need to happen before
+ # _EscapeSpecialChars(), so that any *'s or _'s in the <a>
+ # and <img> tags get encoded.
+
+ # Clear the global hashes. If we don't clear these, you get conflicts
+ # from other articles when generating a page which contains more than
+ # one article (e.g. an index page that shows the N most recent
+ # articles):
+ self.reset()
+
+ if not isinstance(text, unicode):
+ #TODO: perhaps shouldn't presume UTF-8 for string input?
+ text = unicode(text, 'utf-8')
+
+ if self.use_file_vars:
+ # Look for emacs-style file variable hints.
+ emacs_vars = self._get_emacs_vars(text)
+ if "markdown-extras" in emacs_vars:
+ splitter = re.compile("[ ,]+")
+ for e in splitter.split(emacs_vars["markdown-extras"]):
+ if '=' in e:
+ ename, earg = e.split('=', 1)
+ try:
+ earg = int(earg)
+ except ValueError:
+ pass
+ else:
+ ename, earg = e, None
+ self.extras[ename] = earg
+
+ # Standardize line endings:
+ text = re.sub("\r\n|\r", "\n", text)
+
+ # Make sure $text ends with a couple of newlines:
+ text += "\n\n"
+
+ # Convert all tabs to spaces.
+ text = self._detab(text)
+
+ # Strip any lines consisting only of spaces and tabs.
+ # This makes subsequent regexen easier to write, because we can
+ # match consecutive blank lines with /\n+/ instead of something
+ # contorted like /[ \t]*\n+/ .
+ text = self._ws_only_line_re.sub("", text)
+
+ if self.safe_mode:
+ text = self._hash_html_spans(text)
+
+ # Turn block-level HTML blocks into hash entries
+ text = self._hash_html_blocks(text, raw=True)
+
+ # Strip link definitions, store in hashes.
+ if "footnotes" in self.extras:
+ # Must do footnotes first because an unlucky footnote defn
+ # looks like a link defn:
+ # [^4]: this "looks like a link defn"
+ text = self._strip_footnote_definitions(text)
+ text = self._strip_link_definitions(text)
+
+ text = self._run_block_gamut(text)
+
+ text = self._unescape_special_chars(text)
+
+ if "footnotes" in self.extras:
+ text = self._add_footnotes(text)
+
+ if self.safe_mode:
+ text = self._unhash_html_spans(text)
+
+ text += "\n"
+ return text
+
+ _emacs_oneliner_vars_pat = re.compile(r"-\*-\s*([^\r\n]*?)\s*-\*-", re.UNICODE)
+ # This regular expression is intended to match blocks like this:
+ # PREFIX Local Variables: SUFFIX
+ # PREFIX mode: Tcl SUFFIX
+ # PREFIX End: SUFFIX
+ # Some notes:
+ # - "[ \t]" is used instead of "\s" to specifically exclude newlines
+ # - "(\r\n|\n|\r)" is used instead of "$" because the sre engine does
+ # not like anything other than Unix-style line terminators.
+ _emacs_local_vars_pat = re.compile(r"""^
+ (?P<prefix>(?:[^\r\n|\n|\r])*?)
+ [\ \t]*Local\ Variables:[\ \t]*
+ (?P<suffix>.*?)(?:\r\n|\n|\r)
+ (?P<content>.*?\1End:)
+ """, re.IGNORECASE | re.MULTILINE | re.DOTALL | re.VERBOSE)
+
+ def _get_emacs_vars(self, text):
+ """Return a dictionary of emacs-style local variables.
+
+ Parsing is done loosely according to this spec (and according to
+ some in-practice deviations from this):
+ http://www.gnu.org/software/emacs/manual/html_node/emacs/Specifying-File-Variables.html#Specifying-File-Variables
+ """
+ emacs_vars = {}
+ SIZE = pow(2, 13) # 8kB
+
+ # Search near the start for a '-*-'-style one-liner of variables.
+ head = text[:SIZE]
+ if "-*-" in head:
+ match = self._emacs_oneliner_vars_pat.search(head)
+ if match:
+ emacs_vars_str = match.group(1)
+ assert '\n' not in emacs_vars_str
+ emacs_var_strs = [s.strip() for s in emacs_vars_str.split(';')
+ if s.strip()]
+ if len(emacs_var_strs) == 1 and ':' not in emacs_var_strs[0]:
+ # While not in the spec, this form is allowed by emacs:
+ # -*- Tcl -*-
+ # where the implied "variable" is "mode". This form
+ # is only allowed if there are no other variables.
+ emacs_vars["mode"] = emacs_var_strs[0].strip()
+ else:
+ for emacs_var_str in emacs_var_strs:
+ try:
+ variable, value = emacs_var_str.strip().split(':', 1)
+ except ValueError:
+ log.debug("emacs variables error: malformed -*- "
+ "line: %r", emacs_var_str)
+ continue
+ # Lowercase the variable name because Emacs allows "Mode"
+ # or "mode" or "MoDe", etc.
+ emacs_vars[variable.lower()] = value.strip()
+
+ tail = text[-SIZE:]
+ if "Local Variables" in tail:
+ match = self._emacs_local_vars_pat.search(tail)
+ if match:
+ prefix = match.group("prefix")
+ suffix = match.group("suffix")
+ lines = match.group("content").splitlines(0)
+ #print "prefix=%r, suffix=%r, content=%r, lines: %s"\
+ # % (prefix, suffix, match.group("content"), lines)
+
+ # Validate the Local Variables block: proper prefix and suffix
+ # usage.
+ for i, line in enumerate(lines):
+ if not line.startswith(prefix):
+ log.debug("emacs variables error: line '%s' "
+ "does not use proper prefix '%s'"
+ % (line, prefix))
+ return {}
+ # Don't validate suffix on last line. Emacs doesn't care,
+ # neither should we.
+ if i != len(lines)-1 and not line.endswith(suffix):
+ log.debug("emacs variables error: line '%s' "
+ "does not use proper suffix '%s'"
+ % (line, suffix))
+ return {}
+
+ # Parse out one emacs var per line.
+ continued_for = None
+ for line in lines[:-1]: # no var on the last line ("PREFIX End:")
+ if prefix: line = line[len(prefix):] # strip prefix
+ if suffix: line = line[:-len(suffix)] # strip suffix
+ line = line.strip()
+ if continued_for:
+ variable = continued_for
+ if line.endswith('\\'):
+ line = line[:-1].rstrip()
+ else:
+ continued_for = None
+ emacs_vars[variable] += ' ' + line
+ else:
+ try:
+ variable, value = line.split(':', 1)
+ except ValueError:
+ log.debug("local variables error: missing colon "
+ "in local variables entry: '%s'" % line)
+ continue
+ # Do NOT lowercase the variable name, because Emacs only
+ # allows "mode" (and not "Mode", "MoDe", etc.) in this block.
+ value = value.strip()
+ if value.endswith('\\'):
+ value = value[:-1].rstrip()
+ continued_for = variable
+ else:
+ continued_for = None
+ emacs_vars[variable] = value
+
+ # Unquote values.
+ for var, val in emacs_vars.items():
+ if len(val) > 1 and (val.startswith('"') and val.endswith('"')
+ or val.startswith('"') and val.endswith('"')):
+ emacs_vars[var] = val[1:-1]
+
+ return emacs_vars
+
+ # Cribbed from a post by Bart Lateur:
+ # <http://www.nntp.perl.org/group/perl.macperl.anyperl/154>
+ _detab_re = re.compile(r'(.*?)\t', re.M)
+ def _detab_sub(self, match):
+ g1 = match.group(1)
+ return g1 + (' ' * (self.tab_width - len(g1) % self.tab_width))
+ def _detab(self, text):
+ r"""Remove (leading?) tabs from a file.
+
+ >>> m = Markdown()
+ >>> m._detab("\tfoo")
+ ' foo'
+ >>> m._detab(" \tfoo")
+ ' foo'
+ >>> m._detab("\t foo")
+ ' foo'
+ >>> m._detab(" foo")
+ ' foo'
+ >>> m._detab(" foo\n\tbar\tblam")
+ ' foo\n bar blam'
+ """
+ if '\t' not in text:
+ return text
+ return self._detab_re.subn(self._detab_sub, text)[0]
+
+ _block_tags_a = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del'
+ _strict_tag_block_re = re.compile(r"""
+ ( # save in \1
+ ^ # start of line (with re.M)
+ <(%s) # start tag = \2
+ \b # word break
+ (.*\n)*? # any number of lines, minimally matching
+ </\2> # the matching end tag
+ [ \t]* # trailing spaces/tabs
+ (?=\n+|\Z) # followed by a newline or end of document
+ )
+ """ % _block_tags_a,
+ re.X | re.M)
+
+ _block_tags_b = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math'
+ _liberal_tag_block_re = re.compile(r"""
+ ( # save in \1
+ ^ # start of line (with re.M)
+ <(%s) # start tag = \2
+ \b # word break
+ (.*\n)*? # any number of lines, minimally matching
+ .*</\2> # the matching end tag
+ [ \t]* # trailing spaces/tabs
+ (?=\n+|\Z) # followed by a newline or end of document
+ )
+ """ % _block_tags_b,
+ re.X | re.M)
+
+ def _hash_html_block_sub(self, match, raw=False):
+ html = match.group(1)
+ if raw and self.safe_mode:
+ html = self._sanitize_html(html)
+ key = _hash_text(html)
+ self.html_blocks[key] = html
+ return "\n\n" + key + "\n\n"
+
+ def _hash_html_blocks(self, text, raw=False):
+ """Hashify HTML blocks
+
+ We only want to do this for block-level HTML tags, such as headers,
+ lists, and tables. That's because we still want to wrap <p>s around
+ "paragraphs" that are wrapped in non-block-level tags, such as anchors,
+ phrase emphasis, and spans. The list of tags we're looking for is
+ hard-coded.
+
+ @param raw {boolean} indicates if these are raw HTML blocks in
+ the original source. It makes a difference in "safe" mode.
+ """
+ if '<' not in text:
+ return text
+
+ # Pass `raw` value into our calls to self._hash_html_block_sub.
+ hash_html_block_sub = _curry(self._hash_html_block_sub, raw=raw)
+
+ # First, look for nested blocks, e.g.:
+ # <div>
+ # <div>
+ # tags for inner block must be indented.
+ # </div>
+ # </div>
+ #
+ # The outermost tags must start at the left margin for this to match, and
+ # the inner nested divs must be indented.
+ # We need to do this before the next, more liberal match, because the next
+ # match will start at the first `<div>` and stop at the first `</div>`.
+ text = self._strict_tag_block_re.sub(hash_html_block_sub, text)
+
+ # Now match more liberally, simply from `\n<tag>` to `</tag>\n`
+ text = self._liberal_tag_block_re.sub(hash_html_block_sub, text)
+
+ # Special case just for <hr />. It was easier to make a special
+ # case than to make the other regex more complicated.
+ if "<hr" in text:
+ _hr_tag_re = _hr_tag_re_from_tab_width(self.tab_width)
+ text = _hr_tag_re.sub(hash_html_block_sub, text)
+
+ # Special case for standalone HTML comments:
+ if "<!--" in text:
+ start = 0
+ while True:
+ # Delimiters for next comment block.
+ try:
+ start_idx = text.index("<!--", start)
+ except ValueError, ex:
+ break
+ try:
+ end_idx = text.index("-->", start_idx) + 3
+ except ValueError, ex:
+ break
+
+ # Start position for next comment block search.
+ start = end_idx
+
+ # Validate whitespace before comment.
+ if start_idx:
+ # - Up to `tab_width - 1` spaces before start_idx.
+ for i in range(self.tab_width - 1):
+ if text[start_idx - 1] != ' ':
+ break
+ start_idx -= 1
+ if start_idx == 0:
+ break
+ # - Must be preceded by 2 newlines or hit the start of
+ # the document.
+ if start_idx == 0:
+ pass
+ elif start_idx == 1 and text[0] == '\n':
+ start_idx = 0 # to match minute detail of Markdown.pl regex
+ elif text[start_idx-2:start_idx] == '\n\n':
+ pass
+ else:
+ break
+
+ # Validate whitespace after comment.
+ # - Any number of spaces and tabs.
+ while end_idx < len(text):
+ if text[end_idx] not in ' \t':
+ break
+ end_idx += 1
+ # - Must be following by 2 newlines or hit end of text.
+ if text[end_idx:end_idx+2] not in ('', '\n', '\n\n'):
+ continue
+
+ # Escape and hash (must match `_hash_html_block_sub`).
+ html = text[start_idx:end_idx]
+ if raw and self.safe_mode:
+ html = self._sanitize_html(html)
+ key = _hash_text(html)
+ self.html_blocks[key] = html
+ text = text[:start_idx] + "\n\n" + key + "\n\n" + text[end_idx:]
+
+ if "xml" in self.extras:
+ # Treat XML processing instructions and namespaced one-liner
+ # tags as if they were block HTML tags. E.g., if standalone
+ # (i.e. are their own paragraph), the following do not get
+ # wrapped in a <p> tag:
+ # <?foo bar?>
+ #
+ # <xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="chapter_1.md"/>
+ _xml_oneliner_re = _xml_oneliner_re_from_tab_width(self.tab_width)
+ text = _xml_oneliner_re.sub(hash_html_block_sub, text)
+
+ return text
+
+ def _strip_link_definitions(self, text):
+ # Strips link definitions from text, stores the URLs and titles in
+ # hash references.
+ less_than_tab = self.tab_width - 1
+
+ # Link defs are in the form:
+ # [id]: url "optional title"
+ _link_def_re = re.compile(r"""
+ ^[ ]{0,%d}\[(.+)\]: # id = \1
+ [ \t]*
+ \n? # maybe *one* newline
+ [ \t]*
+ <?(.+?)>? # url = \2
+ [ \t]*
+ (?:
+ \n? # maybe one newline
+ [ \t]*
+ (?<=\s) # lookbehind for whitespace
+ ['"(]
+ ([^\n]*) # title = \3
+ ['")]
+ [ \t]*
+ )? # title is optional
+ (?:\n+|\Z)
+ """ % less_than_tab, re.X | re.M | re.U)
+ return _link_def_re.sub(self._extract_link_def_sub, text)
+
+ def _extract_link_def_sub(self, match):
+ id, url, title = match.groups()
+ key = id.lower() # Link IDs are case-insensitive
+ self.urls[key] = self._encode_amps_and_angles(url)
+ if title:
+ self.titles[key] = title.replace('"', '&quot;')
+ return ""
+
+ def _extract_footnote_def_sub(self, match):
+ id, text = match.groups()
+ text = _dedent(text, skip_first_line=not text.startswith('\n')).strip()
+ normed_id = re.sub(r'\W', '-', id)
+ # Ensure footnote text ends with a couple newlines (for some
+ # block gamut matches).
+ self.footnotes[normed_id] = text + "\n\n"
+ return ""
+
+ def _strip_footnote_definitions(self, text):
+ """A footnote definition looks like this:
+
+ [^note-id]: Text of the note.
+
+ May include one or more indented paragraphs.
+
+ Where,
+ - The 'note-id' can be pretty much anything, though typically it
+ is the number of the footnote.
+ - The first paragraph may start on the next line, like so:
+
+ [^note-id]:
+ Text of the note.
+ """
+ less_than_tab = self.tab_width - 1
+ footnote_def_re = re.compile(r'''
+ ^[ ]{0,%d}\[\^(.+)\]: # id = \1
+ [ \t]*
+ ( # footnote text = \2
+ # First line need not start with the spaces.
+ (?:\s*.*\n+)
+ (?:
+ (?:[ ]{%d} | \t) # Subsequent lines must be indented.
+ .*\n+
+ )*
+ )
+ # Lookahead for non-space at line-start, or end of doc.
+ (?:(?=^[ ]{0,%d}\S)|\Z)
+ ''' % (less_than_tab, self.tab_width, self.tab_width),
+ re.X | re.M)
+ return footnote_def_re.sub(self._extract_footnote_def_sub, text)
+
+
+ _hr_res = [
+ re.compile(r"^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$", re.M),
+ re.compile(r"^[ ]{0,2}([ ]?\-[ ]?){3,}[ \t]*$", re.M),
+ re.compile(r"^[ ]{0,2}([ ]?\_[ ]?){3,}[ \t]*$", re.M),
+ ]
+
+ def _run_block_gamut(self, text):
+ # These are all the transformations that form block-level
+ # tags like paragraphs, headers, and list items.
+
+ text = self._do_headers(text)
+
+ # Do Horizontal Rules:
+ hr = "\n<hr"+self.empty_element_suffix+"\n"
+ for hr_re in self._hr_res:
+ text = hr_re.sub(hr, text)
+
+ text = self._do_lists(text)
+
+ if "pyshell" in self.extras:
+ text = self._prepare_pyshell_blocks(text)
+
+ text = self._do_code_blocks(text)
+
+ text = self._do_block_quotes(text)
+
+ # We already ran _HashHTMLBlocks() before, in Markdown(), but that
+ # was to escape raw HTML in the original Markdown source. This time,
+ # we're escaping the markup we've just created, so that we don't wrap
+ # <p> tags around block-level tags.
+ text = self._hash_html_blocks(text)
+
+ text = self._form_paragraphs(text)
+
+ return text
+
+ def _pyshell_block_sub(self, match):
+ lines = match.group(0).splitlines(0)
+ _dedentlines(lines)
+ indent = ' ' * self.tab_width
+ s = ('\n' # separate from possible cuddled paragraph
+ + indent + ('\n'+indent).join(lines)
+ + '\n\n')
+ return s
+
+ def _prepare_pyshell_blocks(self, text):
+ """Ensure that Python interactive shell sessions are put in
+ code blocks -- even if not properly indented.
+ """
+ if ">>>" not in text:
+ return text
+
+ less_than_tab = self.tab_width - 1
+ _pyshell_block_re = re.compile(r"""
+ ^([ ]{0,%d})>>>[ ].*\n # first line
+ ^(\1.*\S+.*\n)* # any number of subsequent lines
+ ^\n # ends with a blank line
+ """ % less_than_tab, re.M | re.X)
+
+ return _pyshell_block_re.sub(self._pyshell_block_sub, text)
+
+ def _run_span_gamut(self, text):
+ # These are all the transformations that occur *within* block-level
+ # tags like paragraphs, headers, and list items.
+
+ text = self._do_code_spans(text)
+
+ text = self._escape_special_chars(text)
+
+ # Process anchor and image tags.
+ text = self._do_links(text)
+
+ # Make links out of things like `<http://example.com/>`
+ # Must come after _do_links(), because you can use < and >
+ # delimiters in inline links like [this](<url>).
+ text = self._do_auto_links(text)
+
+ if "link-patterns" in self.extras:
+ text = self._do_link_patterns(text)
+
+ text = self._encode_amps_and_angles(text)
+
+ text = self._do_italics_and_bold(text)
+
+ # Do hard breaks:
+ text = re.sub(r" {2,}\n", " <br%s\n" % self.empty_element_suffix, text)
+
+ return text
+
+ # "Sorta" because auto-links are identified as "tag" tokens.
+ _sorta_html_tokenize_re = re.compile(r"""
+ (
+ # tag
+ </?
+ (?:\w+) # tag name
+ (?:\s+(?:[\w-]+:)?[\w-]+=(?:".*?"|'.*?'))* # attributes
+ \s*/?>
+ |
+ # auto-link (e.g., <http://www.activestate.com/>)
+ <\w+[^>]*>
+ |
+ <!--.*?--> # comment
+ |
+ <\?.*?\?> # processing instruction
+ )
+ """, re.X)
+
+ def _escape_special_chars(self, text):
+ # Python markdown note: the HTML tokenization here differs from
+ # that in Markdown.pl, hence the behaviour for subtle cases can
+ # differ (I believe the tokenizer here does a better job because
+ # it isn't susceptible to unmatched '<' and '>' in HTML tags).
+ # Note, however, that '>' is not allowed in an auto-link URL
+ # here.
+ escaped = []
+ is_html_markup = False
+ for token in self._sorta_html_tokenize_re.split(text):
+ if is_html_markup:
+ # Within tags/HTML-comments/auto-links, encode * and _
+ # so they don't conflict with their use in Markdown for
+ # italics and strong. We're replacing each such
+ # character with its corresponding MD5 checksum value;
+ # this is likely overkill, but it should prevent us from
+ # colliding with the escape values by accident.
+ escaped.append(token.replace('*', g_escape_table['*'])
+ .replace('_', g_escape_table['_']))
+ else:
+ escaped.append(self._encode_backslash_escapes(token))
+ is_html_markup = not is_html_markup
+ return ''.join(escaped)
+
+ def _hash_html_spans(self, text):
+ # Used for safe_mode.
+
+ def _is_auto_link(s):
+ if ':' in s and self._auto_link_re.match(s):
+ return True
+ elif '@' in s and self._auto_email_link_re.match(s):
+ return True
+ return False
+
+ tokens = []
+ is_html_markup = False
+ for token in self._sorta_html_tokenize_re.split(text):
+ if is_html_markup and not _is_auto_link(token):
+ sanitized = self._sanitize_html(token)
+ key = _hash_text(sanitized)
+ self.html_spans[key] = sanitized
+ tokens.append(key)
+ else:
+ tokens.append(token)
+ is_html_markup = not is_html_markup
+ return ''.join(tokens)
+
+ def _unhash_html_spans(self, text):
+ for key, sanitized in self.html_spans.items():
+ text = text.replace(key, sanitized)
+ return text
+
+ def _sanitize_html(self, s):
+ if self.safe_mode == "replace":
+ return self.html_removed_text
+ elif self.safe_mode == "escape":
+ replacements = [
+ ('&', '&amp;'),
+ ('<', '&lt;'),
+ ('>', '&gt;'),
+ ]
+ for before, after in replacements:
+ s = s.replace(before, after)
+ return s
+ else:
+ raise MarkdownError("invalid value for 'safe_mode': %r (must be "
+ "'escape' or 'replace')" % self.safe_mode)
+
+ _tail_of_inline_link_re = re.compile(r'''
+ # Match tail of: [text](/url/) or [text](/url/ "title")
+ \( # literal paren
+ [ \t]*
+ (?P<url> # \1
+ <.*?>
+ |
+ .*?
+ )
+ [ \t]*
+ ( # \2
+ (['"]) # quote char = \3
+ (?P<title>.*?)
+ \3 # matching quote
+ )? # title is optional
+ \)
+ ''', re.X | re.S)
+ _tail_of_reference_link_re = re.compile(r'''
+ # Match tail of: [text][id]
+ [ ]? # one optional space
+ (?:\n[ ]*)? # one optional newline followed by spaces
+ \[
+ (?P<id>.*?)
+ \]
+ ''', re.X | re.S)
+
+ def _do_links(self, text):
+ """Turn Markdown link shortcuts into XHTML <a> and <img> tags.
+
+ This is a combination of Markdown.pl's _DoAnchors() and
+ _DoImages(). They are done together because that simplified the
+ approach. It was necessary to use a different approach than
+ Markdown.pl because of the lack of atomic matching support in
+ Python's regex engine used in $g_nested_brackets.
+ """
+ MAX_LINK_TEXT_SENTINEL = 3000 # markdown2 issue 24
+
+ # `anchor_allowed_pos` is used to support img links inside
+ # anchors, but not anchors inside anchors. An anchor's start
+ # pos must be `>= anchor_allowed_pos`.
+ anchor_allowed_pos = 0
+
+ curr_pos = 0
+ while True: # Handle the next link.
+ # The next '[' is the start of:
+ # - an inline anchor: [text](url "title")
+ # - a reference anchor: [text][id]
+ # - an inline img: ![text](url "title")
+ # - a reference img: ![text][id]
+ # - a footnote ref: [^id]
+ # (Only if 'footnotes' extra enabled)
+ # - a footnote defn: [^id]: ...
+ # (Only if 'footnotes' extra enabled) These have already
+ # been stripped in _strip_footnote_definitions() so no
+ # need to watch for them.
+ # - a link definition: [id]: url "title"
+ # These have already been stripped in
+ # _strip_link_definitions() so no need to watch for them.
+ # - not markup: [...anything else...
+ try:
+ start_idx = text.index('[', curr_pos)
+ except ValueError:
+ break
+ text_length = len(text)
+
+ # Find the matching closing ']'.
+ # Markdown.pl allows *matching* brackets in link text so we
+ # will here too. Markdown.pl *doesn't* currently allow
+ # matching brackets in img alt text -- we'll differ in that
+ # regard.
+ bracket_depth = 0
+ for p in range(start_idx+1, min(start_idx+MAX_LINK_TEXT_SENTINEL,
+ text_length)):
+ ch = text[p]
+ if ch == ']':
+ bracket_depth -= 1
+ if bracket_depth < 0:
+ break
+ elif ch == '[':
+ bracket_depth += 1
+ else:
+ # Closing bracket not found within sentinel length.
+ # This isn't markup.
+ curr_pos = start_idx + 1
+ continue
+ link_text = text[start_idx+1:p]
+
+ # Possibly a footnote ref?
+ if "footnotes" in self.extras and link_text.startswith("^"):
+ normed_id = re.sub(r'\W', '-', link_text[1:])
+ if normed_id in self.footnotes:
+ self.footnote_ids.append(normed_id)
+ result = '<sup class="footnote-ref" id="fnref-%s">' \
+ '<a href="#fn-%s">%s</a></sup>' \
+ % (normed_id, normed_id, len(self.footnote_ids))
+ text = text[:start_idx] + result + text[p+1:]
+ else:
+ # This id isn't defined, leave the markup alone.
+ curr_pos = p+1
+ continue
+
+ # Now determine what this is by the remainder.
+ p += 1
+ if p == text_length:
+ return text
+
+ # Inline anchor or img?
+ if text[p] == '(': # attempt at perf improvement
+ match = self._tail_of_inline_link_re.match(text, p)
+ if match:
+ # Handle an inline anchor or img.
+ is_img = start_idx > 0 and text[start_idx-1] == "!"
+ if is_img:
+ start_idx -= 1
+
+ url, title = match.group("url"), match.group("title")
+ if url and url[0] == '<':
+ url = url[1:-1] # '<url>' -> 'url'
+ # We've got to encode these to avoid conflicting
+ # with italics/bold.
+ url = url.replace('*', g_escape_table['*']) \
+ .replace('_', g_escape_table['_'])
+ if title:
+ title_str = ' title="%s"' \
+ % title.replace('*', g_escape_table['*']) \
+ .replace('_', g_escape_table['_']) \
+ .replace('"', '&quot;')
+ else:
+ title_str = ''
+ if is_img:
+ result = '<img src="%s" alt="%s"%s%s' \
+ % (url, link_text.replace('"', '&quot;'),
+ title_str, self.empty_element_suffix)
+ curr_pos = start_idx + len(result)
+ text = text[:start_idx] + result + text[match.end():]
+ elif start_idx >= anchor_allowed_pos:
+ result_head = '<a href="%s"%s>' % (url, title_str)
+ result = '%s%s</a>' % (result_head, link_text)
+ # <img> allowed from curr_pos on, <a> from
+ # anchor_allowed_pos on.
+ curr_pos = start_idx + len(result_head)
+ anchor_allowed_pos = start_idx + len(result)
+ text = text[:start_idx] + result + text[match.end():]
+ else:
+ # Anchor not allowed here.
+ curr_pos = start_idx + 1
+ continue
+
+ # Reference anchor or img?
+ else:
+ match = self._tail_of_reference_link_re.match(text, p)
+ if match:
+ # Handle a reference-style anchor or img.
+ is_img = start_idx > 0 and text[start_idx-1] == "!"
+ if is_img:
+ start_idx -= 1
+ link_id = match.group("id").lower()
+ if not link_id:
+ link_id = link_text.lower() # for links like [this][]
+ if link_id in self.urls:
+ url = self.urls[link_id]
+ # We've got to encode these to avoid conflicting
+ # with italics/bold.
+ url = url.replace('*', g_escape_table['*']) \
+ .replace('_', g_escape_table['_'])
+ title = self.titles.get(link_id)
+ if title:
+ title = title.replace('*', g_escape_table['*']) \
+ .replace('_', g_escape_table['_'])
+ title_str = ' title="%s"' % title
+ else:
+ title_str = ''
+ if is_img:
+ result = '<img src="%s" alt="%s"%s%s' \
+ % (url, link_text.replace('"', '&quot;'),
+ title_str, self.empty_element_suffix)
+ curr_pos = start_idx + len(result)
+ text = text[:start_idx] + result + text[match.end():]
+ elif start_idx >= anchor_allowed_pos:
+ result = '<a href="%s"%s>%s</a>' \
+ % (url, title_str, link_text)
+ result_head = '<a href="%s"%s>' % (url, title_str)
+ result = '%s%s</a>' % (result_head, link_text)
+ # <img> allowed from curr_pos on, <a> from
+ # anchor_allowed_pos on.
+ curr_pos = start_idx + len(result_head)
+ anchor_allowed_pos = start_idx + len(result)
+ text = text[:start_idx] + result + text[match.end():]
+ else:
+ # Anchor not allowed here.
+ curr_pos = start_idx + 1
+ else:
+ # This id isn't defined, leave the markup alone.
+ curr_pos = match.end()
+ continue
+
+ # Otherwise, it isn't markup.
+ curr_pos = start_idx + 1
+
+ return text
+
+
+ _setext_h_re = re.compile(r'^(.+)[ \t]*\n(=+|-+)[ \t]*\n+', re.M)
+ def _setext_h_sub(self, match):
+ n = {"=": 1, "-": 2}[match.group(2)[0]]
+ demote_headers = self.extras.get("demote-headers")
+ if demote_headers:
+ n = min(n + demote_headers, 6)
+ return "<h%d>%s</h%d>\n\n" \
+ % (n, self._run_span_gamut(match.group(1)), n)
+
+ _atx_h_re = re.compile(r'''
+ ^(\#{1,6}) # \1 = string of #'s
+ [ \t]*
+ (.+?) # \2 = Header text
+ [ \t]*
+ (?<!\\) # ensure not an escaped trailing '#'
+ \#* # optional closing #'s (not counted)
+ \n+
+ ''', re.X | re.M)
+ def _atx_h_sub(self, match):
+ n = len(match.group(1))
+ demote_headers = self.extras.get("demote-headers")
+ if demote_headers:
+ n = min(n + demote_headers, 6)
+ return "<h%d>%s</h%d>\n\n" \
+ % (n, self._run_span_gamut(match.group(2)), n)
+
+ def _do_headers(self, text):
+ # Setext-style headers:
+ # Header 1
+ # ========
+ #
+ # Header 2
+ # --------
+ text = self._setext_h_re.sub(self._setext_h_sub, text)
+
+ # atx-style headers:
+ # # Header 1
+ # ## Header 2
+ # ## Header 2 with closing hashes ##
+ # ...
+ # ###### Header 6
+ text = self._atx_h_re.sub(self._atx_h_sub, text)
+
+ return text
+
+
+ _marker_ul_chars = '*+-'
+ _marker_any = r'(?:[%s]|\d+\.)' % _marker_ul_chars
+ _marker_ul = '(?:[%s])' % _marker_ul_chars
+ _marker_ol = r'(?:\d+\.)'
+
+ def _list_sub(self, match):
+ lst = match.group(1)
+ lst_type = match.group(3) in self._marker_ul_chars and "ul" or "ol"
+ result = self._process_list_items(lst)
+ if self.list_level:
+ return "<%s>\n%s</%s>\n" % (lst_type, result, lst_type)
+ else:
+ return "<%s>\n%s</%s>\n\n" % (lst_type, result, lst_type)
+
+ def _do_lists(self, text):
+ # Form HTML ordered (numbered) and unordered (bulleted) lists.
+
+ for marker_pat in (self._marker_ul, self._marker_ol):
+ # Re-usable pattern to match any entire ul or ol list:
+ less_than_tab = self.tab_width - 1
+ whole_list = r'''
+ ( # \1 = whole list
+ ( # \2
+ [ ]{0,%d}
+ (%s) # \3 = first list item marker
+ [ \t]+
+ )
+ (?:.+?)
+ ( # \4
+ \Z
+ |
+ \n{2,}
+ (?=\S)
+ (?! # Negative lookahead for another list item marker
+ [ \t]*
+ %s[ \t]+
+ )
+ )
+ )
+ ''' % (less_than_tab, marker_pat, marker_pat)
+
+ # We use a different prefix before nested lists than top-level lists.
+ # See extended comment in _process_list_items().
+ #
+ # Note: There's a bit of duplication here. My original implementation
+ # created a scalar regex pattern as the conditional result of the test on
+ # $g_list_level, and then only ran the $text =~ s{...}{...}egmx
+ # substitution once, using the scalar as the pattern. This worked,
+ # everywhere except when running under MT on my hosting account at Pair
+ # Networks. There, this caused all rebuilds to be killed by the reaper (or
+ # perhaps they crashed, but that seems incredibly unlikely given that the
+ # same script on the same server ran fine *except* under MT. I've spent
+ # more time trying to figure out why this is happening than I'd like to
+ # admit. My only guess, backed up by the fact that this workaround works,
+ # is that Perl optimizes the substition when it can figure out that the
+ # pattern will never change, and when this optimization isn't on, we run
+ # afoul of the reaper. Thus, the slightly redundant code to that uses two
+ # static s/// patterns rather than one conditional pattern.
+
+ if self.list_level:
+ sub_list_re = re.compile("^"+whole_list, re.X | re.M | re.S)
+ text = sub_list_re.sub(self._list_sub, text)
+ else:
+ list_re = re.compile(r"(?:(?<=\n\n)|\A\n?)"+whole_list,
+ re.X | re.M | re.S)
+ text = list_re.sub(self._list_sub, text)
+
+ return text
+
+ _list_item_re = re.compile(r'''
+ (\n)? # leading line = \1
+ (^[ \t]*) # leading whitespace = \2
+ (%s) [ \t]+ # list marker = \3
+ ((?:.+?) # list item text = \4
+ (\n{1,2})) # eols = \5
+ (?= \n* (\Z | \2 (%s) [ \t]+))
+ ''' % (_marker_any, _marker_any),
+ re.M | re.X | re.S)
+
+ _last_li_endswith_two_eols = False
+ def _list_item_sub(self, match):
+ item = match.group(4)
+ leading_line = match.group(1)
+ leading_space = match.group(2)
+ if leading_line or "\n\n" in item or self._last_li_endswith_two_eols:
+ item = self._run_block_gamut(self._outdent(item))
+ else:
+ # Recursion for sub-lists:
+ item = self._do_lists(self._outdent(item))
+ if item.endswith('\n'):
+ item = item[:-1]
+ item = self._run_span_gamut(item)
+ self._last_li_endswith_two_eols = (len(match.group(5)) == 2)
+ return "<li>%s</li>\n" % item
+
+ def _process_list_items(self, list_str):
+ # Process the contents of a single ordered or unordered list,
+ # splitting it into individual list items.
+
+ # The $g_list_level global keeps track of when we're inside a list.
+ # Each time we enter a list, we increment it; when we leave a list,
+ # we decrement. If it's zero, we're not in a list anymore.
+ #
+ # We do this because when we're not inside a list, we want to treat
+ # something like this:
+ #
+ # I recommend upgrading to version
+ # 8. Oops, now this line is treated
+ # as a sub-list.
+ #
+ # As a single paragraph, despite the fact that the second line starts
+ # with a digit-period-space sequence.
+ #
+ # Whereas when we're inside a list (or sub-list), that line will be
+ # treated as the start of a sub-list. What a kludge, huh? This is
+ # an aspect of Markdown's syntax that's hard to parse perfectly
+ # without resorting to mind-reading. Perhaps the solution is to
+ # change the syntax rules such that sub-lists must start with a
+ # starting cardinal number; e.g. "1." or "a.".
+ self.list_level += 1
+ self._last_li_endswith_two_eols = False
+ list_str = list_str.rstrip('\n') + '\n'
+ list_str = self._list_item_re.sub(self._list_item_sub, list_str)
+ self.list_level -= 1
+ return list_str
+
+ def _get_pygments_lexer(self, lexer_name):
+ try:
+ from pygments import lexers, util
+ except ImportError:
+ return None
+ try:
+ return lexers.get_lexer_by_name(lexer_name)
+ except util.ClassNotFound:
+ return None
+
+ def _color_with_pygments(self, codeblock, lexer, **formatter_opts):
+ import pygments
+ import pygments.formatters
+
+ class HtmlCodeFormatter(pygments.formatters.HtmlFormatter):
+ def _wrap_code(self, inner):
+ """A function for use in a Pygments Formatter which
+ wraps in <code> tags.
+ """
+ yield 0, "<code>"
+ for tup in inner:
+ yield tup
+ yield 0, "</code>"
+
+ def wrap(self, source, outfile):
+ """Return the source with a code, pre, and div."""
+ return self._wrap_div(self._wrap_pre(self._wrap_code(source)))
+
+ formatter = HtmlCodeFormatter(cssclass="codehilite", **formatter_opts)
+ return pygments.highlight(codeblock, lexer, formatter)
+
+ def _code_block_sub(self, match):
+ codeblock = match.group(1)
+ codeblock = self._outdent(codeblock)
+ codeblock = self._detab(codeblock)
+ codeblock = codeblock.lstrip('\n') # trim leading newlines
+ codeblock = codeblock.rstrip() # trim trailing whitespace
+
+ if "code-color" in self.extras and codeblock.startswith(":::"):
+ lexer_name, rest = codeblock.split('\n', 1)
+ lexer_name = lexer_name[3:].strip()
+ lexer = self._get_pygments_lexer(lexer_name)
+ codeblock = rest.lstrip("\n") # Remove lexer declaration line.
+ if lexer:
+ formatter_opts = self.extras['code-color'] or {}
+ colored = self._color_with_pygments(codeblock, lexer,
+ **formatter_opts)
+ return "\n\n%s\n\n" % colored
+
+ codeblock = self._encode_code(codeblock)
+ return "\n\n<pre><code>%s\n</code></pre>\n\n" % codeblock
+
+ def _do_code_blocks(self, text):
+ """Process Markdown `<pre><code>` blocks."""
+ code_block_re = re.compile(r'''
+ (?:\n\n|\A)
+ ( # $1 = the code block -- one or more lines, starting with a space/tab
+ (?:
+ (?:[ ]{%d} | \t) # Lines must start with a tab or a tab-width of spaces
+ .*\n+
+ )+
+ )
+ ((?=^[ ]{0,%d}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
+ ''' % (self.tab_width, self.tab_width),
+ re.M | re.X)
+
+ return code_block_re.sub(self._code_block_sub, text)
+
+
+ # Rules for a code span:
+ # - backslash escapes are not interpreted in a code span
+ # - to include one or or a run of more backticks the delimiters must
+ # be a longer run of backticks
+ # - cannot start or end a code span with a backtick; pad with a
+ # space and that space will be removed in the emitted HTML
+ # See `test/tm-cases/escapes.text` for a number of edge-case
+ # examples.
+ _code_span_re = re.compile(r'''
+ (?<!\\)
+ (`+) # \1 = Opening run of `
+ (?!`) # See Note A test/tm-cases/escapes.text
+ (.+?) # \2 = The code block
+ (?<!`)
+ \1 # Matching closer
+ (?!`)
+ ''', re.X | re.S)
+
+ def _code_span_sub(self, match):
+ c = match.group(2).strip(" \t")
+ c = self._encode_code(c)
+ return "<code>%s</code>" % c
+
+ def _do_code_spans(self, text):
+ # * Backtick quotes are used for <code></code> spans.
+ #
+ # * You can use multiple backticks as the delimiters if you want to
+ # include literal backticks in the code span. So, this input:
+ #
+ # Just type ``foo `bar` baz`` at the prompt.
+ #
+ # Will translate to:
+ #
+ # <p>Just type <code>foo `bar` baz</code> at the prompt.</p>
+ #
+ # There's no arbitrary limit to the number of backticks you
+ # can use as delimters. If you need three consecutive backticks
+ # in your code, use four for delimiters, etc.
+ #
+ # * You can use spaces to get literal backticks at the edges:
+ #
+ # ... type `` `bar` `` ...
+ #
+ # Turns to:
+ #
+ # ... type <code>`bar`</code> ...
+ return self._code_span_re.sub(self._code_span_sub, text)
+
+ def _encode_code(self, text):
+ """Encode/escape certain characters inside Markdown code runs.
+ The point is that in code, these characters are literals,
+ and lose their special Markdown meanings.
+ """
+ replacements = [
+ # Encode all ampersands; HTML entities are not
+ # entities within a Markdown code span.
+ ('&', '&amp;'),
+ # Do the angle bracket song and dance:
+ ('<', '&lt;'),
+ ('>', '&gt;'),
+ # Now, escape characters that are magic in Markdown:
+ ('*', g_escape_table['*']),
+ ('_', g_escape_table['_']),
+ ('{', g_escape_table['{']),
+ ('}', g_escape_table['}']),
+ ('[', g_escape_table['[']),
+ (']', g_escape_table[']']),
+ ('\\', g_escape_table['\\']),
+ ]
+ for before, after in replacements:
+ text = text.replace(before, after)
+ return text
+
+ _strong_re = re.compile(r"(\*\*|__)(?=\S)(.+?[*_]*)(?<=\S)\1", re.S)
+ _em_re = re.compile(r"(\*|_)(?=\S)(.+?)(?<=\S)\1", re.S)
+ _code_friendly_strong_re = re.compile(r"\*\*(?=\S)(.+?[*_]*)(?<=\S)\*\*", re.S)
+ _code_friendly_em_re = re.compile(r"\*(?=\S)(.+?)(?<=\S)\*", re.S)
+ def _do_italics_and_bold(self, text):
+ # <strong> must go first:
+ if "code-friendly" in self.extras:
+ text = self._code_friendly_strong_re.sub(r"<strong>\1</strong>", text)
+ text = self._code_friendly_em_re.sub(r"<em>\1</em>", text)
+ else:
+ text = self._strong_re.sub(r"<strong>\2</strong>", text)
+ text = self._em_re.sub(r"<em>\2</em>", text)
+ return text
+
+
+ _block_quote_re = re.compile(r'''
+ ( # Wrap whole match in \1
+ (
+ ^[ \t]*>[ \t]? # '>' at the start of a line
+ .+\n # rest of the first line
+ (.+\n)* # subsequent consecutive lines
+ \n* # blanks
+ )+
+ )
+ ''', re.M | re.X)
+ _bq_one_level_re = re.compile('^[ \t]*>[ \t]?', re.M);
+
+ _html_pre_block_re = re.compile(r'(\s*<pre>.+?</pre>)', re.S)
+ def _dedent_two_spaces_sub(self, match):
+ return re.sub(r'(?m)^ ', '', match.group(1))
+
+ def _block_quote_sub(self, match):
+ bq = match.group(1)
+ bq = self._bq_one_level_re.sub('', bq) # trim one level of quoting
+ bq = self._ws_only_line_re.sub('', bq) # trim whitespace-only lines
+ bq = self._run_block_gamut(bq) # recurse
+
+ bq = re.sub('(?m)^', ' ', bq)
+ # These leading spaces screw with <pre> content, so we need to fix that:
+ bq = self._html_pre_block_re.sub(self._dedent_two_spaces_sub, bq)
+
+ return "<blockquote>\n%s\n</blockquote>\n\n" % bq
+
+ def _do_block_quotes(self, text):
+ if '>' not in text:
+ return text
+ return self._block_quote_re.sub(self._block_quote_sub, text)
+
+ def _form_paragraphs(self, text):
+ # Strip leading and trailing lines:
+ text = text.strip('\n')
+
+ # Wrap <p> tags.
+ grafs = re.split(r"\n{2,}", text)
+ for i, graf in enumerate(grafs):
+ if graf in self.html_blocks:
+ # Unhashify HTML blocks
+ grafs[i] = self.html_blocks[graf]
+ else:
+ # Wrap <p> tags.
+ graf = self._run_span_gamut(graf)
+ grafs[i] = "<p>" + graf.lstrip(" \t") + "</p>"
+
+ return "\n\n".join(grafs)
+
+ def _add_footnotes(self, text):
+ if self.footnotes:
+ footer = [
+ '<div class="footnotes">',
+ '<hr' + self.empty_element_suffix,
+ '<ol>',
+ ]
+ for i, id in enumerate(self.footnote_ids):
+ if i != 0:
+ footer.append('')
+ footer.append('<li id="fn-%s">' % id)
+ footer.append(self._run_block_gamut(self.footnotes[id]))
+ backlink = ('<a href="#fnref-%s" '
+ 'class="footnoteBackLink" '
+ 'title="Jump back to footnote %d in the text.">'
+ '&#8617;</a>' % (id, i+1))
+ if footer[-1].endswith("</p>"):
+ footer[-1] = footer[-1][:-len("</p>")] \
+ + '&nbsp;' + backlink + "</p>"
+ else:
+ footer.append("\n<p>%s</p>" % backlink)
+ footer.append('</li>')
+ footer.append('</ol>')
+ footer.append('</div>')
+ return text + '\n\n' + '\n'.join(footer)
+ else:
+ return text
+
+ # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
+ # http://bumppo.net/projects/amputator/
+ _ampersand_re = re.compile(r'&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)')
+ _naked_lt_re = re.compile(r'<(?![a-z/?\$!])', re.I)
+ _naked_gt_re = re.compile(r'''(?<![a-z?!/'"-])>''', re.I)
+
+ def _encode_amps_and_angles(self, text):
+ # Smart processing for ampersands and angle brackets that need
+ # to be encoded.
+ text = self._ampersand_re.sub('&amp;', text)
+
+ # Encode naked <'s
+ text = self._naked_lt_re.sub('&lt;', text)
+
+ # Encode naked >'s
+ # Note: Other markdown implementations (e.g. Markdown.pl, PHP
+ # Markdown) don't do this.
+ text = self._naked_gt_re.sub('&gt;', text)
+ return text
+
+ def _encode_backslash_escapes(self, text):
+ for ch, escape in g_escape_table.items():
+ text = text.replace("\\"+ch, escape)
+ return text
+
+ _auto_link_re = re.compile(r'<((https?|ftp):[^\'">\s]+)>', re.I)
+ def _auto_link_sub(self, match):
+ g1 = match.group(1)
+ return '<a href="%s">%s</a>' % (g1, g1)
+
+ _auto_email_link_re = re.compile(r"""
+ <
+ (?:mailto:)?
+ (
+ [-.\w]+
+ \@
+ [-\w]+(\.[-\w]+)*\.[a-z]+
+ )
+ >
+ """, re.I | re.X | re.U)
+ def _auto_email_link_sub(self, match):
+ return self._encode_email_address(
+ self._unescape_special_chars(match.group(1)))
+
+ def _do_auto_links(self, text):
+ text = self._auto_link_re.sub(self._auto_link_sub, text)
+ text = self._auto_email_link_re.sub(self._auto_email_link_sub, text)
+ return text
+
+ def _encode_email_address(self, addr):
+ # Input: an email address, e.g. "foo@example.com"
+ #
+ # Output: the email address as a mailto link, with each character
+ # of the address encoded as either a decimal or hex entity, in
+ # the hopes of foiling most address harvesting spam bots. E.g.:
+ #
+ # <a href="&#x6D;&#97;&#105;&#108;&#x74;&#111;:&#102;&#111;&#111;&#64;&#101;
+ # x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;">&#102;&#111;&#111;
+ # &#64;&#101;x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;</a>
+ #
+ # Based on a filter by Matthew Wickline, posted to the BBEdit-Talk
+ # mailing list: <http://tinyurl.com/yu7ue>
+ chars = [_xml_encode_email_char_at_random(ch)
+ for ch in "mailto:" + addr]
+ # Strip the mailto: from the visible part.
+ addr = '<a href="%s">%s</a>' \
+ % (''.join(chars), ''.join(chars[7:]))
+ return addr
+
+ def _do_link_patterns(self, text):
+ """Caveat emptor: there isn't much guarding against link
+ patterns being formed inside other standard Markdown links, e.g.
+ inside a [link def][like this].
+
+ Dev Notes: *Could* consider prefixing regexes with a negative
+ lookbehind assertion to attempt to guard against this.
+ """
+ link_from_hash = {}
+ for regex, repl in self.link_patterns:
+ replacements = []
+ for match in regex.finditer(text):
+ if hasattr(repl, "__call__"):
+ href = repl(match)
+ else:
+ href = match.expand(repl)
+ replacements.append((match.span(), href))
+ for (start, end), href in reversed(replacements):
+ escaped_href = (
+ href.replace('"', '&quot;') # b/c of attr quote
+ # To avoid markdown <em> and <strong>:
+ .replace('*', g_escape_table['*'])
+ .replace('_', g_escape_table['_']))
+ link = '<a href="%s">%s</a>' % (escaped_href, text[start:end])
+ hash = md5(link).hexdigest()
+ link_from_hash[hash] = link
+ text = text[:start] + hash + text[end:]
+ for hash, link in link_from_hash.items():
+ text = text.replace(hash, link)
+ return text
+
+ def _unescape_special_chars(self, text):
+ # Swap back in all the special characters we've hidden.
+ for ch, hash in g_escape_table.items():
+ text = text.replace(hash, ch)
+ return text
+
+ def _outdent(self, text):
+ # Remove one level of line-leading tabs or spaces
+ return self._outdent_re.sub('', text)
+
+
+class MarkdownWithExtras(Markdown):
+ """A markdowner class that enables most extras:
+
+ - footnotes
+ - code-color (only has effect if 'pygments' Python module on path)
+
+ These are not included:
+ - pyshell (specific to Python-related documenting)
+ - code-friendly (because it *disables* part of the syntax)
+ - link-patterns (because you need to specify some actual
+ link-patterns anyway)
+ """
+ extras = ["footnotes", "code-color"]
+
+
+#---- internal support functions
+
+# From http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52549
+def _curry(*args, **kwargs):
+ function, args = args[0], args[1:]
+ def result(*rest, **kwrest):
+ combined = kwargs.copy()
+ combined.update(kwrest)
+ return function(*args + rest, **combined)
+ return result
+
+# Recipe: regex_from_encoded_pattern (1.0)
+def _regex_from_encoded_pattern(s):
+ """'foo' -> re.compile(re.escape('foo'))
+ '/foo/' -> re.compile('foo')
+ '/foo/i' -> re.compile('foo', re.I)
+ """
+ if s.startswith('/') and s.rfind('/') != 0:
+ # Parse it: /PATTERN/FLAGS
+ idx = s.rfind('/')
+ pattern, flags_str = s[1:idx], s[idx+1:]
+ flag_from_char = {
+ "i": re.IGNORECASE,
+ "l": re.LOCALE,
+ "s": re.DOTALL,
+ "m": re.MULTILINE,
+ "u": re.UNICODE,
+ }
+ flags = 0
+ for char in flags_str:
+ try:
+ flags |= flag_from_char[char]
+ except KeyError:
+ raise ValueError("unsupported regex flag: '%s' in '%s' "
+ "(must be one of '%s')"
+ % (char, s, ''.join(flag_from_char.keys())))
+ return re.compile(s[1:idx], flags)
+ else: # not an encoded regex
+ return re.compile(re.escape(s))
+
+# Recipe: dedent (0.1.2)
+def _dedentlines(lines, tabsize=8, skip_first_line=False):
+ """_dedentlines(lines, tabsize=8, skip_first_line=False) -> dedented lines
+
+ "lines" is a list of lines to dedent.
+ "tabsize" is the tab width to use for indent width calculations.
+ "skip_first_line" is a boolean indicating if the first line should
+ be skipped for calculating the indent width and for dedenting.
+ This is sometimes useful for docstrings and similar.
+
+ Same as dedent() except operates on a sequence of lines. Note: the
+ lines list is modified **in-place**.
+ """
+ DEBUG = False
+ if DEBUG:
+ print "dedent: dedent(..., tabsize=%d, skip_first_line=%r)"\
+ % (tabsize, skip_first_line)
+ indents = []
+ margin = None
+ for i, line in enumerate(lines):
+ if i == 0 and skip_first_line: continue
+ indent = 0
+ for ch in line:
+ if ch == ' ':
+ indent += 1
+ elif ch == '\t':
+ indent += tabsize - (indent % tabsize)
+ elif ch in '\r\n':
+ continue # skip all-whitespace lines
+ else:
+ break
+ else:
+ continue # skip all-whitespace lines
+ if DEBUG: print "dedent: indent=%d: %r" % (indent, line)
+ if margin is None:
+ margin = indent
+ else:
+ margin = min(margin, indent)
+ if DEBUG: print "dedent: margin=%r" % margin
+
+ if margin is not None and margin > 0:
+ for i, line in enumerate(lines):
+ if i == 0 and skip_first_line: continue
+ removed = 0
+ for j, ch in enumerate(line):
+ if ch == ' ':
+ removed += 1
+ elif ch == '\t':
+ removed += tabsize - (removed % tabsize)
+ elif ch in '\r\n':
+ if DEBUG: print "dedent: %r: EOL -> strip up to EOL" % line
+ lines[i] = lines[i][j:]
+ break
+ else:
+ raise ValueError("unexpected non-whitespace char %r in "
+ "line %r while removing %d-space margin"
+ % (ch, line, margin))
+ if DEBUG:
+ print "dedent: %r: %r -> removed %d/%d"\
+ % (line, ch, removed, margin)
+ if removed == margin:
+ lines[i] = lines[i][j+1:]
+ break
+ elif removed > margin:
+ lines[i] = ' '*(removed-margin) + lines[i][j+1:]
+ break
+ else:
+ if removed:
+ lines[i] = lines[i][removed:]
+ return lines
+
+def _dedent(text, tabsize=8, skip_first_line=False):
+ """_dedent(text, tabsize=8, skip_first_line=False) -> dedented text
+
+ "text" is the text to dedent.
+ "tabsize" is the tab width to use for indent width calculations.
+ "skip_first_line" is a boolean indicating if the first line should
+ be skipped for calculating the indent width and for dedenting.
+ This is sometimes useful for docstrings and similar.
+
+ textwrap.dedent(s), but don't expand tabs to spaces
+ """
+ lines = text.splitlines(1)
+ _dedentlines(lines, tabsize=tabsize, skip_first_line=skip_first_line)
+ return ''.join(lines)
+
+
+class _memoized(object):
+ """Decorator that caches a function's return value each time it is called.
+ If called later with the same arguments, the cached value is returned, and
+ not re-evaluated.
+
+ http://wiki.python.org/moin/PythonDecoratorLibrary
+ """
+ def __init__(self, func):
+ self.func = func
+ self.cache = {}
+ def __call__(self, *args):
+ try:
+ return self.cache[args]
+ except KeyError:
+ self.cache[args] = value = self.func(*args)
+ return value
+ except TypeError:
+ # uncachable -- for instance, passing a list as an argument.
+ # Better to not cache than to blow up entirely.
+ return self.func(*args)
+ def __repr__(self):
+ """Return the function's docstring."""
+ return self.func.__doc__
+
+
+def _xml_oneliner_re_from_tab_width(tab_width):
+ """Standalone XML processing instruction regex."""
+ return re.compile(r"""
+ (?:
+ (?<=\n\n) # Starting after a blank line
+ | # or
+ \A\n? # the beginning of the doc
+ )
+ ( # save in $1
+ [ ]{0,%d}
+ (?:
+ <\?\w+\b\s+.*?\?> # XML processing instruction
+ |
+ <\w+:\w+\b\s+.*?/> # namespaced single tag
+ )
+ [ \t]*
+ (?=\n{2,}|\Z) # followed by a blank line or end of document
+ )
+ """ % (tab_width - 1), re.X)
+_xml_oneliner_re_from_tab_width = _memoized(_xml_oneliner_re_from_tab_width)
+
+def _hr_tag_re_from_tab_width(tab_width):
+ return re.compile(r"""
+ (?:
+ (?<=\n\n) # Starting after a blank line
+ | # or
+ \A\n? # the beginning of the doc
+ )
+ ( # save in \1
+ [ ]{0,%d}
+ <(hr) # start tag = \2
+ \b # word break
+ ([^<>])*? #
+ /?> # the matching end tag
+ [ \t]*
+ (?=\n{2,}|\Z) # followed by a blank line or end of document
+ )
+ """ % (tab_width - 1), re.X)
+_hr_tag_re_from_tab_width = _memoized(_hr_tag_re_from_tab_width)
+
+
+def _xml_encode_email_char_at_random(ch):
+ r = random()
+ # Roughly 10% raw, 45% hex, 45% dec.
+ # '@' *must* be encoded. I [John Gruber] insist.
+ # Issue 26: '_' must be encoded.
+ if r > 0.9 and ch not in "@_":
+ return ch
+ elif r < 0.45:
+ # The [1:] is to drop leading '0': 0x63 -> x63
+ return '&#%s;' % hex(ord(ch))[1:]
+ else:
+ return '&#%s;' % ord(ch)
+
+def _hash_text(text):
+ return 'md5:'+md5(text.encode("utf-8")).hexdigest()
+
+
+#---- mainline
+
+class _NoReflowFormatter(optparse.IndentedHelpFormatter):
+ """An optparse formatter that does NOT reflow the description."""
+ def format_description(self, description):
+ return description or ""
+
+def _test():
+ import doctest
+ doctest.testmod()
+
+def main(argv=None):
+ if argv is None:
+ argv = sys.argv
+ if not logging.root.handlers:
+ logging.basicConfig()
+
+ usage = "usage: %prog [PATHS...]"
+ version = "%prog "+__version__
+ parser = optparse.OptionParser(prog="markdown2", usage=usage,
+ version=version, description=cmdln_desc,
+ formatter=_NoReflowFormatter())
+ parser.add_option("-v", "--verbose", dest="log_level",
+ action="store_const", const=logging.DEBUG,
+ help="more verbose output")
+ parser.add_option("--encoding",
+ help="specify encoding of text content")
+ parser.add_option("--html4tags", action="store_true", default=False,
+ help="use HTML 4 style for empty element tags")
+ parser.add_option("-s", "--safe", metavar="MODE", dest="safe_mode",
+ help="sanitize literal HTML: 'escape' escapes "
+ "HTML meta chars, 'replace' replaces with an "
+ "[HTML_REMOVED] note")
+ parser.add_option("-x", "--extras", action="append",
+ help="Turn on specific extra features (not part of "
+ "the core Markdown spec). Supported values: "
+ "'code-friendly' disables _/__ for emphasis; "
+ "'code-color' adds code-block syntax coloring; "
+ "'link-patterns' adds auto-linking based on patterns; "
+ "'footnotes' adds the footnotes syntax;"
+ "'xml' passes one-liner processing instructions and namespaced XML tags;"
+ "'pyshell' to put unindented Python interactive shell sessions in a <code> block.")
+ parser.add_option("--use-file-vars",
+ help="Look for and use Emacs-style 'markdown-extras' "
+ "file var to turn on extras. See "
+ "<http://code.google.com/p/python-markdown2/wiki/Extras>.")
+ parser.add_option("--link-patterns-file",
+ help="path to a link pattern file")
+ parser.add_option("--self-test", action="store_true",
+ help="run internal self-tests (some doctests)")
+ parser.add_option("--compare", action="store_true",
+ help="run against Markdown.pl as well (for testing)")
+ parser.set_defaults(log_level=logging.INFO, compare=False,
+ encoding="utf-8", safe_mode=None, use_file_vars=False)
+ opts, paths = parser.parse_args()
+ log.setLevel(opts.log_level)
+
+ if opts.self_test:
+ return _test()
+
+ if opts.extras:
+ extras = {}
+ for s in opts.extras:
+ splitter = re.compile("[,;: ]+")
+ for e in splitter.split(s):
+ if '=' in e:
+ ename, earg = e.split('=', 1)
+ try:
+ earg = int(earg)
+ except ValueError:
+ pass
+ else:
+ ename, earg = e, None
+ extras[ename] = earg
+ else:
+ extras = None
+
+ if opts.link_patterns_file:
+ link_patterns = []
+ f = open(opts.link_patterns_file)
+ try:
+ for i, line in enumerate(f.readlines()):
+ if not line.strip(): continue
+ if line.lstrip().startswith("#"): continue
+ try:
+ pat, href = line.rstrip().rsplit(None, 1)
+ except ValueError:
+ raise MarkdownError("%s:%d: invalid link pattern line: %r"
+ % (opts.link_patterns_file, i+1, line))
+ link_patterns.append(
+ (_regex_from_encoded_pattern(pat), href))
+ finally:
+ f.close()
+ else:
+ link_patterns = None
+
+ from os.path import join, dirname, abspath, exists
+ markdown_pl = join(dirname(dirname(abspath(__file__))), "test",
+ "Markdown.pl")
+ for path in paths:
+ if opts.compare:
+ print "==== Markdown.pl ===="
+ perl_cmd = 'perl %s "%s"' % (markdown_pl, path)
+ o = os.popen(perl_cmd)
+ perl_html = o.read()
+ o.close()
+ sys.stdout.write(perl_html)
+ print "==== markdown2.py ===="
+ html = markdown_path(path, encoding=opts.encoding,
+ html4tags=opts.html4tags,
+ safe_mode=opts.safe_mode,
+ extras=extras, link_patterns=link_patterns,
+ use_file_vars=opts.use_file_vars)
+ sys.stdout.write(
+ html.encode(sys.stdout.encoding or "utf-8", 'xmlcharrefreplace'))
+ if opts.compare:
+ test_dir = join(dirname(dirname(abspath(__file__))), "test")
+ if exists(join(test_dir, "test_markdown2.py")):
+ sys.path.insert(0, test_dir)
+ from test_markdown2 import norm_html_from_html
+ norm_html = norm_html_from_html(html)
+ norm_perl_html = norm_html_from_html(perl_html)
+ else:
+ norm_html = html
+ norm_perl_html = perl_html
+ print "==== match? %r ====" % (norm_perl_html == norm_html)
+
+
+if __name__ == "__main__":
+ sys.exit( main(sys.argv) )
+
diff --git a/lib/pagination/__init__.py b/lib/pagination/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/lib/pagination/__init__.py
@@ -0,0 +1 @@
+
diff --git a/lib/pagination/middleware.py b/lib/pagination/middleware.py
new file mode 100644
index 0000000..cf9f9cb
--- /dev/null
+++ b/lib/pagination/middleware.py
@@ -0,0 +1,25 @@
+class PaginationMiddleware(object):
+ """
+ Inserts a variable representing the current page onto the request object if
+ it exists in either **GET** or **POST** portions of the request.
+ """
+ def process_request(self, request):
+ try:
+ request.page = int(request.REQUEST['page'])
+ except (KeyError, ValueError):
+ request.page = 1
+
+ def process_view(self, request, view_func, view_args, view_kwargs):
+ if 'paginate' in view_kwargs:
+ del view_kwargs['paginate']
+
+ if 'page_url' in view_kwargs:
+ request.page_url = view_kwargs['page_url']
+ del view_kwargs['page_url']
+
+ if 'page' in view_kwargs:
+ request.page = int(view_kwargs['page'])
+ del view_kwargs['page']
+ else:
+ request.page = 1
+
diff --git a/lib/pagination/models.py b/lib/pagination/models.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/lib/pagination/models.py
@@ -0,0 +1 @@
+
diff --git a/lib/pagination/templates/pagination/pagination.html b/lib/pagination/templates/pagination/pagination.html
new file mode 100644
index 0000000..2ecffb4
--- /dev/null
+++ b/lib/pagination/templates/pagination/pagination.html
@@ -0,0 +1,30 @@
+{% load pagination_tags %}
+{% if is_paginated %}
+<div class="pagination">
+ {% if page_obj.has_previous %}
+ <a href="{% if use_page_path %}{% page_path prev_page %}{% else %}?page={{ page_obj.previous_page_number }}{{ getvars }}{% endif %}" class="prev">&lsaquo;&lsaquo; previous</a>
+ {% else %}
+ <span class="disabled prev"></span>
+ {% endif %}
+ {% for page in pages %}
+ {% if page %}
+ {% ifequal page page_obj.number %}
+ <span class="current page">{{ page }}</span>
+ {% else %}
+ {% if use_page_path %}
+ <a href="{% page_path page %}" class="page">{{ page }}</a>
+ {% else %}
+ <a href="?page={{ page }}{{ getvars }}" class="page">{{ page }}</a>
+ {% endif %}
+ {% endifequal %}
+ {% else %}
+ ...
+ {% endif %}
+ {% endfor %}
+ {% if page_obj.has_next %}
+ <a href="{% if use_page_path %}{% page_path next_page %}{% else %}?page={{ page_obj.next_page_number }}{{ getvars }}{% endif %}" class="next">next &rsaquo;&rsaquo;</a>
+ {% else %}
+ <span class="disabled next">next &rsaquo;&rsaquo;</span>
+ {% endif %}
+</div>
+{% endif %}
diff --git a/lib/pagination/templatetags/__init__.py b/lib/pagination/templatetags/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/lib/pagination/templatetags/__init__.py
@@ -0,0 +1 @@
+
diff --git a/lib/pagination/templatetags/pagination_tags.py b/lib/pagination/templatetags/pagination_tags.py
new file mode 100644
index 0000000..ef9bf5e
--- /dev/null
+++ b/lib/pagination/templatetags/pagination_tags.py
@@ -0,0 +1,235 @@
+try:
+ set
+except NameError:
+ from sets import Set as set
+import re
+from django import template
+from django.db.models.query import QuerySet
+from django.core.paginator import Paginator, QuerySetPaginator, InvalidPage
+
+register = template.Library()
+
+DEFAULT_PAGINATION = 20
+DEFAULT_WINDOW = 4
+DEFAULT_ORPHANS = 0
+
+@register.tag
+def page_path(parser, token):
+ """Returns the path for the given page."""
+ bits = token.split_contents()
+ if len(bits) != 2:
+ raise template.TemplateSyntaxError(
+ 'page_path requires a page template var'
+ )
+
+ return PagePathNode(bits[1])
+
+class PagePathNode(template.Node):
+ """Renders the path for a given page number"""
+ def __init__(self, page_var):
+ self.page_var = page_var
+
+ def render(self, context):
+ try:
+ page = int(context[self.page_var])
+ path = context['request'].path
+ page_url = context['request'].page_url
+ except:
+ return ''
+
+ return page_url % page
+
+def do_autopaginate(parser, token):
+ """
+ Splits the arguments to the autopaginate tag and formats them correctly.
+ """
+ split = token.split_contents()
+ if len(split) == 2:
+ return AutoPaginateNode(split[1])
+ elif len(split) == 3:
+ try:
+ paginate_by = int(split[2])
+ except ValueError:
+ raise template.TemplateSyntaxError(u'Got %s, but expected integer.' % split[2])
+ return AutoPaginateNode(split[1], paginate_by=paginate_by)
+ elif len(split) == 4:
+ try:
+ paginate_by = int(split[2])
+ except ValueError:
+ raise template.TemplateSyntaxError(u'Got %s, but expected integer.' % split[2])
+ try:
+ orphans = int(split[3])
+ except ValueError:
+ raise template.TemplateSyntaxError(u'Got %s, but expected integer.' % split[3])
+ return AutoPaginateNode(split[1], paginate_by=paginate_by, orphans=orphans)
+ else:
+ raise template.TemplateSyntaxError('%r tag takes one required argument and one optional argument' % split[0])
+
+class AutoPaginateNode(template.Node):
+ """
+ Emits the required objects to allow for Digg-style pagination.
+
+ First, it looks in the current context for the variable specified. This
+ should be either a QuerySet or a list.
+
+ 1. If it is a QuerySet, this ``AutoPaginateNode`` will emit a
+ ``QuerySetPaginator`` and the current page object into the context names
+ ``paginator`` and ``page_obj``, respectively.
+
+ 2. If it is a list, this ``AutoPaginateNode`` will emit a simple
+ ``Paginator`` and the current page object into the context names
+ ``paginator`` and ``page_obj``, respectively.
+
+ It will then replace the variable specified with only the objects for the
+ current page.
+
+ .. note::
+
+ It is recommended to use *{% paginate %}* after using the autopaginate
+ tag. If you choose not to use *{% paginate %}*, make sure to display the
+ list of availabale pages, or else the application may seem to be buggy.
+ """
+ def __init__(self, queryset_var, paginate_by=DEFAULT_PAGINATION, orphans=DEFAULT_ORPHANS):
+ self.queryset_var = template.Variable(queryset_var)
+ self.paginate_by = paginate_by
+ self.orphans = orphans
+
+ def render(self, context):
+ key = self.queryset_var.var
+ value = self.queryset_var.resolve(context)
+ if issubclass(value.__class__, QuerySet):
+ model = value.model
+ paginator_class = QuerySetPaginator
+ else:
+ value = list(value)
+ try:
+ model = value[0].__class__
+ except IndexError:
+ return u''
+ paginator_class = Paginator
+ paginator = paginator_class(value, self.paginate_by, self.orphans)
+ try:
+ page_obj = paginator.page(context['request'].page)
+ except InvalidPage:
+ context[key] = []
+ context['invalid_page'] = True
+ return u''
+ context[key] = page_obj.object_list
+ context['paginator'] = paginator
+ context['page_obj'] = page_obj
+ if hasattr(context['request'], 'page_url'):
+ context['use_page_path'] = True
+ return u''
+
+def paginate(context, window=DEFAULT_WINDOW):
+ """
+ Renders the ``pagination/pagination.html`` template, resulting in a
+ Digg-like display of the available pages, given the current page. If there
+ are too many pages to be displayed before and after the current page, then
+ elipses will be used to indicate the undisplayed gap between page numbers.
+
+ Requires one argument, ``context``, which should be a dictionary-like data
+ structure and must contain the following keys:
+
+ ``paginator``
+ A ``Paginator`` or ``QuerySetPaginator`` object.
+
+ ``page_obj``
+ This should be the result of calling the page method on the
+ aforementioned ``Paginator`` or ``QuerySetPaginator`` object, given
+ the current page.
+
+ This same ``context`` dictionary-like data structure may also include:
+
+ ``getvars``
+ A dictionary of all of the **GET** parameters in the current request.
+ This is useful to maintain certain types of state, even when requesting
+ a different page.
+ """
+ try:
+ paginator = context['paginator']
+ page_obj = context['page_obj']
+ page_range = paginator.page_range
+ # First and last are simply the first *n* pages and the last *n* pages,
+ # where *n* is the current window size.
+ first = set(page_range[:window])
+ last = set(page_range[-window:])
+ # Now we look around our current page, making sure that we don't wrap
+ # around.
+ current_start = page_obj.number-1-window
+ if current_start < 0:
+ current_start = 0
+ current_end = page_obj.number-1+window
+ if current_end < 0:
+ current_end = 0
+ current = set(page_range[current_start:current_end])
+ pages = []
+ # If there's no overlap between the first set of pages and the current
+ # set of pages, then there's a possible need for elusion.
+ if len(first.intersection(current)) == 0:
+ first_list = sorted(list(first))
+ second_list = sorted(list(current))
+ pages.extend(first_list)
+ diff = second_list[0] - first_list[-1]
+ # If there is a gap of two, between the last page of the first
+ # set and the first page of the current set, then we're missing a
+ # page.
+ if diff == 2:
+ pages.append(second_list[0] - 1)
+ # If the difference is just one, then there's nothing to be done,
+ # as the pages need no elusion and are correct.
+ elif diff == 1:
+ pass
+ # Otherwise, there's a bigger gap which needs to be signaled for
+ # elusion, by pushing a None value to the page list.
+ else:
+ pages.append(None)
+ pages.extend(second_list)
+ else:
+ pages.extend(sorted(list(first.union(current))))
+ # If there's no overlap between the current set of pages and the last
+ # set of pages, then there's a possible need for elusion.
+ if len(current.intersection(last)) == 0:
+ second_list = sorted(list(last))
+ diff = second_list[0] - pages[-1]
+ # If there is a gap of two, between the last page of the current
+ # set and the first page of the last set, then we're missing a
+ # page.
+ if diff == 2:
+ pages.append(second_list[0] - 1)
+ # If the difference is just one, then there's nothing to be done,
+ # as the pages need no elusion and are correct.
+ elif diff == 1:
+ pass
+ # Otherwise, there's a bigger gap which needs to be signaled for
+ # elusion, by pushing a None value to the page list.
+ else:
+ pages.append(None)
+ pages.extend(second_list)
+ else:
+ pages.extend(sorted(list(last.difference(current))))
+ to_return = {
+ 'pages': pages,
+ 'page_obj': page_obj,
+ 'paginator': paginator,
+ 'is_paginated': paginator.count > paginator.per_page,
+ }
+ if 'request' in context:
+ if 'use_page_path' in context:
+ to_return['request'] = context['request']
+ to_return['use_page_path'] = context['use_page_path']
+ to_return['prev_page'] = page_obj.previous_page_number()
+ to_return['next_page'] = page_obj.next_page_number()
+
+ getvars = context['request'].GET.copy()
+ if 'page' in getvars:
+ del getvars['page']
+ if len(getvars.keys()) > 0:
+ to_return['getvars'] = "&%s" % getvars.urlencode()
+ else:
+ to_return['getvars'] = ''
+ return to_return
+ except KeyError:
+ return {}
+register.inclusion_tag('pagination/pagination.html', takes_context=True)(paginate)
+register.tag('autopaginate', do_autopaginate)
diff --git a/lib/pagination/tests.py b/lib/pagination/tests.py
new file mode 100644
index 0000000..837e55c
--- /dev/null
+++ b/lib/pagination/tests.py
@@ -0,0 +1,52 @@
+"""
+>>> from django.core.paginator import Paginator
+>>> from pagination.templatetags.pagination_tags import paginate
+>>> from django.template import Template, Context
+
+>>> p = Paginator(range(15), 2)
+>>> paginate({'paginator': p, 'page_obj': p.page(1)})['pages']
+[1, 2, 3, 4, 5, 6, 7, 8]
+
+>>> p = Paginator(range(17), 2)
+>>> paginate({'paginator': p, 'page_obj': p.page(1)})['pages']
+[1, 2, 3, 4, 5, 6, 7, 8, 9]
+
+>>> p = Paginator(range(19), 2)
+>>> paginate({'paginator': p, 'page_obj': p.page(1)})['pages']
+[1, 2, 3, 4, None, 7, 8, 9, 10]
+
+>>> p = Paginator(range(21), 2)
+>>> paginate({'paginator': p, 'page_obj': p.page(1)})['pages']
+[1, 2, 3, 4, None, 8, 9, 10, 11]
+
+# Testing orphans
+>>> p = Paginator(range(5), 2, 1)
+>>> paginate({'paginator': p, 'page_obj': p.page(1)})['pages']
+[1, 2]
+
+>>> p = Paginator(range(21), 2, 1)
+>>> paginate({'paginator': p, 'page_obj': p.page(1)})['pages']
+[1, 2, 3, 4, None, 7, 8, 9, 10]
+
+>>> t = Template("{% load pagination_tags %}{% autopaginate var 2 %}{% paginate %}")
+
+# WARNING: Please, please nobody read this portion of the code!
+>>> class GetProxy(object):
+... def __iter__(self): yield self.__dict__.__iter__
+... def copy(self): return self
+... def urlencode(self): return u''
+... def keys(self): return []
+>>> class RequestProxy(object):
+... page = 1
+... GET = GetProxy()
+>>>
+# ENDWARNING
+
+>>> t.render(Context({'var': range(21), 'request': RequestProxy()}))
+u'\\n<div class="pagination">...
+>>>
+>>> t = Template("{% load pagination_tags %}{% autopaginate var %}{% paginate %}")
+>>> t.render(Context({'var': range(21), 'request': RequestProxy()}))
+u'\\n<div class="pagination">...
+>>>
+""" \ No newline at end of file
diff --git a/lib/pydelicious.py b/lib/pydelicious.py
new file mode 100644
index 0000000..dd33788
--- /dev/null
+++ b/lib/pydelicious.py
@@ -0,0 +1,817 @@
+"""Library to access del.icio.us data via Python.
+
+:examples:
+
+ Using the API class directly:
+
+ >>> a = pydelicious.apiNew('user', 'passwd')
+ >>> # or:
+ >>> a = DeliciousAPI('user', 'passwd')
+ >>> a.tags_get() # Same as:
+ >>> a.request('tags/get', )
+
+ Or by calling one of the methods on the module:
+
+ - add(user, passwd, url, description, tags = "", extended = "", dt = "", replace="no")
+ - get(user, passwd, tag="", dt="", count = 0)
+ - get_all(user, passwd, tag = "")
+ - delete(user, passwd, url)
+ - rename_tag(user, passwd, oldtag, newtag)
+ - get_tags(user, passwd)
+
+ >>> a = apiNew(user, passwd)
+ >>> a.posts_add(url="http://my.com/", desciption="my.com", extended="the url is my.moc", tags="my com")
+ True
+ >>> len(a.posts_all())
+ 1
+ >>> get_all(user, passwd)
+ 1
+
+ This are short functions for getrss calls.
+
+ >>> rss_
+
+def get_userposts(user):
+def get_tagposts(tag):
+def get_urlposts(url):
+def get_popular(tag = ""):
+
+ >>> json_posts()
+ >>> json_tags()
+ >>> json_network()
+ >>> json_fans()
+
+:License: pydelicious is released under the BSD license. See 'license.txt'
+ for more informations.
+
+:todo, bvb:
+ - Rewriting comments to english. More documentation, examples.
+ - Added JSON-like return values for XML data (del.icio.us also serves some JSON...)
+ - better error/exception classes and handling, work in progress.
+
+:todo:
+ - Source code SHOULD BE ASCII!
+ - More tests.
+ - handling different encodings, what, how?
+ >>> pydelicious.getrss(tag="t[a]g")
+ url: http://del.icio.us/rss/tag/t[a]g
+ - Parse datetimes in XML.
+ - Test RSS functionality? HTML scraping doesn't work yet?
+ - API functions need required argument checks.
+ - interesting functionality in other libraries (ruby, java, perl, etc)?
+ - what is pydelicious used for?
+ - license, readme docs via setup.py verdelen?
+ - automatic releas build
+
+:done:
+ * Refactored the API class, much cleaner now and functions dlcs_api_request, dlcs_parse_xml are available for who wants them.
+"""
+import sys
+import os
+import time
+import datetime
+import md5, httplib
+import urllib, urllib2, time
+from StringIO import StringIO
+
+try:
+ from elementtree.ElementTree import parse as parse_xml
+except ImportError:
+ from xml.etree.ElementTree import parse as parse_xml
+
+import feedparser
+
+
+### Static config
+
+__version__ = '0.5.0'
+__author__ = 'Frank Timmermann <regenkind_at_gmx_dot_de>' # GP: does not respond to emails
+__contributors__ = [
+ 'Greg Pinero',
+ 'Berend van Berkum <berend+pydelicious@dotmpe.com>']
+__url__ = 'http://code.google.com/p/pydelicious/'
+__author_email__ = ""
+# Old URL: 'http://deliciouspython.python-hosting.com/'
+
+__description__ = '''pydelicious.py allows you to access the web service of del.icio.us via it's API through python.'''
+__long_description__ = '''the goal is to design an easy to use and fully functional python interface to del.icio.us. '''
+
+DLCS_OK_MESSAGES = ('done', 'ok') # Known text values of positive del.icio.us <result> answers
+DLCS_WAIT_TIME = 4
+DLCS_REQUEST_TIMEOUT = 444 # Seconds before socket triggers timeout
+#DLCS_API_REALM = 'del.icio.us API'
+DLCS_API_HOST = 'https://api.del.icio.us'
+DLCS_API_PATH = 'v1'
+DLCS_API = "%s/%s" % (DLCS_API_HOST, DLCS_API_PATH)
+DLCS_RSS = 'http://del.icio.us/rss/'
+
+ISO_8601_DATETIME = '%Y-%m-%dT%H:%M:%SZ'
+
+USER_AGENT = 'pydelicious.py/%s %s' % (__version__, __url__)
+
+DEBUG = 0
+if 'DLCS_DEBUG' in os.environ:
+ DEBUG = int(os.environ['DLCS_DEBUG'])
+
+
+# Taken from FeedParser.py
+# timeoutsocket allows feedparser to time out rather than hang forever on ultra-slow servers.
+# Python 2.3 now has this functionality available in the standard socket library, so under
+# 2.3 you don't need to install anything. But you probably should anyway, because the socket
+# module is buggy and timeoutsocket is better.
+try:
+ import timeoutsocket # http://www.timo-tasi.org/python/timeoutsocket.py
+ timeoutsocket.setDefaultSocketTimeout(DLCS_REQUEST_TIMEOUT)
+except ImportError:
+ import socket
+ if hasattr(socket, 'setdefaulttimeout'): socket.setdefaulttimeout(DLCS_REQUEST_TIMEOUT)
+if DEBUG: print >>sys.stderr, "Set socket timeout to %s seconds" % DLCS_REQUEST_TIMEOUT
+
+
+### Utility classes
+
+class _Waiter:
+ """Waiter makes sure a certain amount of time passes between
+ successive calls of `Waiter()`.
+
+ Some attributes:
+ :last: time of last call
+ :wait: the minimum time needed between calls
+ :waited: the number of calls throttled
+
+ pydelicious.Waiter is an instance created when the module is loaded.
+ """
+ def __init__(self, wait):
+ self.wait = wait
+ self.waited = 0
+ self.lastcall = 0;
+
+ def __call__(self):
+ tt = time.time()
+ wait = self.wait
+
+ timeago = tt - self.lastcall
+
+ if timeago < wait:
+ wait = wait - timeago
+ if DEBUG>0: print >>sys.stderr, "Waiting %s seconds." % wait
+ time.sleep(wait)
+ self.waited += 1
+ self.lastcall = tt + wait
+ else:
+ self.lastcall = tt
+
+Waiter = _Waiter(DLCS_WAIT_TIME)
+
+class PyDeliciousException(Exception):
+ '''Std. pydelicious error'''
+ pass
+
+class DeliciousError(Exception):
+ """Raised when the server responds with a negative answer"""
+
+
+class DefaultErrorHandler(urllib2.HTTPDefaultErrorHandler):
+ '''xxx, bvb: Where is this used? should it be registered somewhere with urllib2?
+
+ Handles HTTP Error, currently only 503.
+ '''
+ def http_error_503(self, req, fp, code, msg, headers):
+ raise urllib2.HTTPError(req, code, throttled_message, headers, fp)
+
+
+class post(dict):
+ """Post object, contains href, description, hash, dt, tags,
+ extended, user, count(, shared).
+
+ xxx, bvb: Not used in DeliciousAPI
+ """
+ def __init__(self, href="", description="", hash="", time="", tag="", extended="", user="", count="",
+ tags="", url="", dt=""): # tags or tag?
+ self["href"] = href
+ if url != "": self["href"] = url
+ self["description"] = description
+ self["hash"] = hash
+ self["dt"] = dt
+ if time != "": self["dt"] = time
+ self["tags"] = tags
+ if tag != "": self["tags"] = tag # tag or tags? # !! tags
+ self["extended"] = extended
+ self["user"] = user
+ self["count"] = count
+
+ def __getattr__(self, name):
+ try: return self[name]
+ except: object.__getattribute__(self, name)
+
+
+class posts(list):
+ def __init__(self, *args):
+ for i in args: self.append(i)
+
+ def __getattr__(self, attr):
+ try: return [p[attr] for p in self]
+ except: object.__getattribute__(self, attr)
+
+### Utility functions
+
+def str2uni(s):
+ # type(in) str or unicode
+ # type(out) unicode
+ return ("".join([unichr(ord(i)) for i in s]))
+
+def str2utf8(s):
+ # type(in) str or unicode
+ # type(out) str
+ return ("".join([unichr(ord(i)).encode("utf-8") for i in s]))
+
+def str2quote(s):
+ return urllib.quote_plus("".join([unichr(ord(i)).encode("utf-8") for i in s]))
+
+def dict0(d):
+ # Trims empty dict entries
+ # {'a':'a', 'b':'', 'c': 'c'} => {'a': 'a', 'c': 'c'}
+ dd = dict()
+ for i in d:
+ if d[i] != "": dd[i] = d[i]
+ return dd
+
+def delicious_datetime(str):
+ """Parse a ISO 8601 formatted string to a Python datetime ...
+ """
+ return datetime.datetime(*time.strptime(str, ISO_8601_DATETIME)[0:6])
+
+def http_request(url, user_agent=USER_AGENT, retry=4):
+ """Retrieve the contents referenced by the URL using urllib2.
+
+ Retries up to four times (default) on exceptions.
+ """
+ request = urllib2.Request(url, headers={'User-Agent':user_agent})
+
+ # Remember last error
+ e = None
+
+ # Repeat request on time-out errors
+ tries = retry;
+ while tries:
+ try:
+ return urllib2.urlopen(request)
+
+ except urllib2.HTTPError, e: # protocol errors,
+ raise PyDeliciousException, "%s" % e
+
+ except urllib2.URLError, e:
+ # xxx: Ugly check for time-out errors
+ #if len(e)>0 and 'timed out' in arg[0]:
+ print >> sys.stderr, "%s, %s tries left." % (e, tries)
+ Waiter()
+ tries = tries - 1
+ #else:
+ # tries = None
+
+ # Give up
+ raise PyDeliciousException, \
+ "Unable to retrieve data at '%s', %s" % (url, e)
+
+def http_auth_request(url, host, user, passwd, user_agent=USER_AGENT):
+ """Call an HTTP server with authorization credentials using urllib2.
+ """
+ if DEBUG: httplib.HTTPConnection.debuglevel = 1
+
+ # Hook up handler/opener to urllib2
+ password_manager = urllib2.HTTPPasswordMgrWithDefaultRealm()
+ password_manager.add_password(None, host, user, passwd)
+ auth_handler = urllib2.HTTPBasicAuthHandler(password_manager)
+ opener = urllib2.build_opener(auth_handler)
+ urllib2.install_opener(opener)
+
+ return http_request(url, user_agent)
+
+def dlcs_api_request(path, params='', user='', passwd='', throttle=True):
+ """Retrieve/query a path within the del.icio.us API.
+
+ This implements a minimum interval between calls to avoid
+ throttling. [#]_ Use param 'throttle' to turn this behaviour off.
+
+ todo: back off on 503's (HTTPError, URLError? testing
+
+ Returned XML does not always correspond with given del.icio.us examples
+ [#]_.
+
+ .. [#] http://del.icio.us/help/api/
+ """
+ if throttle:
+ Waiter()
+
+ if params:
+ # params come as a dict, strip empty entries and urlencode
+ url = "%s/%s?%s" % (DLCS_API, path, urllib.urlencode(dict0(params)))
+ else:
+ url = "%s/%s" % (DLCS_API, path)
+
+ if DEBUG: print >>sys.stderr, "dlcs_api_request: %s" % url
+
+ try:
+ return http_auth_request(url, DLCS_API_HOST, user, passwd, USER_AGENT)
+
+ # bvb: Is this ever raised? When?
+ except DefaultErrorHandler, e:
+ print >>sys.stderr, "%s" % e
+
+def dlcs_parse_xml(data, split_tags=False):
+ """Parse any del.icio.us XML document and return Python data structure.
+
+ Recognizes all XML document formats as returned by the version 1 API and
+ translates to a JSON-like data structure (dicts 'n lists).
+
+ Returned instance is always a dictionary. Examples::
+
+ {'posts': [{'url':'...','hash':'...',},],}
+ {'tags':['tag1', 'tag2',]}
+ {'dates': [{'count':'...','date':'...'},], 'tag':'', 'user':'...'}
+ {'result':(True, "done")}
+ # etcetera.
+ """
+
+ if DEBUG>3: print >>sys.stderr, "dlcs_parse_xml: parsing from ", data
+
+ if not hasattr(data, 'read'):
+ data = StringIO(data)
+
+ doc = parse_xml(data)
+ root = doc.getroot()
+ fmt = root.tag
+
+ # Split up into three cases: Data, Result or Update
+ if fmt in ('tags', 'posts', 'dates', 'bundles'):
+
+ # Data: expect a list of data elements, 'resources'.
+ # Use `fmt` (without last 's') to find data elements, elements
+ # don't have contents, attributes contain all the data we need:
+ # append to list
+ elist = [el.attrib for el in doc.findall(fmt[:-1])]
+
+ # Return list in dict, use tagname of rootnode as keyname.
+ data = {fmt: elist}
+
+ # Root element might have attributes too, append dict.
+ data.update(root.attrib)
+
+ return data
+
+ elif fmt == 'result':
+
+ # Result: answer to operations
+ if root.attrib.has_key('code'):
+ msg = root.attrib['code']
+ else:
+ msg = root.text
+
+ # Return {'result':(True, msg)} for /known/ O.K. messages,
+ # use (False, msg) otherwise
+ v = msg in DLCS_OK_MESSAGES
+ return {fmt: (v, msg)}
+
+ elif fmt == 'update':
+
+ # Update: "time"
+ #return {fmt: root.attrib}
+ return {fmt: {'time':time.strptime(root.attrib['time'], ISO_8601_DATETIME)}}
+
+ else:
+ raise PyDeliciousException, "Unknown XML document format '%s'" % fmt
+
+def dlcs_rss_request(tag = "", popular = 0, user = "", url = ''):
+ """Handle a request for RSS
+
+ todo: translate from German
+
+ rss sollte nun wieder funktionieren, aber diese try, except scheisse ist so nicht schoen
+
+ rss wird unterschiedlich zusammengesetzt. ich kann noch keinen einheitlichen zusammenhang
+ zwischen daten (url, desc, ext, usw) und dem feed erkennen. warum k[o]nnen die das nicht einheitlich machen?
+ """
+ tag = str2quote(tag)
+ user = str2quote(user)
+ if url != '':
+ # http://del.icio.us/rss/url/efbfb246d886393d48065551434dab54
+ url = DLCS_RSS + '''url/%s'''%md5.new(url).hexdigest()
+ elif user != '' and tag != '':
+ url = DLCS_RSS + '''%(user)s/%(tag)s'''%dict(user=user, tag=tag)
+ elif user != '' and tag == '':
+ # http://del.icio.us/rss/delpy
+ url = DLCS_RSS + '''%s'''%user
+ elif popular == 0 and tag == '':
+ url = DLCS_RSS
+ elif popular == 0 and tag != '':
+ # http://del.icio.us/rss/tag/apple
+ # http://del.icio.us/rss/tag/web2.0
+ url = DLCS_RSS + "tag/%s"%tag
+ elif popular == 1 and tag == '':
+ url = DLCS_RSS + '''popular/'''
+ elif popular == 1 and tag != '':
+ url = DLCS_RSS + '''popular/%s'''%tag
+ rss = http_request(url).read()
+ rss = feedparser.parse(rss)
+ # print rss
+# for e in rss.entries: print e;print
+ l = posts()
+ for e in rss.entries:
+ if e.has_key("links") and e["links"]!=[] and e["links"][0].has_key("href"):
+ url = e["links"][0]["href"]
+ elif e.has_key("link"):
+ url = e["link"]
+ elif e.has_key("id"):
+ url = e["id"]
+ else:
+ url = ""
+ if e.has_key("title"):
+ description = e['title']
+ elif e.has_key("title_detail") and e["title_detail"].has_key("title"):
+ description = e["title_detail"]['value']
+ else:
+ description = ''
+ try: tags = e['categories'][0][1]
+ except:
+ try: tags = e["category"]
+ except: tags = ""
+ if e.has_key("modified"):
+ dt = e['modified']
+ else:
+ dt = ""
+ if e.has_key("summary"):
+ extended = e['summary']
+ elif e.has_key("summary_detail"):
+ e['summary_detail']["value"]
+ else:
+ extended = ""
+ if e.has_key("author"):
+ user = e['author']
+ else:
+ user = ""
+ # time = dt ist weist auf ein problem hin
+ # die benennung der variablen ist nicht einheitlich
+ # api senden und
+ # xml bekommen sind zwei verschiedene schuhe :(
+ l.append(post(url = url, description = description, tags = tags, dt = dt, extended = extended, user = user))
+ return l
+
+
+### Main module class
+
+class DeliciousAPI:
+ """Class providing main interace to del.icio.us API.
+
+ Methods ``request`` and ``request_raw`` represent the core. For all API
+ paths there are furthermore methods (e.g. posts_add for 'posts/all') with
+ an explicit declaration of the parameters and documentation. These all call
+ ``request`` and pass on extra keywords like ``_raw``.
+ """
+
+ def __init__(self, user, passwd, codec='iso-8859-1', api_request=dlcs_api_request, xml_parser=dlcs_parse_xml):
+ """Initialize access to the API with ``user`` and ``passwd``.
+
+ ``codec`` sets the encoding of the arguments.
+
+ The ``api_request`` and ``xml_parser`` parameters by default point to
+ functions within this package with standard implementations to
+ request and parse a resource. See ``dlcs_api_request()`` and
+ ``dlcs_parse_xml()``. Note that ``api_request`` should return a
+ file-like instance with an HTTPMessage instance under ``info()``,
+ see ``urllib2.openurl`` for more info.
+ """
+ assert user != ""
+ self.user = user
+ self.passwd = passwd
+ self.codec = codec
+
+ # Implement communication to server and parsing of respons messages:
+ assert callable(api_request)
+ self._api_request = api_request
+ assert callable(xml_parser)
+ self._parse_response = xml_parser
+
+ def _call_server(self, path, **params):
+ params = dict0(params)
+ for key in params:
+ params[key] = params[key].encode(self.codec)
+
+ # see __init__ for _api_request()
+ return self._api_request(path, params, self.user, self.passwd)
+
+
+ ### Core functionality
+
+ def request(self, path, _raw=False, **params):
+ """Calls a path in the API, parses the answer to a JSON-like structure by
+ default. Use with ``_raw=True`` or ``call request_raw()`` directly to
+ get the filehandler and process the response message manually.
+
+ Calls to some paths will return a `result` message, i.e.::
+
+ <result code="..." />
+
+ or::
+
+ <result>...</result>
+
+ These are all parsed to ``{'result':(Boolean, MessageString)}`` and this
+ method will raise ``DeliciousError`` on negative `result` answers. Using
+ ``_raw=True`` bypasses all parsing and will never raise ``DeliciousError``.
+
+ See ``dlcs_parse_xml()`` and ``self.request_raw()``."""
+
+ # method _parse_response is bound in `__init__()`, `_call_server`
+ # uses `_api_request` also set in `__init__()`
+ if _raw:
+ # return answer
+ return self.request_raw(path, **params)
+
+ else:
+ # get answer and parse
+ fl = self._call_server(path, **params)
+ rs = self._parse_response(fl)
+
+ # Raise an error for negative 'result' answers
+ if type(rs) == dict and rs == 'result' and not rs['result'][0]:
+ errmsg = ""
+ if len(rs['result'])>0:
+ errmsg = rs['result'][1:]
+ raise DeliciousError, errmsg
+
+ return rs
+
+ def request_raw(self, path, **params):
+ """Calls the path in the API, returns the filehandle. Returned
+ file-like instances have an ``HTTPMessage`` instance with HTTP header
+ information available. Use ``filehandle.info()`` or refer to the
+ ``urllib2.openurl`` documentation.
+ """
+ # see `request()` on how the response can be handled
+ return self._call_server(path, **params)
+
+ ### Explicit declarations of API paths, their parameters and docs
+
+ # Tags
+ def tags_get(self, **kwds):
+ """Returns a list of tags and the number of times it is used by the user.
+ ::
+
+ <tags>
+ <tag tag="TagName" count="888">
+ """
+ return self.request("tags/get", **kwds)
+
+ def tags_rename(self, old, new, **kwds):
+ """Rename an existing tag with a new tag name. Returns a `result`
+ message or raises an ``DeliciousError``. See ``self.request()``.
+
+ &old (required)
+ Tag to rename.
+ &new (required)
+ New name.
+ """
+ return self.request("tags/rename", old=old, new=new, **kwds)
+
+ # Posts
+ def posts_update(self, **kwds):
+ """Returns the last update time for the user. Use this before calling
+ `posts_all` to see if the data has changed since the last fetch.
+ ::
+
+ <update time="CCYY-MM-DDThh:mm:ssZ">
+ """
+ return self.request("posts/update", **kwds)
+
+ def posts_dates(self, tag="", **kwds):
+ """Returns a list of dates with the number of posts at each date.
+ ::
+
+ <dates>
+ <date date="CCYY-MM-DD" count="888">
+
+ &tag (optional).
+ Filter by this tag.
+ """
+ return self.request("posts/dates", tag=tag, **kwds)
+
+ def posts_get(self, tag="", dt="", url="", **kwds):
+ """Returns posts matching the arguments. If no date or url is given,
+ most recent date will be used.
+ ::
+
+ <posts dt="CCYY-MM-DD" tag="..." user="...">
+ <post ...>
+
+ &tag (optional).
+ Filter by this tag.
+ &dt (optional).
+ Filter by this date (CCYY-MM-DDThh:mm:ssZ).
+ &url (optional).
+ Filter by this url.
+ """
+ return self.request("posts/get", tag=tag, dt=dt, url=url, **kwds)
+
+ def posts_recent(self, tag="", count="", **kwds):
+ """Returns a list of the most recent posts, filtered by argument.
+ ::
+
+ <posts tag="..." user="...">
+ <post ...>
+
+ &tag (optional).
+ Filter by this tag.
+ &count (optional).
+ Number of items to retrieve (Default:15, Maximum:100).
+ """
+ return self.request("posts/recent", tag=tag, count=count, **kwds)
+
+ def posts_all(self, tag="", **kwds):
+ """Returns all posts. Please use sparingly. Call the `posts_update`
+ method to see if you need to fetch this at all.
+ ::
+
+ <posts tag="..." user="..." update="CCYY-MM-DDThh:mm:ssZ">
+ <post ...>
+
+ &tag (optional).
+ Filter by this tag.
+ """
+ return self.request("posts/all", tag=tag, **kwds)
+
+ def posts_add(self, url, description, extended="", tags="", dt="",
+ replace="no", shared="yes", **kwds):
+ """Add a post to del.icio.us. Returns a `result` message or raises an
+ ``DeliciousError``. See ``self.request()``.
+
+ &url (required)
+ the url of the item.
+ &description (required)
+ the description of the item.
+ &extended (optional)
+ notes for the item.
+ &tags (optional)
+ tags for the item (space delimited).
+ &dt (optional)
+ datestamp of the item (format "CCYY-MM-DDThh:mm:ssZ").
+
+ Requires a LITERAL "T" and "Z" like in ISO8601 at http://www.cl.cam.ac.uk/~mgk25/iso-time.html for example: "1984-09-01T14:21:31Z"
+ &replace=no (optional) - don't replace post if given url has already been posted.
+ &shared=no (optional) - make the item private
+ """
+ return self.request("posts/add", url=url, description=description,
+ extended=extended, tags=tags, dt=dt,
+ replace=replace, shared=shared, **kwds)
+
+ def posts_delete(self, url, **kwds):
+ """Delete a post from del.icio.us. Returns a `result` message or
+ raises an ``DeliciousError``. See ``self.request()``.
+
+ &url (required)
+ the url of the item.
+ """
+ return self.request("posts/delete", url=url, **kwds)
+
+ # Bundles
+ def bundles_all(self, **kwds):
+ """Retrieve user bundles from del.icio.us.
+ ::
+
+ <bundles>
+ <bundel name="..." tags=...">
+ """
+ return self.request("tags/bundles/all", **kwds)
+
+ def bundles_set(self, bundle, tags, **kwds):
+ """Assign a set of tags to a single bundle, wipes away previous
+ settings for bundle. Returns a `result` messages or raises an
+ ``DeliciousError``. See ``self.request()``.
+
+ &bundle (required)
+ the bundle name.
+ &tags (required)
+ list of tags (space seperated).
+ """
+ if type(tags)==list:
+ tags = " ".join(tags)
+ return self.request("tags/bundles/set", bundle=bundle, tags=tags,
+ **kwds)
+
+ def bundles_delete(self, bundle, **kwds):
+ """Delete a bundle from del.icio.us. Returns a `result` message or
+ raises an ``DeliciousError``. See ``self.request()``.
+
+ &bundle (required)
+ the bundle name.
+ """
+ return self.request("tags/bundles/delete", bundle=bundle, **kwds)
+
+ ### Utils
+
+ # Lookup table for del.icio.us url-path to DeliciousAPI method.
+ paths = {
+ 'tags/get': tags_get,
+ 'tags/rename': tags_rename,
+ 'posts/update': posts_update,
+ 'posts/dates': posts_dates,
+ 'posts/get': posts_get,
+ 'posts/recent': posts_recent,
+ 'posts/all': posts_all,
+ 'posts/add': posts_add,
+ 'posts/delete': posts_delete,
+ 'tags/bundles/all': bundles_all,
+ 'tags/bundles/set': bundles_set,
+ 'tags/bundles/delete': bundles_delete,
+ }
+
+ def get_url(self, url):
+ """Return the del.icio.us url at which the HTML page with posts for
+ ``url`` can be found.
+ """
+ return "http://del.icio.us/url/?url=%s" % (url,)
+
+
+### Convenience functions on this package
+
+def apiNew(user, passwd):
+ """creates a new DeliciousAPI object.
+ requires user(name) and passwd
+ """
+ return DeliciousAPI(user=user, passwd=passwd)
+
+def add(user, passwd, url, description, tags="", extended="", dt="", replace="no"):
+ return apiNew(user, passwd).posts_add(url=url, description=description, extended=extended, tags=tags, dt=dt, replace=replace)
+
+def get(user, passwd, tag="", dt="", count = 0):
+ posts = apiNew(user, passwd).posts_get(tag=tag,dt=dt)
+ if count != 0: posts = posts[0:count]
+ return posts
+
+def get_all(user, passwd, tag=""):
+ return apiNew(user, passwd).posts_all(tag=tag)
+
+def delete(user, passwd, url):
+ return apiNew(user, passwd).posts_delete(url=url)
+
+def rename_tag(user, passwd, oldtag, newtag):
+ return apiNew(user=user, passwd=passwd).tags_rename(old=oldtag, new=newtag)
+
+def get_tags(user, passwd):
+ return apiNew(user=user, passwd=passwd).tags_get()
+
+
+### RSS functions bvb: still working...?
+def getrss(tag="", popular=0, url='', user=""):
+ """get posts from del.icio.us via parsing RSS (bvb:or HTML)
+
+ todo: not tested
+
+ tag (opt) sort by tag
+ popular (opt) look for the popular stuff
+ user (opt) get the posts by a user, this striks popular
+ url (opt) get the posts by url
+ """
+ return dlcs_rss_request(tag=tag, popular=popular, user=user, url=url)
+
+def get_userposts(user):
+ return getrss(user = user)
+
+def get_tagposts(tag):
+ return getrss(tag = tag)
+
+def get_urlposts(url):
+ return getrss(url = url)
+
+def get_popular(tag = ""):
+ return getrss(tag = tag, popular = 1)
+
+
+### TODO: implement JSON fetching
+def json_posts(user, count=15):
+ """http://del.icio.us/feeds/json/mpe
+ http://del.icio.us/feeds/json/mpe/art+history
+ count=### the number of posts you want to get (default is 15, maximum is 100)
+ raw a raw JSON object is returned, instead of an object named Delicious.posts
+ """
+
+def json_tags(user, atleast, count, sort='alpha'):
+ """http://del.icio.us/feeds/json/tags/mpe
+ atleast=### include only tags for which there are at least ### number of posts
+ count=### include ### tags, counting down from the top
+ sort={alpha|count} construct the object with tags in alphabetic order (alpha), or by count of posts (count)
+ callback=NAME wrap the object definition in a function call NAME(...), thus invoking that function when the feed is executed
+ raw a pure JSON object is returned, instead of code that will construct an object named Delicious.tags
+ """
+
+def json_network(user):
+ """http://del.icio.us/feeds/json/network/mpe
+ callback=NAME wrap the object definition in a function call NAME(...)
+ ?raw a raw JSON object is returned, instead of an object named Delicious.posts
+ """
+
+def json_fans(user):
+ """http://del.icio.us/feeds/json/fans/mpe
+ callback=NAME wrap the object definition in a function call NAME(...)
+ ?raw a pure JSON object is returned, instead of an object named Delicious.
+ """
+
diff --git a/lib/strutils.py b/lib/strutils.py
new file mode 100644
index 0000000..368d3d8
--- /dev/null
+++ b/lib/strutils.py
@@ -0,0 +1,50 @@
+
+#
+# String/unicode conversion utils.
+#
+
+def safestr(s):
+ """
+ Safely corerce *anything* to a string. If the object can't be str'd, an
+ empty string will be returned.
+
+ You can (and I do) use this for really crappy unicode handling, but it's
+ a bit like killing a mosquito with a bazooka.
+ """
+ if s is None:
+ return ""
+ if isinstance(s, unicode):
+ return s.encode('ascii', 'xmlcharrefreplace')
+ else:
+ try:
+ return str(s)
+ except:
+ return ""
+
+def safeint(s):
+ """Like safestr(), but always returns an int. Returns 0 on failure."""
+ try:
+ return int(safestr(s))
+ except ValueError:
+ return 0
+
+
+def convertentity(m):
+ import htmlentitydefs
+ """Convert a HTML entity into normal string (ISO-8859-1)"""
+ if m.group(1)=='#':
+ try:
+ return chr(int(m.group(2)))
+ except ValueError:
+ return '&#%s;' % m.group(2)
+ try:
+ return htmlentitydefs.entitydefs[m.group(2)]
+ except KeyError:
+ return '&%s;' % m.group(2)
+
+def unquotehtml(s):
+ import re
+ """Convert a HTML quoted string into normal string (ISO-8859-1).
+
+ Works with &#XX; and with &nbsp; &gt; etc."""
+ return re.sub(r'&(#?)(.+?);',convertentity,s)
diff --git a/lib/templatetags/__init__.py b/lib/templatetags/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/lib/templatetags/__init__.py
diff --git a/lib/templatetags/templatetags/__init__.py b/lib/templatetags/templatetags/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/lib/templatetags/templatetags/__init__.py
diff --git a/lib/templatetags/templatetags/get_latest.py b/lib/templatetags/templatetags/get_latest.py
new file mode 100644
index 0000000..6c9f9fa
--- /dev/null
+++ b/lib/templatetags/templatetags/get_latest.py
@@ -0,0 +1 @@
+from django.template import Library, Node from django.db.models import get_model register = Library() class LatestContentNode(Node): def __init__(self, model, num, varname): self.num, self.varname = num, varname self.model = get_model(*model.split('.')) def render(self, context): context[self.varname] = self.model._default_manager.all()[:self.num] return '' def get_latest(parser, token): bits = token.contents.split() if len(bits) != 5: raise TemplateSyntaxError, "get_latest tag takes exactly four arguments" if bits[3] != 'as': raise TemplateSyntaxError, "third argument to get_latest tag must be 'as'" return LatestContentNode(bits[1], bits[2], bits[4]) get_latest = register.tag(get_latest) \ No newline at end of file
diff --git a/lib/templatetags/templatetags/get_latest_pub.py b/lib/templatetags/templatetags/get_latest_pub.py
new file mode 100644
index 0000000..151befa
--- /dev/null
+++ b/lib/templatetags/templatetags/get_latest_pub.py
@@ -0,0 +1 @@
+from django.template import Library, Node from django.db.models import get_model register = Library() class LatestContentNode(Node): def __init__(self, model, num, varname): self.num, self.varname = num, varname self.model = get_model(*model.split('.')) def render(self, context): context[self.varname] = self.model._default_manager.filter(status__exact=1)[:self.num] return '' def get_latest_pub(parser, token): bits = token.contents.split() if len(bits) != 5: raise TemplateSyntaxError, "get_latest tag takes exactly four arguments" if bits[3] != 'as': raise TemplateSyntaxError, "third argument to get_latest tag must be 'as'" return LatestContentNode(bits[1], bits[2], bits[4]) get_latest_pub = register.tag(get_latest_pub) \ No newline at end of file
diff --git a/lib/templatetags/templatetags/markdown.py b/lib/templatetags/templatetags/markdown.py
new file mode 100644
index 0000000..dca51f2
--- /dev/null
+++ b/lib/templatetags/templatetags/markdown.py
@@ -0,0 +1,9 @@
+from django import template
+import markdown2 as markdown
+
+register = template.Library()
+
+def do_markdown(text):
+ return markdown.markdown(text, safe_mode = False)
+
+register.filter('markdown', do_markdown) \ No newline at end of file
diff --git a/lib/templatetags/templatetags/slugify_under.py b/lib/templatetags/templatetags/slugify_under.py
new file mode 100644
index 0000000..bbf01d2
--- /dev/null
+++ b/lib/templatetags/templatetags/slugify_under.py
@@ -0,0 +1,15 @@
+import re
+from django import template
+from django.utils.safestring import mark_safe
+register = template.Library()
+
+@register.filter
+def slugify_under(value):
+ """
+ Normalizes string, converts to lowercase, removes non-alpha characters,
+ and converts spaces to hyphens.
+ """
+ import unicodedata
+ value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore')
+ value = unicode(re.sub('[^\w\s-]', '', value).strip().lower())
+ return mark_safe(re.sub('[-\s]+', '_', value))
diff --git a/lib/templatetags/templatetags/smartypants.py b/lib/templatetags/templatetags/smartypants.py
new file mode 100644
index 0000000..07ddd03
--- /dev/null
+++ b/lib/templatetags/templatetags/smartypants.py
@@ -0,0 +1,878 @@
+r"""
+==============
+smartypants.py
+==============
+
+----------------------------
+SmartyPants ported to Python
+----------------------------
+
+Ported by `Chad Miller`_
+Copyright (c) 2004 Chad Miller
+
+original `SmartyPants`_ by `John Gruber`_
+Copyright (c) 2003 John Gruber
+
+
+Synopsis
+========
+
+A smart-quotes plugin for Pyblosxom_.
+
+The priginal "SmartyPants" is a free web publishing plug-in for Movable Type,
+Blosxom, and BBEdit that easily translates plain ASCII punctuation characters
+into "smart" typographic punctuation HTML entities.
+
+This software, *smartypants.py*, endeavours to be a functional port of
+SmartyPants to Python, for use with Pyblosxom_.
+
+
+Description
+===========
+
+SmartyPants can perform the following transformations:
+
+- Straight quotes ( " and ' ) into "curly" quote HTML entities
+- Backticks-style quotes (\`\`like this'') into "curly" quote HTML entities
+- Dashes (``--`` and ``---``) into en- and em-dash entities
+- Three consecutive dots (``...`` or ``. . .``) into an ellipsis entity
+
+This means you can write, edit, and save your posts using plain old
+ASCII straight quotes, plain dashes, and plain dots, but your published
+posts (and final HTML output) will appear with smart quotes, em-dashes,
+and proper ellipses.
+
+SmartyPants does not modify characters within ``<pre>``, ``<code>``, ``<kbd>``,
+``<math>`` or ``<script>`` tag blocks. Typically, these tags are used to
+display text where smart quotes and other "smart punctuation" would not be
+appropriate, such as source code or example markup.
+
+
+Backslash Escapes
+=================
+
+If you need to use literal straight quotes (or plain hyphens and
+periods), SmartyPants accepts the following backslash escape sequences
+to force non-smart punctuation. It does so by transforming the escape
+sequence into a decimal-encoded HTML entity:
+
+(FIXME: table here.)
+
+.. comment It sucks that there's a disconnect between the visual layout and table markup when special characters are involved.
+.. comment ====== ===== =========
+.. comment Escape Value Character
+.. comment ====== ===== =========
+.. comment \\\\\\\\ &#92; \\\\
+.. comment \\\\" &#34; "
+.. comment \\\\' &#39; '
+.. comment \\\\. &#46; .
+.. comment \\\\- &#45; \-
+.. comment \\\\` &#96; \`
+.. comment ====== ===== =========
+
+This is useful, for example, when you want to use straight quotes as
+foot and inch marks: 6'2" tall; a 17" iMac.
+
+Options
+=======
+
+For Pyblosxom users, the ``smartypants_attributes`` attribute is where you
+specify configuration options.
+
+Numeric values are the easiest way to configure SmartyPants' behavior:
+
+"0"
+ Suppress all transformations. (Do nothing.)
+"1"
+ Performs default SmartyPants transformations: quotes (including
+ \`\`backticks'' -style), em-dashes, and ellipses. "``--``" (dash dash)
+ is used to signify an em-dash; there is no support for en-dashes.
+
+"2"
+ Same as smarty_pants="1", except that it uses the old-school typewriter
+ shorthand for dashes: "``--``" (dash dash) for en-dashes, "``---``"
+ (dash dash dash)
+ for em-dashes.
+
+"3"
+ Same as smarty_pants="2", but inverts the shorthand for dashes:
+ "``--``" (dash dash) for em-dashes, and "``---``" (dash dash dash) for
+ en-dashes.
+
+"-1"
+ Stupefy mode. Reverses the SmartyPants transformation process, turning
+ the HTML entities produced by SmartyPants into their ASCII equivalents.
+ E.g. "&#8220;" is turned into a simple double-quote ("), "&#8212;" is
+ turned into two dashes, etc.
+
+
+The following single-character attribute values can be combined to toggle
+individual transformations from within the smarty_pants attribute. For
+example, to educate normal quotes and em-dashes, but not ellipses or
+\`\`backticks'' -style quotes:
+
+``py['smartypants_attributes'] = "1"``
+
+"q"
+ Educates normal quote characters: (") and (').
+
+"b"
+ Educates \`\`backticks'' -style double quotes.
+
+"B"
+ Educates \`\`backticks'' -style double quotes and \`single' quotes.
+
+"d"
+ Educates em-dashes.
+
+"D"
+ Educates em-dashes and en-dashes, using old-school typewriter shorthand:
+ (dash dash) for en-dashes, (dash dash dash) for em-dashes.
+
+"i"
+ Educates em-dashes and en-dashes, using inverted old-school typewriter
+ shorthand: (dash dash) for em-dashes, (dash dash dash) for en-dashes.
+
+"e"
+ Educates ellipses.
+
+"w"
+ Translates any instance of ``&quot;`` into a normal double-quote character.
+ This should be of no interest to most people, but of particular interest
+ to anyone who writes their posts using Dreamweaver, as Dreamweaver
+ inexplicably uses this entity to represent a literal double-quote
+ character. SmartyPants only educates normal quotes, not entities (because
+ ordinarily, entities are used for the explicit purpose of representing the
+ specific character they represent). The "w" option must be used in
+ conjunction with one (or both) of the other quote options ("q" or "b").
+ Thus, if you wish to apply all SmartyPants transformations (quotes, en-
+ and em-dashes, and ellipses) and also translate ``&quot;`` entities into
+ regular quotes so SmartyPants can educate them, you should pass the
+ following to the smarty_pants attribute:
+
+The ``smartypants_forbidden_flavours`` list contains pyblosxom flavours for
+which no Smarty Pants rendering will occur.
+
+
+Caveats
+=======
+
+Why You Might Not Want to Use Smart Quotes in Your Weblog
+---------------------------------------------------------
+
+For one thing, you might not care.
+
+Most normal, mentally stable individuals do not take notice of proper
+typographic punctuation. Many design and typography nerds, however, break
+out in a nasty rash when they encounter, say, a restaurant sign that uses
+a straight apostrophe to spell "Joe's".
+
+If you're the sort of person who just doesn't care, you might well want to
+continue not caring. Using straight quotes -- and sticking to the 7-bit
+ASCII character set in general -- is certainly a simpler way to live.
+
+Even if you I *do* care about accurate typography, you still might want to
+think twice before educating the quote characters in your weblog. One side
+effect of publishing curly quote HTML entities is that it makes your
+weblog a bit harder for others to quote from using copy-and-paste. What
+happens is that when someone copies text from your blog, the copied text
+contains the 8-bit curly quote characters (as well as the 8-bit characters
+for em-dashes and ellipses, if you use these options). These characters
+are not standard across different text encoding methods, which is why they
+need to be encoded as HTML entities.
+
+People copying text from your weblog, however, may not notice that you're
+using curly quotes, and they'll go ahead and paste the unencoded 8-bit
+characters copied from their browser into an email message or their own
+weblog. When pasted as raw "smart quotes", these characters are likely to
+get mangled beyond recognition.
+
+That said, my own opinion is that any decent text editor or email client
+makes it easy to stupefy smart quote characters into their 7-bit
+equivalents, and I don't consider it my problem if you're using an
+indecent text editor or email client.
+
+
+Algorithmic Shortcomings
+------------------------
+
+One situation in which quotes will get curled the wrong way is when
+apostrophes are used at the start of leading contractions. For example:
+
+``'Twas the night before Christmas.``
+
+In the case above, SmartyPants will turn the apostrophe into an opening
+single-quote, when in fact it should be a closing one. I don't think
+this problem can be solved in the general case -- every word processor
+I've tried gets this wrong as well. In such cases, it's best to use the
+proper HTML entity for closing single-quotes (``&#8217;``) by hand.
+
+
+Bugs
+====
+
+To file bug reports or feature requests (other than topics listed in the
+Caveats section above) please send email to: mailto:smartypantspy@chad.org
+
+If the bug involves quotes being curled the wrong way, please send example
+text to illustrate.
+
+To Do list
+----------
+
+- Provide a function for use within templates to quote anything at all.
+
+
+Version History
+===============
+
+1.5_1.5: Sat, 13 Aug 2005 15:50:24 -0400
+ - Fix bogus magical quotation when there is no hint that the
+ user wants it, e.g., in "21st century". Thanks to Nathan Hamblen.
+ - Be smarter about quotes before terminating numbers in an en-dash'ed
+ range.
+
+1.5_1.4: Thu, 10 Feb 2005 20:24:36 -0500
+ - Fix a date-processing bug, as reported by jacob childress.
+ - Begin a test-suite for ensuring correct output.
+ - Removed import of "string", since I didn't really need it.
+ (This was my first every Python program. Sue me!)
+
+1.5_1.3: Wed, 15 Sep 2004 18:25:58 -0400
+ - Abort processing if the flavour is in forbidden-list. Default of
+ [ "rss" ] (Idea of Wolfgang SCHNERRING.)
+ - Remove stray virgules from en-dashes. Patch by Wolfgang SCHNERRING.
+
+1.5_1.2: Mon, 24 May 2004 08:14:54 -0400
+ - Some single quotes weren't replaced properly. Diff-tesuji played
+ by Benjamin GEIGER.
+
+1.5_1.1: Sun, 14 Mar 2004 14:38:28 -0500
+ - Support upcoming pyblosxom 0.9 plugin verification feature.
+
+1.5_1.0: Tue, 09 Mar 2004 08:08:35 -0500
+ - Initial release
+
+Version Information
+-------------------
+
+Version numbers will track the SmartyPants_ version numbers, with the addition
+of an underscore and the smartypants.py version on the end.
+
+New versions will be available at `http://wiki.chad.org/SmartyPantsPy`_
+
+.. _http://wiki.chad.org/SmartyPantsPy: http://wiki.chad.org/SmartyPantsPy
+
+Authors
+=======
+
+`John Gruber`_ did all of the hard work of writing this software in Perl for
+`Movable Type`_ and almost all of this useful documentation. `Chad Miller`_
+ported it to Python to use with Pyblosxom_.
+
+
+Additional Credits
+==================
+
+Portions of the SmartyPants original work are based on Brad Choate's nifty
+MTRegex plug-in. `Brad Choate`_ also contributed a few bits of source code to
+this plug-in. Brad Choate is a fine hacker indeed.
+
+`Jeremy Hedley`_ and `Charles Wiltgen`_ deserve mention for exemplary beta
+testing of the original SmartyPants.
+
+`Rael Dornfest`_ ported SmartyPants to Blosxom.
+
+.. _Brad Choate: http://bradchoate.com/
+.. _Jeremy Hedley: http://antipixel.com/
+.. _Charles Wiltgen: http://playbacktime.com/
+.. _Rael Dornfest: http://raelity.org/
+
+
+Copyright and License
+=====================
+
+SmartyPants_ license::
+
+ Copyright (c) 2003 John Gruber
+ (http://daringfireball.net/)
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+
+ * Neither the name "SmartyPants" nor the names of its contributors
+ may be used to endorse or promote products derived from this
+ software without specific prior written permission.
+
+ This software is provided by the copyright holders and contributors "as
+ is" and any express or implied warranties, including, but not limited
+ to, the implied warranties of merchantability and fitness for a
+ particular purpose are disclaimed. In no event shall the copyright
+ owner or contributors be liable for any direct, indirect, incidental,
+ special, exemplary, or consequential damages (including, but not
+ limited to, procurement of substitute goods or services; loss of use,
+ data, or profits; or business interruption) however caused and on any
+ theory of liability, whether in contract, strict liability, or tort
+ (including negligence or otherwise) arising in any way out of the use
+ of this software, even if advised of the possibility of such damage.
+
+
+smartypants.py license::
+
+ smartypants.py is a derivative work of SmartyPants.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+
+ This software is provided by the copyright holders and contributors "as
+ is" and any express or implied warranties, including, but not limited
+ to, the implied warranties of merchantability and fitness for a
+ particular purpose are disclaimed. In no event shall the copyright
+ owner or contributors be liable for any direct, indirect, incidental,
+ special, exemplary, or consequential damages (including, but not
+ limited to, procurement of substitute goods or services; loss of use,
+ data, or profits; or business interruption) however caused and on any
+ theory of liability, whether in contract, strict liability, or tort
+ (including negligence or otherwise) arising in any way out of the use
+ of this software, even if advised of the possibility of such damage.
+
+
+
+.. _John Gruber: http://daringfireball.net/
+.. _Chad Miller: http://web.chad.org/
+
+.. _Pyblosxom: http://roughingit.subtlehints.net/pyblosxom
+.. _SmartyPants: http://daringfireball.net/projects/smartypants/
+.. _Movable Type: http://www.movabletype.org/
+
+"""
+
+default_smartypants_attr = "1"
+
+import re
+
+tags_to_skip_regex = re.compile("<(/)?(?:pre|code|kbd|script|math)[^>]*>")
+
+
+def verify_installation(request):
+ return 1
+ # assert the plugin is functional
+
+
+def cb_story(args):
+ global default_smartypants_attr
+
+ try:
+ forbidden_flavours = args["entry"]["smartypants_forbidden_flavours"]
+ except KeyError:
+ forbidden_flavours = [ "rss" ]
+
+ try:
+ attributes = args["entry"]["smartypants_attributes"]
+ except KeyError:
+ attributes = default_smartypants_attr
+
+ if attributes is None:
+ attributes = default_smartypants_attr
+
+ entryData = args["entry"].getData()
+
+ try:
+ if args["request"]["flavour"] in forbidden_flavours:
+ return
+ except KeyError:
+ if "&lt;" in args["entry"]["body"][0:15]: # sniff the stream
+ return # abort if it looks like escaped HTML. FIXME
+
+ # FIXME: make these configurable, perhaps?
+ args["entry"]["body"] = smartyPants(entryData, attributes)
+ args["entry"]["title"] = smartyPants(args["entry"]["title"], attributes)
+
+
+### interal functions below here
+
+def smartyPants(text, attr=default_smartypants_attr):
+ convert_quot = False # should we translate &quot; entities into normal quotes?
+
+ # Parse attributes:
+ # 0 : do nothing
+ # 1 : set all
+ # 2 : set all, using old school en- and em- dash shortcuts
+ # 3 : set all, using inverted old school en and em- dash shortcuts
+ #
+ # q : quotes
+ # b : backtick quotes (``double'' only)
+ # B : backtick quotes (``double'' and `single')
+ # d : dashes
+ # D : old school dashes
+ # i : inverted old school dashes
+ # e : ellipses
+ # w : convert &quot; entities to " for Dreamweaver users
+
+ do_dashes = "0"
+ do_backticks = "0"
+ do_quotes = "0"
+ do_ellipses = "0"
+ do_stupefy = "0"
+
+ if attr == "0":
+ # Do nothing.
+ return text
+ elif attr == "1":
+ do_quotes = "1"
+ do_backticks = "1"
+ do_dashes = "1"
+ do_ellipses = "1"
+ elif attr == "2":
+ # Do everything, turn all options on, use old school dash shorthand.
+ do_quotes = "1"
+ do_backticks = "1"
+ do_dashes = "2"
+ do_ellipses = "1"
+ elif attr == "3":
+ # Do everything, turn all options on, use inverted old school dash shorthand.
+ do_quotes = "1"
+ do_backticks = "1"
+ do_dashes = "3"
+ do_ellipses = "1"
+ elif attr == "-1":
+ # Special "stupefy" mode.
+ do_stupefy = "1"
+ else:
+ for c in attr:
+ if c == "q": do_quotes = "1"
+ elif c == "b": do_backticks = "1"
+ elif c == "B": do_backticks = "2"
+ elif c == "d": do_dashes = "1"
+ elif c == "D": do_dashes = "2"
+ elif c == "i": do_dashes = "3"
+ elif c == "e": do_ellipses = "1"
+ elif c == "w": convert_quot = "1"
+ else:
+ pass
+ # ignore unknown option
+
+ tokens = _tokenize(text)
+ result = []
+ in_pre = False
+
+ prev_token_last_char = ""
+ # This is a cheat, used to get some context
+ # for one-character tokens that consist of
+ # just a quote char. What we do is remember
+ # the last character of the previous text
+ # token, to use as context to curl single-
+ # character quote tokens correctly.
+
+ for cur_token in tokens:
+ if cur_token[0] == "tag":
+ # Don't mess with quotes inside tags.
+ result.append(cur_token[1])
+ close_match = tags_to_skip_regex.match(cur_token[1])
+ if close_match is not None and close_match.group(1) == "":
+ in_pre = True
+ else:
+ in_pre = False
+ else:
+ t = cur_token[1]
+ last_char = t[-1:] # Remember last char of this token before processing.
+ if not in_pre:
+ oldstr = t
+ t = processEscapes(t)
+
+ if convert_quot != "0":
+ t = re.sub('&quot;', '"', t)
+
+ if do_dashes != "0":
+ if do_dashes == "1":
+ t = educateDashes(t)
+ if do_dashes == "2":
+ t = educateDashesOldSchool(t)
+ if do_dashes == "3":
+ t = educateDashesOldSchoolInverted(t)
+
+ if do_ellipses != "0":
+ t = educateEllipses(t)
+
+ # Note: backticks need to be processed before quotes.
+ if do_backticks != "0":
+ t = educateBackticks(t)
+
+ if do_backticks == "2":
+ t = educateSingleBackticks(t)
+
+ if do_quotes != "0":
+ if t == "'":
+ # Special case: single-character ' token
+ if re.match("\S", prev_token_last_char):
+ t = "&#8217;"
+ else:
+ t = "&#8216;"
+ elif t == '"':
+ # Special case: single-character " token
+ if re.match("\S", prev_token_last_char):
+ t = "&#8221;"
+ else:
+ t = "&#8220;"
+
+ else:
+ # Normal case:
+ t = educateQuotes(t)
+
+ if do_stupefy == "1":
+ t = stupefyEntities(t)
+
+ prev_token_last_char = last_char
+ result.append(t)
+
+ return "".join(result)
+
+
+def educateQuotes(str):
+ """
+ Parameter: String.
+
+ Returns: The string, with "educated" curly quote HTML entities.
+
+ Example input: "Isn't this fun?"
+ Example output: &#8220;Isn&#8217;t this fun?&#8221;
+ """
+
+ oldstr = str
+ punct_class = r"""[!"#\$\%'()*+,-.\/:;<=>?\@\[\\\]\^_`{|}~]"""
+
+ # Special case if the very first character is a quote
+ # followed by punctuation at a non-word-break. Close the quotes by brute force:
+ str = re.sub(r"""^'(?=%s\\B)""" % (punct_class,), r"""&#8217;""", str)
+ str = re.sub(r"""^"(?=%s\\B)""" % (punct_class,), r"""&#8221;""", str)
+
+ # Special case for double sets of quotes, e.g.:
+ # <p>He said, "'Quoted' words in a larger quote."</p>
+ str = re.sub(r""""'(?=\w)""", """&#8220;&#8216;""", str)
+ str = re.sub(r"""'"(?=\w)""", """&#8216;&#8220;""", str)
+
+ # Special case for decade abbreviations (the '80s):
+ str = re.sub(r"""\b'(?=\d{2}s)""", r"""&#8217;""", str)
+
+ close_class = r"""[^\ \t\r\n\[\{\(\-]"""
+ dec_dashes = r"""&#8211;|&#8212;"""
+
+ # Get most opening single quotes:
+ opening_single_quotes_regex = re.compile(r"""
+ (
+ \s | # a whitespace char, or
+ &nbsp; | # a non-breaking space entity, or
+ -- | # dashes, or
+ &[mn]dash; | # named dash entities
+ %s | # or decimal entities
+ &\#x201[34]; # or hex
+ )
+ ' # the quote
+ (?=\w) # followed by a word character
+ """ % (dec_dashes,), re.VERBOSE)
+ str = opening_single_quotes_regex.sub(r"""\1&#8216;""", str)
+
+ closing_single_quotes_regex = re.compile(r"""
+ (%s)
+ '
+ (?!\s | s\b | \d)
+ """ % (close_class,), re.VERBOSE)
+ str = closing_single_quotes_regex.sub(r"""\1&#8217;""", str)
+
+ closing_single_quotes_regex = re.compile(r"""
+ (%s)
+ '
+ (\s | s\b)
+ """ % (close_class,), re.VERBOSE)
+ str = closing_single_quotes_regex.sub(r"""\1&#8217;\2""", str)
+
+ # Any remaining single quotes should be opening ones:
+ str = re.sub(r"""'""", r"""&#8216;""", str)
+
+ # Get most opening double quotes:
+ opening_double_quotes_regex = re.compile(r"""
+ (
+ \s | # a whitespace char, or
+ &nbsp; | # a non-breaking space entity, or
+ -- | # dashes, or
+ &[mn]dash; | # named dash entities
+ %s | # or decimal entities
+ &\#x201[34]; # or hex
+ )
+ " # the quote
+ (?=\w) # followed by a word character
+ """ % (dec_dashes,), re.VERBOSE)
+ str = opening_double_quotes_regex.sub(r"""\1&#8220;""", str)
+
+ # Double closing quotes:
+ closing_double_quotes_regex = re.compile(r"""
+ #(%s)? # character that indicates the quote should be closing
+ "
+ (?=\s)
+ """ % (close_class,), re.VERBOSE)
+ str = closing_double_quotes_regex.sub(r"""&#8221;""", str)
+
+ closing_double_quotes_regex = re.compile(r"""
+ (%s) # character that indicates the quote should be closing
+ "
+ """ % (close_class,), re.VERBOSE)
+ str = closing_double_quotes_regex.sub(r"""\1&#8221;""", str)
+
+ # Any remaining quotes should be opening ones.
+ str = re.sub(r'"', r"""&#8220;""", str)
+
+ return str
+
+
+def educateBackticks(str):
+ """
+ Parameter: String.
+ Returns: The string, with ``backticks'' -style double quotes
+ translated into HTML curly quote entities.
+ Example input: ``Isn't this fun?''
+ Example output: &#8220;Isn't this fun?&#8221;
+ """
+
+ str = re.sub(r"""``""", r"""&#8220;""", str)
+ str = re.sub(r"""''""", r"""&#8221;""", str)
+ return str
+
+
+def educateSingleBackticks(str):
+ """
+ Parameter: String.
+ Returns: The string, with `backticks' -style single quotes
+ translated into HTML curly quote entities.
+
+ Example input: `Isn't this fun?'
+ Example output: &#8216;Isn&#8217;t this fun?&#8217;
+ """
+
+ str = re.sub(r"""`""", r"""&#8216;""", str)
+ str = re.sub(r"""'""", r"""&#8217;""", str)
+ return str
+
+
+def educateDashes(str):
+ """
+ Parameter: String.
+
+ Returns: The string, with each instance of "--" translated to
+ an em-dash HTML entity.
+ """
+
+ str = re.sub(r"""---""", r"""&#8211;""", str) # en (yes, backwards)
+ str = re.sub(r"""--""", r"""&#8212;""", str) # em (yes, backwards)
+ return str
+
+
+def educateDashesOldSchool(str):
+ """
+ Parameter: String.
+
+ Returns: The string, with each instance of "--" translated to
+ an en-dash HTML entity, and each "---" translated to
+ an em-dash HTML entity.
+ """
+
+ str = re.sub(r"""---""", r"""&#8212;""", str) # em (yes, backwards)
+ str = re.sub(r"""--""", r"""&#8211;""", str) # en (yes, backwards)
+ return str
+
+
+def educateDashesOldSchoolInverted(str):
+ """
+ Parameter: String.
+
+ Returns: The string, with each instance of "--" translated to
+ an em-dash HTML entity, and each "---" translated to
+ an en-dash HTML entity. Two reasons why: First, unlike the
+ en- and em-dash syntax supported by
+ EducateDashesOldSchool(), it's compatible with existing
+ entries written before SmartyPants 1.1, back when "--" was
+ only used for em-dashes. Second, em-dashes are more
+ common than en-dashes, and so it sort of makes sense that
+ the shortcut should be shorter to type. (Thanks to Aaron
+ Swartz for the idea.)
+ """
+ str = re.sub(r"""---""", r"""&#8211;""", str) # em
+ str = re.sub(r"""--""", r"""&#8212;""", str) # en
+ return str
+
+
+
+def educateEllipses(str):
+ """
+ Parameter: String.
+ Returns: The string, with each instance of "..." translated to
+ an ellipsis HTML entity.
+
+ Example input: Huh...?
+ Example output: Huh&#8230;?
+ """
+
+ str = re.sub(r"""\.\.\.""", r"""&#8230;""", str)
+ str = re.sub(r"""\. \. \.""", r"""&#8230;""", str)
+ return str
+
+
+def stupefyEntities(str):
+ """
+ Parameter: String.
+ Returns: The string, with each SmartyPants HTML entity translated to
+ its ASCII counterpart.
+
+ Example input: &#8220;Hello &#8212; world.&#8221;
+ Example output: "Hello -- world."
+ """
+
+ str = re.sub(r"""&#8211;""", r"""-""", str) # en-dash
+ str = re.sub(r"""&#8212;""", r"""--""", str) # em-dash
+
+ str = re.sub(r"""&#8216;""", r"""'""", str) # open single quote
+ str = re.sub(r"""&#8217;""", r"""'""", str) # close single quote
+
+ str = re.sub(r"""&#8220;""", r'''"''', str) # open double quote
+ str = re.sub(r"""&#8221;""", r'''"''', str) # close double quote
+
+ str = re.sub(r"""&#8230;""", r"""...""", str)# ellipsis
+
+ return str
+
+
+def processEscapes(str):
+ r"""
+ Parameter: String.
+ Returns: The string, with after processing the following backslash
+ escape sequences. This is useful if you want to force a "dumb"
+ quote or other character to appear.
+
+ Escape Value
+ ------ -----
+ \\ &#92;
+ \" &#34;
+ \' &#39;
+ \. &#46;
+ \- &#45;
+ \` &#96;
+ """
+ str = re.sub(r"""\\\\""", r"""&#92;""", str)
+ str = re.sub(r'''\\"''', r"""&#34;""", str)
+ str = re.sub(r"""\\'""", r"""&#39;""", str)
+ str = re.sub(r"""\\\.""", r"""&#46;""", str)
+ str = re.sub(r"""\\-""", r"""&#45;""", str)
+ str = re.sub(r"""\\`""", r"""&#96;""", str)
+
+ return str
+
+
+def _tokenize(str):
+ """
+ Parameter: String containing HTML markup.
+ Returns: Reference to an array of the tokens comprising the input
+ string. Each token is either a tag (possibly with nested,
+ tags contained therein, such as <a href="<MTFoo>">, or a
+ run of text between tags. Each element of the array is a
+ two-element array; the first is either 'tag' or 'text';
+ the second is the actual value.
+
+ Based on the _tokenize() subroutine from Brad Choate's MTRegex plugin.
+ <http://www.bradchoate.com/past/mtregex.php>
+ """
+
+ pos = 0
+ length = len(str)
+ tokens = []
+
+ depth = 6
+ nested_tags = "|".join(['(?:<(?:[^<>]',] * depth) + (')*>)' * depth)
+ #match = r"""(?: <! ( -- .*? -- \s* )+ > ) | # comments
+ # (?: <\? .*? \?> ) | # directives
+ # %s # nested tags """ % (nested_tags,)
+ tag_soup = re.compile(r"""([^<]*)(<[^>]*>)""")
+
+ token_match = tag_soup.search(str)
+
+ previous_end = 0
+ while token_match is not None:
+ if token_match.group(1) != "":
+ tokens.append(['text', token_match.group(1)])
+
+ tokens.append(['tag', token_match.group(2)])
+
+ previous_end = token_match.end()
+ token_match = tag_soup.search(str, token_match.end())
+
+ if previous_end < len(str):
+ tokens.append(['text', str[previous_end:]])
+
+ return tokens
+
+
+
+if __name__ == "__main__":
+
+ import locale
+
+ try:
+ locale.setlocale(locale.LC_ALL, '')
+ except:
+ pass
+
+ from docutils.core import publish_string
+ docstring_html = publish_string(__doc__, writer_name='html')
+
+ print docstring_html
+
+
+ # Unit test output goes out stderr. No worries.
+ import unittest
+ sp = smartyPants
+
+ class TestSmartypantsAllAttributes(unittest.TestCase):
+ # the default attribute is "1", which means "all".
+
+ def test_dates(self):
+ self.assertEqual(sp("1440-80's"), "1440-80&#8217;s")
+ self.assertEqual(sp("1440-'80s"), "1440-&#8216;80s")
+ self.assertEqual(sp("1440---'80s"), "1440&#8211;&#8216;80s")
+ self.assertEqual(sp("1960s"), "1960s") # no effect.
+ self.assertEqual(sp("1960's"), "1960&#8217;s")
+ self.assertEqual(sp("one two '60s"), "one two &#8216;60s")
+ self.assertEqual(sp("'60s"), "&#8216;60s")
+
+ def test_ordinal_numbers(self):
+ self.assertEqual(sp("21st century"), "21st century") # no effect.
+ self.assertEqual(sp("3rd"), "3rd") # no effect.
+
+ def test_educated_quotes(self):
+ self.assertEqual(sp('''"Isn't this fun?"'''), '''&#8220;Isn&#8217;t this fun?&#8221;''')
+
+ unittest.main()
+
+
+
+
+__author__ = "Chad Miller <smartypantspy@chad.org>"
+__version__ = "1.5_1.5: Sat, 13 Aug 2005 15:50:24 -0400"
+__url__ = "http://wiki.chad.org/SmartyPantsPy"
+__description__ = "Smart-quotes, smart-ellipses, and smart-dashes for weblog entries in pyblosxom"
diff --git a/lib/templatetags/templatetags/truncateletters.py b/lib/templatetags/templatetags/truncateletters.py
new file mode 100644
index 0000000..c492430
--- /dev/null
+++ b/lib/templatetags/templatetags/truncateletters.py
@@ -0,0 +1,24 @@
+from django import template
+register = template.Library()
+
+@register.filter
+def truncateletters(value, arg):
+ """
+ Truncates a string after a certain number of letters
+
+ Argument: Number of letters to truncate after
+ """
+ try:
+ length = int(arg)
+ except ValueError: # invalid literal for int()
+ return value # Fail silently
+ if not isinstance(value, basestring):
+ value = str(value)
+
+ if len(value) > length:
+ truncated = value[:length]
+ if not truncated.endswith('...'):
+ truncated += '...'
+ return truncated
+
+ return value \ No newline at end of file
diff --git a/lib/templatetags/templatetags/typogrify.py b/lib/templatetags/templatetags/typogrify.py
new file mode 100644
index 0000000..fa4f0cf
--- /dev/null
+++ b/lib/templatetags/templatetags/typogrify.py
@@ -0,0 +1,216 @@
+# from django.conf import settings
+import re
+from django.conf import settings
+from django import template
+register = template.Library()
+
+def amp(text):
+ """Wraps apersands in html with ``<span class="amp">`` so they can be
+ styled with CSS. Apersands are also normalized to ``&amp;``. Requires
+ ampersands to have whitespace or an ``&nbsp;`` on both sides.
+
+ >>> amp('One & two')
+ 'One <span class="amp">&amp;</span> two'
+ >>> amp('One &amp; two')
+ 'One <span class="amp">&amp;</span> two'
+ >>> amp('One &#38; two')
+ 'One <span class="amp">&amp;</span> two'
+
+ >>> amp('One&nbsp;&amp;&nbsp;two')
+ 'One&nbsp;<span class="amp">&amp;</span>&nbsp;two'
+
+ It won't mess up & that are already wrapped, in entities or URLs
+
+ >>> amp('One <span class="amp">&amp;</span> two')
+ 'One <span class="amp">&amp;</span> two'
+ >>> amp('&ldquo;this&rdquo; & <a href="/?that&amp;test">that</a>')
+ '&ldquo;this&rdquo; <span class="amp">&amp;</span> <a href="/?that&amp;test">that</a>'
+ """
+ amp_finder = re.compile(r"(\s|&nbsp;)(&|&amp;|&\#38;)(\s|&nbsp;)")
+ return amp_finder.sub(r"""\1<span class="amp">&amp;</span>\3""", text)
+
+def caps(text):
+ """Wraps multiple capital letters in ``<span class="caps">``
+ so they can be styled with CSS.
+
+ >>> caps("A message from KU")
+ 'A message from <span class="caps">KU</span>'
+
+ Uses the smartypants tokenizer to not screw with HTML or with tags it shouldn't.
+
+ >>> caps("<PRE>CAPS</pre> more CAPS")
+ '<PRE>CAPS</pre> more <span class="caps">CAPS</span>'
+
+ >>> caps("A message from 2KU2 with digits")
+ 'A message from <span class="caps">2KU2</span> with digits'
+
+ >>> caps("Dotted caps followed by spaces should never include them in the wrap D.O.T. like so.")
+ 'Dotted caps followed by spaces should never include them in the wrap <span class="caps">D.O.T.</span> like so.'
+
+ >>> caps("<i>D.O.T.</i>HE34T<b>RFID</b>")
+ '<i><span class="caps">D.O.T.</span></i><span class="caps">HE34T</span><b><span class="caps">RFID</span></b>'
+ """
+ try:
+ import smartypants
+ except ImportError:
+ if settings.DEBUG:
+ raise template.TemplateSyntaxError, "Error in {% caps %} filter: The Python SmartyPants library isn't installed."
+ return text
+
+ tokens = smartypants._tokenize(text)
+ result = []
+ in_skipped_tag = False
+
+ cap_finder = re.compile(r"""(
+ (\b[A-Z\d]* # Group 2: Any amount of caps and digits
+ [A-Z]\d*[A-Z] # A cap string much at least include two caps (but they can have digits between them)
+ [A-Z\d]*\b) # Any amount of caps and digits
+ | (\b[A-Z]+\.\s? # OR: Group 3: Some caps, followed by a '.' and an optional space
+ (?:[A-Z]+\.\s?)+) # Followed by the same thing at least once more
+ (?:\s|\b|$))
+ """, re.VERBOSE)
+
+ def _cap_wrapper(matchobj):
+ """This is necessary to keep dotted cap strings to pick up extra spaces"""
+ if matchobj.group(2):
+ return """<span class="caps">%s</span>""" % matchobj.group(2)
+ else:
+ if matchobj.group(3)[-1] == " ":
+ caps = matchobj.group(3)[:-1]
+ tail = ' '
+ else:
+ caps = matchobj.group(3)
+ tail = ''
+ return """<span class="caps">%s</span>%s""" % (caps, tail)
+
+ tags_to_skip_regex = re.compile("<(/)?(?:pre|code|kbd|script|math)[^>]*>", re.IGNORECASE)
+
+
+ for token in tokens:
+ if token[0] == "tag":
+ # Don't mess with tags.
+ result.append(token[1])
+ close_match = tags_to_skip_regex.match(token[1])
+ if close_match and close_match.group(1) == None:
+ in_skipped_tag = True
+ else:
+ in_skipped_tag = False
+ else:
+ if in_skipped_tag:
+ result.append(token[1])
+ else:
+ result.append(cap_finder.sub(_cap_wrapper, token[1]))
+
+ return "".join(result)
+
+def initial_quotes(text):
+ """Wraps initial quotes in ``class="dquo"`` for double quotes or
+ ``class="quo"`` for single quotes. Works in these block tags ``(h1-h6, p, li)``
+ and also accounts for potential opening inline elements ``a, em, strong, span, b, i``
+
+ >>> initial_quotes('"With primes"')
+ '<span class="dquo">"</span>With primes"'
+ >>> initial_quotes("'With single primes'")
+ '<span class="quo">\\'</span>With single primes\\''
+
+ >>> initial_quotes('<a href="#">"With primes and a link"</a>')
+ '<a href="#"><span class="dquo">"</span>With primes and a link"</a>'
+
+ >>> initial_quotes('&#8220;With smartypanted quotes&#8221;')
+ '<span class="dquo">&#8220;</span>With smartypanted quotes&#8221;'
+ """
+ quote_finder = re.compile(r"""((<(p|h[1-6]|li)[^>]*>|^) # start with an opening p, h1-6, li or the start of the string
+ \s* # optional white space!
+ (<(a|em|span|strong|i|b)[^>]*>\s*)*) # optional opening inline tags, with more optional white space for each.
+ (("|&ldquo;|&\#8220;)|('|&lsquo;|&\#8216;)) # Find me a quote! (only need to find the left quotes and the primes)
+ # double quotes are in group 7, singles in group 8
+ """, re.VERBOSE)
+ def _quote_wrapper(matchobj):
+ if matchobj.group(7):
+ classname = "dquo"
+ quote = matchobj.group(7)
+ else:
+ classname = "quo"
+ quote = matchobj.group(8)
+ return """%s<span class="%s">%s</span>""" % (matchobj.group(1), classname, quote)
+
+ return quote_finder.sub(_quote_wrapper, text)
+
+def smartypants(text):
+ """Applies smarty pants to curl quotes.
+
+ >>> smartypants('The "Green" man')
+ 'The &#8220;Green&#8221; man'
+ """
+ try:
+ import smartypants
+ except ImportError:
+ if settings.DEBUG:
+ raise template.TemplateSyntaxError, "Error in {% smartypants %} filter: The Python smartypants library isn't installed."
+ return text
+ else:
+ return smartypants.smartyPants(text)
+
+def typogrify(text):
+ """The super typography filter
+
+ Applies the following filters: widont, smartypants, caps, amp, initial_quotes
+
+ >>> typogrify('<h2>"Jayhawks" & KU fans act extremely obnoxiously</h2>')
+ '<h2><span class="dquo">&#8220;</span>Jayhawks&#8221; <span class="amp">&amp;</span> <span class="caps">KU</span> fans act extremely&nbsp;obnoxiously</h2>'
+ """
+ text = amp(text)
+ text = widont(text)
+ text = smartypants(text)
+ text = caps(text)
+ text = initial_quotes(text)
+ return text
+
+def widont(text):
+ """Replaces the space between the last two words in a string with ``&nbsp;``
+ Works in these block tags ``(h1-h6, p, li)`` and also accounts for
+ potential closing inline elements ``a, em, strong, span, b, i``
+
+ >>> widont('A very simple test')
+ 'A very simple&nbsp;test'
+
+ >>> widont('<p>In a couple of paragraphs</p><p>paragraph two</p>')
+ '<p>In a couple of&nbsp;paragraphs</p><p>paragraph&nbsp;two</p>'
+
+ >>> widont('<h1><a href="#">In a link inside a heading</i> </a></h1>')
+ '<h1><a href="#">In a link inside a&nbsp;heading</i> </a></h1>'
+
+ >>> widont('<h1><a href="#">In a link</a> followed by other text</h1>')
+ '<h1><a href="#">In a link</a> followed by other&nbsp;text</h1>'
+
+ Empty HTMLs shouldn't error
+ >>> widont('<h1><a href="#"></a></h1>')
+ '<h1><a href="#"></a></h1>'
+
+ >>> widont('<div>Divs get no love!</div>')
+ '<div>Divs get no love!</div>'
+
+ >>> widont('<div><p>But divs with paragraphs do!</p></div>')
+ '<div><p>But divs with paragraphs&nbsp;do!</p></div>'
+ """
+ widont_finder = re.compile(r"""(\s+) # the space to replace
+ ([^<>\s]+ # must be flollowed by non-tag non-space characters
+ \s* # optional white space!
+ (</(a|em|span|strong|i|b)[^>]*>\s*)* # optional closing inline tags with optional white space after each
+ (</(p|h[1-6]|li)|$)) # end with a closing p, h1-6, li or the end of the string
+ """, re.VERBOSE)
+ return widont_finder.sub(r'&nbsp;\2', text)
+
+register.filter('amp', amp)
+register.filter('caps', caps)
+register.filter('initial_quotes', initial_quotes)
+register.filter('smartypants', smartypants)
+register.filter('typogrify', typogrify)
+register.filter('widont', widont)
+
+def _test():
+ import doctest
+ doctest.testmod()
+
+if __name__ == "__main__":
+ _test()
diff --git a/lib/utils/GeoClient.py b/lib/utils/GeoClient.py
new file mode 100644
index 0000000..d1966ca
--- /dev/null
+++ b/lib/utils/GeoClient.py
@@ -0,0 +1,292 @@
+# -*- coding: utf-8 -*-
+
+"""Python wrapper for geoname web APIs
+
+created 20/03/2006 By Nicolas Laurance
+
+This module allows you to access geoname's web APIs,
+and get the results programmatically.
+Described here:
+ http://www.geonames.org/export/
+
+def postalCodeSearch(postalcode, placename='', country=COUNTRY, maxRows='10', http_proxy=None):
+def postalCodeLookupJSON(postalcode, placename='', country=COUNTRY, maxRows='10',gcallback='', http_proxy=None):
+def findNearbyPostalCodes(postalcode, placename='', country=COUNTRY, radius='5', maxRows='10',lat=None,lng=None, http_proxy=None):
+def postalCodeCountryInfo(http_proxy=None):
+def search(placename='', country=COUNTRY, maxRows='10', style='SHORT',lang=LANG, fclass=None, http_proxy=None):
+def findNearbyPlaceName(lat,lng, http_proxy=None):
+
+Sample usage:
+>>> import geoname
+>>> result=geoname.postalCodeSearch('35580','guichen','fr','10')
+>>> result.totalResultsCount.PCDATA
+u'1'
+>>> result.code[0].lat.PCDATA
+u'47.9666667'
+>>> result.code[0].lng.PCDATA
+u'-1.8'
+
+
+
+"""
+
+__author__ = "Nicolas Laurance (nlaurance@zindep.com)"
+__version__ = "2.0"
+__cvsversion__ = "$Revision: 2.0 $"[11:-2]
+__date__ = "$Date: 2003/06/20 22:40:53 $"[7:-2]
+__copyright__ = "Copyright (c) 2006 Nicolas Laurance"
+__license__ = "Python"
+
+import gnosis.xml.objectify as objectify
+
+import os, sys, urllib, re
+try:
+ import timeoutsocket # http://www.timo-tasi.org/python/timeoutsocket.py
+ timeoutsocket.setDefaultSocketTimeout(120)
+except ImportError:
+ pass
+
+HTTP_PROXY = None
+DEBUG = 0
+COUNTRY = 'FR'
+LANG ='fr'
+
+
+# don't touch the rest of these constants
+class GeonameError(Exception): pass
+
+## administrative functions
+def version():
+ print """PyGeoname %(__version__)s
+%(__copyright__)s
+released %(__date__)s
+""" % globals()
+
+def setProxy(http_proxy):
+ """set HTTP proxy"""
+ global HTTP_PROXY
+ HTTP_PROXY = http_proxy
+
+def getProxy(http_proxy = None):
+ """get HTTP proxy"""
+ return http_proxy or HTTP_PROXY
+
+def getProxies(http_proxy = None):
+ http_proxy = getProxy(http_proxy)
+ if http_proxy:
+ proxies = {"http": http_proxy}
+ else:
+ proxies = None
+ return proxies
+
+def _contentsOf(dirname, filename):
+ filename = os.path.join(dirname, filename)
+ if not os.path.exists(filename): return None
+ fsock = open(filename)
+ contents = fsock.read()
+ fsock.close()
+ return contents
+
+def _getScriptDir():
+ if __name__ == '__main__':
+ return os.path.abspath(os.path.dirname(sys.argv[0]))
+ else:
+ return os.path.abspath(os.path.dirname(sys.modules[__name__].__file__))
+
+class Bag: pass
+
+_intFields = ('totalResultsCount')
+_dateFields = ()
+_listFields = ('code','geoname','country',)
+_floatFields = ('lat','lng','distance')
+
+def unmarshal(element):
+ #import pdb;pdb.set_trace()
+ xml_obj = objectify.XML_Objectify(element)
+ rc = xml_obj.make_instance()
+ return rc
+
+def _do(url, http_proxy):
+ proxies = getProxies(http_proxy)
+ u = urllib.FancyURLopener(proxies)
+ usock = u.open(url)
+ rawdata = usock.read()
+ if DEBUG: print rawdata
+ usock.close()
+ data = unmarshal(rawdata)
+ return data
+
+## main functions
+
+def _buildfindNearbyPostalCodes(postalcode, placename, country, radius, maxRows ):
+ placename=urllib.quote(placename)
+ searchUrl = "http://ws.geonames.org/findNearbyPostalCodes?postalcode=%(postalcode)s&placename=%(placename)s&country=%(country)s&radius=%(radius)s&maxRows=%(maxRows)s" % vars()
+ return searchUrl
+
+
+def _buildpostalCodeLookupJSON(postalcode,placename,country,maxRows,gcallback):
+ placename=urllib.quote(placename)
+ searchUrl = "http://ws.geonames.org/postalCodeLookupJSON?postalcode=%(postalcode)s&placename=%(placename)s&country=%(country)s&maxRows=%(maxRows)s&callback=%(gcallback)s" % vars()
+ return searchUrl
+
+def _buildfindNearbyPostalCodesLL(lat,lng,radius,maxRows):
+ searchUrl = "http://ws.geonames.org/findNearbyPostalCodes?lat=%(lat)s&lng=%(lng)s&radius=%(radius)s&maxRows=%(maxRows)s" % vars()
+ return searchUrl
+
+def _buildfindCountrySubdivision(lat,lng):
+ searchUrl = "http://ws.geonames.org/countrySubdivision?lat=%(lat)s&lng=%(lng)s" % vars()
+ return searchUrl
+
+def _buildfindNearbyPlaceName(lat,lng):
+ searchUrl = "http://ws.geonames.org/findNearbyPlaceName?lat=%(lat)s&lng=%(lng)s" % vars()
+ return searchUrl
+
+def _buildpostalCodeSearch(postalcode, placename, country, maxRows ):
+ placename=urllib.quote(placename)
+ searchUrl = "http://ws.geonames.org/postalCodeSearch?postalcode=%(postalcode)s&placename=%(placename)s&country=%(country)s&maxRows=%(maxRows)s" % vars()
+ return searchUrl
+
+def _buildsearch(placename, country, maxRows,style,lang, fclass):
+ placename=urllib.quote(placename)
+ if fclass:
+ urlfclass=''
+ for fc in fclass:
+ urlfclass+=urllib.quote("&fclass=%s" % fc)
+ searchUrl = "http://ws.geonames.org/search?q=%(placename)s&country=%(country)s&maxRows=%(maxRows)s&lang=%(lang)s&style=%(style)s&fclass=%(fclass)s" % vars()
+ return searchUrl
+
+def postalCodeSearch(postalcode, placename='', country=COUNTRY, maxRows='10', http_proxy=None):
+ """
+ http://ws.geonames.org/postalCodeSearch?postalcode=35580&maxRows=10&country=fr
+ Url : ws.geonames.org/postalCodeSearch?
+ Parameters : postalcode ,placename,maxRows,country
+ <geonames>
+ <totalResultsCount>7</totalResultsCount>
+ -
+ <code>
+ <postalcode>35580</postalcode>
+ <name>St Senoux</name>
+ <countryCode>FR</countryCode>
+ <lat>47.9</lat>
+ <lng>-1.7833333</lng>
+ </code>
+ """
+ url = _buildpostalCodeSearch(postalcode,placename,country,maxRows)
+ if DEBUG: print url
+ return _do(url,http_proxy)
+
+def postalCodeLookupJSON(postalcode, placename='', country=COUNTRY, maxRows='10',gcallback='', http_proxy=None):
+ """
+ Webservice Type : REST /JSON
+ Url : ws.geonames.org/postalCodeLookupJSON?
+ Parameters : postalcode,country ,maxRows (default = 20),callback
+ Result : returns a list of places for the given postalcode in JSON format
+ """
+ url = _buildpostalCodeLookupJSON(postalcode,placename,country,maxRows,gcallback)
+# print url
+ proxies = getProxies(http_proxy)
+ u = urllib.FancyURLopener(proxies)
+ usock = u.open(url)
+ rawdata = usock.read()
+ if DEBUG: print rawdata
+ usock.close()
+ return eval(rawdata[:-3])
+
+def findNearbyPostalCodes(postalcode, placename='', country=COUNTRY, radius='5', maxRows='10',lat=None,lng=None, http_proxy=None):
+ """
+ Find nearby postal codes / reverse geocoding
+ This service comes in two flavors. You can either pass the lat/long or a postalcode/placename.
+
+ Webservice Type : REST
+ Url : ws.geonames.org/findNearbyPostalCodes?
+ Parameters :
+ lat,lng, radius (in km), maxRows (default = 5),country (default = all countries)
+ or
+ postalcode,country, radius (in Km), maxRows (default = 5)
+ Result : returns a list of postalcodes and places for the lat/lng query as xml document
+ Example:
+ http://ws.geonames.org/findNearbyPostalCodes?postalcode=35580&placename=guichen&country=FR&radius=5
+ <geonames>
+ -
+ <code>
+ <postalcode>35580</postalcode>
+ <name>Guichen</name>
+ <countryCode>FR</countryCode>
+ <lat>47.9666667</lat>
+ <lng>-1.8</lng>
+ <distance>0.0</distance>
+ </code>
+ """
+ if lat and lng :
+ url = _buildfindNearbyPostalCodesLL(lat,lng,radius,maxRows)
+ else:
+ url = _buildfindNearbyPostalCodes(postalcode,placename,country,radius,maxRows)
+ if DEBUG: print url
+# import pdb;pdb.set_trace()
+ return _do(url,http_proxy).code
+
+
+def postalCodeCountryInfo(http_proxy=None):
+ """
+ http://ws.geonames.org/postalCodeCountryInfo?
+ <country>
+ <countryCode>FR</countryCode>
+ <countryName>France</countryName>
+ <numPostalCodes>39163</numPostalCodes>
+ <minPostalCode>01000</minPostalCode>
+ <maxPostalCode>98000</maxPostalCode>
+ </country>
+
+ """
+ return _do("http://ws.geonames.org/postalCodeCountryInfo?",http_proxy).country
+
+def search(placename='', country=COUNTRY, maxRows='10', style='SHORT',lang=LANG, fclass=None, http_proxy=None):
+ """
+ Url : ws.geonames.org/search?
+ Parameters : q : place name (urlencoded utf8)
+ maxRows : maximal number of rows returned (default = 100)
+ country : iso country code, two characters (default = all countries)
+ fclass : featureclass(es) (default= all feature classes); this parameter may occur more then once, example: fclass=P&fclass=A
+ style : SHORT,MEDIUM,LONG (default = MEDIUM), verbosity of returned xml document
+ lang : ISO 2-letter language code. (default = en), countryName will be returned in the specified language.
+
+ http://ws.geonames.org/search?q=guichen&maxRows=10&style=SHORT&lang=fr&country=fr
+ <geonames>
+ <totalResultsCount>3</totalResultsCount>
+ -
+ <geoname>
+ <name>Laill�</name>
+ <lat>47.9833333</lat>
+ <lng>-1.7166667</lng>
+ </geoname>
+ """
+ url = _buildsearch(placename, country, maxRows,style,lang, fclass)
+ if DEBUG: print url
+ return _do(url,http_proxy)
+
+def findNearbyPlaceName(lat,lng, http_proxy=None):
+ """
+ Webservice Type : REST
+ Url : ws.geonames.org/findNearbyPlaceName?
+ Parameters : lat,lng
+ Result : returns the closest populated place for the lat/lng query as xml document
+ Example:
+ http://ws.geonames.org/findNearbyPlaceName?lat=47.3&lng=9
+ """
+ url = _buildfindNearbyPlaceName(lat,lng)
+ if DEBUG: print url
+ return _do(url,http_proxy)
+
+def findCountrySubdivision(lat,lng, http_proxy=None):
+ """
+ Webservice Type : REST
+ Url : ws.geonames.org/findNearbyPlaceName?
+ Parameters : lat,lng
+ Result : returns the closest populated place for the lat/lng query as xml document
+ Example:
+ http://ws.geonames.org/findNearbyPlaceName?lat=47.3&lng=9
+ """
+ url = _buildfindCountrySubdivision(lat,lng)
+ if DEBUG: print url
+ return _do(url,http_proxy)
+
diff --git a/lib/utils/__init__.py b/lib/utils/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/lib/utils/__init__.py
diff --git a/lib/utils/email_multipart.py b/lib/utils/email_multipart.py
new file mode 100644
index 0000000..4c2e154
--- /dev/null
+++ b/lib/utils/email_multipart.py
@@ -0,0 +1,80 @@
+# -*- coding: utf-8 -*-
+
+# Sending html emails in Django
+# Report any bugs to esat @t sleytr*net
+# Evren Esat Ozkan
+
+
+from feedparser import _sanitizeHTML
+from stripogram import html2text
+
+from django.conf import settings
+from django.template import loader, Context
+
+from email.MIMEMultipart import MIMEMultipart
+from email.MIMEText import MIMEText
+from email.MIMEImage import MIMEImage
+from smtplib import SMTP
+import email.Charset
+
+
+charset='utf-8'
+
+
+email.Charset.add_charset( charset, email.Charset.SHORTEST, None, None )
+
+def htmlmail(sbj,recip,msg,template='',texttemplate='',textmsg='',images=(), recip_name='',sender=settings.DEFAULT_FROM_EMAIL,sender_name='',charset=charset):
+ '''
+ if you want to use Django template system:
+ use `msg` and optionally `textmsg` as template context (dict)
+ and define `template` and optionally `texttemplate` variables.
+ otherwise msg and textmsg variables are used as html and text message sources.
+
+ if you want to use images in html message, define physical paths and ids in tuples.
+ (image paths are relative to MEDIA_ROOT)
+ example:
+ images=(('email_images/logo.gif','img1'),('email_images/footer.gif','img2'))
+ and use them in html like this:
+ <img src="cid:img1">
+ ...
+ <img src="cid:img2">
+ '''
+ html=render(msg,template)
+ if texttemplate or textmsg: text=render((textmsg or msg),texttemplate)
+ else: text= html2text(_sanitizeHTML(html,charset))
+
+ msgRoot = MIMEMultipart('related')
+ msgRoot['Subject'] = sbj
+ msgRoot['From'] = named(sender,sender_name)
+ msgRoot['To'] = named(recip,recip_name)
+ msgRoot.preamble = 'This is a multi-part message in MIME format.'
+
+ msgAlternative = MIMEMultipart('alternative')
+ msgRoot.attach(msgAlternative)
+
+ msgAlternative.attach(MIMEText(text, _charset=charset))
+ msgAlternative.attach(MIMEText(html, 'html', _charset=charset))
+
+ for img in images:
+ fp = open(img[0], 'rb')
+ msgImage = MIMEImage(fp.read())
+ fp.close()
+ msgImage.add_header('Content-ID', '<'+img[1]+'>')
+ msgRoot.attach(msgImage)
+
+ smtp = SMTP()
+ smtp.connect(settings.EMAIL_HOST)
+ smtp.login(settings.EMAIL_HOST_USER , settings.EMAIL_HOST_PASSWORD)
+ smtp.sendmail(sender, recip, msgRoot.as_string())
+ smtp.quit()
+
+
+def render(context,template):
+ if template:
+ t = loader.get_template(template)
+ return t.render(Context(context))
+ return context
+
+def named(mail,name):
+ if name: return '%s <%s>' % (name,mail)
+ return mail \ No newline at end of file
diff --git a/lib/utils/pydelicious.py b/lib/utils/pydelicious.py
new file mode 100644
index 0000000..dd33788
--- /dev/null
+++ b/lib/utils/pydelicious.py
@@ -0,0 +1,817 @@
+"""Library to access del.icio.us data via Python.
+
+:examples:
+
+ Using the API class directly:
+
+ >>> a = pydelicious.apiNew('user', 'passwd')
+ >>> # or:
+ >>> a = DeliciousAPI('user', 'passwd')
+ >>> a.tags_get() # Same as:
+ >>> a.request('tags/get', )
+
+ Or by calling one of the methods on the module:
+
+ - add(user, passwd, url, description, tags = "", extended = "", dt = "", replace="no")
+ - get(user, passwd, tag="", dt="", count = 0)
+ - get_all(user, passwd, tag = "")
+ - delete(user, passwd, url)
+ - rename_tag(user, passwd, oldtag, newtag)
+ - get_tags(user, passwd)
+
+ >>> a = apiNew(user, passwd)
+ >>> a.posts_add(url="http://my.com/", desciption="my.com", extended="the url is my.moc", tags="my com")
+ True
+ >>> len(a.posts_all())
+ 1
+ >>> get_all(user, passwd)
+ 1
+
+ This are short functions for getrss calls.
+
+ >>> rss_
+
+def get_userposts(user):
+def get_tagposts(tag):
+def get_urlposts(url):
+def get_popular(tag = ""):
+
+ >>> json_posts()
+ >>> json_tags()
+ >>> json_network()
+ >>> json_fans()
+
+:License: pydelicious is released under the BSD license. See 'license.txt'
+ for more informations.
+
+:todo, bvb:
+ - Rewriting comments to english. More documentation, examples.
+ - Added JSON-like return values for XML data (del.icio.us also serves some JSON...)
+ - better error/exception classes and handling, work in progress.
+
+:todo:
+ - Source code SHOULD BE ASCII!
+ - More tests.
+ - handling different encodings, what, how?
+ >>> pydelicious.getrss(tag="t[a]g")
+ url: http://del.icio.us/rss/tag/t[a]g
+ - Parse datetimes in XML.
+ - Test RSS functionality? HTML scraping doesn't work yet?
+ - API functions need required argument checks.
+ - interesting functionality in other libraries (ruby, java, perl, etc)?
+ - what is pydelicious used for?
+ - license, readme docs via setup.py verdelen?
+ - automatic releas build
+
+:done:
+ * Refactored the API class, much cleaner now and functions dlcs_api_request, dlcs_parse_xml are available for who wants them.
+"""
+import sys
+import os
+import time
+import datetime
+import md5, httplib
+import urllib, urllib2, time
+from StringIO import StringIO
+
+try:
+ from elementtree.ElementTree import parse as parse_xml
+except ImportError:
+ from xml.etree.ElementTree import parse as parse_xml
+
+import feedparser
+
+
+### Static config
+
+__version__ = '0.5.0'
+__author__ = 'Frank Timmermann <regenkind_at_gmx_dot_de>' # GP: does not respond to emails
+__contributors__ = [
+ 'Greg Pinero',
+ 'Berend van Berkum <berend+pydelicious@dotmpe.com>']
+__url__ = 'http://code.google.com/p/pydelicious/'
+__author_email__ = ""
+# Old URL: 'http://deliciouspython.python-hosting.com/'
+
+__description__ = '''pydelicious.py allows you to access the web service of del.icio.us via it's API through python.'''
+__long_description__ = '''the goal is to design an easy to use and fully functional python interface to del.icio.us. '''
+
+DLCS_OK_MESSAGES = ('done', 'ok') # Known text values of positive del.icio.us <result> answers
+DLCS_WAIT_TIME = 4
+DLCS_REQUEST_TIMEOUT = 444 # Seconds before socket triggers timeout
+#DLCS_API_REALM = 'del.icio.us API'
+DLCS_API_HOST = 'https://api.del.icio.us'
+DLCS_API_PATH = 'v1'
+DLCS_API = "%s/%s" % (DLCS_API_HOST, DLCS_API_PATH)
+DLCS_RSS = 'http://del.icio.us/rss/'
+
+ISO_8601_DATETIME = '%Y-%m-%dT%H:%M:%SZ'
+
+USER_AGENT = 'pydelicious.py/%s %s' % (__version__, __url__)
+
+DEBUG = 0
+if 'DLCS_DEBUG' in os.environ:
+ DEBUG = int(os.environ['DLCS_DEBUG'])
+
+
+# Taken from FeedParser.py
+# timeoutsocket allows feedparser to time out rather than hang forever on ultra-slow servers.
+# Python 2.3 now has this functionality available in the standard socket library, so under
+# 2.3 you don't need to install anything. But you probably should anyway, because the socket
+# module is buggy and timeoutsocket is better.
+try:
+ import timeoutsocket # http://www.timo-tasi.org/python/timeoutsocket.py
+ timeoutsocket.setDefaultSocketTimeout(DLCS_REQUEST_TIMEOUT)
+except ImportError:
+ import socket
+ if hasattr(socket, 'setdefaulttimeout'): socket.setdefaulttimeout(DLCS_REQUEST_TIMEOUT)
+if DEBUG: print >>sys.stderr, "Set socket timeout to %s seconds" % DLCS_REQUEST_TIMEOUT
+
+
+### Utility classes
+
+class _Waiter:
+ """Waiter makes sure a certain amount of time passes between
+ successive calls of `Waiter()`.
+
+ Some attributes:
+ :last: time of last call
+ :wait: the minimum time needed between calls
+ :waited: the number of calls throttled
+
+ pydelicious.Waiter is an instance created when the module is loaded.
+ """
+ def __init__(self, wait):
+ self.wait = wait
+ self.waited = 0
+ self.lastcall = 0;
+
+ def __call__(self):
+ tt = time.time()
+ wait = self.wait
+
+ timeago = tt - self.lastcall
+
+ if timeago < wait:
+ wait = wait - timeago
+ if DEBUG>0: print >>sys.stderr, "Waiting %s seconds." % wait
+ time.sleep(wait)
+ self.waited += 1
+ self.lastcall = tt + wait
+ else:
+ self.lastcall = tt
+
+Waiter = _Waiter(DLCS_WAIT_TIME)
+
+class PyDeliciousException(Exception):
+ '''Std. pydelicious error'''
+ pass
+
+class DeliciousError(Exception):
+ """Raised when the server responds with a negative answer"""
+
+
+class DefaultErrorHandler(urllib2.HTTPDefaultErrorHandler):
+ '''xxx, bvb: Where is this used? should it be registered somewhere with urllib2?
+
+ Handles HTTP Error, currently only 503.
+ '''
+ def http_error_503(self, req, fp, code, msg, headers):
+ raise urllib2.HTTPError(req, code, throttled_message, headers, fp)
+
+
+class post(dict):
+ """Post object, contains href, description, hash, dt, tags,
+ extended, user, count(, shared).
+
+ xxx, bvb: Not used in DeliciousAPI
+ """
+ def __init__(self, href="", description="", hash="", time="", tag="", extended="", user="", count="",
+ tags="", url="", dt=""): # tags or tag?
+ self["href"] = href
+ if url != "": self["href"] = url
+ self["description"] = description
+ self["hash"] = hash
+ self["dt"] = dt
+ if time != "": self["dt"] = time
+ self["tags"] = tags
+ if tag != "": self["tags"] = tag # tag or tags? # !! tags
+ self["extended"] = extended
+ self["user"] = user
+ self["count"] = count
+
+ def __getattr__(self, name):
+ try: return self[name]
+ except: object.__getattribute__(self, name)
+
+
+class posts(list):
+ def __init__(self, *args):
+ for i in args: self.append(i)
+
+ def __getattr__(self, attr):
+ try: return [p[attr] for p in self]
+ except: object.__getattribute__(self, attr)
+
+### Utility functions
+
+def str2uni(s):
+ # type(in) str or unicode
+ # type(out) unicode
+ return ("".join([unichr(ord(i)) for i in s]))
+
+def str2utf8(s):
+ # type(in) str or unicode
+ # type(out) str
+ return ("".join([unichr(ord(i)).encode("utf-8") for i in s]))
+
+def str2quote(s):
+ return urllib.quote_plus("".join([unichr(ord(i)).encode("utf-8") for i in s]))
+
+def dict0(d):
+ # Trims empty dict entries
+ # {'a':'a', 'b':'', 'c': 'c'} => {'a': 'a', 'c': 'c'}
+ dd = dict()
+ for i in d:
+ if d[i] != "": dd[i] = d[i]
+ return dd
+
+def delicious_datetime(str):
+ """Parse a ISO 8601 formatted string to a Python datetime ...
+ """
+ return datetime.datetime(*time.strptime(str, ISO_8601_DATETIME)[0:6])
+
+def http_request(url, user_agent=USER_AGENT, retry=4):
+ """Retrieve the contents referenced by the URL using urllib2.
+
+ Retries up to four times (default) on exceptions.
+ """
+ request = urllib2.Request(url, headers={'User-Agent':user_agent})
+
+ # Remember last error
+ e = None
+
+ # Repeat request on time-out errors
+ tries = retry;
+ while tries:
+ try:
+ return urllib2.urlopen(request)
+
+ except urllib2.HTTPError, e: # protocol errors,
+ raise PyDeliciousException, "%s" % e
+
+ except urllib2.URLError, e:
+ # xxx: Ugly check for time-out errors
+ #if len(e)>0 and 'timed out' in arg[0]:
+ print >> sys.stderr, "%s, %s tries left." % (e, tries)
+ Waiter()
+ tries = tries - 1
+ #else:
+ # tries = None
+
+ # Give up
+ raise PyDeliciousException, \
+ "Unable to retrieve data at '%s', %s" % (url, e)
+
+def http_auth_request(url, host, user, passwd, user_agent=USER_AGENT):
+ """Call an HTTP server with authorization credentials using urllib2.
+ """
+ if DEBUG: httplib.HTTPConnection.debuglevel = 1
+
+ # Hook up handler/opener to urllib2
+ password_manager = urllib2.HTTPPasswordMgrWithDefaultRealm()
+ password_manager.add_password(None, host, user, passwd)
+ auth_handler = urllib2.HTTPBasicAuthHandler(password_manager)
+ opener = urllib2.build_opener(auth_handler)
+ urllib2.install_opener(opener)
+
+ return http_request(url, user_agent)
+
+def dlcs_api_request(path, params='', user='', passwd='', throttle=True):
+ """Retrieve/query a path within the del.icio.us API.
+
+ This implements a minimum interval between calls to avoid
+ throttling. [#]_ Use param 'throttle' to turn this behaviour off.
+
+ todo: back off on 503's (HTTPError, URLError? testing
+
+ Returned XML does not always correspond with given del.icio.us examples
+ [#]_.
+
+ .. [#] http://del.icio.us/help/api/
+ """
+ if throttle:
+ Waiter()
+
+ if params:
+ # params come as a dict, strip empty entries and urlencode
+ url = "%s/%s?%s" % (DLCS_API, path, urllib.urlencode(dict0(params)))
+ else:
+ url = "%s/%s" % (DLCS_API, path)
+
+ if DEBUG: print >>sys.stderr, "dlcs_api_request: %s" % url
+
+ try:
+ return http_auth_request(url, DLCS_API_HOST, user, passwd, USER_AGENT)
+
+ # bvb: Is this ever raised? When?
+ except DefaultErrorHandler, e:
+ print >>sys.stderr, "%s" % e
+
+def dlcs_parse_xml(data, split_tags=False):
+ """Parse any del.icio.us XML document and return Python data structure.
+
+ Recognizes all XML document formats as returned by the version 1 API and
+ translates to a JSON-like data structure (dicts 'n lists).
+
+ Returned instance is always a dictionary. Examples::
+
+ {'posts': [{'url':'...','hash':'...',},],}
+ {'tags':['tag1', 'tag2',]}
+ {'dates': [{'count':'...','date':'...'},], 'tag':'', 'user':'...'}
+ {'result':(True, "done")}
+ # etcetera.
+ """
+
+ if DEBUG>3: print >>sys.stderr, "dlcs_parse_xml: parsing from ", data
+
+ if not hasattr(data, 'read'):
+ data = StringIO(data)
+
+ doc = parse_xml(data)
+ root = doc.getroot()
+ fmt = root.tag
+
+ # Split up into three cases: Data, Result or Update
+ if fmt in ('tags', 'posts', 'dates', 'bundles'):
+
+ # Data: expect a list of data elements, 'resources'.
+ # Use `fmt` (without last 's') to find data elements, elements
+ # don't have contents, attributes contain all the data we need:
+ # append to list
+ elist = [el.attrib for el in doc.findall(fmt[:-1])]
+
+ # Return list in dict, use tagname of rootnode as keyname.
+ data = {fmt: elist}
+
+ # Root element might have attributes too, append dict.
+ data.update(root.attrib)
+
+ return data
+
+ elif fmt == 'result':
+
+ # Result: answer to operations
+ if root.attrib.has_key('code'):
+ msg = root.attrib['code']
+ else:
+ msg = root.text
+
+ # Return {'result':(True, msg)} for /known/ O.K. messages,
+ # use (False, msg) otherwise
+ v = msg in DLCS_OK_MESSAGES
+ return {fmt: (v, msg)}
+
+ elif fmt == 'update':
+
+ # Update: "time"
+ #return {fmt: root.attrib}
+ return {fmt: {'time':time.strptime(root.attrib['time'], ISO_8601_DATETIME)}}
+
+ else:
+ raise PyDeliciousException, "Unknown XML document format '%s'" % fmt
+
+def dlcs_rss_request(tag = "", popular = 0, user = "", url = ''):
+ """Handle a request for RSS
+
+ todo: translate from German
+
+ rss sollte nun wieder funktionieren, aber diese try, except scheisse ist so nicht schoen
+
+ rss wird unterschiedlich zusammengesetzt. ich kann noch keinen einheitlichen zusammenhang
+ zwischen daten (url, desc, ext, usw) und dem feed erkennen. warum k[o]nnen die das nicht einheitlich machen?
+ """
+ tag = str2quote(tag)
+ user = str2quote(user)
+ if url != '':
+ # http://del.icio.us/rss/url/efbfb246d886393d48065551434dab54
+ url = DLCS_RSS + '''url/%s'''%md5.new(url).hexdigest()
+ elif user != '' and tag != '':
+ url = DLCS_RSS + '''%(user)s/%(tag)s'''%dict(user=user, tag=tag)
+ elif user != '' and tag == '':
+ # http://del.icio.us/rss/delpy
+ url = DLCS_RSS + '''%s'''%user
+ elif popular == 0 and tag == '':
+ url = DLCS_RSS
+ elif popular == 0 and tag != '':
+ # http://del.icio.us/rss/tag/apple
+ # http://del.icio.us/rss/tag/web2.0
+ url = DLCS_RSS + "tag/%s"%tag
+ elif popular == 1 and tag == '':
+ url = DLCS_RSS + '''popular/'''
+ elif popular == 1 and tag != '':
+ url = DLCS_RSS + '''popular/%s'''%tag
+ rss = http_request(url).read()
+ rss = feedparser.parse(rss)
+ # print rss
+# for e in rss.entries: print e;print
+ l = posts()
+ for e in rss.entries:
+ if e.has_key("links") and e["links"]!=[] and e["links"][0].has_key("href"):
+ url = e["links"][0]["href"]
+ elif e.has_key("link"):
+ url = e["link"]
+ elif e.has_key("id"):
+ url = e["id"]
+ else:
+ url = ""
+ if e.has_key("title"):
+ description = e['title']
+ elif e.has_key("title_detail") and e["title_detail"].has_key("title"):
+ description = e["title_detail"]['value']
+ else:
+ description = ''
+ try: tags = e['categories'][0][1]
+ except:
+ try: tags = e["category"]
+ except: tags = ""
+ if e.has_key("modified"):
+ dt = e['modified']
+ else:
+ dt = ""
+ if e.has_key("summary"):
+ extended = e['summary']
+ elif e.has_key("summary_detail"):
+ e['summary_detail']["value"]
+ else:
+ extended = ""
+ if e.has_key("author"):
+ user = e['author']
+ else:
+ user = ""
+ # time = dt ist weist auf ein problem hin
+ # die benennung der variablen ist nicht einheitlich
+ # api senden und
+ # xml bekommen sind zwei verschiedene schuhe :(
+ l.append(post(url = url, description = description, tags = tags, dt = dt, extended = extended, user = user))
+ return l
+
+
+### Main module class
+
+class DeliciousAPI:
+ """Class providing main interace to del.icio.us API.
+
+ Methods ``request`` and ``request_raw`` represent the core. For all API
+ paths there are furthermore methods (e.g. posts_add for 'posts/all') with
+ an explicit declaration of the parameters and documentation. These all call
+ ``request`` and pass on extra keywords like ``_raw``.
+ """
+
+ def __init__(self, user, passwd, codec='iso-8859-1', api_request=dlcs_api_request, xml_parser=dlcs_parse_xml):
+ """Initialize access to the API with ``user`` and ``passwd``.
+
+ ``codec`` sets the encoding of the arguments.
+
+ The ``api_request`` and ``xml_parser`` parameters by default point to
+ functions within this package with standard implementations to
+ request and parse a resource. See ``dlcs_api_request()`` and
+ ``dlcs_parse_xml()``. Note that ``api_request`` should return a
+ file-like instance with an HTTPMessage instance under ``info()``,
+ see ``urllib2.openurl`` for more info.
+ """
+ assert user != ""
+ self.user = user
+ self.passwd = passwd
+ self.codec = codec
+
+ # Implement communication to server and parsing of respons messages:
+ assert callable(api_request)
+ self._api_request = api_request
+ assert callable(xml_parser)
+ self._parse_response = xml_parser
+
+ def _call_server(self, path, **params):
+ params = dict0(params)
+ for key in params:
+ params[key] = params[key].encode(self.codec)
+
+ # see __init__ for _api_request()
+ return self._api_request(path, params, self.user, self.passwd)
+
+
+ ### Core functionality
+
+ def request(self, path, _raw=False, **params):
+ """Calls a path in the API, parses the answer to a JSON-like structure by
+ default. Use with ``_raw=True`` or ``call request_raw()`` directly to
+ get the filehandler and process the response message manually.
+
+ Calls to some paths will return a `result` message, i.e.::
+
+ <result code="..." />
+
+ or::
+
+ <result>...</result>
+
+ These are all parsed to ``{'result':(Boolean, MessageString)}`` and this
+ method will raise ``DeliciousError`` on negative `result` answers. Using
+ ``_raw=True`` bypasses all parsing and will never raise ``DeliciousError``.
+
+ See ``dlcs_parse_xml()`` and ``self.request_raw()``."""
+
+ # method _parse_response is bound in `__init__()`, `_call_server`
+ # uses `_api_request` also set in `__init__()`
+ if _raw:
+ # return answer
+ return self.request_raw(path, **params)
+
+ else:
+ # get answer and parse
+ fl = self._call_server(path, **params)
+ rs = self._parse_response(fl)
+
+ # Raise an error for negative 'result' answers
+ if type(rs) == dict and rs == 'result' and not rs['result'][0]:
+ errmsg = ""
+ if len(rs['result'])>0:
+ errmsg = rs['result'][1:]
+ raise DeliciousError, errmsg
+
+ return rs
+
+ def request_raw(self, path, **params):
+ """Calls the path in the API, returns the filehandle. Returned
+ file-like instances have an ``HTTPMessage`` instance with HTTP header
+ information available. Use ``filehandle.info()`` or refer to the
+ ``urllib2.openurl`` documentation.
+ """
+ # see `request()` on how the response can be handled
+ return self._call_server(path, **params)
+
+ ### Explicit declarations of API paths, their parameters and docs
+
+ # Tags
+ def tags_get(self, **kwds):
+ """Returns a list of tags and the number of times it is used by the user.
+ ::
+
+ <tags>
+ <tag tag="TagName" count="888">
+ """
+ return self.request("tags/get", **kwds)
+
+ def tags_rename(self, old, new, **kwds):
+ """Rename an existing tag with a new tag name. Returns a `result`
+ message or raises an ``DeliciousError``. See ``self.request()``.
+
+ &old (required)
+ Tag to rename.
+ &new (required)
+ New name.
+ """
+ return self.request("tags/rename", old=old, new=new, **kwds)
+
+ # Posts
+ def posts_update(self, **kwds):
+ """Returns the last update time for the user. Use this before calling
+ `posts_all` to see if the data has changed since the last fetch.
+ ::
+
+ <update time="CCYY-MM-DDThh:mm:ssZ">
+ """
+ return self.request("posts/update", **kwds)
+
+ def posts_dates(self, tag="", **kwds):
+ """Returns a list of dates with the number of posts at each date.
+ ::
+
+ <dates>
+ <date date="CCYY-MM-DD" count="888">
+
+ &tag (optional).
+ Filter by this tag.
+ """
+ return self.request("posts/dates", tag=tag, **kwds)
+
+ def posts_get(self, tag="", dt="", url="", **kwds):
+ """Returns posts matching the arguments. If no date or url is given,
+ most recent date will be used.
+ ::
+
+ <posts dt="CCYY-MM-DD" tag="..." user="...">
+ <post ...>
+
+ &tag (optional).
+ Filter by this tag.
+ &dt (optional).
+ Filter by this date (CCYY-MM-DDThh:mm:ssZ).
+ &url (optional).
+ Filter by this url.
+ """
+ return self.request("posts/get", tag=tag, dt=dt, url=url, **kwds)
+
+ def posts_recent(self, tag="", count="", **kwds):
+ """Returns a list of the most recent posts, filtered by argument.
+ ::
+
+ <posts tag="..." user="...">
+ <post ...>
+
+ &tag (optional).
+ Filter by this tag.
+ &count (optional).
+ Number of items to retrieve (Default:15, Maximum:100).
+ """
+ return self.request("posts/recent", tag=tag, count=count, **kwds)
+
+ def posts_all(self, tag="", **kwds):
+ """Returns all posts. Please use sparingly. Call the `posts_update`
+ method to see if you need to fetch this at all.
+ ::
+
+ <posts tag="..." user="..." update="CCYY-MM-DDThh:mm:ssZ">
+ <post ...>
+
+ &tag (optional).
+ Filter by this tag.
+ """
+ return self.request("posts/all", tag=tag, **kwds)
+
+ def posts_add(self, url, description, extended="", tags="", dt="",
+ replace="no", shared="yes", **kwds):
+ """Add a post to del.icio.us. Returns a `result` message or raises an
+ ``DeliciousError``. See ``self.request()``.
+
+ &url (required)
+ the url of the item.
+ &description (required)
+ the description of the item.
+ &extended (optional)
+ notes for the item.
+ &tags (optional)
+ tags for the item (space delimited).
+ &dt (optional)
+ datestamp of the item (format "CCYY-MM-DDThh:mm:ssZ").
+
+ Requires a LITERAL "T" and "Z" like in ISO8601 at http://www.cl.cam.ac.uk/~mgk25/iso-time.html for example: "1984-09-01T14:21:31Z"
+ &replace=no (optional) - don't replace post if given url has already been posted.
+ &shared=no (optional) - make the item private
+ """
+ return self.request("posts/add", url=url, description=description,
+ extended=extended, tags=tags, dt=dt,
+ replace=replace, shared=shared, **kwds)
+
+ def posts_delete(self, url, **kwds):
+ """Delete a post from del.icio.us. Returns a `result` message or
+ raises an ``DeliciousError``. See ``self.request()``.
+
+ &url (required)
+ the url of the item.
+ """
+ return self.request("posts/delete", url=url, **kwds)
+
+ # Bundles
+ def bundles_all(self, **kwds):
+ """Retrieve user bundles from del.icio.us.
+ ::
+
+ <bundles>
+ <bundel name="..." tags=...">
+ """
+ return self.request("tags/bundles/all", **kwds)
+
+ def bundles_set(self, bundle, tags, **kwds):
+ """Assign a set of tags to a single bundle, wipes away previous
+ settings for bundle. Returns a `result` messages or raises an
+ ``DeliciousError``. See ``self.request()``.
+
+ &bundle (required)
+ the bundle name.
+ &tags (required)
+ list of tags (space seperated).
+ """
+ if type(tags)==list:
+ tags = " ".join(tags)
+ return self.request("tags/bundles/set", bundle=bundle, tags=tags,
+ **kwds)
+
+ def bundles_delete(self, bundle, **kwds):
+ """Delete a bundle from del.icio.us. Returns a `result` message or
+ raises an ``DeliciousError``. See ``self.request()``.
+
+ &bundle (required)
+ the bundle name.
+ """
+ return self.request("tags/bundles/delete", bundle=bundle, **kwds)
+
+ ### Utils
+
+ # Lookup table for del.icio.us url-path to DeliciousAPI method.
+ paths = {
+ 'tags/get': tags_get,
+ 'tags/rename': tags_rename,
+ 'posts/update': posts_update,
+ 'posts/dates': posts_dates,
+ 'posts/get': posts_get,
+ 'posts/recent': posts_recent,
+ 'posts/all': posts_all,
+ 'posts/add': posts_add,
+ 'posts/delete': posts_delete,
+ 'tags/bundles/all': bundles_all,
+ 'tags/bundles/set': bundles_set,
+ 'tags/bundles/delete': bundles_delete,
+ }
+
+ def get_url(self, url):
+ """Return the del.icio.us url at which the HTML page with posts for
+ ``url`` can be found.
+ """
+ return "http://del.icio.us/url/?url=%s" % (url,)
+
+
+### Convenience functions on this package
+
+def apiNew(user, passwd):
+ """creates a new DeliciousAPI object.
+ requires user(name) and passwd
+ """
+ return DeliciousAPI(user=user, passwd=passwd)
+
+def add(user, passwd, url, description, tags="", extended="", dt="", replace="no"):
+ return apiNew(user, passwd).posts_add(url=url, description=description, extended=extended, tags=tags, dt=dt, replace=replace)
+
+def get(user, passwd, tag="", dt="", count = 0):
+ posts = apiNew(user, passwd).posts_get(tag=tag,dt=dt)
+ if count != 0: posts = posts[0:count]
+ return posts
+
+def get_all(user, passwd, tag=""):
+ return apiNew(user, passwd).posts_all(tag=tag)
+
+def delete(user, passwd, url):
+ return apiNew(user, passwd).posts_delete(url=url)
+
+def rename_tag(user, passwd, oldtag, newtag):
+ return apiNew(user=user, passwd=passwd).tags_rename(old=oldtag, new=newtag)
+
+def get_tags(user, passwd):
+ return apiNew(user=user, passwd=passwd).tags_get()
+
+
+### RSS functions bvb: still working...?
+def getrss(tag="", popular=0, url='', user=""):
+ """get posts from del.icio.us via parsing RSS (bvb:or HTML)
+
+ todo: not tested
+
+ tag (opt) sort by tag
+ popular (opt) look for the popular stuff
+ user (opt) get the posts by a user, this striks popular
+ url (opt) get the posts by url
+ """
+ return dlcs_rss_request(tag=tag, popular=popular, user=user, url=url)
+
+def get_userposts(user):
+ return getrss(user = user)
+
+def get_tagposts(tag):
+ return getrss(tag = tag)
+
+def get_urlposts(url):
+ return getrss(url = url)
+
+def get_popular(tag = ""):
+ return getrss(tag = tag, popular = 1)
+
+
+### TODO: implement JSON fetching
+def json_posts(user, count=15):
+ """http://del.icio.us/feeds/json/mpe
+ http://del.icio.us/feeds/json/mpe/art+history
+ count=### the number of posts you want to get (default is 15, maximum is 100)
+ raw a raw JSON object is returned, instead of an object named Delicious.posts
+ """
+
+def json_tags(user, atleast, count, sort='alpha'):
+ """http://del.icio.us/feeds/json/tags/mpe
+ atleast=### include only tags for which there are at least ### number of posts
+ count=### include ### tags, counting down from the top
+ sort={alpha|count} construct the object with tags in alphabetic order (alpha), or by count of posts (count)
+ callback=NAME wrap the object definition in a function call NAME(...), thus invoking that function when the feed is executed
+ raw a pure JSON object is returned, instead of code that will construct an object named Delicious.tags
+ """
+
+def json_network(user):
+ """http://del.icio.us/feeds/json/network/mpe
+ callback=NAME wrap the object definition in a function call NAME(...)
+ ?raw a raw JSON object is returned, instead of an object named Delicious.posts
+ """
+
+def json_fans(user):
+ """http://del.icio.us/feeds/json/fans/mpe
+ callback=NAME wrap the object definition in a function call NAME(...)
+ ?raw a pure JSON object is returned, instead of an object named Delicious.
+ """
+