diff options
32 files changed, 5867 insertions, 0 deletions
diff --git a/lib/APIClients.py b/lib/APIClients.py new file mode 100644 index 0000000..24ab97b --- /dev/null +++ b/lib/APIClients.py @@ -0,0 +1,104 @@ +# APIClients for grabbing data from popular web services +# By Scott Gilbertson +# Copyright is lame, take what you want, except for those portions noted + +# Dependencies: +import sys, urllib +import xml.etree.cElementTree as xml_parser + + +DEBUG = 0 + +""" +base class -- handles GoodReads.com, but works for any rss feed, just send an empty string for anything you don't need +""" +class APIClient: + def __init__(self, base_path, api_key): + self.api_key = api_key + self.base_path = base_path + + def __getattr__(self, method): + def method(_self=self, _method=method, **params): + url = "%s%s?%s&" % (self.base_path, self.api_key, urllib.urlencode(params)) + if DEBUG: print url + data = self.fetch(url) + return data + + return method + + def fetch(self, url): + u = urllib.FancyURLopener(None) + usock = u.open(url) + rawdata = usock.read() + if DEBUG: print rawdata + usock.close() + return xml_parser.fromstring(rawdata) + +""" + Extend APIClient to work with the ma.gnolia.com API + (http://wiki.ma.gnolia.com/Ma.gnolia_API) + Adds some error handling as well +""" +class MagnoliaError(Exception): + def __init__(self, code, message): + self.code = code + self.message = message + + def __str__(self): + return 'Magnolia Error %s: %s' % (self.code, self.message) + + +class MagnoliaClient(APIClient): + def __getattr__(self, method): + def method(_self=self, _method=method, **params): + url = "%s%s?%s&api_key=%s" % (self.base_path, _method, urllib.urlencode(params), self.api_key) + if DEBUG: print url + data = APIClient.fetch(self, url) + return data + return method + + +""" + Extend APIClient to work with the Flickr API + (http://www.flickr.com/services/api/) + Adds error handling as well +""" + +class FlickrError(Exception): + def __init__(self, code, message): + self.code = code + self.message = message + + def __str__(self): + return 'Flickr Error %s: %s' % (self.code, self.message) + +class FlickrClient(APIClient): + def __getattr__(self, method): + def method(_self=self, _method=method, **params): + _method = _method.replace("_", ".") + url = "%s?method=%s&%s&api_key=%s" % (self.base_path, _method, urllib.urlencode(params), self.api_key) + if DEBUG: print url + data = APIClient.fetch(self, url) + return data + return method + +class TumblrClient: + def __init__(self, base_path): + self.base_path = base_path + + def __getattr__(self, method): + def method(_self=self, _method=method, **params): + url = "%s" % (self.base_path) + if DEBUG: print url + data = self.fetch(url) + return data + + return method + + def fetch(self, url): + u = urllib.FancyURLopener(None) + usock = u.open(url) + rawdata = usock.read() + if DEBUG: print rawdata + usock.close() + return xml_parser.fromstring(rawdata) diff --git a/lib/contact_form/__init__.py b/lib/contact_form/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/lib/contact_form/__init__.py diff --git a/lib/contact_form/forms.py b/lib/contact_form/forms.py new file mode 100644 index 0000000..921d8ee --- /dev/null +++ b/lib/contact_form/forms.py @@ -0,0 +1,233 @@ +""" +A base contact form for allowing users to send email messages through +a web interface, and a subclass demonstrating useful functionality. + +""" + + +from django import forms +from django.conf import settings +from django.core.mail import send_mail +from django.template import loader, RequestContext +from django.contrib.sites.models import Site + + +# I put this on all required fields, because it's easier to pick up +# on them with CSS or JavaScript if they have a class of "required" +# in the HTML. Your mileage may vary. +attrs_dict = { 'class': 'required' } + + +class ContactForm(forms.Form): + """ + Base contact form class from which all contact form classes should + inherit. + + If you don't need any custom functionality, you can simply use + this form to provide basic contact functionality; it will collect + name, email address and message. + + The ``contact_form`` view included in this application knows how + to work with this form and can handle many types of subclasses as + well (see below for a discussion of the important points), so in + many cases it will be all that you need. If you'd like to use this + form or a subclass of it from one of your own views, just do the + following: + + 1. When you instantiate the form, pass the current + ``HttpRequest`` object to the constructor as the keyword + argument ``request``; this is used internally by the base + implementation, and also made available so that subclasses + can add functionality which relies on inspecting the + request. + + 2. To send the message, call the form's ``save`` method, which + accepts the keyword argument ``fail_silently`` and defaults + it to ``False``. This argument is passed directly to + ``send_mail``, and allows you to suppress or raise + exceptions as needed for debugging. The ``save`` method has + no return value. + + Other than that, treat it like any other form; validity checks and + validated data are handled normally, through the ``is_valid`` + method and the ``cleaned_data`` dictionary. + + + Base implementation + ------------------- + + Under the hood, this form uses a somewhat abstracted interface in + order to make it easier to subclass and add functionality. There + are several important attributes subclasses may want to look at + overriding, all of which will work (in the base implementation) as + either plain attributes or as callable methods: + + * ``from_email`` -- used to get the address to use in the + ``From:`` header of the message. The base implementation + returns the value of the ``DEFAULT_FROM_EMAIL`` setting. + + * ``message`` -- used to get the message body as a string. The + base implementation renders a template using the form's + ``cleaned_data`` dictionary as context. + + * ``recipient_list`` -- used to generate the list of + recipients for the message. The base implementation returns + the email addresses specified in the ``MANAGERS`` setting. + + * ``subject`` -- used to generate the subject line for the + message. The base implementation returns the string 'Message + sent through the web site', with the name of the current + ``Site`` prepended. + + * ``template_name`` -- used by the base ``message`` method to + determine which template to use for rendering the + message. Default is ``contact_form/contact_form.txt``. + + Internally, the base implementation ``_get_message_dict`` method + collects ``from_email``, ``message``, ``recipient_list`` and + ``subject`` into a dictionary, which the ``save`` method then + passes directly to ``send_mail`` as keyword arguments. + + Particularly important is the ``message`` attribute, with its base + implementation as a method which renders a template; because it + passes ``cleaned_data`` as the template context, any additional + fields added by a subclass will automatically be available in the + template. This means that many useful subclasses can get by with + just adding a few fields and possibly overriding + ``template_name``. + + Much useful functionality can be achieved in subclasses without + having to override much of the above; adding additional validation + methods works the same as any other form, and typically only a few + items -- ``recipient_list`` and ``subject_line``, for example, + need to be overridden to achieve customized behavior. + + + Other notes for subclassing + --------------------------- + + Subclasses which want to inspect the current ``HttpRequest`` to + add functionality can access it via the attribute ``request``; the + base ``message`` takes advantage of this to use ``RequestContext`` + when rendering its template. See the ``AkismetContactForm`` + subclass in this file for an example of using the request to + perform additional validation. + + Subclasses which override ``__init__`` need to accept ``*args`` + and ``**kwargs``, and pass them via ``super`` in order to ensure + proper behavior. + + Subclasses should be careful if overriding ``_get_message_dict``, + since that method **must** return a dictionary suitable for + passing directly to ``send_mail`` (unless ``save`` is overridden + as well). + + Overriding ``save`` is relatively safe, though remember that code + which uses your form will expect ``save`` to accept the + ``fail_silently`` keyword argument. In the base implementation, + that argument defaults to ``False``, on the assumption that it's + far better to notice errors than to silently not send mail from + the contact form (see also the Zen of Python: "Errors should never + pass silently, unless explicitly silenced"). + + """ + def __init__(self, data=None, files=None, request=None, *args, **kwargs): + if request is None: + raise TypeError("Keyword argument 'request' must be supplied") + super(ContactForm, self).__init__(data=data, files=files, *args, **kwargs) + self.request = request + + name = forms.CharField(max_length=100, + widget=forms.TextInput(attrs=attrs_dict), + label=u'Your name') + email = forms.EmailField(widget=forms.TextInput(attrs=dict(attrs_dict, + maxlength=200)), + label=u'Your email address') + body = forms.CharField(widget=forms.Textarea(attrs=attrs_dict), + label=u'Your message') + + #from_email = settings.DEFAULT_FROM_EMAIL + + recipient_list = [mail_tuple[1] for mail_tuple in settings.MANAGERS] + + subject_template_name = "contact_form/contact_form_subject.txt" + + template_name = 'contact_form/contact_form.txt' + + _context = None + + def message(self): + """ + Renders the body of the message to a string. + + """ + if callable(self.template_name): + template_name = self.template_name() + else: + template_name = self.template_name + return loader.render_to_string(template_name, + self.get_context()) + + def subject(self): + """ + Renders the subject of the message to a string. + + """ + subject = loader.render_to_string(self.subject_template_name, + self.get_context()) + return ''.join(subject.splitlines()) + + def get_context(self): + if not self.is_valid(): + raise ValueError("Cannot generate Context from invalid contact form") + if self._context is None: + self.cleaned_data['site_name'] = settings.SITE_NAME + self._context = RequestContext(self.request, + dict(self.cleaned_data, + site=Site.objects.get_current())) + return self._context + + def get_message_dict(self): + if not self.is_valid(): + raise ValueError("Message cannot be sent from invalid contact form") + message_dict = {} + for message_part in ('from_email', 'message', 'recipient_list', 'subject'): + attr = getattr(self, message_part) + message_dict[message_part] = callable(attr) and attr() or attr + return message_dict + + def from_email(self): + addy = self.cleaned_data['email'] + return addy + + def save(self, fail_silently=False): + """ + Builds and sends the email message. + + """ + send_mail(fail_silently=fail_silently, **self.get_message_dict()) + + +class AkismetContactForm(ContactForm): + """ + Contact form which doesn't add any extra fields, but does add an + Akismet spam check to the validation routine. + + Requires the setting ``AKISMET_API_KEY``, which should be a valid + Akismet API key. + + """ + def clean_body(self): + if 'body' in self.cleaned_data and getattr(settings, 'AKISMET_API_KEY', ''): + from akismet import Akismet + from django.utils.encoding import smart_str + akismet_api = Akismet(key=settings.AKISMET_API_KEY, + blog_url='http://%s/' % Site.objects.get_current().domain) + if akismet_api.verify_key(): + akismet_data = { 'comment_type': 'comment', + 'referer': self.request.META.get('HTTP_REFERER', ''), + 'user_ip': self.request.META.get('REMOTE_ADDR', ''), + 'user_agent': self.request.META.get('HTTP_USER_AGENT', '') } + if akismet_api.comment_check(smart_str(self.cleaned_data['body']), data=akismet_data, build_data=True): + raise forms.ValidationError(u"Akismet thinks this message is spam") + return self.cleaned_data['body'] diff --git a/lib/contact_form/urls.py b/lib/contact_form/urls.py new file mode 100644 index 0000000..f80c27f --- /dev/null +++ b/lib/contact_form/urls.py @@ -0,0 +1,28 @@ +""" +Example URLConf for a contact form. + +Because the ``contact_form`` view takes configurable arguments, it's +recommended that you manually place it somewhere in your URL +configuration with the arguments you want. If you just prefer the +default, however, you can hang this URLConf somewhere in your URL +hierarchy (for best results with the defaults, include it under +``/contact/``). + +""" + + +from django.conf.urls.defaults import * +from django.views.generic.simple import direct_to_template + +from contact_form.views import contact_form + + +urlpatterns = patterns('', + url(r'^$', + contact_form, + name='contact_form'), + url(r'^sent/$', + direct_to_template, + { 'template': 'contact_form/contact_form_sent.html' }, + name='contact_form_sent'), + ) diff --git a/lib/contact_form/views.py b/lib/contact_form/views.py new file mode 100644 index 0000000..fc33f4a --- /dev/null +++ b/lib/contact_form/views.py @@ -0,0 +1,66 @@ +""" +View which can render and send email from a contact form. + +""" + + +from django.http import HttpResponseRedirect +from django.shortcuts import render_to_response +from django.template import RequestContext +from django.contrib.auth.views import redirect_to_login + +from contact_form.forms import ContactForm + + +def contact_form(request, form_class=ContactForm, + template_name='contact_form/contact_form.html', + success_url='/contact/sent/', login_required=False, + fail_silently=False): + """ + Renders a contact form, validates its input and sends an email + from it. + + To specify the form class to use, pass the ``form_class`` keyword + argument; if no ``form_class`` is specified, the base + ``ContactForm`` class will be used. + + To specify the template to use for rendering the form (*not* the + template used to render the email message sent from the form, + which is handled by the form class), pass the ``template_name`` + keyword argument; if not supplied, this will default to + ``contact_form/contact_form.html``. + + To specify a URL to redirect to after a successfully-sent message, + pass the ``success_url`` keyword argument; if not supplied, this + will default to ``/contact/sent/``. + + To allow only registered users to use the form, pass a ``True`` + value for the ``login_required`` keyword argument. + + To suppress exceptions raised during sending of the email, pass a + ``True`` value for the ``fail_silently`` keyword argument. This is + **not** recommended. + + Template:: + + Passed in the ``template_name`` argument. + + Context:: + + form + The form instance. + + """ + if login_required and not request.user.is_authenticated(): + return redirect_to_login(request.path) + + if request.method == 'POST': + form = form_class(data=request.POST, request=request) + if form.is_valid(): + form.save(fail_silently=fail_silently) + return HttpResponseRedirect(success_url) + else: + form = form_class(request=request) + return render_to_response(template_name, + { 'form': form }, + context_instance=RequestContext(request)) diff --git a/lib/context-processors/__init__.py b/lib/context-processors/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/lib/context-processors/__init__.py diff --git a/lib/context-processors/context_processors.py b/lib/context-processors/context_processors.py new file mode 100644 index 0000000..5691102 --- /dev/null +++ b/lib/context-processors/context_processors.py @@ -0,0 +1,3 @@ +def media_url(request): + from django.conf import settings + return {'media_url': settings.MEDIA_URL, 'map_key':settings.MAP_API_KEY} diff --git a/lib/fdigg/__init__.py b/lib/fdigg/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/lib/fdigg/__init__.py diff --git a/lib/fdigg/middleware.py b/lib/fdigg/middleware.py new file mode 100644 index 0000000..c21ab7f --- /dev/null +++ b/lib/fdigg/middleware.py @@ -0,0 +1,11 @@ +from django.shortcuts import render_to_response +import re +import logging +digg_re = re.compile(r'http://digg.com/\w{1,8}/*(\?.*)?$') +class FckDiggMiddleware(object): + def process_request(self, request): + + if request.META.has_key('HTTP_REFERER'): + logging.info(request.META['HTTP_REFERER']) + if digg_re.search(request.META['HTTP_REFERER']): + return render_to_response('details/fck_digg.html')
\ No newline at end of file diff --git a/lib/markdown2.py b/lib/markdown2.py new file mode 100755 index 0000000..d72f414 --- /dev/null +++ b/lib/markdown2.py @@ -0,0 +1,1877 @@ +#!/usr/bin/env python +# Copyright (c) 2007-2008 ActiveState Corp. +# License: MIT (http://www.opensource.org/licenses/mit-license.php) + +r"""A fast and complete Python implementation of Markdown. + +[from http://daringfireball.net/projects/markdown/] +> Markdown is a text-to-HTML filter; it translates an easy-to-read / +> easy-to-write structured text format into HTML. Markdown's text +> format is most similar to that of plain text email, and supports +> features such as headers, *emphasis*, code blocks, blockquotes, and +> links. +> +> Markdown's syntax is designed not as a generic markup language, but +> specifically to serve as a front-end to (X)HTML. You can use span-level +> HTML tags anywhere in a Markdown document, and you can use block level +> HTML tags (like <div> and <table> as well). + +Module usage: + + >>> import markdown2 + >>> markdown2.markdown("*boo!*") # or use `html = markdown_path(PATH)` + u'<p><em>boo!</em></p>\n' + + >>> markdowner = Markdown() + >>> markdowner.convert("*boo!*") + u'<p><em>boo!</em></p>\n' + >>> markdowner.convert("**boom!**") + u'<p><strong>boom!</strong></p>\n' + +This implementation of Markdown implements the full "core" syntax plus a +number of extras (e.g., code syntax coloring, footnotes) as described on +<http://code.google.com/p/python-markdown2/wiki/Extras>. +""" + +cmdln_desc = """A fast and complete Python implementation of Markdown, a +text-to-HTML conversion tool for web writers. +""" + +# Dev Notes: +# - There is already a Python markdown processor +# (http://www.freewisdom.org/projects/python-markdown/). +# - Python's regex syntax doesn't have '\z', so I'm using '\Z'. I'm +# not yet sure if there implications with this. Compare 'pydoc sre' +# and 'perldoc perlre'. + +__version_info__ = (1, 0, 1, 13) # first three nums match Markdown.pl +__version__ = '1.0.1.13' +__author__ = "Trent Mick" + +import os +import sys +from pprint import pprint +import re +import logging +try: + from hashlib import md5 +except ImportError: + from md5 import md5 +import optparse +from random import random +import codecs + + + +#---- Python version compat + +if sys.version_info[:2] < (2,4): + from sets import Set as set + def reversed(sequence): + for i in sequence[::-1]: + yield i + def _unicode_decode(s, encoding, errors='xmlcharrefreplace'): + return unicode(s, encoding, errors) +else: + def _unicode_decode(s, encoding, errors='strict'): + return s.decode(encoding, errors) + + +#---- globals + +DEBUG = False +log = logging.getLogger("markdown") + +DEFAULT_TAB_WIDTH = 4 + +# Table of hash values for escaped characters: +def _escape_hash(s): + # Lame attempt to avoid possible collision with someone actually + # using the MD5 hexdigest of one of these chars in there text. + # Other ideas: random.random(), uuid.uuid() + #return md5(s).hexdigest() # Markdown.pl effectively does this. + return 'md5-'+md5(s).hexdigest() +g_escape_table = dict([(ch, _escape_hash(ch)) + for ch in '\\`*_{}[]()>#+-.!']) + + + +#---- exceptions + +class MarkdownError(Exception): + pass + + + +#---- public api + +def markdown_path(path, encoding="utf-8", + html4tags=False, tab_width=DEFAULT_TAB_WIDTH, + safe_mode=None, extras=None, link_patterns=None, + use_file_vars=False): + text = codecs.open(path, 'r', encoding).read() + return Markdown(html4tags=html4tags, tab_width=tab_width, + safe_mode=safe_mode, extras=extras, + link_patterns=link_patterns, + use_file_vars=use_file_vars).convert(text) + +def markdown(text, html4tags=False, tab_width=DEFAULT_TAB_WIDTH, + safe_mode=None, extras=None, link_patterns=None, + use_file_vars=False): + return Markdown(html4tags=html4tags, tab_width=tab_width, + safe_mode=safe_mode, extras=extras, + link_patterns=link_patterns, + use_file_vars=use_file_vars).convert(text) + +class Markdown(object): + # The dict of "extras" to enable in processing -- a mapping of + # extra name to argument for the extra. Most extras do not have an + # argument, in which case the value is None. + # + # This can be set via (a) subclassing and (b) the constructor + # "extras" argument. + extras = None + + urls = None + titles = None + html_blocks = None + html_spans = None + html_removed_text = "[HTML_REMOVED]" # for compat with markdown.py + + # Used to track when we're inside an ordered or unordered list + # (see _ProcessListItems() for details): + list_level = 0 + + _ws_only_line_re = re.compile(r"^[ \t]+$", re.M) + + def __init__(self, html4tags=False, tab_width=4, safe_mode=None, + extras=None, link_patterns=None, use_file_vars=False): + if html4tags: + self.empty_element_suffix = ">" + else: + self.empty_element_suffix = " />" + self.tab_width = tab_width + + # For compatibility with earlier markdown2.py and with + # markdown.py's safe_mode being a boolean, + # safe_mode == True -> "replace" + if safe_mode is True: + self.safe_mode = "replace" + else: + self.safe_mode = safe_mode + + if self.extras is None: + self.extras = {} + elif not isinstance(self.extras, dict): + self.extras = dict([(e, None) for e in self.extras]) + if extras: + if not isinstance(extras, dict): + extras = dict([(e, None) for e in extras]) + self.extras.update(extras) + assert isinstance(self.extras, dict) + self._instance_extras = self.extras.copy() + self.link_patterns = link_patterns + self.use_file_vars = use_file_vars + self._outdent_re = re.compile(r'^(\t|[ ]{1,%d})' % tab_width, re.M) + + def reset(self): + self.urls = {} + self.titles = {} + self.html_blocks = {} + self.html_spans = {} + self.list_level = 0 + self.extras = self._instance_extras.copy() + if "footnotes" in self.extras: + self.footnotes = {} + self.footnote_ids = [] + + def convert(self, text): + """Convert the given text.""" + # Main function. The order in which other subs are called here is + # essential. Link and image substitutions need to happen before + # _EscapeSpecialChars(), so that any *'s or _'s in the <a> + # and <img> tags get encoded. + + # Clear the global hashes. If we don't clear these, you get conflicts + # from other articles when generating a page which contains more than + # one article (e.g. an index page that shows the N most recent + # articles): + self.reset() + + if not isinstance(text, unicode): + #TODO: perhaps shouldn't presume UTF-8 for string input? + text = unicode(text, 'utf-8') + + if self.use_file_vars: + # Look for emacs-style file variable hints. + emacs_vars = self._get_emacs_vars(text) + if "markdown-extras" in emacs_vars: + splitter = re.compile("[ ,]+") + for e in splitter.split(emacs_vars["markdown-extras"]): + if '=' in e: + ename, earg = e.split('=', 1) + try: + earg = int(earg) + except ValueError: + pass + else: + ename, earg = e, None + self.extras[ename] = earg + + # Standardize line endings: + text = re.sub("\r\n|\r", "\n", text) + + # Make sure $text ends with a couple of newlines: + text += "\n\n" + + # Convert all tabs to spaces. + text = self._detab(text) + + # Strip any lines consisting only of spaces and tabs. + # This makes subsequent regexen easier to write, because we can + # match consecutive blank lines with /\n+/ instead of something + # contorted like /[ \t]*\n+/ . + text = self._ws_only_line_re.sub("", text) + + if self.safe_mode: + text = self._hash_html_spans(text) + + # Turn block-level HTML blocks into hash entries + text = self._hash_html_blocks(text, raw=True) + + # Strip link definitions, store in hashes. + if "footnotes" in self.extras: + # Must do footnotes first because an unlucky footnote defn + # looks like a link defn: + # [^4]: this "looks like a link defn" + text = self._strip_footnote_definitions(text) + text = self._strip_link_definitions(text) + + text = self._run_block_gamut(text) + + text = self._unescape_special_chars(text) + + if "footnotes" in self.extras: + text = self._add_footnotes(text) + + if self.safe_mode: + text = self._unhash_html_spans(text) + + text += "\n" + return text + + _emacs_oneliner_vars_pat = re.compile(r"-\*-\s*([^\r\n]*?)\s*-\*-", re.UNICODE) + # This regular expression is intended to match blocks like this: + # PREFIX Local Variables: SUFFIX + # PREFIX mode: Tcl SUFFIX + # PREFIX End: SUFFIX + # Some notes: + # - "[ \t]" is used instead of "\s" to specifically exclude newlines + # - "(\r\n|\n|\r)" is used instead of "$" because the sre engine does + # not like anything other than Unix-style line terminators. + _emacs_local_vars_pat = re.compile(r"""^ + (?P<prefix>(?:[^\r\n|\n|\r])*?) + [\ \t]*Local\ Variables:[\ \t]* + (?P<suffix>.*?)(?:\r\n|\n|\r) + (?P<content>.*?\1End:) + """, re.IGNORECASE | re.MULTILINE | re.DOTALL | re.VERBOSE) + + def _get_emacs_vars(self, text): + """Return a dictionary of emacs-style local variables. + + Parsing is done loosely according to this spec (and according to + some in-practice deviations from this): + http://www.gnu.org/software/emacs/manual/html_node/emacs/Specifying-File-Variables.html#Specifying-File-Variables + """ + emacs_vars = {} + SIZE = pow(2, 13) # 8kB + + # Search near the start for a '-*-'-style one-liner of variables. + head = text[:SIZE] + if "-*-" in head: + match = self._emacs_oneliner_vars_pat.search(head) + if match: + emacs_vars_str = match.group(1) + assert '\n' not in emacs_vars_str + emacs_var_strs = [s.strip() for s in emacs_vars_str.split(';') + if s.strip()] + if len(emacs_var_strs) == 1 and ':' not in emacs_var_strs[0]: + # While not in the spec, this form is allowed by emacs: + # -*- Tcl -*- + # where the implied "variable" is "mode". This form + # is only allowed if there are no other variables. + emacs_vars["mode"] = emacs_var_strs[0].strip() + else: + for emacs_var_str in emacs_var_strs: + try: + variable, value = emacs_var_str.strip().split(':', 1) + except ValueError: + log.debug("emacs variables error: malformed -*- " + "line: %r", emacs_var_str) + continue + # Lowercase the variable name because Emacs allows "Mode" + # or "mode" or "MoDe", etc. + emacs_vars[variable.lower()] = value.strip() + + tail = text[-SIZE:] + if "Local Variables" in tail: + match = self._emacs_local_vars_pat.search(tail) + if match: + prefix = match.group("prefix") + suffix = match.group("suffix") + lines = match.group("content").splitlines(0) + #print "prefix=%r, suffix=%r, content=%r, lines: %s"\ + # % (prefix, suffix, match.group("content"), lines) + + # Validate the Local Variables block: proper prefix and suffix + # usage. + for i, line in enumerate(lines): + if not line.startswith(prefix): + log.debug("emacs variables error: line '%s' " + "does not use proper prefix '%s'" + % (line, prefix)) + return {} + # Don't validate suffix on last line. Emacs doesn't care, + # neither should we. + if i != len(lines)-1 and not line.endswith(suffix): + log.debug("emacs variables error: line '%s' " + "does not use proper suffix '%s'" + % (line, suffix)) + return {} + + # Parse out one emacs var per line. + continued_for = None + for line in lines[:-1]: # no var on the last line ("PREFIX End:") + if prefix: line = line[len(prefix):] # strip prefix + if suffix: line = line[:-len(suffix)] # strip suffix + line = line.strip() + if continued_for: + variable = continued_for + if line.endswith('\\'): + line = line[:-1].rstrip() + else: + continued_for = None + emacs_vars[variable] += ' ' + line + else: + try: + variable, value = line.split(':', 1) + except ValueError: + log.debug("local variables error: missing colon " + "in local variables entry: '%s'" % line) + continue + # Do NOT lowercase the variable name, because Emacs only + # allows "mode" (and not "Mode", "MoDe", etc.) in this block. + value = value.strip() + if value.endswith('\\'): + value = value[:-1].rstrip() + continued_for = variable + else: + continued_for = None + emacs_vars[variable] = value + + # Unquote values. + for var, val in emacs_vars.items(): + if len(val) > 1 and (val.startswith('"') and val.endswith('"') + or val.startswith('"') and val.endswith('"')): + emacs_vars[var] = val[1:-1] + + return emacs_vars + + # Cribbed from a post by Bart Lateur: + # <http://www.nntp.perl.org/group/perl.macperl.anyperl/154> + _detab_re = re.compile(r'(.*?)\t', re.M) + def _detab_sub(self, match): + g1 = match.group(1) + return g1 + (' ' * (self.tab_width - len(g1) % self.tab_width)) + def _detab(self, text): + r"""Remove (leading?) tabs from a file. + + >>> m = Markdown() + >>> m._detab("\tfoo") + ' foo' + >>> m._detab(" \tfoo") + ' foo' + >>> m._detab("\t foo") + ' foo' + >>> m._detab(" foo") + ' foo' + >>> m._detab(" foo\n\tbar\tblam") + ' foo\n bar blam' + """ + if '\t' not in text: + return text + return self._detab_re.subn(self._detab_sub, text)[0] + + _block_tags_a = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del' + _strict_tag_block_re = re.compile(r""" + ( # save in \1 + ^ # start of line (with re.M) + <(%s) # start tag = \2 + \b # word break + (.*\n)*? # any number of lines, minimally matching + </\2> # the matching end tag + [ \t]* # trailing spaces/tabs + (?=\n+|\Z) # followed by a newline or end of document + ) + """ % _block_tags_a, + re.X | re.M) + + _block_tags_b = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math' + _liberal_tag_block_re = re.compile(r""" + ( # save in \1 + ^ # start of line (with re.M) + <(%s) # start tag = \2 + \b # word break + (.*\n)*? # any number of lines, minimally matching + .*</\2> # the matching end tag + [ \t]* # trailing spaces/tabs + (?=\n+|\Z) # followed by a newline or end of document + ) + """ % _block_tags_b, + re.X | re.M) + + def _hash_html_block_sub(self, match, raw=False): + html = match.group(1) + if raw and self.safe_mode: + html = self._sanitize_html(html) + key = _hash_text(html) + self.html_blocks[key] = html + return "\n\n" + key + "\n\n" + + def _hash_html_blocks(self, text, raw=False): + """Hashify HTML blocks + + We only want to do this for block-level HTML tags, such as headers, + lists, and tables. That's because we still want to wrap <p>s around + "paragraphs" that are wrapped in non-block-level tags, such as anchors, + phrase emphasis, and spans. The list of tags we're looking for is + hard-coded. + + @param raw {boolean} indicates if these are raw HTML blocks in + the original source. It makes a difference in "safe" mode. + """ + if '<' not in text: + return text + + # Pass `raw` value into our calls to self._hash_html_block_sub. + hash_html_block_sub = _curry(self._hash_html_block_sub, raw=raw) + + # First, look for nested blocks, e.g.: + # <div> + # <div> + # tags for inner block must be indented. + # </div> + # </div> + # + # The outermost tags must start at the left margin for this to match, and + # the inner nested divs must be indented. + # We need to do this before the next, more liberal match, because the next + # match will start at the first `<div>` and stop at the first `</div>`. + text = self._strict_tag_block_re.sub(hash_html_block_sub, text) + + # Now match more liberally, simply from `\n<tag>` to `</tag>\n` + text = self._liberal_tag_block_re.sub(hash_html_block_sub, text) + + # Special case just for <hr />. It was easier to make a special + # case than to make the other regex more complicated. + if "<hr" in text: + _hr_tag_re = _hr_tag_re_from_tab_width(self.tab_width) + text = _hr_tag_re.sub(hash_html_block_sub, text) + + # Special case for standalone HTML comments: + if "<!--" in text: + start = 0 + while True: + # Delimiters for next comment block. + try: + start_idx = text.index("<!--", start) + except ValueError, ex: + break + try: + end_idx = text.index("-->", start_idx) + 3 + except ValueError, ex: + break + + # Start position for next comment block search. + start = end_idx + + # Validate whitespace before comment. + if start_idx: + # - Up to `tab_width - 1` spaces before start_idx. + for i in range(self.tab_width - 1): + if text[start_idx - 1] != ' ': + break + start_idx -= 1 + if start_idx == 0: + break + # - Must be preceded by 2 newlines or hit the start of + # the document. + if start_idx == 0: + pass + elif start_idx == 1 and text[0] == '\n': + start_idx = 0 # to match minute detail of Markdown.pl regex + elif text[start_idx-2:start_idx] == '\n\n': + pass + else: + break + + # Validate whitespace after comment. + # - Any number of spaces and tabs. + while end_idx < len(text): + if text[end_idx] not in ' \t': + break + end_idx += 1 + # - Must be following by 2 newlines or hit end of text. + if text[end_idx:end_idx+2] not in ('', '\n', '\n\n'): + continue + + # Escape and hash (must match `_hash_html_block_sub`). + html = text[start_idx:end_idx] + if raw and self.safe_mode: + html = self._sanitize_html(html) + key = _hash_text(html) + self.html_blocks[key] = html + text = text[:start_idx] + "\n\n" + key + "\n\n" + text[end_idx:] + + if "xml" in self.extras: + # Treat XML processing instructions and namespaced one-liner + # tags as if they were block HTML tags. E.g., if standalone + # (i.e. are their own paragraph), the following do not get + # wrapped in a <p> tag: + # <?foo bar?> + # + # <xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="chapter_1.md"/> + _xml_oneliner_re = _xml_oneliner_re_from_tab_width(self.tab_width) + text = _xml_oneliner_re.sub(hash_html_block_sub, text) + + return text + + def _strip_link_definitions(self, text): + # Strips link definitions from text, stores the URLs and titles in + # hash references. + less_than_tab = self.tab_width - 1 + + # Link defs are in the form: + # [id]: url "optional title" + _link_def_re = re.compile(r""" + ^[ ]{0,%d}\[(.+)\]: # id = \1 + [ \t]* + \n? # maybe *one* newline + [ \t]* + <?(.+?)>? # url = \2 + [ \t]* + (?: + \n? # maybe one newline + [ \t]* + (?<=\s) # lookbehind for whitespace + ['"(] + ([^\n]*) # title = \3 + ['")] + [ \t]* + )? # title is optional + (?:\n+|\Z) + """ % less_than_tab, re.X | re.M | re.U) + return _link_def_re.sub(self._extract_link_def_sub, text) + + def _extract_link_def_sub(self, match): + id, url, title = match.groups() + key = id.lower() # Link IDs are case-insensitive + self.urls[key] = self._encode_amps_and_angles(url) + if title: + self.titles[key] = title.replace('"', '"') + return "" + + def _extract_footnote_def_sub(self, match): + id, text = match.groups() + text = _dedent(text, skip_first_line=not text.startswith('\n')).strip() + normed_id = re.sub(r'\W', '-', id) + # Ensure footnote text ends with a couple newlines (for some + # block gamut matches). + self.footnotes[normed_id] = text + "\n\n" + return "" + + def _strip_footnote_definitions(self, text): + """A footnote definition looks like this: + + [^note-id]: Text of the note. + + May include one or more indented paragraphs. + + Where, + - The 'note-id' can be pretty much anything, though typically it + is the number of the footnote. + - The first paragraph may start on the next line, like so: + + [^note-id]: + Text of the note. + """ + less_than_tab = self.tab_width - 1 + footnote_def_re = re.compile(r''' + ^[ ]{0,%d}\[\^(.+)\]: # id = \1 + [ \t]* + ( # footnote text = \2 + # First line need not start with the spaces. + (?:\s*.*\n+) + (?: + (?:[ ]{%d} | \t) # Subsequent lines must be indented. + .*\n+ + )* + ) + # Lookahead for non-space at line-start, or end of doc. + (?:(?=^[ ]{0,%d}\S)|\Z) + ''' % (less_than_tab, self.tab_width, self.tab_width), + re.X | re.M) + return footnote_def_re.sub(self._extract_footnote_def_sub, text) + + + _hr_res = [ + re.compile(r"^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$", re.M), + re.compile(r"^[ ]{0,2}([ ]?\-[ ]?){3,}[ \t]*$", re.M), + re.compile(r"^[ ]{0,2}([ ]?\_[ ]?){3,}[ \t]*$", re.M), + ] + + def _run_block_gamut(self, text): + # These are all the transformations that form block-level + # tags like paragraphs, headers, and list items. + + text = self._do_headers(text) + + # Do Horizontal Rules: + hr = "\n<hr"+self.empty_element_suffix+"\n" + for hr_re in self._hr_res: + text = hr_re.sub(hr, text) + + text = self._do_lists(text) + + if "pyshell" in self.extras: + text = self._prepare_pyshell_blocks(text) + + text = self._do_code_blocks(text) + + text = self._do_block_quotes(text) + + # We already ran _HashHTMLBlocks() before, in Markdown(), but that + # was to escape raw HTML in the original Markdown source. This time, + # we're escaping the markup we've just created, so that we don't wrap + # <p> tags around block-level tags. + text = self._hash_html_blocks(text) + + text = self._form_paragraphs(text) + + return text + + def _pyshell_block_sub(self, match): + lines = match.group(0).splitlines(0) + _dedentlines(lines) + indent = ' ' * self.tab_width + s = ('\n' # separate from possible cuddled paragraph + + indent + ('\n'+indent).join(lines) + + '\n\n') + return s + + def _prepare_pyshell_blocks(self, text): + """Ensure that Python interactive shell sessions are put in + code blocks -- even if not properly indented. + """ + if ">>>" not in text: + return text + + less_than_tab = self.tab_width - 1 + _pyshell_block_re = re.compile(r""" + ^([ ]{0,%d})>>>[ ].*\n # first line + ^(\1.*\S+.*\n)* # any number of subsequent lines + ^\n # ends with a blank line + """ % less_than_tab, re.M | re.X) + + return _pyshell_block_re.sub(self._pyshell_block_sub, text) + + def _run_span_gamut(self, text): + # These are all the transformations that occur *within* block-level + # tags like paragraphs, headers, and list items. + + text = self._do_code_spans(text) + + text = self._escape_special_chars(text) + + # Process anchor and image tags. + text = self._do_links(text) + + # Make links out of things like `<http://example.com/>` + # Must come after _do_links(), because you can use < and > + # delimiters in inline links like [this](<url>). + text = self._do_auto_links(text) + + if "link-patterns" in self.extras: + text = self._do_link_patterns(text) + + text = self._encode_amps_and_angles(text) + + text = self._do_italics_and_bold(text) + + # Do hard breaks: + text = re.sub(r" {2,}\n", " <br%s\n" % self.empty_element_suffix, text) + + return text + + # "Sorta" because auto-links are identified as "tag" tokens. + _sorta_html_tokenize_re = re.compile(r""" + ( + # tag + </? + (?:\w+) # tag name + (?:\s+(?:[\w-]+:)?[\w-]+=(?:".*?"|'.*?'))* # attributes + \s*/?> + | + # auto-link (e.g., <http://www.activestate.com/>) + <\w+[^>]*> + | + <!--.*?--> # comment + | + <\?.*?\?> # processing instruction + ) + """, re.X) + + def _escape_special_chars(self, text): + # Python markdown note: the HTML tokenization here differs from + # that in Markdown.pl, hence the behaviour for subtle cases can + # differ (I believe the tokenizer here does a better job because + # it isn't susceptible to unmatched '<' and '>' in HTML tags). + # Note, however, that '>' is not allowed in an auto-link URL + # here. + escaped = [] + is_html_markup = False + for token in self._sorta_html_tokenize_re.split(text): + if is_html_markup: + # Within tags/HTML-comments/auto-links, encode * and _ + # so they don't conflict with their use in Markdown for + # italics and strong. We're replacing each such + # character with its corresponding MD5 checksum value; + # this is likely overkill, but it should prevent us from + # colliding with the escape values by accident. + escaped.append(token.replace('*', g_escape_table['*']) + .replace('_', g_escape_table['_'])) + else: + escaped.append(self._encode_backslash_escapes(token)) + is_html_markup = not is_html_markup + return ''.join(escaped) + + def _hash_html_spans(self, text): + # Used for safe_mode. + + def _is_auto_link(s): + if ':' in s and self._auto_link_re.match(s): + return True + elif '@' in s and self._auto_email_link_re.match(s): + return True + return False + + tokens = [] + is_html_markup = False + for token in self._sorta_html_tokenize_re.split(text): + if is_html_markup and not _is_auto_link(token): + sanitized = self._sanitize_html(token) + key = _hash_text(sanitized) + self.html_spans[key] = sanitized + tokens.append(key) + else: + tokens.append(token) + is_html_markup = not is_html_markup + return ''.join(tokens) + + def _unhash_html_spans(self, text): + for key, sanitized in self.html_spans.items(): + text = text.replace(key, sanitized) + return text + + def _sanitize_html(self, s): + if self.safe_mode == "replace": + return self.html_removed_text + elif self.safe_mode == "escape": + replacements = [ + ('&', '&'), + ('<', '<'), + ('>', '>'), + ] + for before, after in replacements: + s = s.replace(before, after) + return s + else: + raise MarkdownError("invalid value for 'safe_mode': %r (must be " + "'escape' or 'replace')" % self.safe_mode) + + _tail_of_inline_link_re = re.compile(r''' + # Match tail of: [text](/url/) or [text](/url/ "title") + \( # literal paren + [ \t]* + (?P<url> # \1 + <.*?> + | + .*? + ) + [ \t]* + ( # \2 + (['"]) # quote char = \3 + (?P<title>.*?) + \3 # matching quote + )? # title is optional + \) + ''', re.X | re.S) + _tail_of_reference_link_re = re.compile(r''' + # Match tail of: [text][id] + [ ]? # one optional space + (?:\n[ ]*)? # one optional newline followed by spaces + \[ + (?P<id>.*?) + \] + ''', re.X | re.S) + + def _do_links(self, text): + """Turn Markdown link shortcuts into XHTML <a> and <img> tags. + + This is a combination of Markdown.pl's _DoAnchors() and + _DoImages(). They are done together because that simplified the + approach. It was necessary to use a different approach than + Markdown.pl because of the lack of atomic matching support in + Python's regex engine used in $g_nested_brackets. + """ + MAX_LINK_TEXT_SENTINEL = 3000 # markdown2 issue 24 + + # `anchor_allowed_pos` is used to support img links inside + # anchors, but not anchors inside anchors. An anchor's start + # pos must be `>= anchor_allowed_pos`. + anchor_allowed_pos = 0 + + curr_pos = 0 + while True: # Handle the next link. + # The next '[' is the start of: + # - an inline anchor: [text](url "title") + # - a reference anchor: [text][id] + # - an inline img: ![text](url "title") + # - a reference img: ![text][id] + # - a footnote ref: [^id] + # (Only if 'footnotes' extra enabled) + # - a footnote defn: [^id]: ... + # (Only if 'footnotes' extra enabled) These have already + # been stripped in _strip_footnote_definitions() so no + # need to watch for them. + # - a link definition: [id]: url "title" + # These have already been stripped in + # _strip_link_definitions() so no need to watch for them. + # - not markup: [...anything else... + try: + start_idx = text.index('[', curr_pos) + except ValueError: + break + text_length = len(text) + + # Find the matching closing ']'. + # Markdown.pl allows *matching* brackets in link text so we + # will here too. Markdown.pl *doesn't* currently allow + # matching brackets in img alt text -- we'll differ in that + # regard. + bracket_depth = 0 + for p in range(start_idx+1, min(start_idx+MAX_LINK_TEXT_SENTINEL, + text_length)): + ch = text[p] + if ch == ']': + bracket_depth -= 1 + if bracket_depth < 0: + break + elif ch == '[': + bracket_depth += 1 + else: + # Closing bracket not found within sentinel length. + # This isn't markup. + curr_pos = start_idx + 1 + continue + link_text = text[start_idx+1:p] + + # Possibly a footnote ref? + if "footnotes" in self.extras and link_text.startswith("^"): + normed_id = re.sub(r'\W', '-', link_text[1:]) + if normed_id in self.footnotes: + self.footnote_ids.append(normed_id) + result = '<sup class="footnote-ref" id="fnref-%s">' \ + '<a href="#fn-%s">%s</a></sup>' \ + % (normed_id, normed_id, len(self.footnote_ids)) + text = text[:start_idx] + result + text[p+1:] + else: + # This id isn't defined, leave the markup alone. + curr_pos = p+1 + continue + + # Now determine what this is by the remainder. + p += 1 + if p == text_length: + return text + + # Inline anchor or img? + if text[p] == '(': # attempt at perf improvement + match = self._tail_of_inline_link_re.match(text, p) + if match: + # Handle an inline anchor or img. + is_img = start_idx > 0 and text[start_idx-1] == "!" + if is_img: + start_idx -= 1 + + url, title = match.group("url"), match.group("title") + if url and url[0] == '<': + url = url[1:-1] # '<url>' -> 'url' + # We've got to encode these to avoid conflicting + # with italics/bold. + url = url.replace('*', g_escape_table['*']) \ + .replace('_', g_escape_table['_']) + if title: + title_str = ' title="%s"' \ + % title.replace('*', g_escape_table['*']) \ + .replace('_', g_escape_table['_']) \ + .replace('"', '"') + else: + title_str = '' + if is_img: + result = '<img src="%s" alt="%s"%s%s' \ + % (url, link_text.replace('"', '"'), + title_str, self.empty_element_suffix) + curr_pos = start_idx + len(result) + text = text[:start_idx] + result + text[match.end():] + elif start_idx >= anchor_allowed_pos: + result_head = '<a href="%s"%s>' % (url, title_str) + result = '%s%s</a>' % (result_head, link_text) + # <img> allowed from curr_pos on, <a> from + # anchor_allowed_pos on. + curr_pos = start_idx + len(result_head) + anchor_allowed_pos = start_idx + len(result) + text = text[:start_idx] + result + text[match.end():] + else: + # Anchor not allowed here. + curr_pos = start_idx + 1 + continue + + # Reference anchor or img? + else: + match = self._tail_of_reference_link_re.match(text, p) + if match: + # Handle a reference-style anchor or img. + is_img = start_idx > 0 and text[start_idx-1] == "!" + if is_img: + start_idx -= 1 + link_id = match.group("id").lower() + if not link_id: + link_id = link_text.lower() # for links like [this][] + if link_id in self.urls: + url = self.urls[link_id] + # We've got to encode these to avoid conflicting + # with italics/bold. + url = url.replace('*', g_escape_table['*']) \ + .replace('_', g_escape_table['_']) + title = self.titles.get(link_id) + if title: + title = title.replace('*', g_escape_table['*']) \ + .replace('_', g_escape_table['_']) + title_str = ' title="%s"' % title + else: + title_str = '' + if is_img: + result = '<img src="%s" alt="%s"%s%s' \ + % (url, link_text.replace('"', '"'), + title_str, self.empty_element_suffix) + curr_pos = start_idx + len(result) + text = text[:start_idx] + result + text[match.end():] + elif start_idx >= anchor_allowed_pos: + result = '<a href="%s"%s>%s</a>' \ + % (url, title_str, link_text) + result_head = '<a href="%s"%s>' % (url, title_str) + result = '%s%s</a>' % (result_head, link_text) + # <img> allowed from curr_pos on, <a> from + # anchor_allowed_pos on. + curr_pos = start_idx + len(result_head) + anchor_allowed_pos = start_idx + len(result) + text = text[:start_idx] + result + text[match.end():] + else: + # Anchor not allowed here. + curr_pos = start_idx + 1 + else: + # This id isn't defined, leave the markup alone. + curr_pos = match.end() + continue + + # Otherwise, it isn't markup. + curr_pos = start_idx + 1 + + return text + + + _setext_h_re = re.compile(r'^(.+)[ \t]*\n(=+|-+)[ \t]*\n+', re.M) + def _setext_h_sub(self, match): + n = {"=": 1, "-": 2}[match.group(2)[0]] + demote_headers = self.extras.get("demote-headers") + if demote_headers: + n = min(n + demote_headers, 6) + return "<h%d>%s</h%d>\n\n" \ + % (n, self._run_span_gamut(match.group(1)), n) + + _atx_h_re = re.compile(r''' + ^(\#{1,6}) # \1 = string of #'s + [ \t]* + (.+?) # \2 = Header text + [ \t]* + (?<!\\) # ensure not an escaped trailing '#' + \#* # optional closing #'s (not counted) + \n+ + ''', re.X | re.M) + def _atx_h_sub(self, match): + n = len(match.group(1)) + demote_headers = self.extras.get("demote-headers") + if demote_headers: + n = min(n + demote_headers, 6) + return "<h%d>%s</h%d>\n\n" \ + % (n, self._run_span_gamut(match.group(2)), n) + + def _do_headers(self, text): + # Setext-style headers: + # Header 1 + # ======== + # + # Header 2 + # -------- + text = self._setext_h_re.sub(self._setext_h_sub, text) + + # atx-style headers: + # # Header 1 + # ## Header 2 + # ## Header 2 with closing hashes ## + # ... + # ###### Header 6 + text = self._atx_h_re.sub(self._atx_h_sub, text) + + return text + + + _marker_ul_chars = '*+-' + _marker_any = r'(?:[%s]|\d+\.)' % _marker_ul_chars + _marker_ul = '(?:[%s])' % _marker_ul_chars + _marker_ol = r'(?:\d+\.)' + + def _list_sub(self, match): + lst = match.group(1) + lst_type = match.group(3) in self._marker_ul_chars and "ul" or "ol" + result = self._process_list_items(lst) + if self.list_level: + return "<%s>\n%s</%s>\n" % (lst_type, result, lst_type) + else: + return "<%s>\n%s</%s>\n\n" % (lst_type, result, lst_type) + + def _do_lists(self, text): + # Form HTML ordered (numbered) and unordered (bulleted) lists. + + for marker_pat in (self._marker_ul, self._marker_ol): + # Re-usable pattern to match any entire ul or ol list: + less_than_tab = self.tab_width - 1 + whole_list = r''' + ( # \1 = whole list + ( # \2 + [ ]{0,%d} + (%s) # \3 = first list item marker + [ \t]+ + ) + (?:.+?) + ( # \4 + \Z + | + \n{2,} + (?=\S) + (?! # Negative lookahead for another list item marker + [ \t]* + %s[ \t]+ + ) + ) + ) + ''' % (less_than_tab, marker_pat, marker_pat) + + # We use a different prefix before nested lists than top-level lists. + # See extended comment in _process_list_items(). + # + # Note: There's a bit of duplication here. My original implementation + # created a scalar regex pattern as the conditional result of the test on + # $g_list_level, and then only ran the $text =~ s{...}{...}egmx + # substitution once, using the scalar as the pattern. This worked, + # everywhere except when running under MT on my hosting account at Pair + # Networks. There, this caused all rebuilds to be killed by the reaper (or + # perhaps they crashed, but that seems incredibly unlikely given that the + # same script on the same server ran fine *except* under MT. I've spent + # more time trying to figure out why this is happening than I'd like to + # admit. My only guess, backed up by the fact that this workaround works, + # is that Perl optimizes the substition when it can figure out that the + # pattern will never change, and when this optimization isn't on, we run + # afoul of the reaper. Thus, the slightly redundant code to that uses two + # static s/// patterns rather than one conditional pattern. + + if self.list_level: + sub_list_re = re.compile("^"+whole_list, re.X | re.M | re.S) + text = sub_list_re.sub(self._list_sub, text) + else: + list_re = re.compile(r"(?:(?<=\n\n)|\A\n?)"+whole_list, + re.X | re.M | re.S) + text = list_re.sub(self._list_sub, text) + + return text + + _list_item_re = re.compile(r''' + (\n)? # leading line = \1 + (^[ \t]*) # leading whitespace = \2 + (%s) [ \t]+ # list marker = \3 + ((?:.+?) # list item text = \4 + (\n{1,2})) # eols = \5 + (?= \n* (\Z | \2 (%s) [ \t]+)) + ''' % (_marker_any, _marker_any), + re.M | re.X | re.S) + + _last_li_endswith_two_eols = False + def _list_item_sub(self, match): + item = match.group(4) + leading_line = match.group(1) + leading_space = match.group(2) + if leading_line or "\n\n" in item or self._last_li_endswith_two_eols: + item = self._run_block_gamut(self._outdent(item)) + else: + # Recursion for sub-lists: + item = self._do_lists(self._outdent(item)) + if item.endswith('\n'): + item = item[:-1] + item = self._run_span_gamut(item) + self._last_li_endswith_two_eols = (len(match.group(5)) == 2) + return "<li>%s</li>\n" % item + + def _process_list_items(self, list_str): + # Process the contents of a single ordered or unordered list, + # splitting it into individual list items. + + # The $g_list_level global keeps track of when we're inside a list. + # Each time we enter a list, we increment it; when we leave a list, + # we decrement. If it's zero, we're not in a list anymore. + # + # We do this because when we're not inside a list, we want to treat + # something like this: + # + # I recommend upgrading to version + # 8. Oops, now this line is treated + # as a sub-list. + # + # As a single paragraph, despite the fact that the second line starts + # with a digit-period-space sequence. + # + # Whereas when we're inside a list (or sub-list), that line will be + # treated as the start of a sub-list. What a kludge, huh? This is + # an aspect of Markdown's syntax that's hard to parse perfectly + # without resorting to mind-reading. Perhaps the solution is to + # change the syntax rules such that sub-lists must start with a + # starting cardinal number; e.g. "1." or "a.". + self.list_level += 1 + self._last_li_endswith_two_eols = False + list_str = list_str.rstrip('\n') + '\n' + list_str = self._list_item_re.sub(self._list_item_sub, list_str) + self.list_level -= 1 + return list_str + + def _get_pygments_lexer(self, lexer_name): + try: + from pygments import lexers, util + except ImportError: + return None + try: + return lexers.get_lexer_by_name(lexer_name) + except util.ClassNotFound: + return None + + def _color_with_pygments(self, codeblock, lexer, **formatter_opts): + import pygments + import pygments.formatters + + class HtmlCodeFormatter(pygments.formatters.HtmlFormatter): + def _wrap_code(self, inner): + """A function for use in a Pygments Formatter which + wraps in <code> tags. + """ + yield 0, "<code>" + for tup in inner: + yield tup + yield 0, "</code>" + + def wrap(self, source, outfile): + """Return the source with a code, pre, and div.""" + return self._wrap_div(self._wrap_pre(self._wrap_code(source))) + + formatter = HtmlCodeFormatter(cssclass="codehilite", **formatter_opts) + return pygments.highlight(codeblock, lexer, formatter) + + def _code_block_sub(self, match): + codeblock = match.group(1) + codeblock = self._outdent(codeblock) + codeblock = self._detab(codeblock) + codeblock = codeblock.lstrip('\n') # trim leading newlines + codeblock = codeblock.rstrip() # trim trailing whitespace + + if "code-color" in self.extras and codeblock.startswith(":::"): + lexer_name, rest = codeblock.split('\n', 1) + lexer_name = lexer_name[3:].strip() + lexer = self._get_pygments_lexer(lexer_name) + codeblock = rest.lstrip("\n") # Remove lexer declaration line. + if lexer: + formatter_opts = self.extras['code-color'] or {} + colored = self._color_with_pygments(codeblock, lexer, + **formatter_opts) + return "\n\n%s\n\n" % colored + + codeblock = self._encode_code(codeblock) + return "\n\n<pre><code>%s\n</code></pre>\n\n" % codeblock + + def _do_code_blocks(self, text): + """Process Markdown `<pre><code>` blocks.""" + code_block_re = re.compile(r''' + (?:\n\n|\A) + ( # $1 = the code block -- one or more lines, starting with a space/tab + (?: + (?:[ ]{%d} | \t) # Lines must start with a tab or a tab-width of spaces + .*\n+ + )+ + ) + ((?=^[ ]{0,%d}\S)|\Z) # Lookahead for non-space at line-start, or end of doc + ''' % (self.tab_width, self.tab_width), + re.M | re.X) + + return code_block_re.sub(self._code_block_sub, text) + + + # Rules for a code span: + # - backslash escapes are not interpreted in a code span + # - to include one or or a run of more backticks the delimiters must + # be a longer run of backticks + # - cannot start or end a code span with a backtick; pad with a + # space and that space will be removed in the emitted HTML + # See `test/tm-cases/escapes.text` for a number of edge-case + # examples. + _code_span_re = re.compile(r''' + (?<!\\) + (`+) # \1 = Opening run of ` + (?!`) # See Note A test/tm-cases/escapes.text + (.+?) # \2 = The code block + (?<!`) + \1 # Matching closer + (?!`) + ''', re.X | re.S) + + def _code_span_sub(self, match): + c = match.group(2).strip(" \t") + c = self._encode_code(c) + return "<code>%s</code>" % c + + def _do_code_spans(self, text): + # * Backtick quotes are used for <code></code> spans. + # + # * You can use multiple backticks as the delimiters if you want to + # include literal backticks in the code span. So, this input: + # + # Just type ``foo `bar` baz`` at the prompt. + # + # Will translate to: + # + # <p>Just type <code>foo `bar` baz</code> at the prompt.</p> + # + # There's no arbitrary limit to the number of backticks you + # can use as delimters. If you need three consecutive backticks + # in your code, use four for delimiters, etc. + # + # * You can use spaces to get literal backticks at the edges: + # + # ... type `` `bar` `` ... + # + # Turns to: + # + # ... type <code>`bar`</code> ... + return self._code_span_re.sub(self._code_span_sub, text) + + def _encode_code(self, text): + """Encode/escape certain characters inside Markdown code runs. + The point is that in code, these characters are literals, + and lose their special Markdown meanings. + """ + replacements = [ + # Encode all ampersands; HTML entities are not + # entities within a Markdown code span. + ('&', '&'), + # Do the angle bracket song and dance: + ('<', '<'), + ('>', '>'), + # Now, escape characters that are magic in Markdown: + ('*', g_escape_table['*']), + ('_', g_escape_table['_']), + ('{', g_escape_table['{']), + ('}', g_escape_table['}']), + ('[', g_escape_table['[']), + (']', g_escape_table[']']), + ('\\', g_escape_table['\\']), + ] + for before, after in replacements: + text = text.replace(before, after) + return text + + _strong_re = re.compile(r"(\*\*|__)(?=\S)(.+?[*_]*)(?<=\S)\1", re.S) + _em_re = re.compile(r"(\*|_)(?=\S)(.+?)(?<=\S)\1", re.S) + _code_friendly_strong_re = re.compile(r"\*\*(?=\S)(.+?[*_]*)(?<=\S)\*\*", re.S) + _code_friendly_em_re = re.compile(r"\*(?=\S)(.+?)(?<=\S)\*", re.S) + def _do_italics_and_bold(self, text): + # <strong> must go first: + if "code-friendly" in self.extras: + text = self._code_friendly_strong_re.sub(r"<strong>\1</strong>", text) + text = self._code_friendly_em_re.sub(r"<em>\1</em>", text) + else: + text = self._strong_re.sub(r"<strong>\2</strong>", text) + text = self._em_re.sub(r"<em>\2</em>", text) + return text + + + _block_quote_re = re.compile(r''' + ( # Wrap whole match in \1 + ( + ^[ \t]*>[ \t]? # '>' at the start of a line + .+\n # rest of the first line + (.+\n)* # subsequent consecutive lines + \n* # blanks + )+ + ) + ''', re.M | re.X) + _bq_one_level_re = re.compile('^[ \t]*>[ \t]?', re.M); + + _html_pre_block_re = re.compile(r'(\s*<pre>.+?</pre>)', re.S) + def _dedent_two_spaces_sub(self, match): + return re.sub(r'(?m)^ ', '', match.group(1)) + + def _block_quote_sub(self, match): + bq = match.group(1) + bq = self._bq_one_level_re.sub('', bq) # trim one level of quoting + bq = self._ws_only_line_re.sub('', bq) # trim whitespace-only lines + bq = self._run_block_gamut(bq) # recurse + + bq = re.sub('(?m)^', ' ', bq) + # These leading spaces screw with <pre> content, so we need to fix that: + bq = self._html_pre_block_re.sub(self._dedent_two_spaces_sub, bq) + + return "<blockquote>\n%s\n</blockquote>\n\n" % bq + + def _do_block_quotes(self, text): + if '>' not in text: + return text + return self._block_quote_re.sub(self._block_quote_sub, text) + + def _form_paragraphs(self, text): + # Strip leading and trailing lines: + text = text.strip('\n') + + # Wrap <p> tags. + grafs = re.split(r"\n{2,}", text) + for i, graf in enumerate(grafs): + if graf in self.html_blocks: + # Unhashify HTML blocks + grafs[i] = self.html_blocks[graf] + else: + # Wrap <p> tags. + graf = self._run_span_gamut(graf) + grafs[i] = "<p>" + graf.lstrip(" \t") + "</p>" + + return "\n\n".join(grafs) + + def _add_footnotes(self, text): + if self.footnotes: + footer = [ + '<div class="footnotes">', + '<hr' + self.empty_element_suffix, + '<ol>', + ] + for i, id in enumerate(self.footnote_ids): + if i != 0: + footer.append('') + footer.append('<li id="fn-%s">' % id) + footer.append(self._run_block_gamut(self.footnotes[id])) + backlink = ('<a href="#fnref-%s" ' + 'class="footnoteBackLink" ' + 'title="Jump back to footnote %d in the text.">' + '↩</a>' % (id, i+1)) + if footer[-1].endswith("</p>"): + footer[-1] = footer[-1][:-len("</p>")] \ + + ' ' + backlink + "</p>" + else: + footer.append("\n<p>%s</p>" % backlink) + footer.append('</li>') + footer.append('</ol>') + footer.append('</div>') + return text + '\n\n' + '\n'.join(footer) + else: + return text + + # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin: + # http://bumppo.net/projects/amputator/ + _ampersand_re = re.compile(r'&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)') + _naked_lt_re = re.compile(r'<(?![a-z/?\$!])', re.I) + _naked_gt_re = re.compile(r'''(?<![a-z?!/'"-])>''', re.I) + + def _encode_amps_and_angles(self, text): + # Smart processing for ampersands and angle brackets that need + # to be encoded. + text = self._ampersand_re.sub('&', text) + + # Encode naked <'s + text = self._naked_lt_re.sub('<', text) + + # Encode naked >'s + # Note: Other markdown implementations (e.g. Markdown.pl, PHP + # Markdown) don't do this. + text = self._naked_gt_re.sub('>', text) + return text + + def _encode_backslash_escapes(self, text): + for ch, escape in g_escape_table.items(): + text = text.replace("\\"+ch, escape) + return text + + _auto_link_re = re.compile(r'<((https?|ftp):[^\'">\s]+)>', re.I) + def _auto_link_sub(self, match): + g1 = match.group(1) + return '<a href="%s">%s</a>' % (g1, g1) + + _auto_email_link_re = re.compile(r""" + < + (?:mailto:)? + ( + [-.\w]+ + \@ + [-\w]+(\.[-\w]+)*\.[a-z]+ + ) + > + """, re.I | re.X | re.U) + def _auto_email_link_sub(self, match): + return self._encode_email_address( + self._unescape_special_chars(match.group(1))) + + def _do_auto_links(self, text): + text = self._auto_link_re.sub(self._auto_link_sub, text) + text = self._auto_email_link_re.sub(self._auto_email_link_sub, text) + return text + + def _encode_email_address(self, addr): + # Input: an email address, e.g. "foo@example.com" + # + # Output: the email address as a mailto link, with each character + # of the address encoded as either a decimal or hex entity, in + # the hopes of foiling most address harvesting spam bots. E.g.: + # + # <a href="mailto:foo@e + # xample.com">foo + # @example.com</a> + # + # Based on a filter by Matthew Wickline, posted to the BBEdit-Talk + # mailing list: <http://tinyurl.com/yu7ue> + chars = [_xml_encode_email_char_at_random(ch) + for ch in "mailto:" + addr] + # Strip the mailto: from the visible part. + addr = '<a href="%s">%s</a>' \ + % (''.join(chars), ''.join(chars[7:])) + return addr + + def _do_link_patterns(self, text): + """Caveat emptor: there isn't much guarding against link + patterns being formed inside other standard Markdown links, e.g. + inside a [link def][like this]. + + Dev Notes: *Could* consider prefixing regexes with a negative + lookbehind assertion to attempt to guard against this. + """ + link_from_hash = {} + for regex, repl in self.link_patterns: + replacements = [] + for match in regex.finditer(text): + if hasattr(repl, "__call__"): + href = repl(match) + else: + href = match.expand(repl) + replacements.append((match.span(), href)) + for (start, end), href in reversed(replacements): + escaped_href = ( + href.replace('"', '"') # b/c of attr quote + # To avoid markdown <em> and <strong>: + .replace('*', g_escape_table['*']) + .replace('_', g_escape_table['_'])) + link = '<a href="%s">%s</a>' % (escaped_href, text[start:end]) + hash = md5(link).hexdigest() + link_from_hash[hash] = link + text = text[:start] + hash + text[end:] + for hash, link in link_from_hash.items(): + text = text.replace(hash, link) + return text + + def _unescape_special_chars(self, text): + # Swap back in all the special characters we've hidden. + for ch, hash in g_escape_table.items(): + text = text.replace(hash, ch) + return text + + def _outdent(self, text): + # Remove one level of line-leading tabs or spaces + return self._outdent_re.sub('', text) + + +class MarkdownWithExtras(Markdown): + """A markdowner class that enables most extras: + + - footnotes + - code-color (only has effect if 'pygments' Python module on path) + + These are not included: + - pyshell (specific to Python-related documenting) + - code-friendly (because it *disables* part of the syntax) + - link-patterns (because you need to specify some actual + link-patterns anyway) + """ + extras = ["footnotes", "code-color"] + + +#---- internal support functions + +# From http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52549 +def _curry(*args, **kwargs): + function, args = args[0], args[1:] + def result(*rest, **kwrest): + combined = kwargs.copy() + combined.update(kwrest) + return function(*args + rest, **combined) + return result + +# Recipe: regex_from_encoded_pattern (1.0) +def _regex_from_encoded_pattern(s): + """'foo' -> re.compile(re.escape('foo')) + '/foo/' -> re.compile('foo') + '/foo/i' -> re.compile('foo', re.I) + """ + if s.startswith('/') and s.rfind('/') != 0: + # Parse it: /PATTERN/FLAGS + idx = s.rfind('/') + pattern, flags_str = s[1:idx], s[idx+1:] + flag_from_char = { + "i": re.IGNORECASE, + "l": re.LOCALE, + "s": re.DOTALL, + "m": re.MULTILINE, + "u": re.UNICODE, + } + flags = 0 + for char in flags_str: + try: + flags |= flag_from_char[char] + except KeyError: + raise ValueError("unsupported regex flag: '%s' in '%s' " + "(must be one of '%s')" + % (char, s, ''.join(flag_from_char.keys()))) + return re.compile(s[1:idx], flags) + else: # not an encoded regex + return re.compile(re.escape(s)) + +# Recipe: dedent (0.1.2) +def _dedentlines(lines, tabsize=8, skip_first_line=False): + """_dedentlines(lines, tabsize=8, skip_first_line=False) -> dedented lines + + "lines" is a list of lines to dedent. + "tabsize" is the tab width to use for indent width calculations. + "skip_first_line" is a boolean indicating if the first line should + be skipped for calculating the indent width and for dedenting. + This is sometimes useful for docstrings and similar. + + Same as dedent() except operates on a sequence of lines. Note: the + lines list is modified **in-place**. + """ + DEBUG = False + if DEBUG: + print "dedent: dedent(..., tabsize=%d, skip_first_line=%r)"\ + % (tabsize, skip_first_line) + indents = [] + margin = None + for i, line in enumerate(lines): + if i == 0 and skip_first_line: continue + indent = 0 + for ch in line: + if ch == ' ': + indent += 1 + elif ch == '\t': + indent += tabsize - (indent % tabsize) + elif ch in '\r\n': + continue # skip all-whitespace lines + else: + break + else: + continue # skip all-whitespace lines + if DEBUG: print "dedent: indent=%d: %r" % (indent, line) + if margin is None: + margin = indent + else: + margin = min(margin, indent) + if DEBUG: print "dedent: margin=%r" % margin + + if margin is not None and margin > 0: + for i, line in enumerate(lines): + if i == 0 and skip_first_line: continue + removed = 0 + for j, ch in enumerate(line): + if ch == ' ': + removed += 1 + elif ch == '\t': + removed += tabsize - (removed % tabsize) + elif ch in '\r\n': + if DEBUG: print "dedent: %r: EOL -> strip up to EOL" % line + lines[i] = lines[i][j:] + break + else: + raise ValueError("unexpected non-whitespace char %r in " + "line %r while removing %d-space margin" + % (ch, line, margin)) + if DEBUG: + print "dedent: %r: %r -> removed %d/%d"\ + % (line, ch, removed, margin) + if removed == margin: + lines[i] = lines[i][j+1:] + break + elif removed > margin: + lines[i] = ' '*(removed-margin) + lines[i][j+1:] + break + else: + if removed: + lines[i] = lines[i][removed:] + return lines + +def _dedent(text, tabsize=8, skip_first_line=False): + """_dedent(text, tabsize=8, skip_first_line=False) -> dedented text + + "text" is the text to dedent. + "tabsize" is the tab width to use for indent width calculations. + "skip_first_line" is a boolean indicating if the first line should + be skipped for calculating the indent width and for dedenting. + This is sometimes useful for docstrings and similar. + + textwrap.dedent(s), but don't expand tabs to spaces + """ + lines = text.splitlines(1) + _dedentlines(lines, tabsize=tabsize, skip_first_line=skip_first_line) + return ''.join(lines) + + +class _memoized(object): + """Decorator that caches a function's return value each time it is called. + If called later with the same arguments, the cached value is returned, and + not re-evaluated. + + http://wiki.python.org/moin/PythonDecoratorLibrary + """ + def __init__(self, func): + self.func = func + self.cache = {} + def __call__(self, *args): + try: + return self.cache[args] + except KeyError: + self.cache[args] = value = self.func(*args) + return value + except TypeError: + # uncachable -- for instance, passing a list as an argument. + # Better to not cache than to blow up entirely. + return self.func(*args) + def __repr__(self): + """Return the function's docstring.""" + return self.func.__doc__ + + +def _xml_oneliner_re_from_tab_width(tab_width): + """Standalone XML processing instruction regex.""" + return re.compile(r""" + (?: + (?<=\n\n) # Starting after a blank line + | # or + \A\n? # the beginning of the doc + ) + ( # save in $1 + [ ]{0,%d} + (?: + <\?\w+\b\s+.*?\?> # XML processing instruction + | + <\w+:\w+\b\s+.*?/> # namespaced single tag + ) + [ \t]* + (?=\n{2,}|\Z) # followed by a blank line or end of document + ) + """ % (tab_width - 1), re.X) +_xml_oneliner_re_from_tab_width = _memoized(_xml_oneliner_re_from_tab_width) + +def _hr_tag_re_from_tab_width(tab_width): + return re.compile(r""" + (?: + (?<=\n\n) # Starting after a blank line + | # or + \A\n? # the beginning of the doc + ) + ( # save in \1 + [ ]{0,%d} + <(hr) # start tag = \2 + \b # word break + ([^<>])*? # + /?> # the matching end tag + [ \t]* + (?=\n{2,}|\Z) # followed by a blank line or end of document + ) + """ % (tab_width - 1), re.X) +_hr_tag_re_from_tab_width = _memoized(_hr_tag_re_from_tab_width) + + +def _xml_encode_email_char_at_random(ch): + r = random() + # Roughly 10% raw, 45% hex, 45% dec. + # '@' *must* be encoded. I [John Gruber] insist. + # Issue 26: '_' must be encoded. + if r > 0.9 and ch not in "@_": + return ch + elif r < 0.45: + # The [1:] is to drop leading '0': 0x63 -> x63 + return '&#%s;' % hex(ord(ch))[1:] + else: + return '&#%s;' % ord(ch) + +def _hash_text(text): + return 'md5:'+md5(text.encode("utf-8")).hexdigest() + + +#---- mainline + +class _NoReflowFormatter(optparse.IndentedHelpFormatter): + """An optparse formatter that does NOT reflow the description.""" + def format_description(self, description): + return description or "" + +def _test(): + import doctest + doctest.testmod() + +def main(argv=None): + if argv is None: + argv = sys.argv + if not logging.root.handlers: + logging.basicConfig() + + usage = "usage: %prog [PATHS...]" + version = "%prog "+__version__ + parser = optparse.OptionParser(prog="markdown2", usage=usage, + version=version, description=cmdln_desc, + formatter=_NoReflowFormatter()) + parser.add_option("-v", "--verbose", dest="log_level", + action="store_const", const=logging.DEBUG, + help="more verbose output") + parser.add_option("--encoding", + help="specify encoding of text content") + parser.add_option("--html4tags", action="store_true", default=False, + help="use HTML 4 style for empty element tags") + parser.add_option("-s", "--safe", metavar="MODE", dest="safe_mode", + help="sanitize literal HTML: 'escape' escapes " + "HTML meta chars, 'replace' replaces with an " + "[HTML_REMOVED] note") + parser.add_option("-x", "--extras", action="append", + help="Turn on specific extra features (not part of " + "the core Markdown spec). Supported values: " + "'code-friendly' disables _/__ for emphasis; " + "'code-color' adds code-block syntax coloring; " + "'link-patterns' adds auto-linking based on patterns; " + "'footnotes' adds the footnotes syntax;" + "'xml' passes one-liner processing instructions and namespaced XML tags;" + "'pyshell' to put unindented Python interactive shell sessions in a <code> block.") + parser.add_option("--use-file-vars", + help="Look for and use Emacs-style 'markdown-extras' " + "file var to turn on extras. See " + "<http://code.google.com/p/python-markdown2/wiki/Extras>.") + parser.add_option("--link-patterns-file", + help="path to a link pattern file") + parser.add_option("--self-test", action="store_true", + help="run internal self-tests (some doctests)") + parser.add_option("--compare", action="store_true", + help="run against Markdown.pl as well (for testing)") + parser.set_defaults(log_level=logging.INFO, compare=False, + encoding="utf-8", safe_mode=None, use_file_vars=False) + opts, paths = parser.parse_args() + log.setLevel(opts.log_level) + + if opts.self_test: + return _test() + + if opts.extras: + extras = {} + for s in opts.extras: + splitter = re.compile("[,;: ]+") + for e in splitter.split(s): + if '=' in e: + ename, earg = e.split('=', 1) + try: + earg = int(earg) + except ValueError: + pass + else: + ename, earg = e, None + extras[ename] = earg + else: + extras = None + + if opts.link_patterns_file: + link_patterns = [] + f = open(opts.link_patterns_file) + try: + for i, line in enumerate(f.readlines()): + if not line.strip(): continue + if line.lstrip().startswith("#"): continue + try: + pat, href = line.rstrip().rsplit(None, 1) + except ValueError: + raise MarkdownError("%s:%d: invalid link pattern line: %r" + % (opts.link_patterns_file, i+1, line)) + link_patterns.append( + (_regex_from_encoded_pattern(pat), href)) + finally: + f.close() + else: + link_patterns = None + + from os.path import join, dirname, abspath, exists + markdown_pl = join(dirname(dirname(abspath(__file__))), "test", + "Markdown.pl") + for path in paths: + if opts.compare: + print "==== Markdown.pl ====" + perl_cmd = 'perl %s "%s"' % (markdown_pl, path) + o = os.popen(perl_cmd) + perl_html = o.read() + o.close() + sys.stdout.write(perl_html) + print "==== markdown2.py ====" + html = markdown_path(path, encoding=opts.encoding, + html4tags=opts.html4tags, + safe_mode=opts.safe_mode, + extras=extras, link_patterns=link_patterns, + use_file_vars=opts.use_file_vars) + sys.stdout.write( + html.encode(sys.stdout.encoding or "utf-8", 'xmlcharrefreplace')) + if opts.compare: + test_dir = join(dirname(dirname(abspath(__file__))), "test") + if exists(join(test_dir, "test_markdown2.py")): + sys.path.insert(0, test_dir) + from test_markdown2 import norm_html_from_html + norm_html = norm_html_from_html(html) + norm_perl_html = norm_html_from_html(perl_html) + else: + norm_html = html + norm_perl_html = perl_html + print "==== match? %r ====" % (norm_perl_html == norm_html) + + +if __name__ == "__main__": + sys.exit( main(sys.argv) ) + diff --git a/lib/pagination/__init__.py b/lib/pagination/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/lib/pagination/__init__.py @@ -0,0 +1 @@ + diff --git a/lib/pagination/middleware.py b/lib/pagination/middleware.py new file mode 100644 index 0000000..cf9f9cb --- /dev/null +++ b/lib/pagination/middleware.py @@ -0,0 +1,25 @@ +class PaginationMiddleware(object): + """ + Inserts a variable representing the current page onto the request object if + it exists in either **GET** or **POST** portions of the request. + """ + def process_request(self, request): + try: + request.page = int(request.REQUEST['page']) + except (KeyError, ValueError): + request.page = 1 + + def process_view(self, request, view_func, view_args, view_kwargs): + if 'paginate' in view_kwargs: + del view_kwargs['paginate'] + + if 'page_url' in view_kwargs: + request.page_url = view_kwargs['page_url'] + del view_kwargs['page_url'] + + if 'page' in view_kwargs: + request.page = int(view_kwargs['page']) + del view_kwargs['page'] + else: + request.page = 1 + diff --git a/lib/pagination/models.py b/lib/pagination/models.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/lib/pagination/models.py @@ -0,0 +1 @@ + diff --git a/lib/pagination/templates/pagination/pagination.html b/lib/pagination/templates/pagination/pagination.html new file mode 100644 index 0000000..2ecffb4 --- /dev/null +++ b/lib/pagination/templates/pagination/pagination.html @@ -0,0 +1,30 @@ +{% load pagination_tags %} +{% if is_paginated %} +<div class="pagination"> + {% if page_obj.has_previous %} + <a href="{% if use_page_path %}{% page_path prev_page %}{% else %}?page={{ page_obj.previous_page_number }}{{ getvars }}{% endif %}" class="prev">‹‹ previous</a> + {% else %} + <span class="disabled prev"></span> + {% endif %} + {% for page in pages %} + {% if page %} + {% ifequal page page_obj.number %} + <span class="current page">{{ page }}</span> + {% else %} + {% if use_page_path %} + <a href="{% page_path page %}" class="page">{{ page }}</a> + {% else %} + <a href="?page={{ page }}{{ getvars }}" class="page">{{ page }}</a> + {% endif %} + {% endifequal %} + {% else %} + ... + {% endif %} + {% endfor %} + {% if page_obj.has_next %} + <a href="{% if use_page_path %}{% page_path next_page %}{% else %}?page={{ page_obj.next_page_number }}{{ getvars }}{% endif %}" class="next">next ››</a> + {% else %} + <span class="disabled next">next ››</span> + {% endif %} +</div> +{% endif %} diff --git a/lib/pagination/templatetags/__init__.py b/lib/pagination/templatetags/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/lib/pagination/templatetags/__init__.py @@ -0,0 +1 @@ + diff --git a/lib/pagination/templatetags/pagination_tags.py b/lib/pagination/templatetags/pagination_tags.py new file mode 100644 index 0000000..ef9bf5e --- /dev/null +++ b/lib/pagination/templatetags/pagination_tags.py @@ -0,0 +1,235 @@ +try: + set +except NameError: + from sets import Set as set +import re +from django import template +from django.db.models.query import QuerySet +from django.core.paginator import Paginator, QuerySetPaginator, InvalidPage + +register = template.Library() + +DEFAULT_PAGINATION = 20 +DEFAULT_WINDOW = 4 +DEFAULT_ORPHANS = 0 + +@register.tag +def page_path(parser, token): + """Returns the path for the given page.""" + bits = token.split_contents() + if len(bits) != 2: + raise template.TemplateSyntaxError( + 'page_path requires a page template var' + ) + + return PagePathNode(bits[1]) + +class PagePathNode(template.Node): + """Renders the path for a given page number""" + def __init__(self, page_var): + self.page_var = page_var + + def render(self, context): + try: + page = int(context[self.page_var]) + path = context['request'].path + page_url = context['request'].page_url + except: + return '' + + return page_url % page + +def do_autopaginate(parser, token): + """ + Splits the arguments to the autopaginate tag and formats them correctly. + """ + split = token.split_contents() + if len(split) == 2: + return AutoPaginateNode(split[1]) + elif len(split) == 3: + try: + paginate_by = int(split[2]) + except ValueError: + raise template.TemplateSyntaxError(u'Got %s, but expected integer.' % split[2]) + return AutoPaginateNode(split[1], paginate_by=paginate_by) + elif len(split) == 4: + try: + paginate_by = int(split[2]) + except ValueError: + raise template.TemplateSyntaxError(u'Got %s, but expected integer.' % split[2]) + try: + orphans = int(split[3]) + except ValueError: + raise template.TemplateSyntaxError(u'Got %s, but expected integer.' % split[3]) + return AutoPaginateNode(split[1], paginate_by=paginate_by, orphans=orphans) + else: + raise template.TemplateSyntaxError('%r tag takes one required argument and one optional argument' % split[0]) + +class AutoPaginateNode(template.Node): + """ + Emits the required objects to allow for Digg-style pagination. + + First, it looks in the current context for the variable specified. This + should be either a QuerySet or a list. + + 1. If it is a QuerySet, this ``AutoPaginateNode`` will emit a + ``QuerySetPaginator`` and the current page object into the context names + ``paginator`` and ``page_obj``, respectively. + + 2. If it is a list, this ``AutoPaginateNode`` will emit a simple + ``Paginator`` and the current page object into the context names + ``paginator`` and ``page_obj``, respectively. + + It will then replace the variable specified with only the objects for the + current page. + + .. note:: + + It is recommended to use *{% paginate %}* after using the autopaginate + tag. If you choose not to use *{% paginate %}*, make sure to display the + list of availabale pages, or else the application may seem to be buggy. + """ + def __init__(self, queryset_var, paginate_by=DEFAULT_PAGINATION, orphans=DEFAULT_ORPHANS): + self.queryset_var = template.Variable(queryset_var) + self.paginate_by = paginate_by + self.orphans = orphans + + def render(self, context): + key = self.queryset_var.var + value = self.queryset_var.resolve(context) + if issubclass(value.__class__, QuerySet): + model = value.model + paginator_class = QuerySetPaginator + else: + value = list(value) + try: + model = value[0].__class__ + except IndexError: + return u'' + paginator_class = Paginator + paginator = paginator_class(value, self.paginate_by, self.orphans) + try: + page_obj = paginator.page(context['request'].page) + except InvalidPage: + context[key] = [] + context['invalid_page'] = True + return u'' + context[key] = page_obj.object_list + context['paginator'] = paginator + context['page_obj'] = page_obj + if hasattr(context['request'], 'page_url'): + context['use_page_path'] = True + return u'' + +def paginate(context, window=DEFAULT_WINDOW): + """ + Renders the ``pagination/pagination.html`` template, resulting in a + Digg-like display of the available pages, given the current page. If there + are too many pages to be displayed before and after the current page, then + elipses will be used to indicate the undisplayed gap between page numbers. + + Requires one argument, ``context``, which should be a dictionary-like data + structure and must contain the following keys: + + ``paginator`` + A ``Paginator`` or ``QuerySetPaginator`` object. + + ``page_obj`` + This should be the result of calling the page method on the + aforementioned ``Paginator`` or ``QuerySetPaginator`` object, given + the current page. + + This same ``context`` dictionary-like data structure may also include: + + ``getvars`` + A dictionary of all of the **GET** parameters in the current request. + This is useful to maintain certain types of state, even when requesting + a different page. + """ + try: + paginator = context['paginator'] + page_obj = context['page_obj'] + page_range = paginator.page_range + # First and last are simply the first *n* pages and the last *n* pages, + # where *n* is the current window size. + first = set(page_range[:window]) + last = set(page_range[-window:]) + # Now we look around our current page, making sure that we don't wrap + # around. + current_start = page_obj.number-1-window + if current_start < 0: + current_start = 0 + current_end = page_obj.number-1+window + if current_end < 0: + current_end = 0 + current = set(page_range[current_start:current_end]) + pages = [] + # If there's no overlap between the first set of pages and the current + # set of pages, then there's a possible need for elusion. + if len(first.intersection(current)) == 0: + first_list = sorted(list(first)) + second_list = sorted(list(current)) + pages.extend(first_list) + diff = second_list[0] - first_list[-1] + # If there is a gap of two, between the last page of the first + # set and the first page of the current set, then we're missing a + # page. + if diff == 2: + pages.append(second_list[0] - 1) + # If the difference is just one, then there's nothing to be done, + # as the pages need no elusion and are correct. + elif diff == 1: + pass + # Otherwise, there's a bigger gap which needs to be signaled for + # elusion, by pushing a None value to the page list. + else: + pages.append(None) + pages.extend(second_list) + else: + pages.extend(sorted(list(first.union(current)))) + # If there's no overlap between the current set of pages and the last + # set of pages, then there's a possible need for elusion. + if len(current.intersection(last)) == 0: + second_list = sorted(list(last)) + diff = second_list[0] - pages[-1] + # If there is a gap of two, between the last page of the current + # set and the first page of the last set, then we're missing a + # page. + if diff == 2: + pages.append(second_list[0] - 1) + # If the difference is just one, then there's nothing to be done, + # as the pages need no elusion and are correct. + elif diff == 1: + pass + # Otherwise, there's a bigger gap which needs to be signaled for + # elusion, by pushing a None value to the page list. + else: + pages.append(None) + pages.extend(second_list) + else: + pages.extend(sorted(list(last.difference(current)))) + to_return = { + 'pages': pages, + 'page_obj': page_obj, + 'paginator': paginator, + 'is_paginated': paginator.count > paginator.per_page, + } + if 'request' in context: + if 'use_page_path' in context: + to_return['request'] = context['request'] + to_return['use_page_path'] = context['use_page_path'] + to_return['prev_page'] = page_obj.previous_page_number() + to_return['next_page'] = page_obj.next_page_number() + + getvars = context['request'].GET.copy() + if 'page' in getvars: + del getvars['page'] + if len(getvars.keys()) > 0: + to_return['getvars'] = "&%s" % getvars.urlencode() + else: + to_return['getvars'] = '' + return to_return + except KeyError: + return {} +register.inclusion_tag('pagination/pagination.html', takes_context=True)(paginate) +register.tag('autopaginate', do_autopaginate) diff --git a/lib/pagination/tests.py b/lib/pagination/tests.py new file mode 100644 index 0000000..837e55c --- /dev/null +++ b/lib/pagination/tests.py @@ -0,0 +1,52 @@ +""" +>>> from django.core.paginator import Paginator +>>> from pagination.templatetags.pagination_tags import paginate +>>> from django.template import Template, Context + +>>> p = Paginator(range(15), 2) +>>> paginate({'paginator': p, 'page_obj': p.page(1)})['pages'] +[1, 2, 3, 4, 5, 6, 7, 8] + +>>> p = Paginator(range(17), 2) +>>> paginate({'paginator': p, 'page_obj': p.page(1)})['pages'] +[1, 2, 3, 4, 5, 6, 7, 8, 9] + +>>> p = Paginator(range(19), 2) +>>> paginate({'paginator': p, 'page_obj': p.page(1)})['pages'] +[1, 2, 3, 4, None, 7, 8, 9, 10] + +>>> p = Paginator(range(21), 2) +>>> paginate({'paginator': p, 'page_obj': p.page(1)})['pages'] +[1, 2, 3, 4, None, 8, 9, 10, 11] + +# Testing orphans +>>> p = Paginator(range(5), 2, 1) +>>> paginate({'paginator': p, 'page_obj': p.page(1)})['pages'] +[1, 2] + +>>> p = Paginator(range(21), 2, 1) +>>> paginate({'paginator': p, 'page_obj': p.page(1)})['pages'] +[1, 2, 3, 4, None, 7, 8, 9, 10] + +>>> t = Template("{% load pagination_tags %}{% autopaginate var 2 %}{% paginate %}") + +# WARNING: Please, please nobody read this portion of the code! +>>> class GetProxy(object): +... def __iter__(self): yield self.__dict__.__iter__ +... def copy(self): return self +... def urlencode(self): return u'' +... def keys(self): return [] +>>> class RequestProxy(object): +... page = 1 +... GET = GetProxy() +>>> +# ENDWARNING + +>>> t.render(Context({'var': range(21), 'request': RequestProxy()})) +u'\\n<div class="pagination">... +>>> +>>> t = Template("{% load pagination_tags %}{% autopaginate var %}{% paginate %}") +>>> t.render(Context({'var': range(21), 'request': RequestProxy()})) +u'\\n<div class="pagination">... +>>> +"""
\ No newline at end of file diff --git a/lib/pydelicious.py b/lib/pydelicious.py new file mode 100644 index 0000000..dd33788 --- /dev/null +++ b/lib/pydelicious.py @@ -0,0 +1,817 @@ +"""Library to access del.icio.us data via Python. + +:examples: + + Using the API class directly: + + >>> a = pydelicious.apiNew('user', 'passwd') + >>> # or: + >>> a = DeliciousAPI('user', 'passwd') + >>> a.tags_get() # Same as: + >>> a.request('tags/get', ) + + Or by calling one of the methods on the module: + + - add(user, passwd, url, description, tags = "", extended = "", dt = "", replace="no") + - get(user, passwd, tag="", dt="", count = 0) + - get_all(user, passwd, tag = "") + - delete(user, passwd, url) + - rename_tag(user, passwd, oldtag, newtag) + - get_tags(user, passwd) + + >>> a = apiNew(user, passwd) + >>> a.posts_add(url="http://my.com/", desciption="my.com", extended="the url is my.moc", tags="my com") + True + >>> len(a.posts_all()) + 1 + >>> get_all(user, passwd) + 1 + + This are short functions for getrss calls. + + >>> rss_ + +def get_userposts(user): +def get_tagposts(tag): +def get_urlposts(url): +def get_popular(tag = ""): + + >>> json_posts() + >>> json_tags() + >>> json_network() + >>> json_fans() + +:License: pydelicious is released under the BSD license. See 'license.txt' + for more informations. + +:todo, bvb: + - Rewriting comments to english. More documentation, examples. + - Added JSON-like return values for XML data (del.icio.us also serves some JSON...) + - better error/exception classes and handling, work in progress. + +:todo: + - Source code SHOULD BE ASCII! + - More tests. + - handling different encodings, what, how? + >>> pydelicious.getrss(tag="t[a]g") + url: http://del.icio.us/rss/tag/t[a]g + - Parse datetimes in XML. + - Test RSS functionality? HTML scraping doesn't work yet? + - API functions need required argument checks. + - interesting functionality in other libraries (ruby, java, perl, etc)? + - what is pydelicious used for? + - license, readme docs via setup.py verdelen? + - automatic releas build + +:done: + * Refactored the API class, much cleaner now and functions dlcs_api_request, dlcs_parse_xml are available for who wants them. +""" +import sys +import os +import time +import datetime +import md5, httplib +import urllib, urllib2, time +from StringIO import StringIO + +try: + from elementtree.ElementTree import parse as parse_xml +except ImportError: + from xml.etree.ElementTree import parse as parse_xml + +import feedparser + + +### Static config + +__version__ = '0.5.0' +__author__ = 'Frank Timmermann <regenkind_at_gmx_dot_de>' # GP: does not respond to emails +__contributors__ = [ + 'Greg Pinero', + 'Berend van Berkum <berend+pydelicious@dotmpe.com>'] +__url__ = 'http://code.google.com/p/pydelicious/' +__author_email__ = "" +# Old URL: 'http://deliciouspython.python-hosting.com/' + +__description__ = '''pydelicious.py allows you to access the web service of del.icio.us via it's API through python.''' +__long_description__ = '''the goal is to design an easy to use and fully functional python interface to del.icio.us. ''' + +DLCS_OK_MESSAGES = ('done', 'ok') # Known text values of positive del.icio.us <result> answers +DLCS_WAIT_TIME = 4 +DLCS_REQUEST_TIMEOUT = 444 # Seconds before socket triggers timeout +#DLCS_API_REALM = 'del.icio.us API' +DLCS_API_HOST = 'https://api.del.icio.us' +DLCS_API_PATH = 'v1' +DLCS_API = "%s/%s" % (DLCS_API_HOST, DLCS_API_PATH) +DLCS_RSS = 'http://del.icio.us/rss/' + +ISO_8601_DATETIME = '%Y-%m-%dT%H:%M:%SZ' + +USER_AGENT = 'pydelicious.py/%s %s' % (__version__, __url__) + +DEBUG = 0 +if 'DLCS_DEBUG' in os.environ: + DEBUG = int(os.environ['DLCS_DEBUG']) + + +# Taken from FeedParser.py +# timeoutsocket allows feedparser to time out rather than hang forever on ultra-slow servers. +# Python 2.3 now has this functionality available in the standard socket library, so under +# 2.3 you don't need to install anything. But you probably should anyway, because the socket +# module is buggy and timeoutsocket is better. +try: + import timeoutsocket # http://www.timo-tasi.org/python/timeoutsocket.py + timeoutsocket.setDefaultSocketTimeout(DLCS_REQUEST_TIMEOUT) +except ImportError: + import socket + if hasattr(socket, 'setdefaulttimeout'): socket.setdefaulttimeout(DLCS_REQUEST_TIMEOUT) +if DEBUG: print >>sys.stderr, "Set socket timeout to %s seconds" % DLCS_REQUEST_TIMEOUT + + +### Utility classes + +class _Waiter: + """Waiter makes sure a certain amount of time passes between + successive calls of `Waiter()`. + + Some attributes: + :last: time of last call + :wait: the minimum time needed between calls + :waited: the number of calls throttled + + pydelicious.Waiter is an instance created when the module is loaded. + """ + def __init__(self, wait): + self.wait = wait + self.waited = 0 + self.lastcall = 0; + + def __call__(self): + tt = time.time() + wait = self.wait + + timeago = tt - self.lastcall + + if timeago < wait: + wait = wait - timeago + if DEBUG>0: print >>sys.stderr, "Waiting %s seconds." % wait + time.sleep(wait) + self.waited += 1 + self.lastcall = tt + wait + else: + self.lastcall = tt + +Waiter = _Waiter(DLCS_WAIT_TIME) + +class PyDeliciousException(Exception): + '''Std. pydelicious error''' + pass + +class DeliciousError(Exception): + """Raised when the server responds with a negative answer""" + + +class DefaultErrorHandler(urllib2.HTTPDefaultErrorHandler): + '''xxx, bvb: Where is this used? should it be registered somewhere with urllib2? + + Handles HTTP Error, currently only 503. + ''' + def http_error_503(self, req, fp, code, msg, headers): + raise urllib2.HTTPError(req, code, throttled_message, headers, fp) + + +class post(dict): + """Post object, contains href, description, hash, dt, tags, + extended, user, count(, shared). + + xxx, bvb: Not used in DeliciousAPI + """ + def __init__(self, href="", description="", hash="", time="", tag="", extended="", user="", count="", + tags="", url="", dt=""): # tags or tag? + self["href"] = href + if url != "": self["href"] = url + self["description"] = description + self["hash"] = hash + self["dt"] = dt + if time != "": self["dt"] = time + self["tags"] = tags + if tag != "": self["tags"] = tag # tag or tags? # !! tags + self["extended"] = extended + self["user"] = user + self["count"] = count + + def __getattr__(self, name): + try: return self[name] + except: object.__getattribute__(self, name) + + +class posts(list): + def __init__(self, *args): + for i in args: self.append(i) + + def __getattr__(self, attr): + try: return [p[attr] for p in self] + except: object.__getattribute__(self, attr) + +### Utility functions + +def str2uni(s): + # type(in) str or unicode + # type(out) unicode + return ("".join([unichr(ord(i)) for i in s])) + +def str2utf8(s): + # type(in) str or unicode + # type(out) str + return ("".join([unichr(ord(i)).encode("utf-8") for i in s])) + +def str2quote(s): + return urllib.quote_plus("".join([unichr(ord(i)).encode("utf-8") for i in s])) + +def dict0(d): + # Trims empty dict entries + # {'a':'a', 'b':'', 'c': 'c'} => {'a': 'a', 'c': 'c'} + dd = dict() + for i in d: + if d[i] != "": dd[i] = d[i] + return dd + +def delicious_datetime(str): + """Parse a ISO 8601 formatted string to a Python datetime ... + """ + return datetime.datetime(*time.strptime(str, ISO_8601_DATETIME)[0:6]) + +def http_request(url, user_agent=USER_AGENT, retry=4): + """Retrieve the contents referenced by the URL using urllib2. + + Retries up to four times (default) on exceptions. + """ + request = urllib2.Request(url, headers={'User-Agent':user_agent}) + + # Remember last error + e = None + + # Repeat request on time-out errors + tries = retry; + while tries: + try: + return urllib2.urlopen(request) + + except urllib2.HTTPError, e: # protocol errors, + raise PyDeliciousException, "%s" % e + + except urllib2.URLError, e: + # xxx: Ugly check for time-out errors + #if len(e)>0 and 'timed out' in arg[0]: + print >> sys.stderr, "%s, %s tries left." % (e, tries) + Waiter() + tries = tries - 1 + #else: + # tries = None + + # Give up + raise PyDeliciousException, \ + "Unable to retrieve data at '%s', %s" % (url, e) + +def http_auth_request(url, host, user, passwd, user_agent=USER_AGENT): + """Call an HTTP server with authorization credentials using urllib2. + """ + if DEBUG: httplib.HTTPConnection.debuglevel = 1 + + # Hook up handler/opener to urllib2 + password_manager = urllib2.HTTPPasswordMgrWithDefaultRealm() + password_manager.add_password(None, host, user, passwd) + auth_handler = urllib2.HTTPBasicAuthHandler(password_manager) + opener = urllib2.build_opener(auth_handler) + urllib2.install_opener(opener) + + return http_request(url, user_agent) + +def dlcs_api_request(path, params='', user='', passwd='', throttle=True): + """Retrieve/query a path within the del.icio.us API. + + This implements a minimum interval between calls to avoid + throttling. [#]_ Use param 'throttle' to turn this behaviour off. + + todo: back off on 503's (HTTPError, URLError? testing + + Returned XML does not always correspond with given del.icio.us examples + [#]_. + + .. [#] http://del.icio.us/help/api/ + """ + if throttle: + Waiter() + + if params: + # params come as a dict, strip empty entries and urlencode + url = "%s/%s?%s" % (DLCS_API, path, urllib.urlencode(dict0(params))) + else: + url = "%s/%s" % (DLCS_API, path) + + if DEBUG: print >>sys.stderr, "dlcs_api_request: %s" % url + + try: + return http_auth_request(url, DLCS_API_HOST, user, passwd, USER_AGENT) + + # bvb: Is this ever raised? When? + except DefaultErrorHandler, e: + print >>sys.stderr, "%s" % e + +def dlcs_parse_xml(data, split_tags=False): + """Parse any del.icio.us XML document and return Python data structure. + + Recognizes all XML document formats as returned by the version 1 API and + translates to a JSON-like data structure (dicts 'n lists). + + Returned instance is always a dictionary. Examples:: + + {'posts': [{'url':'...','hash':'...',},],} + {'tags':['tag1', 'tag2',]} + {'dates': [{'count':'...','date':'...'},], 'tag':'', 'user':'...'} + {'result':(True, "done")} + # etcetera. + """ + + if DEBUG>3: print >>sys.stderr, "dlcs_parse_xml: parsing from ", data + + if not hasattr(data, 'read'): + data = StringIO(data) + + doc = parse_xml(data) + root = doc.getroot() + fmt = root.tag + + # Split up into three cases: Data, Result or Update + if fmt in ('tags', 'posts', 'dates', 'bundles'): + + # Data: expect a list of data elements, 'resources'. + # Use `fmt` (without last 's') to find data elements, elements + # don't have contents, attributes contain all the data we need: + # append to list + elist = [el.attrib for el in doc.findall(fmt[:-1])] + + # Return list in dict, use tagname of rootnode as keyname. + data = {fmt: elist} + + # Root element might have attributes too, append dict. + data.update(root.attrib) + + return data + + elif fmt == 'result': + + # Result: answer to operations + if root.attrib.has_key('code'): + msg = root.attrib['code'] + else: + msg = root.text + + # Return {'result':(True, msg)} for /known/ O.K. messages, + # use (False, msg) otherwise + v = msg in DLCS_OK_MESSAGES + return {fmt: (v, msg)} + + elif fmt == 'update': + + # Update: "time" + #return {fmt: root.attrib} + return {fmt: {'time':time.strptime(root.attrib['time'], ISO_8601_DATETIME)}} + + else: + raise PyDeliciousException, "Unknown XML document format '%s'" % fmt + +def dlcs_rss_request(tag = "", popular = 0, user = "", url = ''): + """Handle a request for RSS + + todo: translate from German + + rss sollte nun wieder funktionieren, aber diese try, except scheisse ist so nicht schoen + + rss wird unterschiedlich zusammengesetzt. ich kann noch keinen einheitlichen zusammenhang + zwischen daten (url, desc, ext, usw) und dem feed erkennen. warum k[o]nnen die das nicht einheitlich machen? + """ + tag = str2quote(tag) + user = str2quote(user) + if url != '': + # http://del.icio.us/rss/url/efbfb246d886393d48065551434dab54 + url = DLCS_RSS + '''url/%s'''%md5.new(url).hexdigest() + elif user != '' and tag != '': + url = DLCS_RSS + '''%(user)s/%(tag)s'''%dict(user=user, tag=tag) + elif user != '' and tag == '': + # http://del.icio.us/rss/delpy + url = DLCS_RSS + '''%s'''%user + elif popular == 0 and tag == '': + url = DLCS_RSS + elif popular == 0 and tag != '': + # http://del.icio.us/rss/tag/apple + # http://del.icio.us/rss/tag/web2.0 + url = DLCS_RSS + "tag/%s"%tag + elif popular == 1 and tag == '': + url = DLCS_RSS + '''popular/''' + elif popular == 1 and tag != '': + url = DLCS_RSS + '''popular/%s'''%tag + rss = http_request(url).read() + rss = feedparser.parse(rss) + # print rss +# for e in rss.entries: print e;print + l = posts() + for e in rss.entries: + if e.has_key("links") and e["links"]!=[] and e["links"][0].has_key("href"): + url = e["links"][0]["href"] + elif e.has_key("link"): + url = e["link"] + elif e.has_key("id"): + url = e["id"] + else: + url = "" + if e.has_key("title"): + description = e['title'] + elif e.has_key("title_detail") and e["title_detail"].has_key("title"): + description = e["title_detail"]['value'] + else: + description = '' + try: tags = e['categories'][0][1] + except: + try: tags = e["category"] + except: tags = "" + if e.has_key("modified"): + dt = e['modified'] + else: + dt = "" + if e.has_key("summary"): + extended = e['summary'] + elif e.has_key("summary_detail"): + e['summary_detail']["value"] + else: + extended = "" + if e.has_key("author"): + user = e['author'] + else: + user = "" + # time = dt ist weist auf ein problem hin + # die benennung der variablen ist nicht einheitlich + # api senden und + # xml bekommen sind zwei verschiedene schuhe :( + l.append(post(url = url, description = description, tags = tags, dt = dt, extended = extended, user = user)) + return l + + +### Main module class + +class DeliciousAPI: + """Class providing main interace to del.icio.us API. + + Methods ``request`` and ``request_raw`` represent the core. For all API + paths there are furthermore methods (e.g. posts_add for 'posts/all') with + an explicit declaration of the parameters and documentation. These all call + ``request`` and pass on extra keywords like ``_raw``. + """ + + def __init__(self, user, passwd, codec='iso-8859-1', api_request=dlcs_api_request, xml_parser=dlcs_parse_xml): + """Initialize access to the API with ``user`` and ``passwd``. + + ``codec`` sets the encoding of the arguments. + + The ``api_request`` and ``xml_parser`` parameters by default point to + functions within this package with standard implementations to + request and parse a resource. See ``dlcs_api_request()`` and + ``dlcs_parse_xml()``. Note that ``api_request`` should return a + file-like instance with an HTTPMessage instance under ``info()``, + see ``urllib2.openurl`` for more info. + """ + assert user != "" + self.user = user + self.passwd = passwd + self.codec = codec + + # Implement communication to server and parsing of respons messages: + assert callable(api_request) + self._api_request = api_request + assert callable(xml_parser) + self._parse_response = xml_parser + + def _call_server(self, path, **params): + params = dict0(params) + for key in params: + params[key] = params[key].encode(self.codec) + + # see __init__ for _api_request() + return self._api_request(path, params, self.user, self.passwd) + + + ### Core functionality + + def request(self, path, _raw=False, **params): + """Calls a path in the API, parses the answer to a JSON-like structure by + default. Use with ``_raw=True`` or ``call request_raw()`` directly to + get the filehandler and process the response message manually. + + Calls to some paths will return a `result` message, i.e.:: + + <result code="..." /> + + or:: + + <result>...</result> + + These are all parsed to ``{'result':(Boolean, MessageString)}`` and this + method will raise ``DeliciousError`` on negative `result` answers. Using + ``_raw=True`` bypasses all parsing and will never raise ``DeliciousError``. + + See ``dlcs_parse_xml()`` and ``self.request_raw()``.""" + + # method _parse_response is bound in `__init__()`, `_call_server` + # uses `_api_request` also set in `__init__()` + if _raw: + # return answer + return self.request_raw(path, **params) + + else: + # get answer and parse + fl = self._call_server(path, **params) + rs = self._parse_response(fl) + + # Raise an error for negative 'result' answers + if type(rs) == dict and rs == 'result' and not rs['result'][0]: + errmsg = "" + if len(rs['result'])>0: + errmsg = rs['result'][1:] + raise DeliciousError, errmsg + + return rs + + def request_raw(self, path, **params): + """Calls the path in the API, returns the filehandle. Returned + file-like instances have an ``HTTPMessage`` instance with HTTP header + information available. Use ``filehandle.info()`` or refer to the + ``urllib2.openurl`` documentation. + """ + # see `request()` on how the response can be handled + return self._call_server(path, **params) + + ### Explicit declarations of API paths, their parameters and docs + + # Tags + def tags_get(self, **kwds): + """Returns a list of tags and the number of times it is used by the user. + :: + + <tags> + <tag tag="TagName" count="888"> + """ + return self.request("tags/get", **kwds) + + def tags_rename(self, old, new, **kwds): + """Rename an existing tag with a new tag name. Returns a `result` + message or raises an ``DeliciousError``. See ``self.request()``. + + &old (required) + Tag to rename. + &new (required) + New name. + """ + return self.request("tags/rename", old=old, new=new, **kwds) + + # Posts + def posts_update(self, **kwds): + """Returns the last update time for the user. Use this before calling + `posts_all` to see if the data has changed since the last fetch. + :: + + <update time="CCYY-MM-DDThh:mm:ssZ"> + """ + return self.request("posts/update", **kwds) + + def posts_dates(self, tag="", **kwds): + """Returns a list of dates with the number of posts at each date. + :: + + <dates> + <date date="CCYY-MM-DD" count="888"> + + &tag (optional). + Filter by this tag. + """ + return self.request("posts/dates", tag=tag, **kwds) + + def posts_get(self, tag="", dt="", url="", **kwds): + """Returns posts matching the arguments. If no date or url is given, + most recent date will be used. + :: + + <posts dt="CCYY-MM-DD" tag="..." user="..."> + <post ...> + + &tag (optional). + Filter by this tag. + &dt (optional). + Filter by this date (CCYY-MM-DDThh:mm:ssZ). + &url (optional). + Filter by this url. + """ + return self.request("posts/get", tag=tag, dt=dt, url=url, **kwds) + + def posts_recent(self, tag="", count="", **kwds): + """Returns a list of the most recent posts, filtered by argument. + :: + + <posts tag="..." user="..."> + <post ...> + + &tag (optional). + Filter by this tag. + &count (optional). + Number of items to retrieve (Default:15, Maximum:100). + """ + return self.request("posts/recent", tag=tag, count=count, **kwds) + + def posts_all(self, tag="", **kwds): + """Returns all posts. Please use sparingly. Call the `posts_update` + method to see if you need to fetch this at all. + :: + + <posts tag="..." user="..." update="CCYY-MM-DDThh:mm:ssZ"> + <post ...> + + &tag (optional). + Filter by this tag. + """ + return self.request("posts/all", tag=tag, **kwds) + + def posts_add(self, url, description, extended="", tags="", dt="", + replace="no", shared="yes", **kwds): + """Add a post to del.icio.us. Returns a `result` message or raises an + ``DeliciousError``. See ``self.request()``. + + &url (required) + the url of the item. + &description (required) + the description of the item. + &extended (optional) + notes for the item. + &tags (optional) + tags for the item (space delimited). + &dt (optional) + datestamp of the item (format "CCYY-MM-DDThh:mm:ssZ"). + + Requires a LITERAL "T" and "Z" like in ISO8601 at http://www.cl.cam.ac.uk/~mgk25/iso-time.html for example: "1984-09-01T14:21:31Z" + &replace=no (optional) - don't replace post if given url has already been posted. + &shared=no (optional) - make the item private + """ + return self.request("posts/add", url=url, description=description, + extended=extended, tags=tags, dt=dt, + replace=replace, shared=shared, **kwds) + + def posts_delete(self, url, **kwds): + """Delete a post from del.icio.us. Returns a `result` message or + raises an ``DeliciousError``. See ``self.request()``. + + &url (required) + the url of the item. + """ + return self.request("posts/delete", url=url, **kwds) + + # Bundles + def bundles_all(self, **kwds): + """Retrieve user bundles from del.icio.us. + :: + + <bundles> + <bundel name="..." tags=..."> + """ + return self.request("tags/bundles/all", **kwds) + + def bundles_set(self, bundle, tags, **kwds): + """Assign a set of tags to a single bundle, wipes away previous + settings for bundle. Returns a `result` messages or raises an + ``DeliciousError``. See ``self.request()``. + + &bundle (required) + the bundle name. + &tags (required) + list of tags (space seperated). + """ + if type(tags)==list: + tags = " ".join(tags) + return self.request("tags/bundles/set", bundle=bundle, tags=tags, + **kwds) + + def bundles_delete(self, bundle, **kwds): + """Delete a bundle from del.icio.us. Returns a `result` message or + raises an ``DeliciousError``. See ``self.request()``. + + &bundle (required) + the bundle name. + """ + return self.request("tags/bundles/delete", bundle=bundle, **kwds) + + ### Utils + + # Lookup table for del.icio.us url-path to DeliciousAPI method. + paths = { + 'tags/get': tags_get, + 'tags/rename': tags_rename, + 'posts/update': posts_update, + 'posts/dates': posts_dates, + 'posts/get': posts_get, + 'posts/recent': posts_recent, + 'posts/all': posts_all, + 'posts/add': posts_add, + 'posts/delete': posts_delete, + 'tags/bundles/all': bundles_all, + 'tags/bundles/set': bundles_set, + 'tags/bundles/delete': bundles_delete, + } + + def get_url(self, url): + """Return the del.icio.us url at which the HTML page with posts for + ``url`` can be found. + """ + return "http://del.icio.us/url/?url=%s" % (url,) + + +### Convenience functions on this package + +def apiNew(user, passwd): + """creates a new DeliciousAPI object. + requires user(name) and passwd + """ + return DeliciousAPI(user=user, passwd=passwd) + +def add(user, passwd, url, description, tags="", extended="", dt="", replace="no"): + return apiNew(user, passwd).posts_add(url=url, description=description, extended=extended, tags=tags, dt=dt, replace=replace) + +def get(user, passwd, tag="", dt="", count = 0): + posts = apiNew(user, passwd).posts_get(tag=tag,dt=dt) + if count != 0: posts = posts[0:count] + return posts + +def get_all(user, passwd, tag=""): + return apiNew(user, passwd).posts_all(tag=tag) + +def delete(user, passwd, url): + return apiNew(user, passwd).posts_delete(url=url) + +def rename_tag(user, passwd, oldtag, newtag): + return apiNew(user=user, passwd=passwd).tags_rename(old=oldtag, new=newtag) + +def get_tags(user, passwd): + return apiNew(user=user, passwd=passwd).tags_get() + + +### RSS functions bvb: still working...? +def getrss(tag="", popular=0, url='', user=""): + """get posts from del.icio.us via parsing RSS (bvb:or HTML) + + todo: not tested + + tag (opt) sort by tag + popular (opt) look for the popular stuff + user (opt) get the posts by a user, this striks popular + url (opt) get the posts by url + """ + return dlcs_rss_request(tag=tag, popular=popular, user=user, url=url) + +def get_userposts(user): + return getrss(user = user) + +def get_tagposts(tag): + return getrss(tag = tag) + +def get_urlposts(url): + return getrss(url = url) + +def get_popular(tag = ""): + return getrss(tag = tag, popular = 1) + + +### TODO: implement JSON fetching +def json_posts(user, count=15): + """http://del.icio.us/feeds/json/mpe + http://del.icio.us/feeds/json/mpe/art+history + count=### the number of posts you want to get (default is 15, maximum is 100) + raw a raw JSON object is returned, instead of an object named Delicious.posts + """ + +def json_tags(user, atleast, count, sort='alpha'): + """http://del.icio.us/feeds/json/tags/mpe + atleast=### include only tags for which there are at least ### number of posts + count=### include ### tags, counting down from the top + sort={alpha|count} construct the object with tags in alphabetic order (alpha), or by count of posts (count) + callback=NAME wrap the object definition in a function call NAME(...), thus invoking that function when the feed is executed + raw a pure JSON object is returned, instead of code that will construct an object named Delicious.tags + """ + +def json_network(user): + """http://del.icio.us/feeds/json/network/mpe + callback=NAME wrap the object definition in a function call NAME(...) + ?raw a raw JSON object is returned, instead of an object named Delicious.posts + """ + +def json_fans(user): + """http://del.icio.us/feeds/json/fans/mpe + callback=NAME wrap the object definition in a function call NAME(...) + ?raw a pure JSON object is returned, instead of an object named Delicious. + """ + diff --git a/lib/strutils.py b/lib/strutils.py new file mode 100644 index 0000000..368d3d8 --- /dev/null +++ b/lib/strutils.py @@ -0,0 +1,50 @@ + +# +# String/unicode conversion utils. +# + +def safestr(s): + """ + Safely corerce *anything* to a string. If the object can't be str'd, an + empty string will be returned. + + You can (and I do) use this for really crappy unicode handling, but it's + a bit like killing a mosquito with a bazooka. + """ + if s is None: + return "" + if isinstance(s, unicode): + return s.encode('ascii', 'xmlcharrefreplace') + else: + try: + return str(s) + except: + return "" + +def safeint(s): + """Like safestr(), but always returns an int. Returns 0 on failure.""" + try: + return int(safestr(s)) + except ValueError: + return 0 + + +def convertentity(m): + import htmlentitydefs + """Convert a HTML entity into normal string (ISO-8859-1)""" + if m.group(1)=='#': + try: + return chr(int(m.group(2))) + except ValueError: + return '&#%s;' % m.group(2) + try: + return htmlentitydefs.entitydefs[m.group(2)] + except KeyError: + return '&%s;' % m.group(2) + +def unquotehtml(s): + import re + """Convert a HTML quoted string into normal string (ISO-8859-1). + + Works with &#XX; and with > etc.""" + return re.sub(r'&(#?)(.+?);',convertentity,s) diff --git a/lib/templatetags/__init__.py b/lib/templatetags/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/lib/templatetags/__init__.py diff --git a/lib/templatetags/templatetags/__init__.py b/lib/templatetags/templatetags/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/lib/templatetags/templatetags/__init__.py diff --git a/lib/templatetags/templatetags/get_latest.py b/lib/templatetags/templatetags/get_latest.py new file mode 100644 index 0000000..6c9f9fa --- /dev/null +++ b/lib/templatetags/templatetags/get_latest.py @@ -0,0 +1 @@ +from django.template import Library, Node
from django.db.models import get_model
register = Library()
class LatestContentNode(Node):
def __init__(self, model, num, varname):
self.num, self.varname = num, varname
self.model = get_model(*model.split('.'))
def render(self, context):
context[self.varname] = self.model._default_manager.all()[:self.num]
return ''
def get_latest(parser, token):
bits = token.contents.split()
if len(bits) != 5:
raise TemplateSyntaxError, "get_latest tag takes exactly four arguments"
if bits[3] != 'as':
raise TemplateSyntaxError, "third argument to get_latest tag must be 'as'"
return LatestContentNode(bits[1], bits[2], bits[4])
get_latest = register.tag(get_latest)
\ No newline at end of file diff --git a/lib/templatetags/templatetags/get_latest_pub.py b/lib/templatetags/templatetags/get_latest_pub.py new file mode 100644 index 0000000..151befa --- /dev/null +++ b/lib/templatetags/templatetags/get_latest_pub.py @@ -0,0 +1 @@ +from django.template import Library, Node
from django.db.models import get_model
register = Library()
class LatestContentNode(Node):
def __init__(self, model, num, varname):
self.num, self.varname = num, varname
self.model = get_model(*model.split('.'))
def render(self, context):
context[self.varname] = self.model._default_manager.filter(status__exact=1)[:self.num]
return ''
def get_latest_pub(parser, token):
bits = token.contents.split()
if len(bits) != 5:
raise TemplateSyntaxError, "get_latest tag takes exactly four arguments"
if bits[3] != 'as':
raise TemplateSyntaxError, "third argument to get_latest tag must be 'as'"
return LatestContentNode(bits[1], bits[2], bits[4])
get_latest_pub = register.tag(get_latest_pub)
\ No newline at end of file diff --git a/lib/templatetags/templatetags/markdown.py b/lib/templatetags/templatetags/markdown.py new file mode 100644 index 0000000..dca51f2 --- /dev/null +++ b/lib/templatetags/templatetags/markdown.py @@ -0,0 +1,9 @@ +from django import template +import markdown2 as markdown + +register = template.Library() + +def do_markdown(text): + return markdown.markdown(text, safe_mode = False) + +register.filter('markdown', do_markdown)
\ No newline at end of file diff --git a/lib/templatetags/templatetags/slugify_under.py b/lib/templatetags/templatetags/slugify_under.py new file mode 100644 index 0000000..bbf01d2 --- /dev/null +++ b/lib/templatetags/templatetags/slugify_under.py @@ -0,0 +1,15 @@ +import re +from django import template +from django.utils.safestring import mark_safe +register = template.Library() + +@register.filter +def slugify_under(value): + """ + Normalizes string, converts to lowercase, removes non-alpha characters, + and converts spaces to hyphens. + """ + import unicodedata + value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore') + value = unicode(re.sub('[^\w\s-]', '', value).strip().lower()) + return mark_safe(re.sub('[-\s]+', '_', value)) diff --git a/lib/templatetags/templatetags/smartypants.py b/lib/templatetags/templatetags/smartypants.py new file mode 100644 index 0000000..07ddd03 --- /dev/null +++ b/lib/templatetags/templatetags/smartypants.py @@ -0,0 +1,878 @@ +r""" +============== +smartypants.py +============== + +---------------------------- +SmartyPants ported to Python +---------------------------- + +Ported by `Chad Miller`_ +Copyright (c) 2004 Chad Miller + +original `SmartyPants`_ by `John Gruber`_ +Copyright (c) 2003 John Gruber + + +Synopsis +======== + +A smart-quotes plugin for Pyblosxom_. + +The priginal "SmartyPants" is a free web publishing plug-in for Movable Type, +Blosxom, and BBEdit that easily translates plain ASCII punctuation characters +into "smart" typographic punctuation HTML entities. + +This software, *smartypants.py*, endeavours to be a functional port of +SmartyPants to Python, for use with Pyblosxom_. + + +Description +=========== + +SmartyPants can perform the following transformations: + +- Straight quotes ( " and ' ) into "curly" quote HTML entities +- Backticks-style quotes (\`\`like this'') into "curly" quote HTML entities +- Dashes (``--`` and ``---``) into en- and em-dash entities +- Three consecutive dots (``...`` or ``. . .``) into an ellipsis entity + +This means you can write, edit, and save your posts using plain old +ASCII straight quotes, plain dashes, and plain dots, but your published +posts (and final HTML output) will appear with smart quotes, em-dashes, +and proper ellipses. + +SmartyPants does not modify characters within ``<pre>``, ``<code>``, ``<kbd>``, +``<math>`` or ``<script>`` tag blocks. Typically, these tags are used to +display text where smart quotes and other "smart punctuation" would not be +appropriate, such as source code or example markup. + + +Backslash Escapes +================= + +If you need to use literal straight quotes (or plain hyphens and +periods), SmartyPants accepts the following backslash escape sequences +to force non-smart punctuation. It does so by transforming the escape +sequence into a decimal-encoded HTML entity: + +(FIXME: table here.) + +.. comment It sucks that there's a disconnect between the visual layout and table markup when special characters are involved. +.. comment ====== ===== ========= +.. comment Escape Value Character +.. comment ====== ===== ========= +.. comment \\\\\\\\ \ \\\\ +.. comment \\\\" " " +.. comment \\\\' ' ' +.. comment \\\\. . . +.. comment \\\\- - \- +.. comment \\\\` ` \` +.. comment ====== ===== ========= + +This is useful, for example, when you want to use straight quotes as +foot and inch marks: 6'2" tall; a 17" iMac. + +Options +======= + +For Pyblosxom users, the ``smartypants_attributes`` attribute is where you +specify configuration options. + +Numeric values are the easiest way to configure SmartyPants' behavior: + +"0" + Suppress all transformations. (Do nothing.) +"1" + Performs default SmartyPants transformations: quotes (including + \`\`backticks'' -style), em-dashes, and ellipses. "``--``" (dash dash) + is used to signify an em-dash; there is no support for en-dashes. + +"2" + Same as smarty_pants="1", except that it uses the old-school typewriter + shorthand for dashes: "``--``" (dash dash) for en-dashes, "``---``" + (dash dash dash) + for em-dashes. + +"3" + Same as smarty_pants="2", but inverts the shorthand for dashes: + "``--``" (dash dash) for em-dashes, and "``---``" (dash dash dash) for + en-dashes. + +"-1" + Stupefy mode. Reverses the SmartyPants transformation process, turning + the HTML entities produced by SmartyPants into their ASCII equivalents. + E.g. "“" is turned into a simple double-quote ("), "—" is + turned into two dashes, etc. + + +The following single-character attribute values can be combined to toggle +individual transformations from within the smarty_pants attribute. For +example, to educate normal quotes and em-dashes, but not ellipses or +\`\`backticks'' -style quotes: + +``py['smartypants_attributes'] = "1"`` + +"q" + Educates normal quote characters: (") and ('). + +"b" + Educates \`\`backticks'' -style double quotes. + +"B" + Educates \`\`backticks'' -style double quotes and \`single' quotes. + +"d" + Educates em-dashes. + +"D" + Educates em-dashes and en-dashes, using old-school typewriter shorthand: + (dash dash) for en-dashes, (dash dash dash) for em-dashes. + +"i" + Educates em-dashes and en-dashes, using inverted old-school typewriter + shorthand: (dash dash) for em-dashes, (dash dash dash) for en-dashes. + +"e" + Educates ellipses. + +"w" + Translates any instance of ``"`` into a normal double-quote character. + This should be of no interest to most people, but of particular interest + to anyone who writes their posts using Dreamweaver, as Dreamweaver + inexplicably uses this entity to represent a literal double-quote + character. SmartyPants only educates normal quotes, not entities (because + ordinarily, entities are used for the explicit purpose of representing the + specific character they represent). The "w" option must be used in + conjunction with one (or both) of the other quote options ("q" or "b"). + Thus, if you wish to apply all SmartyPants transformations (quotes, en- + and em-dashes, and ellipses) and also translate ``"`` entities into + regular quotes so SmartyPants can educate them, you should pass the + following to the smarty_pants attribute: + +The ``smartypants_forbidden_flavours`` list contains pyblosxom flavours for +which no Smarty Pants rendering will occur. + + +Caveats +======= + +Why You Might Not Want to Use Smart Quotes in Your Weblog +--------------------------------------------------------- + +For one thing, you might not care. + +Most normal, mentally stable individuals do not take notice of proper +typographic punctuation. Many design and typography nerds, however, break +out in a nasty rash when they encounter, say, a restaurant sign that uses +a straight apostrophe to spell "Joe's". + +If you're the sort of person who just doesn't care, you might well want to +continue not caring. Using straight quotes -- and sticking to the 7-bit +ASCII character set in general -- is certainly a simpler way to live. + +Even if you I *do* care about accurate typography, you still might want to +think twice before educating the quote characters in your weblog. One side +effect of publishing curly quote HTML entities is that it makes your +weblog a bit harder for others to quote from using copy-and-paste. What +happens is that when someone copies text from your blog, the copied text +contains the 8-bit curly quote characters (as well as the 8-bit characters +for em-dashes and ellipses, if you use these options). These characters +are not standard across different text encoding methods, which is why they +need to be encoded as HTML entities. + +People copying text from your weblog, however, may not notice that you're +using curly quotes, and they'll go ahead and paste the unencoded 8-bit +characters copied from their browser into an email message or their own +weblog. When pasted as raw "smart quotes", these characters are likely to +get mangled beyond recognition. + +That said, my own opinion is that any decent text editor or email client +makes it easy to stupefy smart quote characters into their 7-bit +equivalents, and I don't consider it my problem if you're using an +indecent text editor or email client. + + +Algorithmic Shortcomings +------------------------ + +One situation in which quotes will get curled the wrong way is when +apostrophes are used at the start of leading contractions. For example: + +``'Twas the night before Christmas.`` + +In the case above, SmartyPants will turn the apostrophe into an opening +single-quote, when in fact it should be a closing one. I don't think +this problem can be solved in the general case -- every word processor +I've tried gets this wrong as well. In such cases, it's best to use the +proper HTML entity for closing single-quotes (``’``) by hand. + + +Bugs +==== + +To file bug reports or feature requests (other than topics listed in the +Caveats section above) please send email to: mailto:smartypantspy@chad.org + +If the bug involves quotes being curled the wrong way, please send example +text to illustrate. + +To Do list +---------- + +- Provide a function for use within templates to quote anything at all. + + +Version History +=============== + +1.5_1.5: Sat, 13 Aug 2005 15:50:24 -0400 + - Fix bogus magical quotation when there is no hint that the + user wants it, e.g., in "21st century". Thanks to Nathan Hamblen. + - Be smarter about quotes before terminating numbers in an en-dash'ed + range. + +1.5_1.4: Thu, 10 Feb 2005 20:24:36 -0500 + - Fix a date-processing bug, as reported by jacob childress. + - Begin a test-suite for ensuring correct output. + - Removed import of "string", since I didn't really need it. + (This was my first every Python program. Sue me!) + +1.5_1.3: Wed, 15 Sep 2004 18:25:58 -0400 + - Abort processing if the flavour is in forbidden-list. Default of + [ "rss" ] (Idea of Wolfgang SCHNERRING.) + - Remove stray virgules from en-dashes. Patch by Wolfgang SCHNERRING. + +1.5_1.2: Mon, 24 May 2004 08:14:54 -0400 + - Some single quotes weren't replaced properly. Diff-tesuji played + by Benjamin GEIGER. + +1.5_1.1: Sun, 14 Mar 2004 14:38:28 -0500 + - Support upcoming pyblosxom 0.9 plugin verification feature. + +1.5_1.0: Tue, 09 Mar 2004 08:08:35 -0500 + - Initial release + +Version Information +------------------- + +Version numbers will track the SmartyPants_ version numbers, with the addition +of an underscore and the smartypants.py version on the end. + +New versions will be available at `http://wiki.chad.org/SmartyPantsPy`_ + +.. _http://wiki.chad.org/SmartyPantsPy: http://wiki.chad.org/SmartyPantsPy + +Authors +======= + +`John Gruber`_ did all of the hard work of writing this software in Perl for +`Movable Type`_ and almost all of this useful documentation. `Chad Miller`_ +ported it to Python to use with Pyblosxom_. + + +Additional Credits +================== + +Portions of the SmartyPants original work are based on Brad Choate's nifty +MTRegex plug-in. `Brad Choate`_ also contributed a few bits of source code to +this plug-in. Brad Choate is a fine hacker indeed. + +`Jeremy Hedley`_ and `Charles Wiltgen`_ deserve mention for exemplary beta +testing of the original SmartyPants. + +`Rael Dornfest`_ ported SmartyPants to Blosxom. + +.. _Brad Choate: http://bradchoate.com/ +.. _Jeremy Hedley: http://antipixel.com/ +.. _Charles Wiltgen: http://playbacktime.com/ +.. _Rael Dornfest: http://raelity.org/ + + +Copyright and License +===================== + +SmartyPants_ license:: + + Copyright (c) 2003 John Gruber + (http://daringfireball.net/) + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + + * Neither the name "SmartyPants" nor the names of its contributors + may be used to endorse or promote products derived from this + software without specific prior written permission. + + This software is provided by the copyright holders and contributors "as + is" and any express or implied warranties, including, but not limited + to, the implied warranties of merchantability and fitness for a + particular purpose are disclaimed. In no event shall the copyright + owner or contributors be liable for any direct, indirect, incidental, + special, exemplary, or consequential damages (including, but not + limited to, procurement of substitute goods or services; loss of use, + data, or profits; or business interruption) however caused and on any + theory of liability, whether in contract, strict liability, or tort + (including negligence or otherwise) arising in any way out of the use + of this software, even if advised of the possibility of such damage. + + +smartypants.py license:: + + smartypants.py is a derivative work of SmartyPants. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + + This software is provided by the copyright holders and contributors "as + is" and any express or implied warranties, including, but not limited + to, the implied warranties of merchantability and fitness for a + particular purpose are disclaimed. In no event shall the copyright + owner or contributors be liable for any direct, indirect, incidental, + special, exemplary, or consequential damages (including, but not + limited to, procurement of substitute goods or services; loss of use, + data, or profits; or business interruption) however caused and on any + theory of liability, whether in contract, strict liability, or tort + (including negligence or otherwise) arising in any way out of the use + of this software, even if advised of the possibility of such damage. + + + +.. _John Gruber: http://daringfireball.net/ +.. _Chad Miller: http://web.chad.org/ + +.. _Pyblosxom: http://roughingit.subtlehints.net/pyblosxom +.. _SmartyPants: http://daringfireball.net/projects/smartypants/ +.. _Movable Type: http://www.movabletype.org/ + +""" + +default_smartypants_attr = "1" + +import re + +tags_to_skip_regex = re.compile("<(/)?(?:pre|code|kbd|script|math)[^>]*>") + + +def verify_installation(request): + return 1 + # assert the plugin is functional + + +def cb_story(args): + global default_smartypants_attr + + try: + forbidden_flavours = args["entry"]["smartypants_forbidden_flavours"] + except KeyError: + forbidden_flavours = [ "rss" ] + + try: + attributes = args["entry"]["smartypants_attributes"] + except KeyError: + attributes = default_smartypants_attr + + if attributes is None: + attributes = default_smartypants_attr + + entryData = args["entry"].getData() + + try: + if args["request"]["flavour"] in forbidden_flavours: + return + except KeyError: + if "<" in args["entry"]["body"][0:15]: # sniff the stream + return # abort if it looks like escaped HTML. FIXME + + # FIXME: make these configurable, perhaps? + args["entry"]["body"] = smartyPants(entryData, attributes) + args["entry"]["title"] = smartyPants(args["entry"]["title"], attributes) + + +### interal functions below here + +def smartyPants(text, attr=default_smartypants_attr): + convert_quot = False # should we translate " entities into normal quotes? + + # Parse attributes: + # 0 : do nothing + # 1 : set all + # 2 : set all, using old school en- and em- dash shortcuts + # 3 : set all, using inverted old school en and em- dash shortcuts + # + # q : quotes + # b : backtick quotes (``double'' only) + # B : backtick quotes (``double'' and `single') + # d : dashes + # D : old school dashes + # i : inverted old school dashes + # e : ellipses + # w : convert " entities to " for Dreamweaver users + + do_dashes = "0" + do_backticks = "0" + do_quotes = "0" + do_ellipses = "0" + do_stupefy = "0" + + if attr == "0": + # Do nothing. + return text + elif attr == "1": + do_quotes = "1" + do_backticks = "1" + do_dashes = "1" + do_ellipses = "1" + elif attr == "2": + # Do everything, turn all options on, use old school dash shorthand. + do_quotes = "1" + do_backticks = "1" + do_dashes = "2" + do_ellipses = "1" + elif attr == "3": + # Do everything, turn all options on, use inverted old school dash shorthand. + do_quotes = "1" + do_backticks = "1" + do_dashes = "3" + do_ellipses = "1" + elif attr == "-1": + # Special "stupefy" mode. + do_stupefy = "1" + else: + for c in attr: + if c == "q": do_quotes = "1" + elif c == "b": do_backticks = "1" + elif c == "B": do_backticks = "2" + elif c == "d": do_dashes = "1" + elif c == "D": do_dashes = "2" + elif c == "i": do_dashes = "3" + elif c == "e": do_ellipses = "1" + elif c == "w": convert_quot = "1" + else: + pass + # ignore unknown option + + tokens = _tokenize(text) + result = [] + in_pre = False + + prev_token_last_char = "" + # This is a cheat, used to get some context + # for one-character tokens that consist of + # just a quote char. What we do is remember + # the last character of the previous text + # token, to use as context to curl single- + # character quote tokens correctly. + + for cur_token in tokens: + if cur_token[0] == "tag": + # Don't mess with quotes inside tags. + result.append(cur_token[1]) + close_match = tags_to_skip_regex.match(cur_token[1]) + if close_match is not None and close_match.group(1) == "": + in_pre = True + else: + in_pre = False + else: + t = cur_token[1] + last_char = t[-1:] # Remember last char of this token before processing. + if not in_pre: + oldstr = t + t = processEscapes(t) + + if convert_quot != "0": + t = re.sub('"', '"', t) + + if do_dashes != "0": + if do_dashes == "1": + t = educateDashes(t) + if do_dashes == "2": + t = educateDashesOldSchool(t) + if do_dashes == "3": + t = educateDashesOldSchoolInverted(t) + + if do_ellipses != "0": + t = educateEllipses(t) + + # Note: backticks need to be processed before quotes. + if do_backticks != "0": + t = educateBackticks(t) + + if do_backticks == "2": + t = educateSingleBackticks(t) + + if do_quotes != "0": + if t == "'": + # Special case: single-character ' token + if re.match("\S", prev_token_last_char): + t = "’" + else: + t = "‘" + elif t == '"': + # Special case: single-character " token + if re.match("\S", prev_token_last_char): + t = "”" + else: + t = "“" + + else: + # Normal case: + t = educateQuotes(t) + + if do_stupefy == "1": + t = stupefyEntities(t) + + prev_token_last_char = last_char + result.append(t) + + return "".join(result) + + +def educateQuotes(str): + """ + Parameter: String. + + Returns: The string, with "educated" curly quote HTML entities. + + Example input: "Isn't this fun?" + Example output: “Isn’t this fun?” + """ + + oldstr = str + punct_class = r"""[!"#\$\%'()*+,-.\/:;<=>?\@\[\\\]\^_`{|}~]""" + + # Special case if the very first character is a quote + # followed by punctuation at a non-word-break. Close the quotes by brute force: + str = re.sub(r"""^'(?=%s\\B)""" % (punct_class,), r"""’""", str) + str = re.sub(r"""^"(?=%s\\B)""" % (punct_class,), r"""”""", str) + + # Special case for double sets of quotes, e.g.: + # <p>He said, "'Quoted' words in a larger quote."</p> + str = re.sub(r""""'(?=\w)""", """“‘""", str) + str = re.sub(r"""'"(?=\w)""", """‘“""", str) + + # Special case for decade abbreviations (the '80s): + str = re.sub(r"""\b'(?=\d{2}s)""", r"""’""", str) + + close_class = r"""[^\ \t\r\n\[\{\(\-]""" + dec_dashes = r"""–|—""" + + # Get most opening single quotes: + opening_single_quotes_regex = re.compile(r""" + ( + \s | # a whitespace char, or + | # a non-breaking space entity, or + -- | # dashes, or + &[mn]dash; | # named dash entities + %s | # or decimal entities + &\#x201[34]; # or hex + ) + ' # the quote + (?=\w) # followed by a word character + """ % (dec_dashes,), re.VERBOSE) + str = opening_single_quotes_regex.sub(r"""\1‘""", str) + + closing_single_quotes_regex = re.compile(r""" + (%s) + ' + (?!\s | s\b | \d) + """ % (close_class,), re.VERBOSE) + str = closing_single_quotes_regex.sub(r"""\1’""", str) + + closing_single_quotes_regex = re.compile(r""" + (%s) + ' + (\s | s\b) + """ % (close_class,), re.VERBOSE) + str = closing_single_quotes_regex.sub(r"""\1’\2""", str) + + # Any remaining single quotes should be opening ones: + str = re.sub(r"""'""", r"""‘""", str) + + # Get most opening double quotes: + opening_double_quotes_regex = re.compile(r""" + ( + \s | # a whitespace char, or + | # a non-breaking space entity, or + -- | # dashes, or + &[mn]dash; | # named dash entities + %s | # or decimal entities + &\#x201[34]; # or hex + ) + " # the quote + (?=\w) # followed by a word character + """ % (dec_dashes,), re.VERBOSE) + str = opening_double_quotes_regex.sub(r"""\1“""", str) + + # Double closing quotes: + closing_double_quotes_regex = re.compile(r""" + #(%s)? # character that indicates the quote should be closing + " + (?=\s) + """ % (close_class,), re.VERBOSE) + str = closing_double_quotes_regex.sub(r"""”""", str) + + closing_double_quotes_regex = re.compile(r""" + (%s) # character that indicates the quote should be closing + " + """ % (close_class,), re.VERBOSE) + str = closing_double_quotes_regex.sub(r"""\1”""", str) + + # Any remaining quotes should be opening ones. + str = re.sub(r'"', r"""“""", str) + + return str + + +def educateBackticks(str): + """ + Parameter: String. + Returns: The string, with ``backticks'' -style double quotes + translated into HTML curly quote entities. + Example input: ``Isn't this fun?'' + Example output: “Isn't this fun?” + """ + + str = re.sub(r"""``""", r"""“""", str) + str = re.sub(r"""''""", r"""”""", str) + return str + + +def educateSingleBackticks(str): + """ + Parameter: String. + Returns: The string, with `backticks' -style single quotes + translated into HTML curly quote entities. + + Example input: `Isn't this fun?' + Example output: ‘Isn’t this fun?’ + """ + + str = re.sub(r"""`""", r"""‘""", str) + str = re.sub(r"""'""", r"""’""", str) + return str + + +def educateDashes(str): + """ + Parameter: String. + + Returns: The string, with each instance of "--" translated to + an em-dash HTML entity. + """ + + str = re.sub(r"""---""", r"""–""", str) # en (yes, backwards) + str = re.sub(r"""--""", r"""—""", str) # em (yes, backwards) + return str + + +def educateDashesOldSchool(str): + """ + Parameter: String. + + Returns: The string, with each instance of "--" translated to + an en-dash HTML entity, and each "---" translated to + an em-dash HTML entity. + """ + + str = re.sub(r"""---""", r"""—""", str) # em (yes, backwards) + str = re.sub(r"""--""", r"""–""", str) # en (yes, backwards) + return str + + +def educateDashesOldSchoolInverted(str): + """ + Parameter: String. + + Returns: The string, with each instance of "--" translated to + an em-dash HTML entity, and each "---" translated to + an en-dash HTML entity. Two reasons why: First, unlike the + en- and em-dash syntax supported by + EducateDashesOldSchool(), it's compatible with existing + entries written before SmartyPants 1.1, back when "--" was + only used for em-dashes. Second, em-dashes are more + common than en-dashes, and so it sort of makes sense that + the shortcut should be shorter to type. (Thanks to Aaron + Swartz for the idea.) + """ + str = re.sub(r"""---""", r"""–""", str) # em + str = re.sub(r"""--""", r"""—""", str) # en + return str + + + +def educateEllipses(str): + """ + Parameter: String. + Returns: The string, with each instance of "..." translated to + an ellipsis HTML entity. + + Example input: Huh...? + Example output: Huh…? + """ + + str = re.sub(r"""\.\.\.""", r"""…""", str) + str = re.sub(r"""\. \. \.""", r"""…""", str) + return str + + +def stupefyEntities(str): + """ + Parameter: String. + Returns: The string, with each SmartyPants HTML entity translated to + its ASCII counterpart. + + Example input: “Hello — world.” + Example output: "Hello -- world." + """ + + str = re.sub(r"""–""", r"""-""", str) # en-dash + str = re.sub(r"""—""", r"""--""", str) # em-dash + + str = re.sub(r"""‘""", r"""'""", str) # open single quote + str = re.sub(r"""’""", r"""'""", str) # close single quote + + str = re.sub(r"""“""", r'''"''', str) # open double quote + str = re.sub(r"""”""", r'''"''', str) # close double quote + + str = re.sub(r"""…""", r"""...""", str)# ellipsis + + return str + + +def processEscapes(str): + r""" + Parameter: String. + Returns: The string, with after processing the following backslash + escape sequences. This is useful if you want to force a "dumb" + quote or other character to appear. + + Escape Value + ------ ----- + \\ \ + \" " + \' ' + \. . + \- - + \` ` + """ + str = re.sub(r"""\\\\""", r"""\""", str) + str = re.sub(r'''\\"''', r""""""", str) + str = re.sub(r"""\\'""", r"""'""", str) + str = re.sub(r"""\\\.""", r""".""", str) + str = re.sub(r"""\\-""", r"""-""", str) + str = re.sub(r"""\\`""", r"""`""", str) + + return str + + +def _tokenize(str): + """ + Parameter: String containing HTML markup. + Returns: Reference to an array of the tokens comprising the input + string. Each token is either a tag (possibly with nested, + tags contained therein, such as <a href="<MTFoo>">, or a + run of text between tags. Each element of the array is a + two-element array; the first is either 'tag' or 'text'; + the second is the actual value. + + Based on the _tokenize() subroutine from Brad Choate's MTRegex plugin. + <http://www.bradchoate.com/past/mtregex.php> + """ + + pos = 0 + length = len(str) + tokens = [] + + depth = 6 + nested_tags = "|".join(['(?:<(?:[^<>]',] * depth) + (')*>)' * depth) + #match = r"""(?: <! ( -- .*? -- \s* )+ > ) | # comments + # (?: <\? .*? \?> ) | # directives + # %s # nested tags """ % (nested_tags,) + tag_soup = re.compile(r"""([^<]*)(<[^>]*>)""") + + token_match = tag_soup.search(str) + + previous_end = 0 + while token_match is not None: + if token_match.group(1) != "": + tokens.append(['text', token_match.group(1)]) + + tokens.append(['tag', token_match.group(2)]) + + previous_end = token_match.end() + token_match = tag_soup.search(str, token_match.end()) + + if previous_end < len(str): + tokens.append(['text', str[previous_end:]]) + + return tokens + + + +if __name__ == "__main__": + + import locale + + try: + locale.setlocale(locale.LC_ALL, '') + except: + pass + + from docutils.core import publish_string + docstring_html = publish_string(__doc__, writer_name='html') + + print docstring_html + + + # Unit test output goes out stderr. No worries. + import unittest + sp = smartyPants + + class TestSmartypantsAllAttributes(unittest.TestCase): + # the default attribute is "1", which means "all". + + def test_dates(self): + self.assertEqual(sp("1440-80's"), "1440-80’s") + self.assertEqual(sp("1440-'80s"), "1440-‘80s") + self.assertEqual(sp("1440---'80s"), "1440–‘80s") + self.assertEqual(sp("1960s"), "1960s") # no effect. + self.assertEqual(sp("1960's"), "1960’s") + self.assertEqual(sp("one two '60s"), "one two ‘60s") + self.assertEqual(sp("'60s"), "‘60s") + + def test_ordinal_numbers(self): + self.assertEqual(sp("21st century"), "21st century") # no effect. + self.assertEqual(sp("3rd"), "3rd") # no effect. + + def test_educated_quotes(self): + self.assertEqual(sp('''"Isn't this fun?"'''), '''“Isn’t this fun?”''') + + unittest.main() + + + + +__author__ = "Chad Miller <smartypantspy@chad.org>" +__version__ = "1.5_1.5: Sat, 13 Aug 2005 15:50:24 -0400" +__url__ = "http://wiki.chad.org/SmartyPantsPy" +__description__ = "Smart-quotes, smart-ellipses, and smart-dashes for weblog entries in pyblosxom" diff --git a/lib/templatetags/templatetags/truncateletters.py b/lib/templatetags/templatetags/truncateletters.py new file mode 100644 index 0000000..c492430 --- /dev/null +++ b/lib/templatetags/templatetags/truncateletters.py @@ -0,0 +1,24 @@ +from django import template +register = template.Library() + +@register.filter +def truncateletters(value, arg): + """ + Truncates a string after a certain number of letters + + Argument: Number of letters to truncate after + """ + try: + length = int(arg) + except ValueError: # invalid literal for int() + return value # Fail silently + if not isinstance(value, basestring): + value = str(value) + + if len(value) > length: + truncated = value[:length] + if not truncated.endswith('...'): + truncated += '...' + return truncated + + return value
\ No newline at end of file diff --git a/lib/templatetags/templatetags/typogrify.py b/lib/templatetags/templatetags/typogrify.py new file mode 100644 index 0000000..fa4f0cf --- /dev/null +++ b/lib/templatetags/templatetags/typogrify.py @@ -0,0 +1,216 @@ +# from django.conf import settings +import re +from django.conf import settings +from django import template +register = template.Library() + +def amp(text): + """Wraps apersands in html with ``<span class="amp">`` so they can be + styled with CSS. Apersands are also normalized to ``&``. Requires + ampersands to have whitespace or an `` `` on both sides. + + >>> amp('One & two') + 'One <span class="amp">&</span> two' + >>> amp('One & two') + 'One <span class="amp">&</span> two' + >>> amp('One & two') + 'One <span class="amp">&</span> two' + + >>> amp('One & two') + 'One <span class="amp">&</span> two' + + It won't mess up & that are already wrapped, in entities or URLs + + >>> amp('One <span class="amp">&</span> two') + 'One <span class="amp">&</span> two' + >>> amp('“this” & <a href="/?that&test">that</a>') + '“this” <span class="amp">&</span> <a href="/?that&test">that</a>' + """ + amp_finder = re.compile(r"(\s| )(&|&|&\#38;)(\s| )") + return amp_finder.sub(r"""\1<span class="amp">&</span>\3""", text) + +def caps(text): + """Wraps multiple capital letters in ``<span class="caps">`` + so they can be styled with CSS. + + >>> caps("A message from KU") + 'A message from <span class="caps">KU</span>' + + Uses the smartypants tokenizer to not screw with HTML or with tags it shouldn't. + + >>> caps("<PRE>CAPS</pre> more CAPS") + '<PRE>CAPS</pre> more <span class="caps">CAPS</span>' + + >>> caps("A message from 2KU2 with digits") + 'A message from <span class="caps">2KU2</span> with digits' + + >>> caps("Dotted caps followed by spaces should never include them in the wrap D.O.T. like so.") + 'Dotted caps followed by spaces should never include them in the wrap <span class="caps">D.O.T.</span> like so.' + + >>> caps("<i>D.O.T.</i>HE34T<b>RFID</b>") + '<i><span class="caps">D.O.T.</span></i><span class="caps">HE34T</span><b><span class="caps">RFID</span></b>' + """ + try: + import smartypants + except ImportError: + if settings.DEBUG: + raise template.TemplateSyntaxError, "Error in {% caps %} filter: The Python SmartyPants library isn't installed." + return text + + tokens = smartypants._tokenize(text) + result = [] + in_skipped_tag = False + + cap_finder = re.compile(r"""( + (\b[A-Z\d]* # Group 2: Any amount of caps and digits + [A-Z]\d*[A-Z] # A cap string much at least include two caps (but they can have digits between them) + [A-Z\d]*\b) # Any amount of caps and digits + | (\b[A-Z]+\.\s? # OR: Group 3: Some caps, followed by a '.' and an optional space + (?:[A-Z]+\.\s?)+) # Followed by the same thing at least once more + (?:\s|\b|$)) + """, re.VERBOSE) + + def _cap_wrapper(matchobj): + """This is necessary to keep dotted cap strings to pick up extra spaces""" + if matchobj.group(2): + return """<span class="caps">%s</span>""" % matchobj.group(2) + else: + if matchobj.group(3)[-1] == " ": + caps = matchobj.group(3)[:-1] + tail = ' ' + else: + caps = matchobj.group(3) + tail = '' + return """<span class="caps">%s</span>%s""" % (caps, tail) + + tags_to_skip_regex = re.compile("<(/)?(?:pre|code|kbd|script|math)[^>]*>", re.IGNORECASE) + + + for token in tokens: + if token[0] == "tag": + # Don't mess with tags. + result.append(token[1]) + close_match = tags_to_skip_regex.match(token[1]) + if close_match and close_match.group(1) == None: + in_skipped_tag = True + else: + in_skipped_tag = False + else: + if in_skipped_tag: + result.append(token[1]) + else: + result.append(cap_finder.sub(_cap_wrapper, token[1])) + + return "".join(result) + +def initial_quotes(text): + """Wraps initial quotes in ``class="dquo"`` for double quotes or + ``class="quo"`` for single quotes. Works in these block tags ``(h1-h6, p, li)`` + and also accounts for potential opening inline elements ``a, em, strong, span, b, i`` + + >>> initial_quotes('"With primes"') + '<span class="dquo">"</span>With primes"' + >>> initial_quotes("'With single primes'") + '<span class="quo">\\'</span>With single primes\\'' + + >>> initial_quotes('<a href="#">"With primes and a link"</a>') + '<a href="#"><span class="dquo">"</span>With primes and a link"</a>' + + >>> initial_quotes('“With smartypanted quotes”') + '<span class="dquo">“</span>With smartypanted quotes”' + """ + quote_finder = re.compile(r"""((<(p|h[1-6]|li)[^>]*>|^) # start with an opening p, h1-6, li or the start of the string + \s* # optional white space! + (<(a|em|span|strong|i|b)[^>]*>\s*)*) # optional opening inline tags, with more optional white space for each. + (("|“|&\#8220;)|('|‘|&\#8216;)) # Find me a quote! (only need to find the left quotes and the primes) + # double quotes are in group 7, singles in group 8 + """, re.VERBOSE) + def _quote_wrapper(matchobj): + if matchobj.group(7): + classname = "dquo" + quote = matchobj.group(7) + else: + classname = "quo" + quote = matchobj.group(8) + return """%s<span class="%s">%s</span>""" % (matchobj.group(1), classname, quote) + + return quote_finder.sub(_quote_wrapper, text) + +def smartypants(text): + """Applies smarty pants to curl quotes. + + >>> smartypants('The "Green" man') + 'The “Green” man' + """ + try: + import smartypants + except ImportError: + if settings.DEBUG: + raise template.TemplateSyntaxError, "Error in {% smartypants %} filter: The Python smartypants library isn't installed." + return text + else: + return smartypants.smartyPants(text) + +def typogrify(text): + """The super typography filter + + Applies the following filters: widont, smartypants, caps, amp, initial_quotes + + >>> typogrify('<h2>"Jayhawks" & KU fans act extremely obnoxiously</h2>') + '<h2><span class="dquo">“</span>Jayhawks” <span class="amp">&</span> <span class="caps">KU</span> fans act extremely obnoxiously</h2>' + """ + text = amp(text) + text = widont(text) + text = smartypants(text) + text = caps(text) + text = initial_quotes(text) + return text + +def widont(text): + """Replaces the space between the last two words in a string with `` `` + Works in these block tags ``(h1-h6, p, li)`` and also accounts for + potential closing inline elements ``a, em, strong, span, b, i`` + + >>> widont('A very simple test') + 'A very simple test' + + >>> widont('<p>In a couple of paragraphs</p><p>paragraph two</p>') + '<p>In a couple of paragraphs</p><p>paragraph two</p>' + + >>> widont('<h1><a href="#">In a link inside a heading</i> </a></h1>') + '<h1><a href="#">In a link inside a heading</i> </a></h1>' + + >>> widont('<h1><a href="#">In a link</a> followed by other text</h1>') + '<h1><a href="#">In a link</a> followed by other text</h1>' + + Empty HTMLs shouldn't error + >>> widont('<h1><a href="#"></a></h1>') + '<h1><a href="#"></a></h1>' + + >>> widont('<div>Divs get no love!</div>') + '<div>Divs get no love!</div>' + + >>> widont('<div><p>But divs with paragraphs do!</p></div>') + '<div><p>But divs with paragraphs do!</p></div>' + """ + widont_finder = re.compile(r"""(\s+) # the space to replace + ([^<>\s]+ # must be flollowed by non-tag non-space characters + \s* # optional white space! + (</(a|em|span|strong|i|b)[^>]*>\s*)* # optional closing inline tags with optional white space after each + (</(p|h[1-6]|li)|$)) # end with a closing p, h1-6, li or the end of the string + """, re.VERBOSE) + return widont_finder.sub(r' \2', text) + +register.filter('amp', amp) +register.filter('caps', caps) +register.filter('initial_quotes', initial_quotes) +register.filter('smartypants', smartypants) +register.filter('typogrify', typogrify) +register.filter('widont', widont) + +def _test(): + import doctest + doctest.testmod() + +if __name__ == "__main__": + _test() diff --git a/lib/utils/GeoClient.py b/lib/utils/GeoClient.py new file mode 100644 index 0000000..d1966ca --- /dev/null +++ b/lib/utils/GeoClient.py @@ -0,0 +1,292 @@ +# -*- coding: utf-8 -*- + +"""Python wrapper for geoname web APIs + +created 20/03/2006 By Nicolas Laurance + +This module allows you to access geoname's web APIs, +and get the results programmatically. +Described here: + http://www.geonames.org/export/ + +def postalCodeSearch(postalcode, placename='', country=COUNTRY, maxRows='10', http_proxy=None): +def postalCodeLookupJSON(postalcode, placename='', country=COUNTRY, maxRows='10',gcallback='', http_proxy=None): +def findNearbyPostalCodes(postalcode, placename='', country=COUNTRY, radius='5', maxRows='10',lat=None,lng=None, http_proxy=None): +def postalCodeCountryInfo(http_proxy=None): +def search(placename='', country=COUNTRY, maxRows='10', style='SHORT',lang=LANG, fclass=None, http_proxy=None): +def findNearbyPlaceName(lat,lng, http_proxy=None): + +Sample usage: +>>> import geoname +>>> result=geoname.postalCodeSearch('35580','guichen','fr','10') +>>> result.totalResultsCount.PCDATA +u'1' +>>> result.code[0].lat.PCDATA +u'47.9666667' +>>> result.code[0].lng.PCDATA +u'-1.8' + + + +""" + +__author__ = "Nicolas Laurance (nlaurance@zindep.com)" +__version__ = "2.0" +__cvsversion__ = "$Revision: 2.0 $"[11:-2] +__date__ = "$Date: 2003/06/20 22:40:53 $"[7:-2] +__copyright__ = "Copyright (c) 2006 Nicolas Laurance" +__license__ = "Python" + +import gnosis.xml.objectify as objectify + +import os, sys, urllib, re +try: + import timeoutsocket # http://www.timo-tasi.org/python/timeoutsocket.py + timeoutsocket.setDefaultSocketTimeout(120) +except ImportError: + pass + +HTTP_PROXY = None +DEBUG = 0 +COUNTRY = 'FR' +LANG ='fr' + + +# don't touch the rest of these constants +class GeonameError(Exception): pass + +## administrative functions +def version(): + print """PyGeoname %(__version__)s +%(__copyright__)s +released %(__date__)s +""" % globals() + +def setProxy(http_proxy): + """set HTTP proxy""" + global HTTP_PROXY + HTTP_PROXY = http_proxy + +def getProxy(http_proxy = None): + """get HTTP proxy""" + return http_proxy or HTTP_PROXY + +def getProxies(http_proxy = None): + http_proxy = getProxy(http_proxy) + if http_proxy: + proxies = {"http": http_proxy} + else: + proxies = None + return proxies + +def _contentsOf(dirname, filename): + filename = os.path.join(dirname, filename) + if not os.path.exists(filename): return None + fsock = open(filename) + contents = fsock.read() + fsock.close() + return contents + +def _getScriptDir(): + if __name__ == '__main__': + return os.path.abspath(os.path.dirname(sys.argv[0])) + else: + return os.path.abspath(os.path.dirname(sys.modules[__name__].__file__)) + +class Bag: pass + +_intFields = ('totalResultsCount') +_dateFields = () +_listFields = ('code','geoname','country',) +_floatFields = ('lat','lng','distance') + +def unmarshal(element): + #import pdb;pdb.set_trace() + xml_obj = objectify.XML_Objectify(element) + rc = xml_obj.make_instance() + return rc + +def _do(url, http_proxy): + proxies = getProxies(http_proxy) + u = urllib.FancyURLopener(proxies) + usock = u.open(url) + rawdata = usock.read() + if DEBUG: print rawdata + usock.close() + data = unmarshal(rawdata) + return data + +## main functions + +def _buildfindNearbyPostalCodes(postalcode, placename, country, radius, maxRows ): + placename=urllib.quote(placename) + searchUrl = "http://ws.geonames.org/findNearbyPostalCodes?postalcode=%(postalcode)s&placename=%(placename)s&country=%(country)s&radius=%(radius)s&maxRows=%(maxRows)s" % vars() + return searchUrl + + +def _buildpostalCodeLookupJSON(postalcode,placename,country,maxRows,gcallback): + placename=urllib.quote(placename) + searchUrl = "http://ws.geonames.org/postalCodeLookupJSON?postalcode=%(postalcode)s&placename=%(placename)s&country=%(country)s&maxRows=%(maxRows)s&callback=%(gcallback)s" % vars() + return searchUrl + +def _buildfindNearbyPostalCodesLL(lat,lng,radius,maxRows): + searchUrl = "http://ws.geonames.org/findNearbyPostalCodes?lat=%(lat)s&lng=%(lng)s&radius=%(radius)s&maxRows=%(maxRows)s" % vars() + return searchUrl + +def _buildfindCountrySubdivision(lat,lng): + searchUrl = "http://ws.geonames.org/countrySubdivision?lat=%(lat)s&lng=%(lng)s" % vars() + return searchUrl + +def _buildfindNearbyPlaceName(lat,lng): + searchUrl = "http://ws.geonames.org/findNearbyPlaceName?lat=%(lat)s&lng=%(lng)s" % vars() + return searchUrl + +def _buildpostalCodeSearch(postalcode, placename, country, maxRows ): + placename=urllib.quote(placename) + searchUrl = "http://ws.geonames.org/postalCodeSearch?postalcode=%(postalcode)s&placename=%(placename)s&country=%(country)s&maxRows=%(maxRows)s" % vars() + return searchUrl + +def _buildsearch(placename, country, maxRows,style,lang, fclass): + placename=urllib.quote(placename) + if fclass: + urlfclass='' + for fc in fclass: + urlfclass+=urllib.quote("&fclass=%s" % fc) + searchUrl = "http://ws.geonames.org/search?q=%(placename)s&country=%(country)s&maxRows=%(maxRows)s&lang=%(lang)s&style=%(style)s&fclass=%(fclass)s" % vars() + return searchUrl + +def postalCodeSearch(postalcode, placename='', country=COUNTRY, maxRows='10', http_proxy=None): + """ + http://ws.geonames.org/postalCodeSearch?postalcode=35580&maxRows=10&country=fr + Url : ws.geonames.org/postalCodeSearch? + Parameters : postalcode ,placename,maxRows,country + <geonames> + <totalResultsCount>7</totalResultsCount> + - + <code> + <postalcode>35580</postalcode> + <name>St Senoux</name> + <countryCode>FR</countryCode> + <lat>47.9</lat> + <lng>-1.7833333</lng> + </code> + """ + url = _buildpostalCodeSearch(postalcode,placename,country,maxRows) + if DEBUG: print url + return _do(url,http_proxy) + +def postalCodeLookupJSON(postalcode, placename='', country=COUNTRY, maxRows='10',gcallback='', http_proxy=None): + """ + Webservice Type : REST /JSON + Url : ws.geonames.org/postalCodeLookupJSON? + Parameters : postalcode,country ,maxRows (default = 20),callback + Result : returns a list of places for the given postalcode in JSON format + """ + url = _buildpostalCodeLookupJSON(postalcode,placename,country,maxRows,gcallback) +# print url + proxies = getProxies(http_proxy) + u = urllib.FancyURLopener(proxies) + usock = u.open(url) + rawdata = usock.read() + if DEBUG: print rawdata + usock.close() + return eval(rawdata[:-3]) + +def findNearbyPostalCodes(postalcode, placename='', country=COUNTRY, radius='5', maxRows='10',lat=None,lng=None, http_proxy=None): + """ + Find nearby postal codes / reverse geocoding + This service comes in two flavors. You can either pass the lat/long or a postalcode/placename. + + Webservice Type : REST + Url : ws.geonames.org/findNearbyPostalCodes? + Parameters : + lat,lng, radius (in km), maxRows (default = 5),country (default = all countries) + or + postalcode,country, radius (in Km), maxRows (default = 5) + Result : returns a list of postalcodes and places for the lat/lng query as xml document + Example: + http://ws.geonames.org/findNearbyPostalCodes?postalcode=35580&placename=guichen&country=FR&radius=5 + <geonames> + - + <code> + <postalcode>35580</postalcode> + <name>Guichen</name> + <countryCode>FR</countryCode> + <lat>47.9666667</lat> + <lng>-1.8</lng> + <distance>0.0</distance> + </code> + """ + if lat and lng : + url = _buildfindNearbyPostalCodesLL(lat,lng,radius,maxRows) + else: + url = _buildfindNearbyPostalCodes(postalcode,placename,country,radius,maxRows) + if DEBUG: print url +# import pdb;pdb.set_trace() + return _do(url,http_proxy).code + + +def postalCodeCountryInfo(http_proxy=None): + """ + http://ws.geonames.org/postalCodeCountryInfo? + <country> + <countryCode>FR</countryCode> + <countryName>France</countryName> + <numPostalCodes>39163</numPostalCodes> + <minPostalCode>01000</minPostalCode> + <maxPostalCode>98000</maxPostalCode> + </country> + + """ + return _do("http://ws.geonames.org/postalCodeCountryInfo?",http_proxy).country + +def search(placename='', country=COUNTRY, maxRows='10', style='SHORT',lang=LANG, fclass=None, http_proxy=None): + """ + Url : ws.geonames.org/search? + Parameters : q : place name (urlencoded utf8) + maxRows : maximal number of rows returned (default = 100) + country : iso country code, two characters (default = all countries) + fclass : featureclass(es) (default= all feature classes); this parameter may occur more then once, example: fclass=P&fclass=A + style : SHORT,MEDIUM,LONG (default = MEDIUM), verbosity of returned xml document + lang : ISO 2-letter language code. (default = en), countryName will be returned in the specified language. + + http://ws.geonames.org/search?q=guichen&maxRows=10&style=SHORT&lang=fr&country=fr + <geonames> + <totalResultsCount>3</totalResultsCount> + - + <geoname> + <name>Laill�</name> + <lat>47.9833333</lat> + <lng>-1.7166667</lng> + </geoname> + """ + url = _buildsearch(placename, country, maxRows,style,lang, fclass) + if DEBUG: print url + return _do(url,http_proxy) + +def findNearbyPlaceName(lat,lng, http_proxy=None): + """ + Webservice Type : REST + Url : ws.geonames.org/findNearbyPlaceName? + Parameters : lat,lng + Result : returns the closest populated place for the lat/lng query as xml document + Example: + http://ws.geonames.org/findNearbyPlaceName?lat=47.3&lng=9 + """ + url = _buildfindNearbyPlaceName(lat,lng) + if DEBUG: print url + return _do(url,http_proxy) + +def findCountrySubdivision(lat,lng, http_proxy=None): + """ + Webservice Type : REST + Url : ws.geonames.org/findNearbyPlaceName? + Parameters : lat,lng + Result : returns the closest populated place for the lat/lng query as xml document + Example: + http://ws.geonames.org/findNearbyPlaceName?lat=47.3&lng=9 + """ + url = _buildfindCountrySubdivision(lat,lng) + if DEBUG: print url + return _do(url,http_proxy) + diff --git a/lib/utils/__init__.py b/lib/utils/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/lib/utils/__init__.py diff --git a/lib/utils/email_multipart.py b/lib/utils/email_multipart.py new file mode 100644 index 0000000..4c2e154 --- /dev/null +++ b/lib/utils/email_multipart.py @@ -0,0 +1,80 @@ +# -*- coding: utf-8 -*- + +# Sending html emails in Django +# Report any bugs to esat @t sleytr*net +# Evren Esat Ozkan + + +from feedparser import _sanitizeHTML +from stripogram import html2text + +from django.conf import settings +from django.template import loader, Context + +from email.MIMEMultipart import MIMEMultipart +from email.MIMEText import MIMEText +from email.MIMEImage import MIMEImage +from smtplib import SMTP +import email.Charset + + +charset='utf-8' + + +email.Charset.add_charset( charset, email.Charset.SHORTEST, None, None ) + +def htmlmail(sbj,recip,msg,template='',texttemplate='',textmsg='',images=(), recip_name='',sender=settings.DEFAULT_FROM_EMAIL,sender_name='',charset=charset): + ''' + if you want to use Django template system: + use `msg` and optionally `textmsg` as template context (dict) + and define `template` and optionally `texttemplate` variables. + otherwise msg and textmsg variables are used as html and text message sources. + + if you want to use images in html message, define physical paths and ids in tuples. + (image paths are relative to MEDIA_ROOT) + example: + images=(('email_images/logo.gif','img1'),('email_images/footer.gif','img2')) + and use them in html like this: + <img src="cid:img1"> + ... + <img src="cid:img2"> + ''' + html=render(msg,template) + if texttemplate or textmsg: text=render((textmsg or msg),texttemplate) + else: text= html2text(_sanitizeHTML(html,charset)) + + msgRoot = MIMEMultipart('related') + msgRoot['Subject'] = sbj + msgRoot['From'] = named(sender,sender_name) + msgRoot['To'] = named(recip,recip_name) + msgRoot.preamble = 'This is a multi-part message in MIME format.' + + msgAlternative = MIMEMultipart('alternative') + msgRoot.attach(msgAlternative) + + msgAlternative.attach(MIMEText(text, _charset=charset)) + msgAlternative.attach(MIMEText(html, 'html', _charset=charset)) + + for img in images: + fp = open(img[0], 'rb') + msgImage = MIMEImage(fp.read()) + fp.close() + msgImage.add_header('Content-ID', '<'+img[1]+'>') + msgRoot.attach(msgImage) + + smtp = SMTP() + smtp.connect(settings.EMAIL_HOST) + smtp.login(settings.EMAIL_HOST_USER , settings.EMAIL_HOST_PASSWORD) + smtp.sendmail(sender, recip, msgRoot.as_string()) + smtp.quit() + + +def render(context,template): + if template: + t = loader.get_template(template) + return t.render(Context(context)) + return context + +def named(mail,name): + if name: return '%s <%s>' % (name,mail) + return mail
\ No newline at end of file diff --git a/lib/utils/pydelicious.py b/lib/utils/pydelicious.py new file mode 100644 index 0000000..dd33788 --- /dev/null +++ b/lib/utils/pydelicious.py @@ -0,0 +1,817 @@ +"""Library to access del.icio.us data via Python. + +:examples: + + Using the API class directly: + + >>> a = pydelicious.apiNew('user', 'passwd') + >>> # or: + >>> a = DeliciousAPI('user', 'passwd') + >>> a.tags_get() # Same as: + >>> a.request('tags/get', ) + + Or by calling one of the methods on the module: + + - add(user, passwd, url, description, tags = "", extended = "", dt = "", replace="no") + - get(user, passwd, tag="", dt="", count = 0) + - get_all(user, passwd, tag = "") + - delete(user, passwd, url) + - rename_tag(user, passwd, oldtag, newtag) + - get_tags(user, passwd) + + >>> a = apiNew(user, passwd) + >>> a.posts_add(url="http://my.com/", desciption="my.com", extended="the url is my.moc", tags="my com") + True + >>> len(a.posts_all()) + 1 + >>> get_all(user, passwd) + 1 + + This are short functions for getrss calls. + + >>> rss_ + +def get_userposts(user): +def get_tagposts(tag): +def get_urlposts(url): +def get_popular(tag = ""): + + >>> json_posts() + >>> json_tags() + >>> json_network() + >>> json_fans() + +:License: pydelicious is released under the BSD license. See 'license.txt' + for more informations. + +:todo, bvb: + - Rewriting comments to english. More documentation, examples. + - Added JSON-like return values for XML data (del.icio.us also serves some JSON...) + - better error/exception classes and handling, work in progress. + +:todo: + - Source code SHOULD BE ASCII! + - More tests. + - handling different encodings, what, how? + >>> pydelicious.getrss(tag="t[a]g") + url: http://del.icio.us/rss/tag/t[a]g + - Parse datetimes in XML. + - Test RSS functionality? HTML scraping doesn't work yet? + - API functions need required argument checks. + - interesting functionality in other libraries (ruby, java, perl, etc)? + - what is pydelicious used for? + - license, readme docs via setup.py verdelen? + - automatic releas build + +:done: + * Refactored the API class, much cleaner now and functions dlcs_api_request, dlcs_parse_xml are available for who wants them. +""" +import sys +import os +import time +import datetime +import md5, httplib +import urllib, urllib2, time +from StringIO import StringIO + +try: + from elementtree.ElementTree import parse as parse_xml +except ImportError: + from xml.etree.ElementTree import parse as parse_xml + +import feedparser + + +### Static config + +__version__ = '0.5.0' +__author__ = 'Frank Timmermann <regenkind_at_gmx_dot_de>' # GP: does not respond to emails +__contributors__ = [ + 'Greg Pinero', + 'Berend van Berkum <berend+pydelicious@dotmpe.com>'] +__url__ = 'http://code.google.com/p/pydelicious/' +__author_email__ = "" +# Old URL: 'http://deliciouspython.python-hosting.com/' + +__description__ = '''pydelicious.py allows you to access the web service of del.icio.us via it's API through python.''' +__long_description__ = '''the goal is to design an easy to use and fully functional python interface to del.icio.us. ''' + +DLCS_OK_MESSAGES = ('done', 'ok') # Known text values of positive del.icio.us <result> answers +DLCS_WAIT_TIME = 4 +DLCS_REQUEST_TIMEOUT = 444 # Seconds before socket triggers timeout +#DLCS_API_REALM = 'del.icio.us API' +DLCS_API_HOST = 'https://api.del.icio.us' +DLCS_API_PATH = 'v1' +DLCS_API = "%s/%s" % (DLCS_API_HOST, DLCS_API_PATH) +DLCS_RSS = 'http://del.icio.us/rss/' + +ISO_8601_DATETIME = '%Y-%m-%dT%H:%M:%SZ' + +USER_AGENT = 'pydelicious.py/%s %s' % (__version__, __url__) + +DEBUG = 0 +if 'DLCS_DEBUG' in os.environ: + DEBUG = int(os.environ['DLCS_DEBUG']) + + +# Taken from FeedParser.py +# timeoutsocket allows feedparser to time out rather than hang forever on ultra-slow servers. +# Python 2.3 now has this functionality available in the standard socket library, so under +# 2.3 you don't need to install anything. But you probably should anyway, because the socket +# module is buggy and timeoutsocket is better. +try: + import timeoutsocket # http://www.timo-tasi.org/python/timeoutsocket.py + timeoutsocket.setDefaultSocketTimeout(DLCS_REQUEST_TIMEOUT) +except ImportError: + import socket + if hasattr(socket, 'setdefaulttimeout'): socket.setdefaulttimeout(DLCS_REQUEST_TIMEOUT) +if DEBUG: print >>sys.stderr, "Set socket timeout to %s seconds" % DLCS_REQUEST_TIMEOUT + + +### Utility classes + +class _Waiter: + """Waiter makes sure a certain amount of time passes between + successive calls of `Waiter()`. + + Some attributes: + :last: time of last call + :wait: the minimum time needed between calls + :waited: the number of calls throttled + + pydelicious.Waiter is an instance created when the module is loaded. + """ + def __init__(self, wait): + self.wait = wait + self.waited = 0 + self.lastcall = 0; + + def __call__(self): + tt = time.time() + wait = self.wait + + timeago = tt - self.lastcall + + if timeago < wait: + wait = wait - timeago + if DEBUG>0: print >>sys.stderr, "Waiting %s seconds." % wait + time.sleep(wait) + self.waited += 1 + self.lastcall = tt + wait + else: + self.lastcall = tt + +Waiter = _Waiter(DLCS_WAIT_TIME) + +class PyDeliciousException(Exception): + '''Std. pydelicious error''' + pass + +class DeliciousError(Exception): + """Raised when the server responds with a negative answer""" + + +class DefaultErrorHandler(urllib2.HTTPDefaultErrorHandler): + '''xxx, bvb: Where is this used? should it be registered somewhere with urllib2? + + Handles HTTP Error, currently only 503. + ''' + def http_error_503(self, req, fp, code, msg, headers): + raise urllib2.HTTPError(req, code, throttled_message, headers, fp) + + +class post(dict): + """Post object, contains href, description, hash, dt, tags, + extended, user, count(, shared). + + xxx, bvb: Not used in DeliciousAPI + """ + def __init__(self, href="", description="", hash="", time="", tag="", extended="", user="", count="", + tags="", url="", dt=""): # tags or tag? + self["href"] = href + if url != "": self["href"] = url + self["description"] = description + self["hash"] = hash + self["dt"] = dt + if time != "": self["dt"] = time + self["tags"] = tags + if tag != "": self["tags"] = tag # tag or tags? # !! tags + self["extended"] = extended + self["user"] = user + self["count"] = count + + def __getattr__(self, name): + try: return self[name] + except: object.__getattribute__(self, name) + + +class posts(list): + def __init__(self, *args): + for i in args: self.append(i) + + def __getattr__(self, attr): + try: return [p[attr] for p in self] + except: object.__getattribute__(self, attr) + +### Utility functions + +def str2uni(s): + # type(in) str or unicode + # type(out) unicode + return ("".join([unichr(ord(i)) for i in s])) + +def str2utf8(s): + # type(in) str or unicode + # type(out) str + return ("".join([unichr(ord(i)).encode("utf-8") for i in s])) + +def str2quote(s): + return urllib.quote_plus("".join([unichr(ord(i)).encode("utf-8") for i in s])) + +def dict0(d): + # Trims empty dict entries + # {'a':'a', 'b':'', 'c': 'c'} => {'a': 'a', 'c': 'c'} + dd = dict() + for i in d: + if d[i] != "": dd[i] = d[i] + return dd + +def delicious_datetime(str): + """Parse a ISO 8601 formatted string to a Python datetime ... + """ + return datetime.datetime(*time.strptime(str, ISO_8601_DATETIME)[0:6]) + +def http_request(url, user_agent=USER_AGENT, retry=4): + """Retrieve the contents referenced by the URL using urllib2. + + Retries up to four times (default) on exceptions. + """ + request = urllib2.Request(url, headers={'User-Agent':user_agent}) + + # Remember last error + e = None + + # Repeat request on time-out errors + tries = retry; + while tries: + try: + return urllib2.urlopen(request) + + except urllib2.HTTPError, e: # protocol errors, + raise PyDeliciousException, "%s" % e + + except urllib2.URLError, e: + # xxx: Ugly check for time-out errors + #if len(e)>0 and 'timed out' in arg[0]: + print >> sys.stderr, "%s, %s tries left." % (e, tries) + Waiter() + tries = tries - 1 + #else: + # tries = None + + # Give up + raise PyDeliciousException, \ + "Unable to retrieve data at '%s', %s" % (url, e) + +def http_auth_request(url, host, user, passwd, user_agent=USER_AGENT): + """Call an HTTP server with authorization credentials using urllib2. + """ + if DEBUG: httplib.HTTPConnection.debuglevel = 1 + + # Hook up handler/opener to urllib2 + password_manager = urllib2.HTTPPasswordMgrWithDefaultRealm() + password_manager.add_password(None, host, user, passwd) + auth_handler = urllib2.HTTPBasicAuthHandler(password_manager) + opener = urllib2.build_opener(auth_handler) + urllib2.install_opener(opener) + + return http_request(url, user_agent) + +def dlcs_api_request(path, params='', user='', passwd='', throttle=True): + """Retrieve/query a path within the del.icio.us API. + + This implements a minimum interval between calls to avoid + throttling. [#]_ Use param 'throttle' to turn this behaviour off. + + todo: back off on 503's (HTTPError, URLError? testing + + Returned XML does not always correspond with given del.icio.us examples + [#]_. + + .. [#] http://del.icio.us/help/api/ + """ + if throttle: + Waiter() + + if params: + # params come as a dict, strip empty entries and urlencode + url = "%s/%s?%s" % (DLCS_API, path, urllib.urlencode(dict0(params))) + else: + url = "%s/%s" % (DLCS_API, path) + + if DEBUG: print >>sys.stderr, "dlcs_api_request: %s" % url + + try: + return http_auth_request(url, DLCS_API_HOST, user, passwd, USER_AGENT) + + # bvb: Is this ever raised? When? + except DefaultErrorHandler, e: + print >>sys.stderr, "%s" % e + +def dlcs_parse_xml(data, split_tags=False): + """Parse any del.icio.us XML document and return Python data structure. + + Recognizes all XML document formats as returned by the version 1 API and + translates to a JSON-like data structure (dicts 'n lists). + + Returned instance is always a dictionary. Examples:: + + {'posts': [{'url':'...','hash':'...',},],} + {'tags':['tag1', 'tag2',]} + {'dates': [{'count':'...','date':'...'},], 'tag':'', 'user':'...'} + {'result':(True, "done")} + # etcetera. + """ + + if DEBUG>3: print >>sys.stderr, "dlcs_parse_xml: parsing from ", data + + if not hasattr(data, 'read'): + data = StringIO(data) + + doc = parse_xml(data) + root = doc.getroot() + fmt = root.tag + + # Split up into three cases: Data, Result or Update + if fmt in ('tags', 'posts', 'dates', 'bundles'): + + # Data: expect a list of data elements, 'resources'. + # Use `fmt` (without last 's') to find data elements, elements + # don't have contents, attributes contain all the data we need: + # append to list + elist = [el.attrib for el in doc.findall(fmt[:-1])] + + # Return list in dict, use tagname of rootnode as keyname. + data = {fmt: elist} + + # Root element might have attributes too, append dict. + data.update(root.attrib) + + return data + + elif fmt == 'result': + + # Result: answer to operations + if root.attrib.has_key('code'): + msg = root.attrib['code'] + else: + msg = root.text + + # Return {'result':(True, msg)} for /known/ O.K. messages, + # use (False, msg) otherwise + v = msg in DLCS_OK_MESSAGES + return {fmt: (v, msg)} + + elif fmt == 'update': + + # Update: "time" + #return {fmt: root.attrib} + return {fmt: {'time':time.strptime(root.attrib['time'], ISO_8601_DATETIME)}} + + else: + raise PyDeliciousException, "Unknown XML document format '%s'" % fmt + +def dlcs_rss_request(tag = "", popular = 0, user = "", url = ''): + """Handle a request for RSS + + todo: translate from German + + rss sollte nun wieder funktionieren, aber diese try, except scheisse ist so nicht schoen + + rss wird unterschiedlich zusammengesetzt. ich kann noch keinen einheitlichen zusammenhang + zwischen daten (url, desc, ext, usw) und dem feed erkennen. warum k[o]nnen die das nicht einheitlich machen? + """ + tag = str2quote(tag) + user = str2quote(user) + if url != '': + # http://del.icio.us/rss/url/efbfb246d886393d48065551434dab54 + url = DLCS_RSS + '''url/%s'''%md5.new(url).hexdigest() + elif user != '' and tag != '': + url = DLCS_RSS + '''%(user)s/%(tag)s'''%dict(user=user, tag=tag) + elif user != '' and tag == '': + # http://del.icio.us/rss/delpy + url = DLCS_RSS + '''%s'''%user + elif popular == 0 and tag == '': + url = DLCS_RSS + elif popular == 0 and tag != '': + # http://del.icio.us/rss/tag/apple + # http://del.icio.us/rss/tag/web2.0 + url = DLCS_RSS + "tag/%s"%tag + elif popular == 1 and tag == '': + url = DLCS_RSS + '''popular/''' + elif popular == 1 and tag != '': + url = DLCS_RSS + '''popular/%s'''%tag + rss = http_request(url).read() + rss = feedparser.parse(rss) + # print rss +# for e in rss.entries: print e;print + l = posts() + for e in rss.entries: + if e.has_key("links") and e["links"]!=[] and e["links"][0].has_key("href"): + url = e["links"][0]["href"] + elif e.has_key("link"): + url = e["link"] + elif e.has_key("id"): + url = e["id"] + else: + url = "" + if e.has_key("title"): + description = e['title'] + elif e.has_key("title_detail") and e["title_detail"].has_key("title"): + description = e["title_detail"]['value'] + else: + description = '' + try: tags = e['categories'][0][1] + except: + try: tags = e["category"] + except: tags = "" + if e.has_key("modified"): + dt = e['modified'] + else: + dt = "" + if e.has_key("summary"): + extended = e['summary'] + elif e.has_key("summary_detail"): + e['summary_detail']["value"] + else: + extended = "" + if e.has_key("author"): + user = e['author'] + else: + user = "" + # time = dt ist weist auf ein problem hin + # die benennung der variablen ist nicht einheitlich + # api senden und + # xml bekommen sind zwei verschiedene schuhe :( + l.append(post(url = url, description = description, tags = tags, dt = dt, extended = extended, user = user)) + return l + + +### Main module class + +class DeliciousAPI: + """Class providing main interace to del.icio.us API. + + Methods ``request`` and ``request_raw`` represent the core. For all API + paths there are furthermore methods (e.g. posts_add for 'posts/all') with + an explicit declaration of the parameters and documentation. These all call + ``request`` and pass on extra keywords like ``_raw``. + """ + + def __init__(self, user, passwd, codec='iso-8859-1', api_request=dlcs_api_request, xml_parser=dlcs_parse_xml): + """Initialize access to the API with ``user`` and ``passwd``. + + ``codec`` sets the encoding of the arguments. + + The ``api_request`` and ``xml_parser`` parameters by default point to + functions within this package with standard implementations to + request and parse a resource. See ``dlcs_api_request()`` and + ``dlcs_parse_xml()``. Note that ``api_request`` should return a + file-like instance with an HTTPMessage instance under ``info()``, + see ``urllib2.openurl`` for more info. + """ + assert user != "" + self.user = user + self.passwd = passwd + self.codec = codec + + # Implement communication to server and parsing of respons messages: + assert callable(api_request) + self._api_request = api_request + assert callable(xml_parser) + self._parse_response = xml_parser + + def _call_server(self, path, **params): + params = dict0(params) + for key in params: + params[key] = params[key].encode(self.codec) + + # see __init__ for _api_request() + return self._api_request(path, params, self.user, self.passwd) + + + ### Core functionality + + def request(self, path, _raw=False, **params): + """Calls a path in the API, parses the answer to a JSON-like structure by + default. Use with ``_raw=True`` or ``call request_raw()`` directly to + get the filehandler and process the response message manually. + + Calls to some paths will return a `result` message, i.e.:: + + <result code="..." /> + + or:: + + <result>...</result> + + These are all parsed to ``{'result':(Boolean, MessageString)}`` and this + method will raise ``DeliciousError`` on negative `result` answers. Using + ``_raw=True`` bypasses all parsing and will never raise ``DeliciousError``. + + See ``dlcs_parse_xml()`` and ``self.request_raw()``.""" + + # method _parse_response is bound in `__init__()`, `_call_server` + # uses `_api_request` also set in `__init__()` + if _raw: + # return answer + return self.request_raw(path, **params) + + else: + # get answer and parse + fl = self._call_server(path, **params) + rs = self._parse_response(fl) + + # Raise an error for negative 'result' answers + if type(rs) == dict and rs == 'result' and not rs['result'][0]: + errmsg = "" + if len(rs['result'])>0: + errmsg = rs['result'][1:] + raise DeliciousError, errmsg + + return rs + + def request_raw(self, path, **params): + """Calls the path in the API, returns the filehandle. Returned + file-like instances have an ``HTTPMessage`` instance with HTTP header + information available. Use ``filehandle.info()`` or refer to the + ``urllib2.openurl`` documentation. + """ + # see `request()` on how the response can be handled + return self._call_server(path, **params) + + ### Explicit declarations of API paths, their parameters and docs + + # Tags + def tags_get(self, **kwds): + """Returns a list of tags and the number of times it is used by the user. + :: + + <tags> + <tag tag="TagName" count="888"> + """ + return self.request("tags/get", **kwds) + + def tags_rename(self, old, new, **kwds): + """Rename an existing tag with a new tag name. Returns a `result` + message or raises an ``DeliciousError``. See ``self.request()``. + + &old (required) + Tag to rename. + &new (required) + New name. + """ + return self.request("tags/rename", old=old, new=new, **kwds) + + # Posts + def posts_update(self, **kwds): + """Returns the last update time for the user. Use this before calling + `posts_all` to see if the data has changed since the last fetch. + :: + + <update time="CCYY-MM-DDThh:mm:ssZ"> + """ + return self.request("posts/update", **kwds) + + def posts_dates(self, tag="", **kwds): + """Returns a list of dates with the number of posts at each date. + :: + + <dates> + <date date="CCYY-MM-DD" count="888"> + + &tag (optional). + Filter by this tag. + """ + return self.request("posts/dates", tag=tag, **kwds) + + def posts_get(self, tag="", dt="", url="", **kwds): + """Returns posts matching the arguments. If no date or url is given, + most recent date will be used. + :: + + <posts dt="CCYY-MM-DD" tag="..." user="..."> + <post ...> + + &tag (optional). + Filter by this tag. + &dt (optional). + Filter by this date (CCYY-MM-DDThh:mm:ssZ). + &url (optional). + Filter by this url. + """ + return self.request("posts/get", tag=tag, dt=dt, url=url, **kwds) + + def posts_recent(self, tag="", count="", **kwds): + """Returns a list of the most recent posts, filtered by argument. + :: + + <posts tag="..." user="..."> + <post ...> + + &tag (optional). + Filter by this tag. + &count (optional). + Number of items to retrieve (Default:15, Maximum:100). + """ + return self.request("posts/recent", tag=tag, count=count, **kwds) + + def posts_all(self, tag="", **kwds): + """Returns all posts. Please use sparingly. Call the `posts_update` + method to see if you need to fetch this at all. + :: + + <posts tag="..." user="..." update="CCYY-MM-DDThh:mm:ssZ"> + <post ...> + + &tag (optional). + Filter by this tag. + """ + return self.request("posts/all", tag=tag, **kwds) + + def posts_add(self, url, description, extended="", tags="", dt="", + replace="no", shared="yes", **kwds): + """Add a post to del.icio.us. Returns a `result` message or raises an + ``DeliciousError``. See ``self.request()``. + + &url (required) + the url of the item. + &description (required) + the description of the item. + &extended (optional) + notes for the item. + &tags (optional) + tags for the item (space delimited). + &dt (optional) + datestamp of the item (format "CCYY-MM-DDThh:mm:ssZ"). + + Requires a LITERAL "T" and "Z" like in ISO8601 at http://www.cl.cam.ac.uk/~mgk25/iso-time.html for example: "1984-09-01T14:21:31Z" + &replace=no (optional) - don't replace post if given url has already been posted. + &shared=no (optional) - make the item private + """ + return self.request("posts/add", url=url, description=description, + extended=extended, tags=tags, dt=dt, + replace=replace, shared=shared, **kwds) + + def posts_delete(self, url, **kwds): + """Delete a post from del.icio.us. Returns a `result` message or + raises an ``DeliciousError``. See ``self.request()``. + + &url (required) + the url of the item. + """ + return self.request("posts/delete", url=url, **kwds) + + # Bundles + def bundles_all(self, **kwds): + """Retrieve user bundles from del.icio.us. + :: + + <bundles> + <bundel name="..." tags=..."> + """ + return self.request("tags/bundles/all", **kwds) + + def bundles_set(self, bundle, tags, **kwds): + """Assign a set of tags to a single bundle, wipes away previous + settings for bundle. Returns a `result` messages or raises an + ``DeliciousError``. See ``self.request()``. + + &bundle (required) + the bundle name. + &tags (required) + list of tags (space seperated). + """ + if type(tags)==list: + tags = " ".join(tags) + return self.request("tags/bundles/set", bundle=bundle, tags=tags, + **kwds) + + def bundles_delete(self, bundle, **kwds): + """Delete a bundle from del.icio.us. Returns a `result` message or + raises an ``DeliciousError``. See ``self.request()``. + + &bundle (required) + the bundle name. + """ + return self.request("tags/bundles/delete", bundle=bundle, **kwds) + + ### Utils + + # Lookup table for del.icio.us url-path to DeliciousAPI method. + paths = { + 'tags/get': tags_get, + 'tags/rename': tags_rename, + 'posts/update': posts_update, + 'posts/dates': posts_dates, + 'posts/get': posts_get, + 'posts/recent': posts_recent, + 'posts/all': posts_all, + 'posts/add': posts_add, + 'posts/delete': posts_delete, + 'tags/bundles/all': bundles_all, + 'tags/bundles/set': bundles_set, + 'tags/bundles/delete': bundles_delete, + } + + def get_url(self, url): + """Return the del.icio.us url at which the HTML page with posts for + ``url`` can be found. + """ + return "http://del.icio.us/url/?url=%s" % (url,) + + +### Convenience functions on this package + +def apiNew(user, passwd): + """creates a new DeliciousAPI object. + requires user(name) and passwd + """ + return DeliciousAPI(user=user, passwd=passwd) + +def add(user, passwd, url, description, tags="", extended="", dt="", replace="no"): + return apiNew(user, passwd).posts_add(url=url, description=description, extended=extended, tags=tags, dt=dt, replace=replace) + +def get(user, passwd, tag="", dt="", count = 0): + posts = apiNew(user, passwd).posts_get(tag=tag,dt=dt) + if count != 0: posts = posts[0:count] + return posts + +def get_all(user, passwd, tag=""): + return apiNew(user, passwd).posts_all(tag=tag) + +def delete(user, passwd, url): + return apiNew(user, passwd).posts_delete(url=url) + +def rename_tag(user, passwd, oldtag, newtag): + return apiNew(user=user, passwd=passwd).tags_rename(old=oldtag, new=newtag) + +def get_tags(user, passwd): + return apiNew(user=user, passwd=passwd).tags_get() + + +### RSS functions bvb: still working...? +def getrss(tag="", popular=0, url='', user=""): + """get posts from del.icio.us via parsing RSS (bvb:or HTML) + + todo: not tested + + tag (opt) sort by tag + popular (opt) look for the popular stuff + user (opt) get the posts by a user, this striks popular + url (opt) get the posts by url + """ + return dlcs_rss_request(tag=tag, popular=popular, user=user, url=url) + +def get_userposts(user): + return getrss(user = user) + +def get_tagposts(tag): + return getrss(tag = tag) + +def get_urlposts(url): + return getrss(url = url) + +def get_popular(tag = ""): + return getrss(tag = tag, popular = 1) + + +### TODO: implement JSON fetching +def json_posts(user, count=15): + """http://del.icio.us/feeds/json/mpe + http://del.icio.us/feeds/json/mpe/art+history + count=### the number of posts you want to get (default is 15, maximum is 100) + raw a raw JSON object is returned, instead of an object named Delicious.posts + """ + +def json_tags(user, atleast, count, sort='alpha'): + """http://del.icio.us/feeds/json/tags/mpe + atleast=### include only tags for which there are at least ### number of posts + count=### include ### tags, counting down from the top + sort={alpha|count} construct the object with tags in alphabetic order (alpha), or by count of posts (count) + callback=NAME wrap the object definition in a function call NAME(...), thus invoking that function when the feed is executed + raw a pure JSON object is returned, instead of code that will construct an object named Delicious.tags + """ + +def json_network(user): + """http://del.icio.us/feeds/json/network/mpe + callback=NAME wrap the object definition in a function call NAME(...) + ?raw a raw JSON object is returned, instead of an object named Delicious.posts + """ + +def json_fans(user): + """http://del.icio.us/feeds/json/fans/mpe + callback=NAME wrap the object definition in a function call NAME(...) + ?raw a pure JSON object is returned, instead of an object named Delicious. + """ + |