From c54417f2eb53c2bbeff7e94acef688c97cd949d5 Mon Sep 17 00:00:00 2001 From: luxagraf Date: Thu, 8 May 2014 19:47:44 -0400 Subject: moved taggit to virtualenv and cleaned up lib app --- app/lib/taggit/__init__.py | 1 - app/lib/taggit/admin.py | 16 - app/lib/taggit/forms.py | 21 - app/lib/taggit/locale/de/LC_MESSAGES/django.mo | Bin 1331 -> 0 bytes app/lib/taggit/locale/de/LC_MESSAGES/django.po | 67 - app/lib/taggit/locale/en/LC_MESSAGES/django.po | 68 - app/lib/taggit/locale/he/LC_MESSAGES/django.mo | Bin 847 -> 0 bytes app/lib/taggit/locale/he/LC_MESSAGES/django.po | 69 - app/lib/taggit/locale/nl/LC_MESSAGES/django.mo | Bin 1217 -> 0 bytes app/lib/taggit/locale/nl/LC_MESSAGES/django.po | 64 - app/lib/taggit/locale/ru/LC_MESSAGES/django.mo | Bin 1513 -> 0 bytes app/lib/taggit/locale/ru/LC_MESSAGES/django.po | 70 - app/lib/taggit/managers.py | 244 --- app/lib/taggit/models.py | 160 -- app/lib/taggit/tests/__init__.py | 0 app/lib/taggit/tests/forms.py | 20 - app/lib/taggit/tests/models.py | 143 -- app/lib/taggit/tests/runtests.py | 34 - app/lib/taggit/tests/tests.py | 475 ------ app/lib/taggit/utils.py | 126 -- app/lib/taggit/views.py | 18 - app/lib/templatetags/templatetags/markdown.py | 9 - app/lib/templatetags/templatetags/smartypants.py | 878 ---------- app/lib/templatetags/templatetags/typogrify.py | 216 --- app/lib/utils/markdown2.py | 1877 ---------------------- 25 files changed, 4576 deletions(-) delete mode 100644 app/lib/taggit/__init__.py delete mode 100644 app/lib/taggit/admin.py delete mode 100644 app/lib/taggit/forms.py delete mode 100644 app/lib/taggit/locale/de/LC_MESSAGES/django.mo delete mode 100644 app/lib/taggit/locale/de/LC_MESSAGES/django.po delete mode 100644 app/lib/taggit/locale/en/LC_MESSAGES/django.po delete mode 100644 app/lib/taggit/locale/he/LC_MESSAGES/django.mo delete mode 100644 app/lib/taggit/locale/he/LC_MESSAGES/django.po delete mode 100644 app/lib/taggit/locale/nl/LC_MESSAGES/django.mo delete mode 100644 app/lib/taggit/locale/nl/LC_MESSAGES/django.po delete mode 100644 app/lib/taggit/locale/ru/LC_MESSAGES/django.mo delete mode 100644 app/lib/taggit/locale/ru/LC_MESSAGES/django.po delete mode 100644 app/lib/taggit/managers.py delete mode 100644 app/lib/taggit/models.py delete mode 100644 app/lib/taggit/tests/__init__.py delete mode 100644 app/lib/taggit/tests/forms.py delete mode 100644 app/lib/taggit/tests/models.py delete mode 100755 app/lib/taggit/tests/runtests.py delete mode 100644 app/lib/taggit/tests/tests.py delete mode 100644 app/lib/taggit/utils.py delete mode 100644 app/lib/taggit/views.py delete mode 100644 app/lib/templatetags/templatetags/markdown.py delete mode 100644 app/lib/templatetags/templatetags/smartypants.py delete mode 100644 app/lib/templatetags/templatetags/typogrify.py delete mode 100755 app/lib/utils/markdown2.py (limited to 'app/lib') diff --git a/app/lib/taggit/__init__.py b/app/lib/taggit/__init__.py deleted file mode 100644 index 3a055a1..0000000 --- a/app/lib/taggit/__init__.py +++ /dev/null @@ -1 +0,0 @@ -VERSION = (0, 9, 2) diff --git a/app/lib/taggit/admin.py b/app/lib/taggit/admin.py deleted file mode 100644 index 5155b6f..0000000 --- a/app/lib/taggit/admin.py +++ /dev/null @@ -1,16 +0,0 @@ -from django.contrib import admin - -from taggit.models import Tag, TaggedItem - - -class TaggedItemInline(admin.StackedInline): - model = TaggedItem - -class TagAdmin(admin.ModelAdmin): - list_display = ["name"] - inlines = [ - TaggedItemInline - ] - - -admin.site.register(Tag, TagAdmin) diff --git a/app/lib/taggit/forms.py b/app/lib/taggit/forms.py deleted file mode 100644 index e0198bd..0000000 --- a/app/lib/taggit/forms.py +++ /dev/null @@ -1,21 +0,0 @@ -from django import forms -from django.utils.translation import ugettext as _ - -from taggit.utils import parse_tags, edit_string_for_tags - - -class TagWidget(forms.TextInput): - def render(self, name, value, attrs=None): - if value is not None and not isinstance(value, basestring): - value = edit_string_for_tags([o.tag for o in value.select_related("tag")]) - return super(TagWidget, self).render(name, value, attrs) - -class TagField(forms.CharField): - widget = TagWidget - - def clean(self, value): - value = super(TagField, self).clean(value) - try: - return parse_tags(value) - except ValueError: - raise forms.ValidationError(_("Please provide a comma-separated list of tags.")) diff --git a/app/lib/taggit/locale/de/LC_MESSAGES/django.mo b/app/lib/taggit/locale/de/LC_MESSAGES/django.mo deleted file mode 100644 index 3d9eaaf..0000000 Binary files a/app/lib/taggit/locale/de/LC_MESSAGES/django.mo and /dev/null differ diff --git a/app/lib/taggit/locale/de/LC_MESSAGES/django.po b/app/lib/taggit/locale/de/LC_MESSAGES/django.po deleted file mode 100644 index 98ecdac..0000000 --- a/app/lib/taggit/locale/de/LC_MESSAGES/django.po +++ /dev/null @@ -1,67 +0,0 @@ -#, fuzzy -msgid "" -msgstr "" -"Project-Id-Version: django-taggit\n" -"Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2010-09-07 09:26-0700\n" -"PO-Revision-Date: 2010-09-07 09:26-0700\n" -"Last-Translator: Jannis Leidel \n" -"Language-Team: German \n" -"MIME-Version: 1.0\n" -"Content-Type: text/plain; charset=UTF-8\n" -"Content-Transfer-Encoding: 8bit\n" -"Plural-Forms: nplurals=2; plural=(n != 1)\n" - -#: forms.py:20 -msgid "Please provide a comma-separated list of tags." -msgstr "Bitte eine durch Komma getrennte Schlagwortliste eingeben." - -#: managers.py:39 managers.py:83 models.py:50 -msgid "Tags" -msgstr "Schlagwörter" - -#: managers.py:84 -msgid "A comma-separated list of tags." -msgstr "Eine durch Komma getrennte Schlagwortliste." - -#: models.py:10 -msgid "Name" -msgstr "Name" - -#: models.py:11 -msgid "Slug" -msgstr "Kürzel" - -#: models.py:49 -msgid "Tag" -msgstr "Schlagwort" - -#: models.py:56 -#, python-format -msgid "%(object)s tagged with %(tag)s" -msgstr "%(object)s verschlagwortet mit %(tag)s" - -#: models.py:100 -msgid "Object id" -msgstr "Objekt-ID" - -#: models.py:104 models.py:110 -msgid "Content type" -msgstr "Inhaltstyp" - -#: models.py:138 -msgid "Tagged Item" -msgstr "Verschlagwortetes Objekt" - -#: models.py:139 -msgid "Tagged Items" -msgstr "Verschlagwortete Objekte" - -#: contrib/suggest/models.py:57 -msgid "" -"Enter a valid Regular Expression. To make it case-insensitive include \"(?i)" -"\" in your expression." -msgstr "" -"Bitte einen regulären Ausdruck eingeben. Fügen Sie \"(?i) \" dem " -"Ausdruck hinzu, um nicht zwischen Groß- und Kleinschreibung zu " -"unterscheiden." diff --git a/app/lib/taggit/locale/en/LC_MESSAGES/django.po b/app/lib/taggit/locale/en/LC_MESSAGES/django.po deleted file mode 100644 index c5642c7..0000000 --- a/app/lib/taggit/locale/en/LC_MESSAGES/django.po +++ /dev/null @@ -1,68 +0,0 @@ -# SOME DESCRIPTIVE TITLE. -# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER -# This file is distributed under the same license as the PACKAGE package. -# FIRST AUTHOR , YEAR. -# -#, fuzzy -msgid "" -msgstr "" -"Project-Id-Version: PACKAGE VERSION\n" -"Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2010-09-07 09:45-0700\n" -"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" -"Last-Translator: FULL NAME \n" -"Language-Team: LANGUAGE \n" -"MIME-Version: 1.0\n" -"Content-Type: text/plain; charset=UTF-8\n" -"Content-Transfer-Encoding: 8bit\n" - -#: forms.py:20 -msgid "Please provide a comma-separated list of tags." -msgstr "" - -#: managers.py:39 managers.py:83 models.py:50 -msgid "Tags" -msgstr "" - -#: managers.py:84 -msgid "A comma-separated list of tags." -msgstr "" - -#: models.py:10 -msgid "Name" -msgstr "" - -#: models.py:11 -msgid "Slug" -msgstr "" - -#: models.py:49 -msgid "Tag" -msgstr "" - -#: models.py:56 -#, python-format -msgid "%(object)s tagged with %(tag)s" -msgstr "" - -#: models.py:100 -msgid "Object id" -msgstr "" - -#: models.py:104 models.py:110 -msgid "Content type" -msgstr "" - -#: models.py:138 -msgid "Tagged Item" -msgstr "" - -#: models.py:139 -msgid "Tagged Items" -msgstr "" - -#: contrib/suggest/models.py:57 -msgid "" -"Enter a valid Regular Expression. To make it case-insensitive include \"(?i)" -"\" in your expression." -msgstr "" diff --git a/app/lib/taggit/locale/he/LC_MESSAGES/django.mo b/app/lib/taggit/locale/he/LC_MESSAGES/django.mo deleted file mode 100644 index 562db71..0000000 Binary files a/app/lib/taggit/locale/he/LC_MESSAGES/django.mo and /dev/null differ diff --git a/app/lib/taggit/locale/he/LC_MESSAGES/django.po b/app/lib/taggit/locale/he/LC_MESSAGES/django.po deleted file mode 100644 index e27a878..0000000 --- a/app/lib/taggit/locale/he/LC_MESSAGES/django.po +++ /dev/null @@ -1,69 +0,0 @@ -# SOME DESCRIPTIVE TITLE. -# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER -# This file is distributed under the same license as the PACKAGE package. -# FIRST AUTHOR , YEAR. -# -msgid "" -msgstr "" -"Project-Id-Version: Django Taggit\n" -"Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2010-06-26 12:47-0500\n" -"PO-Revision-Date: 2010-06-26 12:54-0600\n" -"Last-Translator: Alex \n" -"Language-Team: LANGUAGE \n" -"MIME-Version: 1.0\n" -"Content-Type: text/plain; charset=UTF-8\n" -"Content-Transfer-Encoding: 8bit\n" -"Plural-Forms: nplurals=2; plural=(n != 1);\n" - -#: forms.py:20 -msgid "Please provide a comma-separated list of tags." -msgstr "נא לספק רשימה של תגים מופרדת עם פסיקים." - -#: managers.py:41 -#: managers.py:113 -#: models.py:18 -msgid "Tags" -msgstr "תגיות" - -#: managers.py:114 -msgid "A comma-separated list of tags." -msgstr "רשימה של תגים מופרדת עם פסיקים." - -#: models.py:10 -msgid "Name" -msgstr "שם" - -#: models.py:11 -msgid "Slug" -msgstr "" - -#: models.py:17 -msgid "Tag" -msgstr "תג" - -#: models.py:56 -#, python-format -msgid "%(object)s tagged with %(tag)s" -msgstr "%(object)s מתויג עם %(tag)s" - -#: models.py:86 -msgid "Object id" -msgstr "" - -#: models.py:87 -msgid "Content type" -msgstr "" - -#: models.py:92 -msgid "Tagged Item" -msgstr "" - -#: models.py:93 -msgid "Tagged Items" -msgstr "" - -#: contrib/suggest/models.py:57 -msgid "Enter a valid Regular Expression. To make it case-insensitive include \"(?i)\" in your expression." -msgstr "" - diff --git a/app/lib/taggit/locale/nl/LC_MESSAGES/django.mo b/app/lib/taggit/locale/nl/LC_MESSAGES/django.mo deleted file mode 100644 index 28e7b7e..0000000 Binary files a/app/lib/taggit/locale/nl/LC_MESSAGES/django.mo and /dev/null differ diff --git a/app/lib/taggit/locale/nl/LC_MESSAGES/django.po b/app/lib/taggit/locale/nl/LC_MESSAGES/django.po deleted file mode 100644 index 7871b0b..0000000 --- a/app/lib/taggit/locale/nl/LC_MESSAGES/django.po +++ /dev/null @@ -1,64 +0,0 @@ -msgid "" -msgstr "" -"Project-Id-Version: django-taggit\n" -"Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2010-09-07 09:45-0700\n" -"PO-Revision-Date: 2010-09-07 23:04+0100\n" -"Last-Translator: Jeffrey Gelens \n" -"Language-Team: Dutch\n" -"MIME-Version: 1.0\n" -"Content-Type: text/plain; charset=UTF-8\n" -"Content-Transfer-Encoding: 8bit\n" - -#: forms.py:20 -msgid "Please provide a comma-separated list of tags." -msgstr "Geef een door komma gescheiden lijst van tags." - -#: managers.py:39 -#: managers.py:83 -#: models.py:50 -msgid "Tags" -msgstr "Tags" - -#: managers.py:84 -msgid "A comma-separated list of tags." -msgstr "Een door komma gescheiden lijst van tags." - -#: models.py:10 -msgid "Name" -msgstr "Naam" - -#: models.py:11 -msgid "Slug" -msgstr "Slug" - -#: models.py:49 -msgid "Tag" -msgstr "Tag" - -#: models.py:56 -#, python-format -msgid "%(object)s tagged with %(tag)s" -msgstr "%(object)s getagged met %(tag)s" - -#: models.py:100 -msgid "Object id" -msgstr "Object-id" - -#: models.py:104 -#: models.py:110 -msgid "Content type" -msgstr "Inhoudstype" - -#: models.py:138 -msgid "Tagged Item" -msgstr "Object getagged" - -#: models.py:139 -msgid "Tagged Items" -msgstr "Objecten getagged" - -#: contrib/suggest/models.py:57 -msgid "Enter a valid Regular Expression. To make it case-insensitive include \"(?i)\" in your expression." -msgstr "Voer een valide reguliere expressie in. Voeg \"(?i)\" aan de expressie toe om deze hoofdletter ongevoelig te maken." - diff --git a/app/lib/taggit/locale/ru/LC_MESSAGES/django.mo b/app/lib/taggit/locale/ru/LC_MESSAGES/django.mo deleted file mode 100644 index 61a7e39..0000000 Binary files a/app/lib/taggit/locale/ru/LC_MESSAGES/django.mo and /dev/null differ diff --git a/app/lib/taggit/locale/ru/LC_MESSAGES/django.po b/app/lib/taggit/locale/ru/LC_MESSAGES/django.po deleted file mode 100644 index 42e3ebe..0000000 --- a/app/lib/taggit/locale/ru/LC_MESSAGES/django.po +++ /dev/null @@ -1,70 +0,0 @@ -# SOME DESCRIPTIVE TITLE. -# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER -# This file is distributed under the same license as the PACKAGE package. -# FIRST AUTHOR , YEAR. -# -msgid "" -msgstr "" -"Project-Id-Version: Django Taggit\n" -"Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2010-06-11 11:28+0700\n" -"PO-Revision-Date: 2010-06-11 11:30+0700\n" -"Last-Translator: Igor 'idle sign' Starikov \n" -"Language-Team: \n" -"MIME-Version: 1.0\n" -"Content-Type: text/plain; charset=UTF-8\n" -"Content-Transfer-Encoding: 8bit\n" -"Plural-Forms: nplurals=3; plural=n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2;\n" -"X-Poedit-Language: Russian\n" - -#: forms.py:20 -msgid "Please provide a comma-separated list of tags." -msgstr "Укажите метки через запятую." - -#: managers.py:41 -#: managers.py:101 -#: models.py:17 -msgid "Tags" -msgstr "Метки" - -#: managers.py:102 -msgid "A comma-separated list of tags." -msgstr "Список меток через запятую." - -#: models.py:9 -msgid "Name" -msgstr "Название" - -#: models.py:10 -msgid "Slug" -msgstr "Слаг" - -#: models.py:16 -msgid "Tag" -msgstr "Метка" - -#: models.py:55 -#, python-format -msgid "%(object)s tagged with %(tag)s" -msgstr "элемент «%(object)s» с меткой «%(tag)s»" - -#: models.py:82 -msgid "Object id" -msgstr "ID объекта" - -#: models.py:83 -msgid "Content type" -msgstr "Тип содержимого" - -#: models.py:87 -msgid "Tagged Item" -msgstr "Элемент с меткой" - -#: models.py:88 -msgid "Tagged Items" -msgstr "Элементы с меткой" - -#: contrib/suggest/models.py:57 -msgid "Enter a valid Regular Expression. To make it case-insensitive include \"(?i)\" in your expression." -msgstr "Введите регулярное выражение. Чтобы сделать его чувствительным к регистру укажите \"(?i)\"." - diff --git a/app/lib/taggit/managers.py b/app/lib/taggit/managers.py deleted file mode 100644 index 55201b4..0000000 --- a/app/lib/taggit/managers.py +++ /dev/null @@ -1,244 +0,0 @@ -from django.contrib.contenttypes.generic import GenericRelation -from django.contrib.contenttypes.models import ContentType -from django.db import models -from django.db.models.fields.related import ManyToManyRel, RelatedField, add_lazy_relation -from django.db.models.related import RelatedObject -from django.utils.text import capfirst -from django.utils.translation import ugettext_lazy as _ - -from taggit.forms import TagField -from taggit.models import TaggedItem, GenericTaggedItemBase -from taggit.utils import require_instance_manager - - -try: - all -except NameError: - # 2.4 compat - try: - from django.utils.itercompat import all - except ImportError: - # 1.1.X compat - def all(iterable): - for item in iterable: - if not item: - return False - return True - - -class TaggableRel(ManyToManyRel): - def __init__(self): - self.related_name = None - self.limit_choices_to = {} - self.symmetrical = True - self.multiple = True - self.through = None - - -class TaggableManager(RelatedField): - def __init__(self, verbose_name=_("Tags"), - help_text=_("A comma-separated list of tags."), through=None, blank=False): - self.through = through or TaggedItem - self.rel = TaggableRel() - self.verbose_name = verbose_name - self.help_text = help_text - self.blank = blank - self.editable = True - self.unique = False - self.creates_table = False - self.db_column = None - self.choices = None - self.serialize = False - self.null = True - self.creation_counter = models.Field.creation_counter - models.Field.creation_counter += 1 - - def __get__(self, instance, model): - if instance is not None and instance.pk is None: - raise ValueError("%s objects need to have a primary key value " - "before you can access their tags." % model.__name__) - manager = _TaggableManager( - through=self.through, model=model, instance=instance - ) - return manager - - def contribute_to_class(self, cls, name): - self.name = self.column = name - self.model = cls - cls._meta.add_field(self) - setattr(cls, name, self) - if not cls._meta.abstract: - if isinstance(self.through, basestring): - def resolve_related_class(field, model, cls): - self.through = model - self.post_through_setup(cls) - add_lazy_relation( - cls, self, self.through, resolve_related_class - ) - else: - self.post_through_setup(cls) - - def post_through_setup(self, cls): - self.use_gfk = ( - self.through is None or issubclass(self.through, GenericTaggedItemBase) - ) - self.rel.to = self.through._meta.get_field("tag").rel.to - if self.use_gfk: - tagged_items = GenericRelation(self.through) - tagged_items.contribute_to_class(cls, "tagged_items") - - def save_form_data(self, instance, value): - getattr(instance, self.name).set(*value) - - def formfield(self, form_class=TagField, **kwargs): - defaults = { - "label": capfirst(self.verbose_name), - "help_text": self.help_text, - "required": not self.blank - } - defaults.update(kwargs) - return form_class(**defaults) - - def value_from_object(self, instance): - if instance.pk: - return self.through.objects.filter(**self.through.lookup_kwargs(instance)) - return self.through.objects.none() - - def related_query_name(self): - return self.model._meta.module_name - - def m2m_reverse_name(self): - return self.through._meta.get_field_by_name("tag")[0].column - - def m2m_target_field_name(self): - return self.model._meta.pk.name - - def m2m_reverse_target_field_name(self): - return self.rel.to._meta.pk.name - - def m2m_column_name(self): - if self.use_gfk: - return self.through._meta.virtual_fields[0].fk_field - return self.through._meta.get_field('content_object').column - - def db_type(self, connection=None): - return None - - def m2m_db_table(self): - return self.through._meta.db_table - - def extra_filters(self, pieces, pos, negate): - if negate or not self.use_gfk: - return [] - prefix = "__".join(["tagged_items"] + pieces[:pos-2]) - cts = map(ContentType.objects.get_for_model, _get_subclasses(self.model)) - if len(cts) == 1: - return [("%s__content_type" % prefix, cts[0])] - return [("%s__content_type__in" % prefix, cts)] - - def bulk_related_objects(self, new_objs, using): - return [] - - -class _TaggableManager(models.Manager): - def __init__(self, through, model, instance): - self.through = through - self.model = model - self.instance = instance - - def get_query_set(self): - return self.through.tags_for(self.model, self.instance) - - def _lookup_kwargs(self): - return self.through.lookup_kwargs(self.instance) - - @require_instance_manager - def add(self, *tags): - str_tags = set([ - t - for t in tags - if not isinstance(t, self.through.tag_model()) - ]) - tag_objs = set(tags) - str_tags - # If str_tags has 0 elements Django actually optimizes that to not do a - # query. Malcolm is very smart. - existing = self.through.tag_model().objects.filter( - name__in=str_tags - ) - tag_objs.update(existing) - - for new_tag in str_tags - set(t.name for t in existing): - tag_objs.add(self.through.tag_model().objects.create(name=new_tag)) - - for tag in tag_objs: - self.through.objects.get_or_create(tag=tag, **self._lookup_kwargs()) - - @require_instance_manager - def set(self, *tags): - self.clear() - self.add(*tags) - - @require_instance_manager - def remove(self, *tags): - self.through.objects.filter(**self._lookup_kwargs()).filter( - tag__name__in=tags).delete() - - @require_instance_manager - def clear(self): - self.through.objects.filter(**self._lookup_kwargs()).delete() - - def most_common(self): - return self.get_query_set().annotate( - num_times=models.Count(self.through.tag_relname()) - ).order_by('-num_times') - - @require_instance_manager - def similar_objects(self): - lookup_kwargs = self._lookup_kwargs() - lookup_keys = sorted(lookup_kwargs) - qs = self.through.objects.values(*lookup_kwargs.keys()) - qs = qs.annotate(n=models.Count('pk')) - qs = qs.exclude(**lookup_kwargs) - qs = qs.filter(tag__in=self.all()) - qs = qs.order_by('-n') - - # TODO: This all feels like a bit of a hack. - items = {} - if len(lookup_keys) == 1: - # Can we do this without a second query by using a select_related() - # somehow? - f = self.through._meta.get_field_by_name(lookup_keys[0])[0] - objs = f.rel.to._default_manager.filter(**{ - "%s__in" % f.rel.field_name: [r["content_object"] for r in qs] - }) - for obj in objs: - items[(getattr(obj, f.rel.field_name),)] = obj - else: - preload = {} - for result in qs: - preload.setdefault(result['content_type'], set()) - preload[result["content_type"]].add(result["object_id"]) - - for ct, obj_ids in preload.iteritems(): - ct = ContentType.objects.get_for_id(ct) - for obj in ct.model_class()._default_manager.filter(pk__in=obj_ids): - items[(ct.pk, obj.pk)] = obj - - results = [] - for result in qs: - obj = items[ - tuple(result[k] for k in lookup_keys) - ] - obj.similar_tags = result["n"] - results.append(obj) - return results - - -def _get_subclasses(model): - subclasses = [model] - for f in model._meta.get_all_field_names(): - field = model._meta.get_field_by_name(f)[0] - if (isinstance(field, RelatedObject) and - getattr(field.field.rel, "parent_link", None)): - subclasses.extend(_get_subclasses(field.model)) - return subclasses diff --git a/app/lib/taggit/models.py b/app/lib/taggit/models.py deleted file mode 100644 index d8a0a41..0000000 --- a/app/lib/taggit/models.py +++ /dev/null @@ -1,160 +0,0 @@ -import django -from django.contrib.contenttypes.models import ContentType -from django.contrib.contenttypes.generic import GenericForeignKey -from django.db import models, IntegrityError, transaction -from django.template.defaultfilters import slugify as default_slugify -from django.utils.translation import ugettext_lazy as _, ugettext - - -class TagBase(models.Model): - name = models.CharField(verbose_name=_('Name'), max_length=100) - slug = models.SlugField(verbose_name=_('Slug'), unique=True, max_length=100) - - def __unicode__(self): - return self.name - - class Meta: - abstract = True - - def save(self, *args, **kwargs): - if not self.pk and not self.slug: - self.slug = self.slugify(self.name) - if django.VERSION >= (1, 2): - from django.db import router - using = kwargs.get("using") or router.db_for_write( - type(self), instance=self) - # Make sure we write to the same db for all attempted writes, - # with a multi-master setup, theoretically we could try to - # write and rollback on different DBs - kwargs["using"] = using - trans_kwargs = {"using": using} - else: - trans_kwargs = {} - i = 0 - while True: - i += 1 - try: - sid = transaction.savepoint(**trans_kwargs) - res = super(TagBase, self).save(*args, **kwargs) - transaction.savepoint_commit(sid, **trans_kwargs) - return res - except IntegrityError: - transaction.savepoint_rollback(sid, **trans_kwargs) - self.slug = self.slugify(self.name, i) - else: - return super(TagBase, self).save(*args, **kwargs) - - def slugify(self, tag, i=None): - slug = default_slugify(tag) - if i is not None: - slug += "_%d" % i - return slug - - -class Tag(TagBase): - class Meta: - verbose_name = _("Tag") - verbose_name_plural = _("Tags") - - - -class ItemBase(models.Model): - def __unicode__(self): - return ugettext("%(object)s tagged with %(tag)s") % { - "object": self.content_object, - "tag": self.tag - } - - class Meta: - abstract = True - - @classmethod - def tag_model(cls): - return cls._meta.get_field_by_name("tag")[0].rel.to - - @classmethod - def tag_relname(cls): - return cls._meta.get_field_by_name('tag')[0].rel.related_name - - @classmethod - def lookup_kwargs(cls, instance): - return { - 'content_object': instance - } - - @classmethod - def bulk_lookup_kwargs(cls, instances): - return { - "content_object__in": instances, - } - - -class TaggedItemBase(ItemBase): - if django.VERSION < (1, 2): - tag = models.ForeignKey(Tag, related_name="%(class)s_items") - else: - tag = models.ForeignKey(Tag, related_name="%(app_label)s_%(class)s_items") - - class Meta: - abstract = True - - @classmethod - def tags_for(cls, model, instance=None): - if instance is not None: - return cls.tag_model().objects.filter(**{ - '%s__content_object' % cls.tag_relname(): instance - }) - return cls.tag_model().objects.filter(**{ - '%s__content_object__isnull' % cls.tag_relname(): False - }).distinct() - - -class GenericTaggedItemBase(ItemBase): - object_id = models.IntegerField(verbose_name=_('Object id'), db_index=True) - if django.VERSION < (1, 2): - content_type = models.ForeignKey( - ContentType, - verbose_name=_('Content type'), - related_name="%(class)s_tagged_items" - ) - else: - content_type = models.ForeignKey( - ContentType, - verbose_name=_('Content type'), - related_name="%(app_label)s_%(class)s_tagged_items" - ) - content_object = GenericForeignKey() - - class Meta: - abstract=True - - @classmethod - def lookup_kwargs(cls, instance): - return { - 'object_id': instance.pk, - 'content_type': ContentType.objects.get_for_model(instance) - } - - @classmethod - def bulk_lookup_kwargs(cls, instances): - # TODO: instances[0], can we assume there are instances. - return { - "object_id__in": [instance.pk for instance in instances], - "content_type": ContentType.objects.get_for_model(instances[0]), - } - - @classmethod - def tags_for(cls, model, instance=None): - ct = ContentType.objects.get_for_model(model) - kwargs = { - "%s__content_type" % cls.tag_relname(): ct - } - if instance is not None: - kwargs["%s__object_id" % cls.tag_relname()] = instance.pk - return cls.tag_model().objects.filter(**kwargs).distinct() - - -class TaggedItem(GenericTaggedItemBase, TaggedItemBase): - class Meta: - verbose_name = _("Tagged Item") - verbose_name_plural = _("Tagged Items") diff --git a/app/lib/taggit/tests/__init__.py b/app/lib/taggit/tests/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/app/lib/taggit/tests/forms.py b/app/lib/taggit/tests/forms.py deleted file mode 100644 index 2cdc6a8..0000000 --- a/app/lib/taggit/tests/forms.py +++ /dev/null @@ -1,20 +0,0 @@ -from django import forms - -from taggit.tests.models import Food, DirectFood, CustomPKFood, OfficialFood - - -class FoodForm(forms.ModelForm): - class Meta: - model = Food - -class DirectFoodForm(forms.ModelForm): - class Meta: - model = DirectFood - -class CustomPKFoodForm(forms.ModelForm): - class Meta: - model = CustomPKFood - -class OfficialFoodForm(forms.ModelForm): - class Meta: - model = OfficialFood diff --git a/app/lib/taggit/tests/models.py b/app/lib/taggit/tests/models.py deleted file mode 100644 index a0e21e0..0000000 --- a/app/lib/taggit/tests/models.py +++ /dev/null @@ -1,143 +0,0 @@ -from django.db import models - -from taggit.managers import TaggableManager -from taggit.models import (TaggedItemBase, GenericTaggedItemBase, TaggedItem, - TagBase, Tag) - - -class Food(models.Model): - name = models.CharField(max_length=50) - - tags = TaggableManager() - - def __unicode__(self): - return self.name - -class Pet(models.Model): - name = models.CharField(max_length=50) - - tags = TaggableManager() - - def __unicode__(self): - return self.name - -class HousePet(Pet): - trained = models.BooleanField() - - -# Test direct-tagging with custom through model - -class TaggedFood(TaggedItemBase): - content_object = models.ForeignKey('DirectFood') - -class TaggedPet(TaggedItemBase): - content_object = models.ForeignKey('DirectPet') - -class DirectFood(models.Model): - name = models.CharField(max_length=50) - - tags = TaggableManager(through="TaggedFood") - -class DirectPet(models.Model): - name = models.CharField(max_length=50) - - tags = TaggableManager(through=TaggedPet) - - def __unicode__(self): - return self.name - -class DirectHousePet(DirectPet): - trained = models.BooleanField() - - -# Test custom through model to model with custom PK - -class TaggedCustomPKFood(TaggedItemBase): - content_object = models.ForeignKey('CustomPKFood') - -class TaggedCustomPKPet(TaggedItemBase): - content_object = models.ForeignKey('CustomPKPet') - -class CustomPKFood(models.Model): - name = models.CharField(max_length=50, primary_key=True) - - tags = TaggableManager(through=TaggedCustomPKFood) - - def __unicode__(self): - return self.name - -class CustomPKPet(models.Model): - name = models.CharField(max_length=50, primary_key=True) - - tags = TaggableManager(through=TaggedCustomPKPet) - - def __unicode__(self): - return self.name - -class CustomPKHousePet(CustomPKPet): - trained = models.BooleanField() - -# Test custom through model to a custom tag model - -class OfficialTag(TagBase): - official = models.BooleanField() - -class OfficialThroughModel(GenericTaggedItemBase): - tag = models.ForeignKey(OfficialTag, related_name="tagged_items") - -class OfficialFood(models.Model): - name = models.CharField(max_length=50) - - tags = TaggableManager(through=OfficialThroughModel) - - def __unicode__(self): - return self.name - -class OfficialPet(models.Model): - name = models.CharField(max_length=50) - - tags = TaggableManager(through=OfficialThroughModel) - - def __unicode__(self): - return self.name - -class OfficialHousePet(OfficialPet): - trained = models.BooleanField() - - -class Media(models.Model): - tags = TaggableManager() - - class Meta: - abstract = True - -class Photo(Media): - pass - -class Movie(Media): - pass - - -class ArticleTag(Tag): - class Meta: - proxy = True - - def slugify(self, tag, i=None): - slug = "category-%s" % tag.lower() - - if i is not None: - slug += "-%d" % i - return slug - -class ArticleTaggedItem(TaggedItem): - class Meta: - proxy = True - - @classmethod - def tag_model(self): - return ArticleTag - -class Article(models.Model): - title = models.CharField(max_length=100) - - tags = TaggableManager(through=ArticleTaggedItem) diff --git a/app/lib/taggit/tests/runtests.py b/app/lib/taggit/tests/runtests.py deleted file mode 100755 index 23bfb91..0000000 --- a/app/lib/taggit/tests/runtests.py +++ /dev/null @@ -1,34 +0,0 @@ -#!/usr/bin/env python -import os -import sys - -from django.conf import settings - -if not settings.configured: - settings.configure( - DATABASE_ENGINE='sqlite3', - INSTALLED_APPS=[ - 'django.contrib.contenttypes', - 'taggit', - 'taggit.tests', - ] - ) - -from django.test.simple import run_tests - - -def runtests(*test_args): - if not test_args: - test_args = ['tests'] - parent = os.path.join( - os.path.dirname(os.path.abspath(__file__)), - "..", - "..", - ) - sys.path.insert(0, parent) - failures = run_tests(test_args, verbosity=1, interactive=True) - sys.exit(failures) - - -if __name__ == '__main__': - runtests(*sys.argv[1:]) diff --git a/app/lib/taggit/tests/tests.py b/app/lib/taggit/tests/tests.py deleted file mode 100644 index e627701..0000000 --- a/app/lib/taggit/tests/tests.py +++ /dev/null @@ -1,475 +0,0 @@ -from unittest import TestCase as UnitTestCase - -import django -from django.conf import settings -from django.core.exceptions import ValidationError -from django.db import connection -from django.test import TestCase, TransactionTestCase - -from taggit.managers import TaggableManager -from taggit.models import Tag, TaggedItem -from taggit.tests.forms import (FoodForm, DirectFoodForm, CustomPKFoodForm, - OfficialFoodForm) -from taggit.tests.models import (Food, Pet, HousePet, DirectFood, DirectPet, - DirectHousePet, TaggedPet, CustomPKFood, CustomPKPet, CustomPKHousePet, - TaggedCustomPKPet, OfficialFood, OfficialPet, OfficialHousePet, - OfficialThroughModel, OfficialTag, Photo, Movie, Article) -from taggit.utils import parse_tags, edit_string_for_tags - - -class BaseTaggingTest(object): - def assert_tags_equal(self, qs, tags, sort=True, attr="name"): - got = map(lambda tag: getattr(tag, attr), qs) - if sort: - got.sort() - tags.sort() - self.assertEqual(got, tags) - - def assert_num_queries(self, n, f, *args, **kwargs): - original_DEBUG = settings.DEBUG - settings.DEBUG = True - current = len(connection.queries) - try: - f(*args, **kwargs) - self.assertEqual( - len(connection.queries) - current, - n, - ) - finally: - settings.DEBUG = original_DEBUG - - def _get_form_str(self, form_str): - if django.VERSION >= (1, 3): - form_str %= { - "help_start": '', - "help_stop": "" - } - else: - form_str %= { - "help_start": "", - "help_stop": "" - } - return form_str - - def assert_form_renders(self, form, html): - self.assertEqual(str(form), self._get_form_str(html)) - -class BaseTaggingTestCase(TestCase, BaseTaggingTest): - pass - -class BaseTaggingTransactionTestCase(TransactionTestCase, BaseTaggingTest): - pass - - -class TagModelTestCase(BaseTaggingTransactionTestCase): - food_model = Food - tag_model = Tag - - def test_unique_slug(self): - apple = self.food_model.objects.create(name="apple") - apple.tags.add("Red", "red") - - def test_update(self): - special = self.tag_model.objects.create(name="special") - special.save() - - def test_add(self): - apple = self.food_model.objects.create(name="apple") - yummy = self.tag_model.objects.create(name="yummy") - apple.tags.add(yummy) - - def test_slugify(self): - a = Article.objects.create(title="django-taggit 1.0 Released") - a.tags.add("awesome", "release", "AWESOME") - self.assert_tags_equal(a.tags.all(), [ - "category-awesome", - "category-release", - "category-awesome-1" - ], attr="slug") - -class TagModelDirectTestCase(TagModelTestCase): - food_model = DirectFood - tag_model = Tag - -class TagModelCustomPKTestCase(TagModelTestCase): - food_model = CustomPKFood - tag_model = Tag - -class TagModelOfficialTestCase(TagModelTestCase): - food_model = OfficialFood - tag_model = OfficialTag - -class TaggableManagerTestCase(BaseTaggingTestCase): - food_model = Food - pet_model = Pet - housepet_model = HousePet - taggeditem_model = TaggedItem - tag_model = Tag - - def test_add_tag(self): - apple = self.food_model.objects.create(name="apple") - self.assertEqual(list(apple.tags.all()), []) - self.assertEqual(list(self.food_model.tags.all()), []) - - apple.tags.add('green') - self.assert_tags_equal(apple.tags.all(), ['green']) - self.assert_tags_equal(self.food_model.tags.all(), ['green']) - - pear = self.food_model.objects.create(name="pear") - pear.tags.add('green') - self.assert_tags_equal(pear.tags.all(), ['green']) - self.assert_tags_equal(self.food_model.tags.all(), ['green']) - - apple.tags.add('red') - self.assert_tags_equal(apple.tags.all(), ['green', 'red']) - self.assert_tags_equal(self.food_model.tags.all(), ['green', 'red']) - - self.assert_tags_equal( - self.food_model.tags.most_common(), - ['green', 'red'], - sort=False - ) - - apple.tags.remove('green') - self.assert_tags_equal(apple.tags.all(), ['red']) - self.assert_tags_equal(self.food_model.tags.all(), ['green', 'red']) - tag = self.tag_model.objects.create(name="delicious") - apple.tags.add(tag) - self.assert_tags_equal(apple.tags.all(), ["red", "delicious"]) - - apple.delete() - self.assert_tags_equal(self.food_model.tags.all(), ["green"]) - - def test_add_queries(self): - apple = self.food_model.objects.create(name="apple") - # 1 query to see which tags exist - # + 3 queries to create the tags. - # + 6 queries to create the intermediary things (including SELECTs, to - # make sure we don't double create. - self.assert_num_queries(10, apple.tags.add, "red", "delicious", "green") - - pear = self.food_model.objects.create(name="pear") - # 1 query to see which tags exist - # + 4 queries to create the intermeidary things (including SELECTs, to - # make sure we dont't double create. - self.assert_num_queries(5, pear.tags.add, "green", "delicious") - - self.assert_num_queries(0, pear.tags.add) - - def test_require_pk(self): - food_instance = self.food_model() - self.assertRaises(ValueError, lambda: food_instance.tags.all()) - - def test_delete_obj(self): - apple = self.food_model.objects.create(name="apple") - apple.tags.add("red") - self.assert_tags_equal(apple.tags.all(), ["red"]) - strawberry = self.food_model.objects.create(name="strawberry") - strawberry.tags.add("red") - apple.delete() - self.assert_tags_equal(strawberry.tags.all(), ["red"]) - - def test_delete_bulk(self): - apple = self.food_model.objects.create(name="apple") - kitty = self.pet_model.objects.create(pk=apple.pk, name="kitty") - - apple.tags.add("red", "delicious", "fruit") - kitty.tags.add("feline") - - self.food_model.objects.all().delete() - - self.assert_tags_equal(kitty.tags.all(), ["feline"]) - - def test_lookup_by_tag(self): - apple = self.food_model.objects.create(name="apple") - apple.tags.add("red", "green") - pear = self.food_model.objects.create(name="pear") - pear.tags.add("green") - - self.assertEqual( - list(self.food_model.objects.filter(tags__name__in=["red"])), - [apple] - ) - self.assertEqual( - list(self.food_model.objects.filter(tags__name__in=["green"])), - [apple, pear] - ) - - kitty = self.pet_model.objects.create(name="kitty") - kitty.tags.add("fuzzy", "red") - dog = self.pet_model.objects.create(name="dog") - dog.tags.add("woof", "red") - self.assertEqual( - list(self.food_model.objects.filter(tags__name__in=["red"]).distinct()), - [apple] - ) - - tag = self.tag_model.objects.get(name="woof") - self.assertEqual(list(self.pet_model.objects.filter(tags__in=[tag])), [dog]) - - cat = self.housepet_model.objects.create(name="cat", trained=True) - cat.tags.add("fuzzy") - - self.assertEqual( - map(lambda o: o.pk, self.pet_model.objects.filter(tags__name__in=["fuzzy"])), - [kitty.pk, cat.pk] - ) - - def test_exclude(self): - apple = self.food_model.objects.create(name="apple") - apple.tags.add("red", "green", "delicious") - - pear = self.food_model.objects.create(name="pear") - pear.tags.add("green", "delicious") - - guava = self.food_model.objects.create(name="guava") - - self.assertEqual( - map(lambda o: o.pk, self.food_model.objects.exclude(tags__name__in=["red"])), - [pear.pk, guava.pk], - ) - - def test_similarity_by_tag(self): - """Test that pears are more similar to apples than watermelons""" - apple = self.food_model.objects.create(name="apple") - apple.tags.add("green", "juicy", "small", "sour") - - pear = self.food_model.objects.create(name="pear") - pear.tags.add("green", "juicy", "small", "sweet") - - watermelon = self.food_model.objects.create(name="watermelon") - watermelon.tags.add("green", "juicy", "large", "sweet") - - similar_objs = apple.tags.similar_objects() - self.assertEqual(similar_objs, [pear, watermelon]) - self.assertEqual(map(lambda x: x.similar_tags, similar_objs), [3, 2]) - - def test_tag_reuse(self): - apple = self.food_model.objects.create(name="apple") - apple.tags.add("juicy", "juicy") - self.assert_tags_equal(apple.tags.all(), ['juicy']) - - def test_query_traverse(self): - spot = self.pet_model.objects.create(name='Spot') - spike = self.pet_model.objects.create(name='Spike') - spot.tags.add('scary') - spike.tags.add('fluffy') - lookup_kwargs = { - '%s__name' % self.pet_model._meta.module_name: 'Spot' - } - self.assert_tags_equal( - self.tag_model.objects.filter(**lookup_kwargs), - ['scary'] - ) - - def test_taggeditem_unicode(self): - ross = self.pet_model.objects.create(name="ross") - # I keep Ross Perot for a pet, what's it to you? - ross.tags.add("president") - - self.assertEqual( - unicode(self.taggeditem_model.objects.all()[0]), - "ross tagged with president" - ) - - def test_abstract_subclasses(self): - p = Photo.objects.create() - p.tags.add("outdoors", "pretty") - self.assert_tags_equal( - p.tags.all(), - ["outdoors", "pretty"] - ) - - m = Movie.objects.create() - m.tags.add("hd") - self.assert_tags_equal( - m.tags.all(), - ["hd"], - ) - - -class TaggableManagerDirectTestCase(TaggableManagerTestCase): - food_model = DirectFood - pet_model = DirectPet - housepet_model = DirectHousePet - taggeditem_model = TaggedPet - -class TaggableManagerCustomPKTestCase(TaggableManagerTestCase): - food_model = CustomPKFood - pet_model = CustomPKPet - housepet_model = CustomPKHousePet - taggeditem_model = TaggedCustomPKPet - - def test_require_pk(self): - # TODO with a charfield pk, pk is never None, so taggit has no way to - # tell if the instance is saved or not - pass - -class TaggableManagerOfficialTestCase(TaggableManagerTestCase): - food_model = OfficialFood - pet_model = OfficialPet - housepet_model = OfficialHousePet - taggeditem_model = OfficialThroughModel - tag_model = OfficialTag - - def test_extra_fields(self): - self.tag_model.objects.create(name="red") - self.tag_model.objects.create(name="delicious", official=True) - apple = self.food_model.objects.create(name="apple") - apple.tags.add("delicious", "red") - - pear = self.food_model.objects.create(name="Pear") - pear.tags.add("delicious") - - self.assertEqual( - map(lambda o: o.pk, self.food_model.objects.filter(tags__official=False)), - [apple.pk], - ) - - -class TaggableFormTestCase(BaseTaggingTestCase): - form_class = FoodForm - food_model = Food - - def test_form(self): - self.assertEqual(self.form_class.base_fields.keys(), ['name', 'tags']) - - f = self.form_class({'name': 'apple', 'tags': 'green, red, yummy'}) - self.assert_form_renders(f, """ -
%(help_start)sA comma-separated list of tags.%(help_stop)s""") - f.save() - apple = self.food_model.objects.get(name='apple') - self.assert_tags_equal(apple.tags.all(), ['green', 'red', 'yummy']) - - f = self.form_class({'name': 'apple', 'tags': 'green, red, yummy, delicious'}, instance=apple) - f.save() - apple = self.food_model.objects.get(name='apple') - self.assert_tags_equal(apple.tags.all(), ['green', 'red', 'yummy', 'delicious']) - self.assertEqual(self.food_model.objects.count(), 1) - - f = self.form_class({"name": "raspberry"}) - self.assertFalse(f.is_valid()) - - f = self.form_class(instance=apple) - self.assert_form_renders(f, """ -
%(help_start)sA comma-separated list of tags.%(help_stop)s""") - - apple.tags.add('has,comma') - f = self.form_class(instance=apple) - self.assert_form_renders(f, """ -
%(help_start)sA comma-separated list of tags.%(help_stop)s""") - - apple.tags.add('has space') - f = self.form_class(instance=apple) - self.assert_form_renders(f, """ -
%(help_start)sA comma-separated list of tags.%(help_stop)s""") - - def test_formfield(self): - tm = TaggableManager(verbose_name='categories', help_text='Add some categories', blank=True) - ff = tm.formfield() - self.assertEqual(ff.label, 'Categories') - self.assertEqual(ff.help_text, u'Add some categories') - self.assertEqual(ff.required, False) - - self.assertEqual(ff.clean(""), []) - - tm = TaggableManager() - ff = tm.formfield() - self.assertRaises(ValidationError, ff.clean, "") - -class TaggableFormDirectTestCase(TaggableFormTestCase): - form_class = DirectFoodForm - food_model = DirectFood - -class TaggableFormCustomPKTestCase(TaggableFormTestCase): - form_class = CustomPKFoodForm - food_model = CustomPKFood - -class TaggableFormOfficialTestCase(TaggableFormTestCase): - form_class = OfficialFoodForm - food_model = OfficialFood - - -class TagStringParseTestCase(UnitTestCase): - """ - Ported from Jonathan Buchanan's `django-tagging - `_ - """ - - def test_with_simple_space_delimited_tags(self): - """ - Test with simple space-delimited tags. - """ - self.assertEqual(parse_tags('one'), [u'one']) - self.assertEqual(parse_tags('one two'), [u'one', u'two']) - self.assertEqual(parse_tags('one two three'), [u'one', u'three', u'two']) - self.assertEqual(parse_tags('one one two two'), [u'one', u'two']) - - def test_with_comma_delimited_multiple_words(self): - """ - Test with comma-delimited multiple words. - An unquoted comma in the input will trigger this. - """ - self.assertEqual(parse_tags(',one'), [u'one']) - self.assertEqual(parse_tags(',one two'), [u'one two']) - self.assertEqual(parse_tags(',one two three'), [u'one two three']) - self.assertEqual(parse_tags('a-one, a-two and a-three'), - [u'a-one', u'a-two and a-three']) - - def test_with_double_quoted_multiple_words(self): - """ - Test with double-quoted multiple words. - A completed quote will trigger this. Unclosed quotes are ignored. - """ - self.assertEqual(parse_tags('"one'), [u'one']) - self.assertEqual(parse_tags('"one two'), [u'one', u'two']) - self.assertEqual(parse_tags('"one two three'), [u'one', u'three', u'two']) - self.assertEqual(parse_tags('"one two"'), [u'one two']) - self.assertEqual(parse_tags('a-one "a-two and a-three"'), - [u'a-one', u'a-two and a-three']) - - def test_with_no_loose_commas(self): - """ - Test with no loose commas -- split on spaces. - """ - self.assertEqual(parse_tags('one two "thr,ee"'), [u'one', u'thr,ee', u'two']) - - def test_with_loose_commas(self): - """ - Loose commas - split on commas - """ - self.assertEqual(parse_tags('"one", two three'), [u'one', u'two three']) - - def test_tags_with_double_quotes_can_contain_commas(self): - """ - Double quotes can contain commas - """ - self.assertEqual(parse_tags('a-one "a-two, and a-three"'), - [u'a-one', u'a-two, and a-three']) - self.assertEqual(parse_tags('"two", one, one, two, "one"'), - [u'one', u'two']) - - def test_with_naughty_input(self): - """ - Test with naughty input. - """ - # Bad users! Naughty users! - self.assertEqual(parse_tags(None), []) - self.assertEqual(parse_tags(''), []) - self.assertEqual(parse_tags('"'), []) - self.assertEqual(parse_tags('""'), []) - self.assertEqual(parse_tags('"' * 7), []) - self.assertEqual(parse_tags(',,,,,,'), []) - self.assertEqual(parse_tags('",",",",",",","'), [u',']) - self.assertEqual(parse_tags('a-one "a-two" and "a-three'), - [u'a-one', u'a-three', u'a-two', u'and']) - - def test_recreation_of_tag_list_string_representations(self): - plain = Tag.objects.create(name='plain') - spaces = Tag.objects.create(name='spa ces') - comma = Tag.objects.create(name='com,ma') - self.assertEqual(edit_string_for_tags([plain]), u'plain') - self.assertEqual(edit_string_for_tags([plain, spaces]), u'"spa ces", plain') - self.assertEqual(edit_string_for_tags([plain, spaces, comma]), u'"com,ma", "spa ces", plain') - self.assertEqual(edit_string_for_tags([plain, comma]), u'"com,ma", plain') - self.assertEqual(edit_string_for_tags([comma, spaces]), u'"com,ma", "spa ces"') diff --git a/app/lib/taggit/utils.py b/app/lib/taggit/utils.py deleted file mode 100644 index 1b5e5a7..0000000 --- a/app/lib/taggit/utils.py +++ /dev/null @@ -1,126 +0,0 @@ -from django.utils.encoding import force_unicode -from django.utils.functional import wraps - - -def parse_tags(tagstring): - """ - Parses tag input, with multiple word input being activated and - delineated by commas and double quotes. Quotes take precedence, so - they may contain commas. - - Returns a sorted list of unique tag names. - - Ported from Jonathan Buchanan's `django-tagging - `_ - """ - if not tagstring: - return [] - - tagstring = force_unicode(tagstring) - - # Special case - if there are no commas or double quotes in the - # input, we don't *do* a recall... I mean, we know we only need to - # split on spaces. - if u',' not in tagstring and u'"' not in tagstring: - words = list(set(split_strip(tagstring, u' '))) - words.sort() - return words - - words = [] - buffer = [] - # Defer splitting of non-quoted sections until we know if there are - # any unquoted commas. - to_be_split = [] - saw_loose_comma = False - open_quote = False - i = iter(tagstring) - try: - while True: - c = i.next() - if c == u'"': - if buffer: - to_be_split.append(u''.join(buffer)) - buffer = [] - # Find the matching quote - open_quote = True - c = i.next() - while c != u'"': - buffer.append(c) - c = i.next() - if buffer: - word = u''.join(buffer).strip() - if word: - words.append(word) - buffer = [] - open_quote = False - else: - if not saw_loose_comma and c == u',': - saw_loose_comma = True - buffer.append(c) - except StopIteration: - # If we were parsing an open quote which was never closed treat - # the buffer as unquoted. - if buffer: - if open_quote and u',' in buffer: - saw_loose_comma = True - to_be_split.append(u''.join(buffer)) - if to_be_split: - if saw_loose_comma: - delimiter = u',' - else: - delimiter = u' ' - for chunk in to_be_split: - words.extend(split_strip(chunk, delimiter)) - words = list(set(words)) - words.sort() - return words - - -def split_strip(string, delimiter=u','): - """ - Splits ``string`` on ``delimiter``, stripping each resulting string - and returning a list of non-empty strings. - - Ported from Jonathan Buchanan's `django-tagging - `_ - """ - if not string: - return [] - - words = [w.strip() for w in string.split(delimiter)] - return [w for w in words if w] - - -def edit_string_for_tags(tags): - """ - Given list of ``Tag`` instances, creates a string representation of - the list suitable for editing by the user, such that submitting the - given string representation back without changing it will give the - same list of tags. - - Tag names which contain commas will be double quoted. - - If any tag name which isn't being quoted contains whitespace, the - resulting string of tag names will be comma-delimited, otherwise - it will be space-delimited. - - Ported from Jonathan Buchanan's `django-tagging - `_ - """ - names = [] - for tag in tags: - name = tag.name - if u',' in name or u' ' in name: - names.append('"%s"' % name) - else: - names.append(name) - return u', '.join(sorted(names)) - - -def require_instance_manager(func): - @wraps(func) - def inner(self, *args, **kwargs): - if self.instance is None: - raise TypeError("Can't call %s with a non-instance manager" % func.__name__) - return func(self, *args, **kwargs) - return inner diff --git a/app/lib/taggit/views.py b/app/lib/taggit/views.py deleted file mode 100644 index 68e955b..0000000 --- a/app/lib/taggit/views.py +++ /dev/null @@ -1,18 +0,0 @@ -from django.contrib.contenttypes.models import ContentType -from django.shortcuts import get_object_or_404 -from django.views.generic.list_detail import object_list - -from taggit.models import TaggedItem, Tag - - -def tagged_object_list(request, slug, queryset, **kwargs): - if callable(queryset): - queryset = queryset() - tag = get_object_or_404(Tag, slug=slug) - qs = queryset.filter(pk__in=TaggedItem.objects.filter( - tag=tag, content_type=ContentType.objects.get_for_model(queryset.model) - ).values_list("object_id", flat=True)) - if "extra_context" not in kwargs: - kwargs["extra_context"] = {} - kwargs["extra_context"]["tag"] = tag - return object_list(request, qs, **kwargs) diff --git a/app/lib/templatetags/templatetags/markdown.py b/app/lib/templatetags/templatetags/markdown.py deleted file mode 100644 index dca51f2..0000000 --- a/app/lib/templatetags/templatetags/markdown.py +++ /dev/null @@ -1,9 +0,0 @@ -from django import template -import markdown2 as markdown - -register = template.Library() - -def do_markdown(text): - return markdown.markdown(text, safe_mode = False) - -register.filter('markdown', do_markdown) \ No newline at end of file diff --git a/app/lib/templatetags/templatetags/smartypants.py b/app/lib/templatetags/templatetags/smartypants.py deleted file mode 100644 index 07ddd03..0000000 --- a/app/lib/templatetags/templatetags/smartypants.py +++ /dev/null @@ -1,878 +0,0 @@ -r""" -============== -smartypants.py -============== - ----------------------------- -SmartyPants ported to Python ----------------------------- - -Ported by `Chad Miller`_ -Copyright (c) 2004 Chad Miller - -original `SmartyPants`_ by `John Gruber`_ -Copyright (c) 2003 John Gruber - - -Synopsis -======== - -A smart-quotes plugin for Pyblosxom_. - -The priginal "SmartyPants" is a free web publishing plug-in for Movable Type, -Blosxom, and BBEdit that easily translates plain ASCII punctuation characters -into "smart" typographic punctuation HTML entities. - -This software, *smartypants.py*, endeavours to be a functional port of -SmartyPants to Python, for use with Pyblosxom_. - - -Description -=========== - -SmartyPants can perform the following transformations: - -- Straight quotes ( " and ' ) into "curly" quote HTML entities -- Backticks-style quotes (\`\`like this'') into "curly" quote HTML entities -- Dashes (``--`` and ``---``) into en- and em-dash entities -- Three consecutive dots (``...`` or ``. . .``) into an ellipsis entity - -This means you can write, edit, and save your posts using plain old -ASCII straight quotes, plain dashes, and plain dots, but your published -posts (and final HTML output) will appear with smart quotes, em-dashes, -and proper ellipses. - -SmartyPants does not modify characters within ``
``, ````, ````,
-```` or ``

He said, "'Quoted' words in a larger quote."

- str = re.sub(r""""'(?=\w)""", """“‘""", str) - str = re.sub(r"""'"(?=\w)""", """‘“""", str) - - # Special case for decade abbreviations (the '80s): - str = re.sub(r"""\b'(?=\d{2}s)""", r"""’""", str) - - close_class = r"""[^\ \t\r\n\[\{\(\-]""" - dec_dashes = r"""–|—""" - - # Get most opening single quotes: - opening_single_quotes_regex = re.compile(r""" - ( - \s | # a whitespace char, or -   | # a non-breaking space entity, or - -- | # dashes, or - &[mn]dash; | # named dash entities - %s | # or decimal entities - &\#x201[34]; # or hex - ) - ' # the quote - (?=\w) # followed by a word character - """ % (dec_dashes,), re.VERBOSE) - str = opening_single_quotes_regex.sub(r"""\1‘""", str) - - closing_single_quotes_regex = re.compile(r""" - (%s) - ' - (?!\s | s\b | \d) - """ % (close_class,), re.VERBOSE) - str = closing_single_quotes_regex.sub(r"""\1’""", str) - - closing_single_quotes_regex = re.compile(r""" - (%s) - ' - (\s | s\b) - """ % (close_class,), re.VERBOSE) - str = closing_single_quotes_regex.sub(r"""\1’\2""", str) - - # Any remaining single quotes should be opening ones: - str = re.sub(r"""'""", r"""‘""", str) - - # Get most opening double quotes: - opening_double_quotes_regex = re.compile(r""" - ( - \s | # a whitespace char, or -   | # a non-breaking space entity, or - -- | # dashes, or - &[mn]dash; | # named dash entities - %s | # or decimal entities - &\#x201[34]; # or hex - ) - " # the quote - (?=\w) # followed by a word character - """ % (dec_dashes,), re.VERBOSE) - str = opening_double_quotes_regex.sub(r"""\1“""", str) - - # Double closing quotes: - closing_double_quotes_regex = re.compile(r""" - #(%s)? # character that indicates the quote should be closing - " - (?=\s) - """ % (close_class,), re.VERBOSE) - str = closing_double_quotes_regex.sub(r"""”""", str) - - closing_double_quotes_regex = re.compile(r""" - (%s) # character that indicates the quote should be closing - " - """ % (close_class,), re.VERBOSE) - str = closing_double_quotes_regex.sub(r"""\1”""", str) - - # Any remaining quotes should be opening ones. - str = re.sub(r'"', r"""“""", str) - - return str - - -def educateBackticks(str): - """ - Parameter: String. - Returns: The string, with ``backticks'' -style double quotes - translated into HTML curly quote entities. - Example input: ``Isn't this fun?'' - Example output: “Isn't this fun?” - """ - - str = re.sub(r"""``""", r"""“""", str) - str = re.sub(r"""''""", r"""”""", str) - return str - - -def educateSingleBackticks(str): - """ - Parameter: String. - Returns: The string, with `backticks' -style single quotes - translated into HTML curly quote entities. - - Example input: `Isn't this fun?' - Example output: ‘Isn’t this fun?’ - """ - - str = re.sub(r"""`""", r"""‘""", str) - str = re.sub(r"""'""", r"""’""", str) - return str - - -def educateDashes(str): - """ - Parameter: String. - - Returns: The string, with each instance of "--" translated to - an em-dash HTML entity. - """ - - str = re.sub(r"""---""", r"""–""", str) # en (yes, backwards) - str = re.sub(r"""--""", r"""—""", str) # em (yes, backwards) - return str - - -def educateDashesOldSchool(str): - """ - Parameter: String. - - Returns: The string, with each instance of "--" translated to - an en-dash HTML entity, and each "---" translated to - an em-dash HTML entity. - """ - - str = re.sub(r"""---""", r"""—""", str) # em (yes, backwards) - str = re.sub(r"""--""", r"""–""", str) # en (yes, backwards) - return str - - -def educateDashesOldSchoolInverted(str): - """ - Parameter: String. - - Returns: The string, with each instance of "--" translated to - an em-dash HTML entity, and each "---" translated to - an en-dash HTML entity. Two reasons why: First, unlike the - en- and em-dash syntax supported by - EducateDashesOldSchool(), it's compatible with existing - entries written before SmartyPants 1.1, back when "--" was - only used for em-dashes. Second, em-dashes are more - common than en-dashes, and so it sort of makes sense that - the shortcut should be shorter to type. (Thanks to Aaron - Swartz for the idea.) - """ - str = re.sub(r"""---""", r"""–""", str) # em - str = re.sub(r"""--""", r"""—""", str) # en - return str - - - -def educateEllipses(str): - """ - Parameter: String. - Returns: The string, with each instance of "..." translated to - an ellipsis HTML entity. - - Example input: Huh...? - Example output: Huh…? - """ - - str = re.sub(r"""\.\.\.""", r"""…""", str) - str = re.sub(r"""\. \. \.""", r"""…""", str) - return str - - -def stupefyEntities(str): - """ - Parameter: String. - Returns: The string, with each SmartyPants HTML entity translated to - its ASCII counterpart. - - Example input: “Hello — world.” - Example output: "Hello -- world." - """ - - str = re.sub(r"""–""", r"""-""", str) # en-dash - str = re.sub(r"""—""", r"""--""", str) # em-dash - - str = re.sub(r"""‘""", r"""'""", str) # open single quote - str = re.sub(r"""’""", r"""'""", str) # close single quote - - str = re.sub(r"""“""", r'''"''', str) # open double quote - str = re.sub(r"""”""", r'''"''', str) # close double quote - - str = re.sub(r"""…""", r"""...""", str)# ellipsis - - return str - - -def processEscapes(str): - r""" - Parameter: String. - Returns: The string, with after processing the following backslash - escape sequences. This is useful if you want to force a "dumb" - quote or other character to appear. - - Escape Value - ------ ----- - \\ \ - \" " - \' ' - \. . - \- - - \` ` - """ - str = re.sub(r"""\\\\""", r"""\""", str) - str = re.sub(r'''\\"''', r""""""", str) - str = re.sub(r"""\\'""", r"""'""", str) - str = re.sub(r"""\\\.""", r""".""", str) - str = re.sub(r"""\\-""", r"""-""", str) - str = re.sub(r"""\\`""", r"""`""", str) - - return str - - -def _tokenize(str): - """ - Parameter: String containing HTML markup. - Returns: Reference to an array of the tokens comprising the input - string. Each token is either a tag (possibly with nested, - tags contained therein, such as , or a - run of text between tags. Each element of the array is a - two-element array; the first is either 'tag' or 'text'; - the second is the actual value. - - Based on the _tokenize() subroutine from Brad Choate's MTRegex plugin. - - """ - - pos = 0 - length = len(str) - tokens = [] - - depth = 6 - nested_tags = "|".join(['(?:<(?:[^<>]',] * depth) + (')*>)' * depth) - #match = r"""(?: ) | # comments - # (?: <\? .*? \?> ) | # directives - # %s # nested tags """ % (nested_tags,) - tag_soup = re.compile(r"""([^<]*)(<[^>]*>)""") - - token_match = tag_soup.search(str) - - previous_end = 0 - while token_match is not None: - if token_match.group(1) != "": - tokens.append(['text', token_match.group(1)]) - - tokens.append(['tag', token_match.group(2)]) - - previous_end = token_match.end() - token_match = tag_soup.search(str, token_match.end()) - - if previous_end < len(str): - tokens.append(['text', str[previous_end:]]) - - return tokens - - - -if __name__ == "__main__": - - import locale - - try: - locale.setlocale(locale.LC_ALL, '') - except: - pass - - from docutils.core import publish_string - docstring_html = publish_string(__doc__, writer_name='html') - - print docstring_html - - - # Unit test output goes out stderr. No worries. - import unittest - sp = smartyPants - - class TestSmartypantsAllAttributes(unittest.TestCase): - # the default attribute is "1", which means "all". - - def test_dates(self): - self.assertEqual(sp("1440-80's"), "1440-80’s") - self.assertEqual(sp("1440-'80s"), "1440-‘80s") - self.assertEqual(sp("1440---'80s"), "1440–‘80s") - self.assertEqual(sp("1960s"), "1960s") # no effect. - self.assertEqual(sp("1960's"), "1960’s") - self.assertEqual(sp("one two '60s"), "one two ‘60s") - self.assertEqual(sp("'60s"), "‘60s") - - def test_ordinal_numbers(self): - self.assertEqual(sp("21st century"), "21st century") # no effect. - self.assertEqual(sp("3rd"), "3rd") # no effect. - - def test_educated_quotes(self): - self.assertEqual(sp('''"Isn't this fun?"'''), '''“Isn’t this fun?”''') - - unittest.main() - - - - -__author__ = "Chad Miller " -__version__ = "1.5_1.5: Sat, 13 Aug 2005 15:50:24 -0400" -__url__ = "http://wiki.chad.org/SmartyPantsPy" -__description__ = "Smart-quotes, smart-ellipses, and smart-dashes for weblog entries in pyblosxom" diff --git a/app/lib/templatetags/templatetags/typogrify.py b/app/lib/templatetags/templatetags/typogrify.py deleted file mode 100644 index fa4f0cf..0000000 --- a/app/lib/templatetags/templatetags/typogrify.py +++ /dev/null @@ -1,216 +0,0 @@ -# from django.conf import settings -import re -from django.conf import settings -from django import template -register = template.Library() - -def amp(text): - """Wraps apersands in html with ```` so they can be - styled with CSS. Apersands are also normalized to ``&``. Requires - ampersands to have whitespace or an `` `` on both sides. - - >>> amp('One & two') - 'One & two' - >>> amp('One & two') - 'One & two' - >>> amp('One & two') - 'One & two' - - >>> amp('One & two') - 'One & two' - - It won't mess up & that are already wrapped, in entities or URLs - - >>> amp('One & two') - 'One & two' - >>> amp('“this” & that') - '“this” & that' - """ - amp_finder = re.compile(r"(\s| )(&|&|&\#38;)(\s| )") - return amp_finder.sub(r"""\1&\3""", text) - -def caps(text): - """Wraps multiple capital letters in ```` - so they can be styled with CSS. - - >>> caps("A message from KU") - 'A message from KU' - - Uses the smartypants tokenizer to not screw with HTML or with tags it shouldn't. - - >>> caps("
CAPS
more CAPS") - '
CAPS
more CAPS' - - >>> caps("A message from 2KU2 with digits") - 'A message from 2KU2 with digits' - - >>> caps("Dotted caps followed by spaces should never include them in the wrap D.O.T. like so.") - 'Dotted caps followed by spaces should never include them in the wrap D.O.T. like so.' - - >>> caps("D.O.T.HE34TRFID") - 'D.O.T.HE34TRFID' - """ - try: - import smartypants - except ImportError: - if settings.DEBUG: - raise template.TemplateSyntaxError, "Error in {% caps %} filter: The Python SmartyPants library isn't installed." - return text - - tokens = smartypants._tokenize(text) - result = [] - in_skipped_tag = False - - cap_finder = re.compile(r"""( - (\b[A-Z\d]* # Group 2: Any amount of caps and digits - [A-Z]\d*[A-Z] # A cap string much at least include two caps (but they can have digits between them) - [A-Z\d]*\b) # Any amount of caps and digits - | (\b[A-Z]+\.\s? # OR: Group 3: Some caps, followed by a '.' and an optional space - (?:[A-Z]+\.\s?)+) # Followed by the same thing at least once more - (?:\s|\b|$)) - """, re.VERBOSE) - - def _cap_wrapper(matchobj): - """This is necessary to keep dotted cap strings to pick up extra spaces""" - if matchobj.group(2): - return """%s""" % matchobj.group(2) - else: - if matchobj.group(3)[-1] == " ": - caps = matchobj.group(3)[:-1] - tail = ' ' - else: - caps = matchobj.group(3) - tail = '' - return """%s%s""" % (caps, tail) - - tags_to_skip_regex = re.compile("<(/)?(?:pre|code|kbd|script|math)[^>]*>", re.IGNORECASE) - - - for token in tokens: - if token[0] == "tag": - # Don't mess with tags. - result.append(token[1]) - close_match = tags_to_skip_regex.match(token[1]) - if close_match and close_match.group(1) == None: - in_skipped_tag = True - else: - in_skipped_tag = False - else: - if in_skipped_tag: - result.append(token[1]) - else: - result.append(cap_finder.sub(_cap_wrapper, token[1])) - - return "".join(result) - -def initial_quotes(text): - """Wraps initial quotes in ``class="dquo"`` for double quotes or - ``class="quo"`` for single quotes. Works in these block tags ``(h1-h6, p, li)`` - and also accounts for potential opening inline elements ``a, em, strong, span, b, i`` - - >>> initial_quotes('"With primes"') - '"With primes"' - >>> initial_quotes("'With single primes'") - '\\'With single primes\\'' - - >>> initial_quotes('"With primes and a link"') - '"With primes and a link"' - - >>> initial_quotes('“With smartypanted quotes”') - 'With smartypanted quotes”' - """ - quote_finder = re.compile(r"""((<(p|h[1-6]|li)[^>]*>|^) # start with an opening p, h1-6, li or the start of the string - \s* # optional white space! - (<(a|em|span|strong|i|b)[^>]*>\s*)*) # optional opening inline tags, with more optional white space for each. - (("|“|&\#8220;)|('|‘|&\#8216;)) # Find me a quote! (only need to find the left quotes and the primes) - # double quotes are in group 7, singles in group 8 - """, re.VERBOSE) - def _quote_wrapper(matchobj): - if matchobj.group(7): - classname = "dquo" - quote = matchobj.group(7) - else: - classname = "quo" - quote = matchobj.group(8) - return """%s%s""" % (matchobj.group(1), classname, quote) - - return quote_finder.sub(_quote_wrapper, text) - -def smartypants(text): - """Applies smarty pants to curl quotes. - - >>> smartypants('The "Green" man') - 'The “Green” man' - """ - try: - import smartypants - except ImportError: - if settings.DEBUG: - raise template.TemplateSyntaxError, "Error in {% smartypants %} filter: The Python smartypants library isn't installed." - return text - else: - return smartypants.smartyPants(text) - -def typogrify(text): - """The super typography filter - - Applies the following filters: widont, smartypants, caps, amp, initial_quotes - - >>> typogrify('

"Jayhawks" & KU fans act extremely obnoxiously

') - '

Jayhawks” & KU fans act extremely obnoxiously

' - """ - text = amp(text) - text = widont(text) - text = smartypants(text) - text = caps(text) - text = initial_quotes(text) - return text - -def widont(text): - """Replaces the space between the last two words in a string with `` `` - Works in these block tags ``(h1-h6, p, li)`` and also accounts for - potential closing inline elements ``a, em, strong, span, b, i`` - - >>> widont('A very simple test') - 'A very simple test' - - >>> widont('

In a couple of paragraphs

paragraph two

') - '

In a couple of paragraphs

paragraph two

' - - >>> widont('

In a link inside a heading

') - '

In a link inside a heading

' - - >>> widont('

In a link followed by other text

') - '

In a link followed by other text

' - - Empty HTMLs shouldn't error - >>> widont('

') - '

' - - >>> widont('
Divs get no love!
') - '
Divs get no love!
' - - >>> widont('

But divs with paragraphs do!

') - '

But divs with paragraphs do!

' - """ - widont_finder = re.compile(r"""(\s+) # the space to replace - ([^<>\s]+ # must be flollowed by non-tag non-space characters - \s* # optional white space! - (]*>\s*)* # optional closing inline tags with optional white space after each - ( Markdown is a text-to-HTML filter; it translates an easy-to-read / -> easy-to-write structured text format into HTML. Markdown's text -> format is most similar to that of plain text email, and supports -> features such as headers, *emphasis*, code blocks, blockquotes, and -> links. -> -> Markdown's syntax is designed not as a generic markup language, but -> specifically to serve as a front-end to (X)HTML. You can use span-level -> HTML tags anywhere in a Markdown document, and you can use block level -> HTML tags (like
and as well). - -Module usage: - - >>> import markdown2 - >>> markdown2.markdown("*boo!*") # or use `html = markdown_path(PATH)` - u'

boo!

\n' - - >>> markdowner = Markdown() - >>> markdowner.convert("*boo!*") - u'

boo!

\n' - >>> markdowner.convert("**boom!**") - u'

boom!

\n' - -This implementation of Markdown implements the full "core" syntax plus a -number of extras (e.g., code syntax coloring, footnotes) as described on -. -""" - -cmdln_desc = """A fast and complete Python implementation of Markdown, a -text-to-HTML conversion tool for web writers. -""" - -# Dev Notes: -# - There is already a Python markdown processor -# (http://www.freewisdom.org/projects/python-markdown/). -# - Python's regex syntax doesn't have '\z', so I'm using '\Z'. I'm -# not yet sure if there implications with this. Compare 'pydoc sre' -# and 'perldoc perlre'. - -__version_info__ = (1, 0, 1, 13) # first three nums match Markdown.pl -__version__ = '1.0.1.13' -__author__ = "Trent Mick" - -import os -import sys -from pprint import pprint -import re -import logging -try: - from hashlib import md5 -except ImportError: - from md5 import md5 -import optparse -from random import random -import codecs - - - -#---- Python version compat - -if sys.version_info[:2] < (2,4): - from sets import Set as set - def reversed(sequence): - for i in sequence[::-1]: - yield i - def _unicode_decode(s, encoding, errors='xmlcharrefreplace'): - return unicode(s, encoding, errors) -else: - def _unicode_decode(s, encoding, errors='strict'): - return s.decode(encoding, errors) - - -#---- globals - -DEBUG = False -log = logging.getLogger("markdown") - -DEFAULT_TAB_WIDTH = 4 - -# Table of hash values for escaped characters: -def _escape_hash(s): - # Lame attempt to avoid possible collision with someone actually - # using the MD5 hexdigest of one of these chars in there text. - # Other ideas: random.random(), uuid.uuid() - #return md5(s).hexdigest() # Markdown.pl effectively does this. - return 'md5-'+md5(s).hexdigest() -g_escape_table = dict([(ch, _escape_hash(ch)) - for ch in '\\`*_{}[]()>#+-.!']) - - - -#---- exceptions - -class MarkdownError(Exception): - pass - - - -#---- public api - -def markdown_path(path, encoding="utf-8", - html4tags=False, tab_width=DEFAULT_TAB_WIDTH, - safe_mode=None, extras=None, link_patterns=None, - use_file_vars=False): - text = codecs.open(path, 'r', encoding).read() - return Markdown(html4tags=html4tags, tab_width=tab_width, - safe_mode=safe_mode, extras=extras, - link_patterns=link_patterns, - use_file_vars=use_file_vars).convert(text) - -def markdown(text, html4tags=False, tab_width=DEFAULT_TAB_WIDTH, - safe_mode=None, extras=None, link_patterns=None, - use_file_vars=False): - return Markdown(html4tags=html4tags, tab_width=tab_width, - safe_mode=safe_mode, extras=extras, - link_patterns=link_patterns, - use_file_vars=use_file_vars).convert(text) - -class Markdown(object): - # The dict of "extras" to enable in processing -- a mapping of - # extra name to argument for the extra. Most extras do not have an - # argument, in which case the value is None. - # - # This can be set via (a) subclassing and (b) the constructor - # "extras" argument. - extras = None - - urls = None - titles = None - html_blocks = None - html_spans = None - html_removed_text = "[HTML_REMOVED]" # for compat with markdown.py - - # Used to track when we're inside an ordered or unordered list - # (see _ProcessListItems() for details): - list_level = 0 - - _ws_only_line_re = re.compile(r"^[ \t]+$", re.M) - - def __init__(self, html4tags=False, tab_width=4, safe_mode=None, - extras=None, link_patterns=None, use_file_vars=False): - if html4tags: - self.empty_element_suffix = ">" - else: - self.empty_element_suffix = " />" - self.tab_width = tab_width - - # For compatibility with earlier markdown2.py and with - # markdown.py's safe_mode being a boolean, - # safe_mode == True -> "replace" - if safe_mode is True: - self.safe_mode = "replace" - else: - self.safe_mode = safe_mode - - if self.extras is None: - self.extras = {} - elif not isinstance(self.extras, dict): - self.extras = dict([(e, None) for e in self.extras]) - if extras: - if not isinstance(extras, dict): - extras = dict([(e, None) for e in extras]) - self.extras.update(extras) - assert isinstance(self.extras, dict) - self._instance_extras = self.extras.copy() - self.link_patterns = link_patterns - self.use_file_vars = use_file_vars - self._outdent_re = re.compile(r'^(\t|[ ]{1,%d})' % tab_width, re.M) - - def reset(self): - self.urls = {} - self.titles = {} - self.html_blocks = {} - self.html_spans = {} - self.list_level = 0 - self.extras = self._instance_extras.copy() - if "footnotes" in self.extras: - self.footnotes = {} - self.footnote_ids = [] - - def convert(self, text): - """Convert the given text.""" - # Main function. The order in which other subs are called here is - # essential. Link and image substitutions need to happen before - # _EscapeSpecialChars(), so that any *'s or _'s in the - # and tags get encoded. - - # Clear the global hashes. If we don't clear these, you get conflicts - # from other articles when generating a page which contains more than - # one article (e.g. an index page that shows the N most recent - # articles): - self.reset() - - if not isinstance(text, unicode): - #TODO: perhaps shouldn't presume UTF-8 for string input? - text = unicode(text, 'utf-8') - - if self.use_file_vars: - # Look for emacs-style file variable hints. - emacs_vars = self._get_emacs_vars(text) - if "markdown-extras" in emacs_vars: - splitter = re.compile("[ ,]+") - for e in splitter.split(emacs_vars["markdown-extras"]): - if '=' in e: - ename, earg = e.split('=', 1) - try: - earg = int(earg) - except ValueError: - pass - else: - ename, earg = e, None - self.extras[ename] = earg - - # Standardize line endings: - text = re.sub("\r\n|\r", "\n", text) - - # Make sure $text ends with a couple of newlines: - text += "\n\n" - - # Convert all tabs to spaces. - text = self._detab(text) - - # Strip any lines consisting only of spaces and tabs. - # This makes subsequent regexen easier to write, because we can - # match consecutive blank lines with /\n+/ instead of something - # contorted like /[ \t]*\n+/ . - text = self._ws_only_line_re.sub("", text) - - if self.safe_mode: - text = self._hash_html_spans(text) - - # Turn block-level HTML blocks into hash entries - text = self._hash_html_blocks(text, raw=True) - - # Strip link definitions, store in hashes. - if "footnotes" in self.extras: - # Must do footnotes first because an unlucky footnote defn - # looks like a link defn: - # [^4]: this "looks like a link defn" - text = self._strip_footnote_definitions(text) - text = self._strip_link_definitions(text) - - text = self._run_block_gamut(text) - - text = self._unescape_special_chars(text) - - if "footnotes" in self.extras: - text = self._add_footnotes(text) - - if self.safe_mode: - text = self._unhash_html_spans(text) - - text += "\n" - return text - - _emacs_oneliner_vars_pat = re.compile(r"-\*-\s*([^\r\n]*?)\s*-\*-", re.UNICODE) - # This regular expression is intended to match blocks like this: - # PREFIX Local Variables: SUFFIX - # PREFIX mode: Tcl SUFFIX - # PREFIX End: SUFFIX - # Some notes: - # - "[ \t]" is used instead of "\s" to specifically exclude newlines - # - "(\r\n|\n|\r)" is used instead of "$" because the sre engine does - # not like anything other than Unix-style line terminators. - _emacs_local_vars_pat = re.compile(r"""^ - (?P(?:[^\r\n|\n|\r])*?) - [\ \t]*Local\ Variables:[\ \t]* - (?P.*?)(?:\r\n|\n|\r) - (?P.*?\1End:) - """, re.IGNORECASE | re.MULTILINE | re.DOTALL | re.VERBOSE) - - def _get_emacs_vars(self, text): - """Return a dictionary of emacs-style local variables. - - Parsing is done loosely according to this spec (and according to - some in-practice deviations from this): - http://www.gnu.org/software/emacs/manual/html_node/emacs/Specifying-File-Variables.html#Specifying-File-Variables - """ - emacs_vars = {} - SIZE = pow(2, 13) # 8kB - - # Search near the start for a '-*-'-style one-liner of variables. - head = text[:SIZE] - if "-*-" in head: - match = self._emacs_oneliner_vars_pat.search(head) - if match: - emacs_vars_str = match.group(1) - assert '\n' not in emacs_vars_str - emacs_var_strs = [s.strip() for s in emacs_vars_str.split(';') - if s.strip()] - if len(emacs_var_strs) == 1 and ':' not in emacs_var_strs[0]: - # While not in the spec, this form is allowed by emacs: - # -*- Tcl -*- - # where the implied "variable" is "mode". This form - # is only allowed if there are no other variables. - emacs_vars["mode"] = emacs_var_strs[0].strip() - else: - for emacs_var_str in emacs_var_strs: - try: - variable, value = emacs_var_str.strip().split(':', 1) - except ValueError: - log.debug("emacs variables error: malformed -*- " - "line: %r", emacs_var_str) - continue - # Lowercase the variable name because Emacs allows "Mode" - # or "mode" or "MoDe", etc. - emacs_vars[variable.lower()] = value.strip() - - tail = text[-SIZE:] - if "Local Variables" in tail: - match = self._emacs_local_vars_pat.search(tail) - if match: - prefix = match.group("prefix") - suffix = match.group("suffix") - lines = match.group("content").splitlines(0) - #print "prefix=%r, suffix=%r, content=%r, lines: %s"\ - # % (prefix, suffix, match.group("content"), lines) - - # Validate the Local Variables block: proper prefix and suffix - # usage. - for i, line in enumerate(lines): - if not line.startswith(prefix): - log.debug("emacs variables error: line '%s' " - "does not use proper prefix '%s'" - % (line, prefix)) - return {} - # Don't validate suffix on last line. Emacs doesn't care, - # neither should we. - if i != len(lines)-1 and not line.endswith(suffix): - log.debug("emacs variables error: line '%s' " - "does not use proper suffix '%s'" - % (line, suffix)) - return {} - - # Parse out one emacs var per line. - continued_for = None - for line in lines[:-1]: # no var on the last line ("PREFIX End:") - if prefix: line = line[len(prefix):] # strip prefix - if suffix: line = line[:-len(suffix)] # strip suffix - line = line.strip() - if continued_for: - variable = continued_for - if line.endswith('\\'): - line = line[:-1].rstrip() - else: - continued_for = None - emacs_vars[variable] += ' ' + line - else: - try: - variable, value = line.split(':', 1) - except ValueError: - log.debug("local variables error: missing colon " - "in local variables entry: '%s'" % line) - continue - # Do NOT lowercase the variable name, because Emacs only - # allows "mode" (and not "Mode", "MoDe", etc.) in this block. - value = value.strip() - if value.endswith('\\'): - value = value[:-1].rstrip() - continued_for = variable - else: - continued_for = None - emacs_vars[variable] = value - - # Unquote values. - for var, val in emacs_vars.items(): - if len(val) > 1 and (val.startswith('"') and val.endswith('"') - or val.startswith('"') and val.endswith('"')): - emacs_vars[var] = val[1:-1] - - return emacs_vars - - # Cribbed from a post by Bart Lateur: - # - _detab_re = re.compile(r'(.*?)\t', re.M) - def _detab_sub(self, match): - g1 = match.group(1) - return g1 + (' ' * (self.tab_width - len(g1) % self.tab_width)) - def _detab(self, text): - r"""Remove (leading?) tabs from a file. - - >>> m = Markdown() - >>> m._detab("\tfoo") - ' foo' - >>> m._detab(" \tfoo") - ' foo' - >>> m._detab("\t foo") - ' foo' - >>> m._detab(" foo") - ' foo' - >>> m._detab(" foo\n\tbar\tblam") - ' foo\n bar blam' - """ - if '\t' not in text: - return text - return self._detab_re.subn(self._detab_sub, text)[0] - - _block_tags_a = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del' - _strict_tag_block_re = re.compile(r""" - ( # save in \1 - ^ # start of line (with re.M) - <(%s) # start tag = \2 - \b # word break - (.*\n)*? # any number of lines, minimally matching - # the matching end tag - [ \t]* # trailing spaces/tabs - (?=\n+|\Z) # followed by a newline or end of document - ) - """ % _block_tags_a, - re.X | re.M) - - _block_tags_b = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math' - _liberal_tag_block_re = re.compile(r""" - ( # save in \1 - ^ # start of line (with re.M) - <(%s) # start tag = \2 - \b # word break - (.*\n)*? # any number of lines, minimally matching - .* # the matching end tag - [ \t]* # trailing spaces/tabs - (?=\n+|\Z) # followed by a newline or end of document - ) - """ % _block_tags_b, - re.X | re.M) - - def _hash_html_block_sub(self, match, raw=False): - html = match.group(1) - if raw and self.safe_mode: - html = self._sanitize_html(html) - key = _hash_text(html) - self.html_blocks[key] = html - return "\n\n" + key + "\n\n" - - def _hash_html_blocks(self, text, raw=False): - """Hashify HTML blocks - - We only want to do this for block-level HTML tags, such as headers, - lists, and tables. That's because we still want to wrap

s around - "paragraphs" that are wrapped in non-block-level tags, such as anchors, - phrase emphasis, and spans. The list of tags we're looking for is - hard-coded. - - @param raw {boolean} indicates if these are raw HTML blocks in - the original source. It makes a difference in "safe" mode. - """ - if '<' not in text: - return text - - # Pass `raw` value into our calls to self._hash_html_block_sub. - hash_html_block_sub = _curry(self._hash_html_block_sub, raw=raw) - - # First, look for nested blocks, e.g.: - #

- #
- # tags for inner block must be indented. - #
- #
- # - # The outermost tags must start at the left margin for this to match, and - # the inner nested divs must be indented. - # We need to do this before the next, more liberal match, because the next - # match will start at the first `
` and stop at the first `
`. - text = self._strict_tag_block_re.sub(hash_html_block_sub, text) - - # Now match more liberally, simply from `\n` to `\n` - text = self._liberal_tag_block_re.sub(hash_html_block_sub, text) - - # Special case just for
. It was easier to make a special - # case than to make the other regex more complicated. - if "", start_idx) + 3 - except ValueError, ex: - break - - # Start position for next comment block search. - start = end_idx - - # Validate whitespace before comment. - if start_idx: - # - Up to `tab_width - 1` spaces before start_idx. - for i in range(self.tab_width - 1): - if text[start_idx - 1] != ' ': - break - start_idx -= 1 - if start_idx == 0: - break - # - Must be preceded by 2 newlines or hit the start of - # the document. - if start_idx == 0: - pass - elif start_idx == 1 and text[0] == '\n': - start_idx = 0 # to match minute detail of Markdown.pl regex - elif text[start_idx-2:start_idx] == '\n\n': - pass - else: - break - - # Validate whitespace after comment. - # - Any number of spaces and tabs. - while end_idx < len(text): - if text[end_idx] not in ' \t': - break - end_idx += 1 - # - Must be following by 2 newlines or hit end of text. - if text[end_idx:end_idx+2] not in ('', '\n', '\n\n'): - continue - - # Escape and hash (must match `_hash_html_block_sub`). - html = text[start_idx:end_idx] - if raw and self.safe_mode: - html = self._sanitize_html(html) - key = _hash_text(html) - self.html_blocks[key] = html - text = text[:start_idx] + "\n\n" + key + "\n\n" + text[end_idx:] - - if "xml" in self.extras: - # Treat XML processing instructions and namespaced one-liner - # tags as if they were block HTML tags. E.g., if standalone - # (i.e. are their own paragraph), the following do not get - # wrapped in a

tag: - # - # - # - _xml_oneliner_re = _xml_oneliner_re_from_tab_width(self.tab_width) - text = _xml_oneliner_re.sub(hash_html_block_sub, text) - - return text - - def _strip_link_definitions(self, text): - # Strips link definitions from text, stores the URLs and titles in - # hash references. - less_than_tab = self.tab_width - 1 - - # Link defs are in the form: - # [id]: url "optional title" - _link_def_re = re.compile(r""" - ^[ ]{0,%d}\[(.+)\]: # id = \1 - [ \t]* - \n? # maybe *one* newline - [ \t]* - ? # url = \2 - [ \t]* - (?: - \n? # maybe one newline - [ \t]* - (?<=\s) # lookbehind for whitespace - ['"(] - ([^\n]*) # title = \3 - ['")] - [ \t]* - )? # title is optional - (?:\n+|\Z) - """ % less_than_tab, re.X | re.M | re.U) - return _link_def_re.sub(self._extract_link_def_sub, text) - - def _extract_link_def_sub(self, match): - id, url, title = match.groups() - key = id.lower() # Link IDs are case-insensitive - self.urls[key] = self._encode_amps_and_angles(url) - if title: - self.titles[key] = title.replace('"', '"') - return "" - - def _extract_footnote_def_sub(self, match): - id, text = match.groups() - text = _dedent(text, skip_first_line=not text.startswith('\n')).strip() - normed_id = re.sub(r'\W', '-', id) - # Ensure footnote text ends with a couple newlines (for some - # block gamut matches). - self.footnotes[normed_id] = text + "\n\n" - return "" - - def _strip_footnote_definitions(self, text): - """A footnote definition looks like this: - - [^note-id]: Text of the note. - - May include one or more indented paragraphs. - - Where, - - The 'note-id' can be pretty much anything, though typically it - is the number of the footnote. - - The first paragraph may start on the next line, like so: - - [^note-id]: - Text of the note. - """ - less_than_tab = self.tab_width - 1 - footnote_def_re = re.compile(r''' - ^[ ]{0,%d}\[\^(.+)\]: # id = \1 - [ \t]* - ( # footnote text = \2 - # First line need not start with the spaces. - (?:\s*.*\n+) - (?: - (?:[ ]{%d} | \t) # Subsequent lines must be indented. - .*\n+ - )* - ) - # Lookahead for non-space at line-start, or end of doc. - (?:(?=^[ ]{0,%d}\S)|\Z) - ''' % (less_than_tab, self.tab_width, self.tab_width), - re.X | re.M) - return footnote_def_re.sub(self._extract_footnote_def_sub, text) - - - _hr_res = [ - re.compile(r"^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$", re.M), - re.compile(r"^[ ]{0,2}([ ]?\-[ ]?){3,}[ \t]*$", re.M), - re.compile(r"^[ ]{0,2}([ ]?\_[ ]?){3,}[ \t]*$", re.M), - ] - - def _run_block_gamut(self, text): - # These are all the transformations that form block-level - # tags like paragraphs, headers, and list items. - - text = self._do_headers(text) - - # Do Horizontal Rules: - hr = "\n tags around block-level tags. - text = self._hash_html_blocks(text) - - text = self._form_paragraphs(text) - - return text - - def _pyshell_block_sub(self, match): - lines = match.group(0).splitlines(0) - _dedentlines(lines) - indent = ' ' * self.tab_width - s = ('\n' # separate from possible cuddled paragraph - + indent + ('\n'+indent).join(lines) - + '\n\n') - return s - - def _prepare_pyshell_blocks(self, text): - """Ensure that Python interactive shell sessions are put in - code blocks -- even if not properly indented. - """ - if ">>>" not in text: - return text - - less_than_tab = self.tab_width - 1 - _pyshell_block_re = re.compile(r""" - ^([ ]{0,%d})>>>[ ].*\n # first line - ^(\1.*\S+.*\n)* # any number of subsequent lines - ^\n # ends with a blank line - """ % less_than_tab, re.M | re.X) - - return _pyshell_block_re.sub(self._pyshell_block_sub, text) - - def _run_span_gamut(self, text): - # These are all the transformations that occur *within* block-level - # tags like paragraphs, headers, and list items. - - text = self._do_code_spans(text) - - text = self._escape_special_chars(text) - - # Process anchor and image tags. - text = self._do_links(text) - - # Make links out of things like `` - # Must come after _do_links(), because you can use < and > - # delimiters in inline links like [this](). - text = self._do_auto_links(text) - - if "link-patterns" in self.extras: - text = self._do_link_patterns(text) - - text = self._encode_amps_and_angles(text) - - text = self._do_italics_and_bold(text) - - # Do hard breaks: - text = re.sub(r" {2,}\n", " - | - # auto-link (e.g., ) - <\w+[^>]*> - | - # comment - | - <\?.*?\?> # processing instruction - ) - """, re.X) - - def _escape_special_chars(self, text): - # Python markdown note: the HTML tokenization here differs from - # that in Markdown.pl, hence the behaviour for subtle cases can - # differ (I believe the tokenizer here does a better job because - # it isn't susceptible to unmatched '<' and '>' in HTML tags). - # Note, however, that '>' is not allowed in an auto-link URL - # here. - escaped = [] - is_html_markup = False - for token in self._sorta_html_tokenize_re.split(text): - if is_html_markup: - # Within tags/HTML-comments/auto-links, encode * and _ - # so they don't conflict with their use in Markdown for - # italics and strong. We're replacing each such - # character with its corresponding MD5 checksum value; - # this is likely overkill, but it should prevent us from - # colliding with the escape values by accident. - escaped.append(token.replace('*', g_escape_table['*']) - .replace('_', g_escape_table['_'])) - else: - escaped.append(self._encode_backslash_escapes(token)) - is_html_markup = not is_html_markup - return ''.join(escaped) - - def _hash_html_spans(self, text): - # Used for safe_mode. - - def _is_auto_link(s): - if ':' in s and self._auto_link_re.match(s): - return True - elif '@' in s and self._auto_email_link_re.match(s): - return True - return False - - tokens = [] - is_html_markup = False - for token in self._sorta_html_tokenize_re.split(text): - if is_html_markup and not _is_auto_link(token): - sanitized = self._sanitize_html(token) - key = _hash_text(sanitized) - self.html_spans[key] = sanitized - tokens.append(key) - else: - tokens.append(token) - is_html_markup = not is_html_markup - return ''.join(tokens) - - def _unhash_html_spans(self, text): - for key, sanitized in self.html_spans.items(): - text = text.replace(key, sanitized) - return text - - def _sanitize_html(self, s): - if self.safe_mode == "replace": - return self.html_removed_text - elif self.safe_mode == "escape": - replacements = [ - ('&', '&'), - ('<', '<'), - ('>', '>'), - ] - for before, after in replacements: - s = s.replace(before, after) - return s - else: - raise MarkdownError("invalid value for 'safe_mode': %r (must be " - "'escape' or 'replace')" % self.safe_mode) - - _tail_of_inline_link_re = re.compile(r''' - # Match tail of: [text](/url/) or [text](/url/ "title") - \( # literal paren - [ \t]* - (?P # \1 - <.*?> - | - .*? - ) - [ \t]* - ( # \2 - (['"]) # quote char = \3 - (?P.*?) - \3 # matching quote - )? # title is optional - \) - ''', re.X | re.S) - _tail_of_reference_link_re = re.compile(r''' - # Match tail of: [text][id] - [ ]? # one optional space - (?:\n[ ]*)? # one optional newline followed by spaces - \[ - (?P<id>.*?) - \] - ''', re.X | re.S) - - def _do_links(self, text): - """Turn Markdown link shortcuts into XHTML <a> and <img> tags. - - This is a combination of Markdown.pl's _DoAnchors() and - _DoImages(). They are done together because that simplified the - approach. It was necessary to use a different approach than - Markdown.pl because of the lack of atomic matching support in - Python's regex engine used in $g_nested_brackets. - """ - MAX_LINK_TEXT_SENTINEL = 3000 # markdown2 issue 24 - - # `anchor_allowed_pos` is used to support img links inside - # anchors, but not anchors inside anchors. An anchor's start - # pos must be `>= anchor_allowed_pos`. - anchor_allowed_pos = 0 - - curr_pos = 0 - while True: # Handle the next link. - # The next '[' is the start of: - # - an inline anchor: [text](url "title") - # - a reference anchor: [text][id] - # - an inline img: ![text](url "title") - # - a reference img: ![text][id] - # - a footnote ref: [^id] - # (Only if 'footnotes' extra enabled) - # - a footnote defn: [^id]: ... - # (Only if 'footnotes' extra enabled) These have already - # been stripped in _strip_footnote_definitions() so no - # need to watch for them. - # - a link definition: [id]: url "title" - # These have already been stripped in - # _strip_link_definitions() so no need to watch for them. - # - not markup: [...anything else... - try: - start_idx = text.index('[', curr_pos) - except ValueError: - break - text_length = len(text) - - # Find the matching closing ']'. - # Markdown.pl allows *matching* brackets in link text so we - # will here too. Markdown.pl *doesn't* currently allow - # matching brackets in img alt text -- we'll differ in that - # regard. - bracket_depth = 0 - for p in range(start_idx+1, min(start_idx+MAX_LINK_TEXT_SENTINEL, - text_length)): - ch = text[p] - if ch == ']': - bracket_depth -= 1 - if bracket_depth < 0: - break - elif ch == '[': - bracket_depth += 1 - else: - # Closing bracket not found within sentinel length. - # This isn't markup. - curr_pos = start_idx + 1 - continue - link_text = text[start_idx+1:p] - - # Possibly a footnote ref? - if "footnotes" in self.extras and link_text.startswith("^"): - normed_id = re.sub(r'\W', '-', link_text[1:]) - if normed_id in self.footnotes: - self.footnote_ids.append(normed_id) - result = '<sup class="footnote-ref" id="fnref-%s">' \ - '<a href="#fn-%s">%s</a></sup>' \ - % (normed_id, normed_id, len(self.footnote_ids)) - text = text[:start_idx] + result + text[p+1:] - else: - # This id isn't defined, leave the markup alone. - curr_pos = p+1 - continue - - # Now determine what this is by the remainder. - p += 1 - if p == text_length: - return text - - # Inline anchor or img? - if text[p] == '(': # attempt at perf improvement - match = self._tail_of_inline_link_re.match(text, p) - if match: - # Handle an inline anchor or img. - is_img = start_idx > 0 and text[start_idx-1] == "!" - if is_img: - start_idx -= 1 - - url, title = match.group("url"), match.group("title") - if url and url[0] == '<': - url = url[1:-1] # '<url>' -> 'url' - # We've got to encode these to avoid conflicting - # with italics/bold. - url = url.replace('*', g_escape_table['*']) \ - .replace('_', g_escape_table['_']) - if title: - title_str = ' title="%s"' \ - % title.replace('*', g_escape_table['*']) \ - .replace('_', g_escape_table['_']) \ - .replace('"', '"') - else: - title_str = '' - if is_img: - result = '<img src="%s" alt="%s"%s%s' \ - % (url, link_text.replace('"', '"'), - title_str, self.empty_element_suffix) - curr_pos = start_idx + len(result) - text = text[:start_idx] + result + text[match.end():] - elif start_idx >= anchor_allowed_pos: - result_head = '<a href="%s"%s>' % (url, title_str) - result = '%s%s</a>' % (result_head, link_text) - # <img> allowed from curr_pos on, <a> from - # anchor_allowed_pos on. - curr_pos = start_idx + len(result_head) - anchor_allowed_pos = start_idx + len(result) - text = text[:start_idx] + result + text[match.end():] - else: - # Anchor not allowed here. - curr_pos = start_idx + 1 - continue - - # Reference anchor or img? - else: - match = self._tail_of_reference_link_re.match(text, p) - if match: - # Handle a reference-style anchor or img. - is_img = start_idx > 0 and text[start_idx-1] == "!" - if is_img: - start_idx -= 1 - link_id = match.group("id").lower() - if not link_id: - link_id = link_text.lower() # for links like [this][] - if link_id in self.urls: - url = self.urls[link_id] - # We've got to encode these to avoid conflicting - # with italics/bold. - url = url.replace('*', g_escape_table['*']) \ - .replace('_', g_escape_table['_']) - title = self.titles.get(link_id) - if title: - title = title.replace('*', g_escape_table['*']) \ - .replace('_', g_escape_table['_']) - title_str = ' title="%s"' % title - else: - title_str = '' - if is_img: - result = '<img src="%s" alt="%s"%s%s' \ - % (url, link_text.replace('"', '"'), - title_str, self.empty_element_suffix) - curr_pos = start_idx + len(result) - text = text[:start_idx] + result + text[match.end():] - elif start_idx >= anchor_allowed_pos: - result = '<a href="%s"%s>%s</a>' \ - % (url, title_str, link_text) - result_head = '<a href="%s"%s>' % (url, title_str) - result = '%s%s</a>' % (result_head, link_text) - # <img> allowed from curr_pos on, <a> from - # anchor_allowed_pos on. - curr_pos = start_idx + len(result_head) - anchor_allowed_pos = start_idx + len(result) - text = text[:start_idx] + result + text[match.end():] - else: - # Anchor not allowed here. - curr_pos = start_idx + 1 - else: - # This id isn't defined, leave the markup alone. - curr_pos = match.end() - continue - - # Otherwise, it isn't markup. - curr_pos = start_idx + 1 - - return text - - - _setext_h_re = re.compile(r'^(.+)[ \t]*\n(=+|-+)[ \t]*\n+', re.M) - def _setext_h_sub(self, match): - n = {"=": 1, "-": 2}[match.group(2)[0]] - demote_headers = self.extras.get("demote-headers") - if demote_headers: - n = min(n + demote_headers, 6) - return "<h%d>%s</h%d>\n\n" \ - % (n, self._run_span_gamut(match.group(1)), n) - - _atx_h_re = re.compile(r''' - ^(\#{1,6}) # \1 = string of #'s - [ \t]* - (.+?) # \2 = Header text - [ \t]* - (?<!\\) # ensure not an escaped trailing '#' - \#* # optional closing #'s (not counted) - \n+ - ''', re.X | re.M) - def _atx_h_sub(self, match): - n = len(match.group(1)) - demote_headers = self.extras.get("demote-headers") - if demote_headers: - n = min(n + demote_headers, 6) - return "<h%d>%s</h%d>\n\n" \ - % (n, self._run_span_gamut(match.group(2)), n) - - def _do_headers(self, text): - # Setext-style headers: - # Header 1 - # ======== - # - # Header 2 - # -------- - text = self._setext_h_re.sub(self._setext_h_sub, text) - - # atx-style headers: - # # Header 1 - # ## Header 2 - # ## Header 2 with closing hashes ## - # ... - # ###### Header 6 - text = self._atx_h_re.sub(self._atx_h_sub, text) - - return text - - - _marker_ul_chars = '*+-' - _marker_any = r'(?:[%s]|\d+\.)' % _marker_ul_chars - _marker_ul = '(?:[%s])' % _marker_ul_chars - _marker_ol = r'(?:\d+\.)' - - def _list_sub(self, match): - lst = match.group(1) - lst_type = match.group(3) in self._marker_ul_chars and "ul" or "ol" - result = self._process_list_items(lst) - if self.list_level: - return "<%s>\n%s</%s>\n" % (lst_type, result, lst_type) - else: - return "<%s>\n%s</%s>\n\n" % (lst_type, result, lst_type) - - def _do_lists(self, text): - # Form HTML ordered (numbered) and unordered (bulleted) lists. - - for marker_pat in (self._marker_ul, self._marker_ol): - # Re-usable pattern to match any entire ul or ol list: - less_than_tab = self.tab_width - 1 - whole_list = r''' - ( # \1 = whole list - ( # \2 - [ ]{0,%d} - (%s) # \3 = first list item marker - [ \t]+ - ) - (?:.+?) - ( # \4 - \Z - | - \n{2,} - (?=\S) - (?! # Negative lookahead for another list item marker - [ \t]* - %s[ \t]+ - ) - ) - ) - ''' % (less_than_tab, marker_pat, marker_pat) - - # We use a different prefix before nested lists than top-level lists. - # See extended comment in _process_list_items(). - # - # Note: There's a bit of duplication here. My original implementation - # created a scalar regex pattern as the conditional result of the test on - # $g_list_level, and then only ran the $text =~ s{...}{...}egmx - # substitution once, using the scalar as the pattern. This worked, - # everywhere except when running under MT on my hosting account at Pair - # Networks. There, this caused all rebuilds to be killed by the reaper (or - # perhaps they crashed, but that seems incredibly unlikely given that the - # same script on the same server ran fine *except* under MT. I've spent - # more time trying to figure out why this is happening than I'd like to - # admit. My only guess, backed up by the fact that this workaround works, - # is that Perl optimizes the substition when it can figure out that the - # pattern will never change, and when this optimization isn't on, we run - # afoul of the reaper. Thus, the slightly redundant code to that uses two - # static s/// patterns rather than one conditional pattern. - - if self.list_level: - sub_list_re = re.compile("^"+whole_list, re.X | re.M | re.S) - text = sub_list_re.sub(self._list_sub, text) - else: - list_re = re.compile(r"(?:(?<=\n\n)|\A\n?)"+whole_list, - re.X | re.M | re.S) - text = list_re.sub(self._list_sub, text) - - return text - - _list_item_re = re.compile(r''' - (\n)? # leading line = \1 - (^[ \t]*) # leading whitespace = \2 - (%s) [ \t]+ # list marker = \3 - ((?:.+?) # list item text = \4 - (\n{1,2})) # eols = \5 - (?= \n* (\Z | \2 (%s) [ \t]+)) - ''' % (_marker_any, _marker_any), - re.M | re.X | re.S) - - _last_li_endswith_two_eols = False - def _list_item_sub(self, match): - item = match.group(4) - leading_line = match.group(1) - leading_space = match.group(2) - if leading_line or "\n\n" in item or self._last_li_endswith_two_eols: - item = self._run_block_gamut(self._outdent(item)) - else: - # Recursion for sub-lists: - item = self._do_lists(self._outdent(item)) - if item.endswith('\n'): - item = item[:-1] - item = self._run_span_gamut(item) - self._last_li_endswith_two_eols = (len(match.group(5)) == 2) - return "<li>%s</li>\n" % item - - def _process_list_items(self, list_str): - # Process the contents of a single ordered or unordered list, - # splitting it into individual list items. - - # The $g_list_level global keeps track of when we're inside a list. - # Each time we enter a list, we increment it; when we leave a list, - # we decrement. If it's zero, we're not in a list anymore. - # - # We do this because when we're not inside a list, we want to treat - # something like this: - # - # I recommend upgrading to version - # 8. Oops, now this line is treated - # as a sub-list. - # - # As a single paragraph, despite the fact that the second line starts - # with a digit-period-space sequence. - # - # Whereas when we're inside a list (or sub-list), that line will be - # treated as the start of a sub-list. What a kludge, huh? This is - # an aspect of Markdown's syntax that's hard to parse perfectly - # without resorting to mind-reading. Perhaps the solution is to - # change the syntax rules such that sub-lists must start with a - # starting cardinal number; e.g. "1." or "a.". - self.list_level += 1 - self._last_li_endswith_two_eols = False - list_str = list_str.rstrip('\n') + '\n' - list_str = self._list_item_re.sub(self._list_item_sub, list_str) - self.list_level -= 1 - return list_str - - def _get_pygments_lexer(self, lexer_name): - try: - from pygments import lexers, util - except ImportError: - return None - try: - return lexers.get_lexer_by_name(lexer_name) - except util.ClassNotFound: - return None - - def _color_with_pygments(self, codeblock, lexer, **formatter_opts): - import pygments - import pygments.formatters - - class HtmlCodeFormatter(pygments.formatters.HtmlFormatter): - def _wrap_code(self, inner): - """A function for use in a Pygments Formatter which - wraps in <code> tags. - """ - yield 0, "<code>" - for tup in inner: - yield tup - yield 0, "</code>" - - def wrap(self, source, outfile): - """Return the source with a code, pre, and div.""" - return self._wrap_div(self._wrap_pre(self._wrap_code(source))) - - formatter = HtmlCodeFormatter(cssclass="codehilite", **formatter_opts) - return pygments.highlight(codeblock, lexer, formatter) - - def _code_block_sub(self, match): - codeblock = match.group(1) - codeblock = self._outdent(codeblock) - codeblock = self._detab(codeblock) - codeblock = codeblock.lstrip('\n') # trim leading newlines - codeblock = codeblock.rstrip() # trim trailing whitespace - - if "code-color" in self.extras and codeblock.startswith(":::"): - lexer_name, rest = codeblock.split('\n', 1) - lexer_name = lexer_name[3:].strip() - lexer = self._get_pygments_lexer(lexer_name) - codeblock = rest.lstrip("\n") # Remove lexer declaration line. - if lexer: - formatter_opts = self.extras['code-color'] or {} - colored = self._color_with_pygments(codeblock, lexer, - **formatter_opts) - return "\n\n%s\n\n" % colored - - codeblock = self._encode_code(codeblock) - return "\n\n<pre><code>%s\n</code></pre>\n\n" % codeblock - - def _do_code_blocks(self, text): - """Process Markdown `<pre><code>` blocks.""" - code_block_re = re.compile(r''' - (?:\n\n|\A) - ( # $1 = the code block -- one or more lines, starting with a space/tab - (?: - (?:[ ]{%d} | \t) # Lines must start with a tab or a tab-width of spaces - .*\n+ - )+ - ) - ((?=^[ ]{0,%d}\S)|\Z) # Lookahead for non-space at line-start, or end of doc - ''' % (self.tab_width, self.tab_width), - re.M | re.X) - - return code_block_re.sub(self._code_block_sub, text) - - - # Rules for a code span: - # - backslash escapes are not interpreted in a code span - # - to include one or or a run of more backticks the delimiters must - # be a longer run of backticks - # - cannot start or end a code span with a backtick; pad with a - # space and that space will be removed in the emitted HTML - # See `test/tm-cases/escapes.text` for a number of edge-case - # examples. - _code_span_re = re.compile(r''' - (?<!\\) - (`+) # \1 = Opening run of ` - (?!`) # See Note A test/tm-cases/escapes.text - (.+?) # \2 = The code block - (?<!`) - \1 # Matching closer - (?!`) - ''', re.X | re.S) - - def _code_span_sub(self, match): - c = match.group(2).strip(" \t") - c = self._encode_code(c) - return "<code>%s</code>" % c - - def _do_code_spans(self, text): - # * Backtick quotes are used for <code></code> spans. - # - # * You can use multiple backticks as the delimiters if you want to - # include literal backticks in the code span. So, this input: - # - # Just type ``foo `bar` baz`` at the prompt. - # - # Will translate to: - # - # <p>Just type <code>foo `bar` baz</code> at the prompt.</p> - # - # There's no arbitrary limit to the number of backticks you - # can use as delimters. If you need three consecutive backticks - # in your code, use four for delimiters, etc. - # - # * You can use spaces to get literal backticks at the edges: - # - # ... type `` `bar` `` ... - # - # Turns to: - # - # ... type <code>`bar`</code> ... - return self._code_span_re.sub(self._code_span_sub, text) - - def _encode_code(self, text): - """Encode/escape certain characters inside Markdown code runs. - The point is that in code, these characters are literals, - and lose their special Markdown meanings. - """ - replacements = [ - # Encode all ampersands; HTML entities are not - # entities within a Markdown code span. - ('&', '&'), - # Do the angle bracket song and dance: - ('<', '<'), - ('>', '>'), - # Now, escape characters that are magic in Markdown: - ('*', g_escape_table['*']), - ('_', g_escape_table['_']), - ('{', g_escape_table['{']), - ('}', g_escape_table['}']), - ('[', g_escape_table['[']), - (']', g_escape_table[']']), - ('\\', g_escape_table['\\']), - ] - for before, after in replacements: - text = text.replace(before, after) - return text - - _strong_re = re.compile(r"(\*\*|__)(?=\S)(.+?[*_]*)(?<=\S)\1", re.S) - _em_re = re.compile(r"(\*|_)(?=\S)(.+?)(?<=\S)\1", re.S) - _code_friendly_strong_re = re.compile(r"\*\*(?=\S)(.+?[*_]*)(?<=\S)\*\*", re.S) - _code_friendly_em_re = re.compile(r"\*(?=\S)(.+?)(?<=\S)\*", re.S) - def _do_italics_and_bold(self, text): - # <strong> must go first: - if "code-friendly" in self.extras: - text = self._code_friendly_strong_re.sub(r"<strong>\1</strong>", text) - text = self._code_friendly_em_re.sub(r"<em>\1</em>", text) - else: - text = self._strong_re.sub(r"<strong>\2</strong>", text) - text = self._em_re.sub(r"<em>\2</em>", text) - return text - - - _block_quote_re = re.compile(r''' - ( # Wrap whole match in \1 - ( - ^[ \t]*>[ \t]? # '>' at the start of a line - .+\n # rest of the first line - (.+\n)* # subsequent consecutive lines - \n* # blanks - )+ - ) - ''', re.M | re.X) - _bq_one_level_re = re.compile('^[ \t]*>[ \t]?', re.M); - - _html_pre_block_re = re.compile(r'(\s*<pre>.+?</pre>)', re.S) - def _dedent_two_spaces_sub(self, match): - return re.sub(r'(?m)^ ', '', match.group(1)) - - def _block_quote_sub(self, match): - bq = match.group(1) - bq = self._bq_one_level_re.sub('', bq) # trim one level of quoting - bq = self._ws_only_line_re.sub('', bq) # trim whitespace-only lines - bq = self._run_block_gamut(bq) # recurse - - bq = re.sub('(?m)^', ' ', bq) - # These leading spaces screw with <pre> content, so we need to fix that: - bq = self._html_pre_block_re.sub(self._dedent_two_spaces_sub, bq) - - return "<blockquote>\n%s\n</blockquote>\n\n" % bq - - def _do_block_quotes(self, text): - if '>' not in text: - return text - return self._block_quote_re.sub(self._block_quote_sub, text) - - def _form_paragraphs(self, text): - # Strip leading and trailing lines: - text = text.strip('\n') - - # Wrap <p> tags. - grafs = re.split(r"\n{2,}", text) - for i, graf in enumerate(grafs): - if graf in self.html_blocks: - # Unhashify HTML blocks - grafs[i] = self.html_blocks[graf] - else: - # Wrap <p> tags. - graf = self._run_span_gamut(graf) - grafs[i] = "<p>" + graf.lstrip(" \t") + "</p>" - - return "\n\n".join(grafs) - - def _add_footnotes(self, text): - if self.footnotes: - footer = [ - '<div class="footnotes">', - '<hr' + self.empty_element_suffix, - '<ol>', - ] - for i, id in enumerate(self.footnote_ids): - if i != 0: - footer.append('') - footer.append('<li id="fn-%s">' % id) - footer.append(self._run_block_gamut(self.footnotes[id])) - backlink = ('<a href="#fnref-%s" ' - 'class="footnoteBackLink" ' - 'title="Jump back to footnote %d in the text.">' - '↩</a>' % (id, i+1)) - if footer[-1].endswith("</p>"): - footer[-1] = footer[-1][:-len("</p>")] \ - + ' ' + backlink + "</p>" - else: - footer.append("\n<p>%s</p>" % backlink) - footer.append('</li>') - footer.append('</ol>') - footer.append('</div>') - return text + '\n\n' + '\n'.join(footer) - else: - return text - - # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin: - # http://bumppo.net/projects/amputator/ - _ampersand_re = re.compile(r'&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)') - _naked_lt_re = re.compile(r'<(?![a-z/?\$!])', re.I) - _naked_gt_re = re.compile(r'''(?<![a-z?!/'"-])>''', re.I) - - def _encode_amps_and_angles(self, text): - # Smart processing for ampersands and angle brackets that need - # to be encoded. - text = self._ampersand_re.sub('&', text) - - # Encode naked <'s - text = self._naked_lt_re.sub('<', text) - - # Encode naked >'s - # Note: Other markdown implementations (e.g. Markdown.pl, PHP - # Markdown) don't do this. - text = self._naked_gt_re.sub('>', text) - return text - - def _encode_backslash_escapes(self, text): - for ch, escape in g_escape_table.items(): - text = text.replace("\\"+ch, escape) - return text - - _auto_link_re = re.compile(r'<((https?|ftp):[^\'">\s]+)>', re.I) - def _auto_link_sub(self, match): - g1 = match.group(1) - return '<a href="%s">%s</a>' % (g1, g1) - - _auto_email_link_re = re.compile(r""" - < - (?:mailto:)? - ( - [-.\w]+ - \@ - [-\w]+(\.[-\w]+)*\.[a-z]+ - ) - > - """, re.I | re.X | re.U) - def _auto_email_link_sub(self, match): - return self._encode_email_address( - self._unescape_special_chars(match.group(1))) - - def _do_auto_links(self, text): - text = self._auto_link_re.sub(self._auto_link_sub, text) - text = self._auto_email_link_re.sub(self._auto_email_link_sub, text) - return text - - def _encode_email_address(self, addr): - # Input: an email address, e.g. "foo@example.com" - # - # Output: the email address as a mailto link, with each character - # of the address encoded as either a decimal or hex entity, in - # the hopes of foiling most address harvesting spam bots. E.g.: - # - # <a href="mailto:foo@e - # xample.com">foo - # @example.com</a> - # - # Based on a filter by Matthew Wickline, posted to the BBEdit-Talk - # mailing list: <http://tinyurl.com/yu7ue> - chars = [_xml_encode_email_char_at_random(ch) - for ch in "mailto:" + addr] - # Strip the mailto: from the visible part. - addr = '<a href="%s">%s</a>' \ - % (''.join(chars), ''.join(chars[7:])) - return addr - - def _do_link_patterns(self, text): - """Caveat emptor: there isn't much guarding against link - patterns being formed inside other standard Markdown links, e.g. - inside a [link def][like this]. - - Dev Notes: *Could* consider prefixing regexes with a negative - lookbehind assertion to attempt to guard against this. - """ - link_from_hash = {} - for regex, repl in self.link_patterns: - replacements = [] - for match in regex.finditer(text): - if hasattr(repl, "__call__"): - href = repl(match) - else: - href = match.expand(repl) - replacements.append((match.span(), href)) - for (start, end), href in reversed(replacements): - escaped_href = ( - href.replace('"', '"') # b/c of attr quote - # To avoid markdown <em> and <strong>: - .replace('*', g_escape_table['*']) - .replace('_', g_escape_table['_'])) - link = '<a href="%s">%s</a>' % (escaped_href, text[start:end]) - hash = md5(link).hexdigest() - link_from_hash[hash] = link - text = text[:start] + hash + text[end:] - for hash, link in link_from_hash.items(): - text = text.replace(hash, link) - return text - - def _unescape_special_chars(self, text): - # Swap back in all the special characters we've hidden. - for ch, hash in g_escape_table.items(): - text = text.replace(hash, ch) - return text - - def _outdent(self, text): - # Remove one level of line-leading tabs or spaces - return self._outdent_re.sub('', text) - - -class MarkdownWithExtras(Markdown): - """A markdowner class that enables most extras: - - - footnotes - - code-color (only has effect if 'pygments' Python module on path) - - These are not included: - - pyshell (specific to Python-related documenting) - - code-friendly (because it *disables* part of the syntax) - - link-patterns (because you need to specify some actual - link-patterns anyway) - """ - extras = ["footnotes", "code-color"] - - -#---- internal support functions - -# From http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52549 -def _curry(*args, **kwargs): - function, args = args[0], args[1:] - def result(*rest, **kwrest): - combined = kwargs.copy() - combined.update(kwrest) - return function(*args + rest, **combined) - return result - -# Recipe: regex_from_encoded_pattern (1.0) -def _regex_from_encoded_pattern(s): - """'foo' -> re.compile(re.escape('foo')) - '/foo/' -> re.compile('foo') - '/foo/i' -> re.compile('foo', re.I) - """ - if s.startswith('/') and s.rfind('/') != 0: - # Parse it: /PATTERN/FLAGS - idx = s.rfind('/') - pattern, flags_str = s[1:idx], s[idx+1:] - flag_from_char = { - "i": re.IGNORECASE, - "l": re.LOCALE, - "s": re.DOTALL, - "m": re.MULTILINE, - "u": re.UNICODE, - } - flags = 0 - for char in flags_str: - try: - flags |= flag_from_char[char] - except KeyError: - raise ValueError("unsupported regex flag: '%s' in '%s' " - "(must be one of '%s')" - % (char, s, ''.join(flag_from_char.keys()))) - return re.compile(s[1:idx], flags) - else: # not an encoded regex - return re.compile(re.escape(s)) - -# Recipe: dedent (0.1.2) -def _dedentlines(lines, tabsize=8, skip_first_line=False): - """_dedentlines(lines, tabsize=8, skip_first_line=False) -> dedented lines - - "lines" is a list of lines to dedent. - "tabsize" is the tab width to use for indent width calculations. - "skip_first_line" is a boolean indicating if the first line should - be skipped for calculating the indent width and for dedenting. - This is sometimes useful for docstrings and similar. - - Same as dedent() except operates on a sequence of lines. Note: the - lines list is modified **in-place**. - """ - DEBUG = False - if DEBUG: - print "dedent: dedent(..., tabsize=%d, skip_first_line=%r)"\ - % (tabsize, skip_first_line) - indents = [] - margin = None - for i, line in enumerate(lines): - if i == 0 and skip_first_line: continue - indent = 0 - for ch in line: - if ch == ' ': - indent += 1 - elif ch == '\t': - indent += tabsize - (indent % tabsize) - elif ch in '\r\n': - continue # skip all-whitespace lines - else: - break - else: - continue # skip all-whitespace lines - if DEBUG: print "dedent: indent=%d: %r" % (indent, line) - if margin is None: - margin = indent - else: - margin = min(margin, indent) - if DEBUG: print "dedent: margin=%r" % margin - - if margin is not None and margin > 0: - for i, line in enumerate(lines): - if i == 0 and skip_first_line: continue - removed = 0 - for j, ch in enumerate(line): - if ch == ' ': - removed += 1 - elif ch == '\t': - removed += tabsize - (removed % tabsize) - elif ch in '\r\n': - if DEBUG: print "dedent: %r: EOL -> strip up to EOL" % line - lines[i] = lines[i][j:] - break - else: - raise ValueError("unexpected non-whitespace char %r in " - "line %r while removing %d-space margin" - % (ch, line, margin)) - if DEBUG: - print "dedent: %r: %r -> removed %d/%d"\ - % (line, ch, removed, margin) - if removed == margin: - lines[i] = lines[i][j+1:] - break - elif removed > margin: - lines[i] = ' '*(removed-margin) + lines[i][j+1:] - break - else: - if removed: - lines[i] = lines[i][removed:] - return lines - -def _dedent(text, tabsize=8, skip_first_line=False): - """_dedent(text, tabsize=8, skip_first_line=False) -> dedented text - - "text" is the text to dedent. - "tabsize" is the tab width to use for indent width calculations. - "skip_first_line" is a boolean indicating if the first line should - be skipped for calculating the indent width and for dedenting. - This is sometimes useful for docstrings and similar. - - textwrap.dedent(s), but don't expand tabs to spaces - """ - lines = text.splitlines(1) - _dedentlines(lines, tabsize=tabsize, skip_first_line=skip_first_line) - return ''.join(lines) - - -class _memoized(object): - """Decorator that caches a function's return value each time it is called. - If called later with the same arguments, the cached value is returned, and - not re-evaluated. - - http://wiki.python.org/moin/PythonDecoratorLibrary - """ - def __init__(self, func): - self.func = func - self.cache = {} - def __call__(self, *args): - try: - return self.cache[args] - except KeyError: - self.cache[args] = value = self.func(*args) - return value - except TypeError: - # uncachable -- for instance, passing a list as an argument. - # Better to not cache than to blow up entirely. - return self.func(*args) - def __repr__(self): - """Return the function's docstring.""" - return self.func.__doc__ - - -def _xml_oneliner_re_from_tab_width(tab_width): - """Standalone XML processing instruction regex.""" - return re.compile(r""" - (?: - (?<=\n\n) # Starting after a blank line - | # or - \A\n? # the beginning of the doc - ) - ( # save in $1 - [ ]{0,%d} - (?: - <\?\w+\b\s+.*?\?> # XML processing instruction - | - <\w+:\w+\b\s+.*?/> # namespaced single tag - ) - [ \t]* - (?=\n{2,}|\Z) # followed by a blank line or end of document - ) - """ % (tab_width - 1), re.X) -_xml_oneliner_re_from_tab_width = _memoized(_xml_oneliner_re_from_tab_width) - -def _hr_tag_re_from_tab_width(tab_width): - return re.compile(r""" - (?: - (?<=\n\n) # Starting after a blank line - | # or - \A\n? # the beginning of the doc - ) - ( # save in \1 - [ ]{0,%d} - <(hr) # start tag = \2 - \b # word break - ([^<>])*? # - /?> # the matching end tag - [ \t]* - (?=\n{2,}|\Z) # followed by a blank line or end of document - ) - """ % (tab_width - 1), re.X) -_hr_tag_re_from_tab_width = _memoized(_hr_tag_re_from_tab_width) - - -def _xml_encode_email_char_at_random(ch): - r = random() - # Roughly 10% raw, 45% hex, 45% dec. - # '@' *must* be encoded. I [John Gruber] insist. - # Issue 26: '_' must be encoded. - if r > 0.9 and ch not in "@_": - return ch - elif r < 0.45: - # The [1:] is to drop leading '0': 0x63 -> x63 - return '&#%s;' % hex(ord(ch))[1:] - else: - return '&#%s;' % ord(ch) - -def _hash_text(text): - return 'md5:'+md5(text.encode("utf-8")).hexdigest() - - -#---- mainline - -class _NoReflowFormatter(optparse.IndentedHelpFormatter): - """An optparse formatter that does NOT reflow the description.""" - def format_description(self, description): - return description or "" - -def _test(): - import doctest - doctest.testmod() - -def main(argv=None): - if argv is None: - argv = sys.argv - if not logging.root.handlers: - logging.basicConfig() - - usage = "usage: %prog [PATHS...]" - version = "%prog "+__version__ - parser = optparse.OptionParser(prog="markdown2", usage=usage, - version=version, description=cmdln_desc, - formatter=_NoReflowFormatter()) - parser.add_option("-v", "--verbose", dest="log_level", - action="store_const", const=logging.DEBUG, - help="more verbose output") - parser.add_option("--encoding", - help="specify encoding of text content") - parser.add_option("--html4tags", action="store_true", default=False, - help="use HTML 4 style for empty element tags") - parser.add_option("-s", "--safe", metavar="MODE", dest="safe_mode", - help="sanitize literal HTML: 'escape' escapes " - "HTML meta chars, 'replace' replaces with an " - "[HTML_REMOVED] note") - parser.add_option("-x", "--extras", action="append", - help="Turn on specific extra features (not part of " - "the core Markdown spec). Supported values: " - "'code-friendly' disables _/__ for emphasis; " - "'code-color' adds code-block syntax coloring; " - "'link-patterns' adds auto-linking based on patterns; " - "'footnotes' adds the footnotes syntax;" - "'xml' passes one-liner processing instructions and namespaced XML tags;" - "'pyshell' to put unindented Python interactive shell sessions in a <code> block.") - parser.add_option("--use-file-vars", - help="Look for and use Emacs-style 'markdown-extras' " - "file var to turn on extras. See " - "<http://code.google.com/p/python-markdown2/wiki/Extras>.") - parser.add_option("--link-patterns-file", - help="path to a link pattern file") - parser.add_option("--self-test", action="store_true", - help="run internal self-tests (some doctests)") - parser.add_option("--compare", action="store_true", - help="run against Markdown.pl as well (for testing)") - parser.set_defaults(log_level=logging.INFO, compare=False, - encoding="utf-8", safe_mode=None, use_file_vars=False) - opts, paths = parser.parse_args() - log.setLevel(opts.log_level) - - if opts.self_test: - return _test() - - if opts.extras: - extras = {} - for s in opts.extras: - splitter = re.compile("[,;: ]+") - for e in splitter.split(s): - if '=' in e: - ename, earg = e.split('=', 1) - try: - earg = int(earg) - except ValueError: - pass - else: - ename, earg = e, None - extras[ename] = earg - else: - extras = None - - if opts.link_patterns_file: - link_patterns = [] - f = open(opts.link_patterns_file) - try: - for i, line in enumerate(f.readlines()): - if not line.strip(): continue - if line.lstrip().startswith("#"): continue - try: - pat, href = line.rstrip().rsplit(None, 1) - except ValueError: - raise MarkdownError("%s:%d: invalid link pattern line: %r" - % (opts.link_patterns_file, i+1, line)) - link_patterns.append( - (_regex_from_encoded_pattern(pat), href)) - finally: - f.close() - else: - link_patterns = None - - from os.path import join, dirname, abspath, exists - markdown_pl = join(dirname(dirname(abspath(__file__))), "test", - "Markdown.pl") - for path in paths: - if opts.compare: - print "==== Markdown.pl ====" - perl_cmd = 'perl %s "%s"' % (markdown_pl, path) - o = os.popen(perl_cmd) - perl_html = o.read() - o.close() - sys.stdout.write(perl_html) - print "==== markdown2.py ====" - html = markdown_path(path, encoding=opts.encoding, - html4tags=opts.html4tags, - safe_mode=opts.safe_mode, - extras=extras, link_patterns=link_patterns, - use_file_vars=opts.use_file_vars) - sys.stdout.write( - html.encode(sys.stdout.encoding or "utf-8", 'xmlcharrefreplace')) - if opts.compare: - test_dir = join(dirname(dirname(abspath(__file__))), "test") - if exists(join(test_dir, "test_markdown2.py")): - sys.path.insert(0, test_dir) - from test_markdown2 import norm_html_from_html - norm_html = norm_html_from_html(html) - norm_perl_html = norm_html_from_html(perl_html) - else: - norm_html = html - norm_perl_html = perl_html - print "==== match? %r ====" % (norm_perl_html == norm_html) - - -if __name__ == "__main__": - sys.exit( main(sys.argv) ) - -- cgit v1.2.3-70-g09d2