diff options
Diffstat (limited to 'lib/tagging/utils.py')
-rw-r--r-- | lib/tagging/utils.py | 263 |
1 files changed, 263 insertions, 0 deletions
diff --git a/lib/tagging/utils.py b/lib/tagging/utils.py new file mode 100644 index 0000000..e89bab0 --- /dev/null +++ b/lib/tagging/utils.py @@ -0,0 +1,263 @@ +""" +Tagging utilities - from user tag input parsing to tag cloud +calculation. +""" +import math +import types + +from django.db.models.query import QuerySet +from django.utils.encoding import force_unicode +from django.utils.translation import ugettext as _ + +# Python 2.3 compatibility +try: + set +except NameError: + from sets import Set as set + +def parse_tag_input(input): + """ + Parses tag input, with multiple word input being activated and + delineated by commas and double quotes. Quotes take precedence, so + they may contain commas. + + Returns a sorted list of unique tag names. + """ + if not input: + return [] + + input = force_unicode(input) + + # Special case - if there are no commas or double quotes in the + # input, we don't *do* a recall... I mean, we know we only need to + # split on spaces. + if u',' not in input and u'"' not in input: + words = list(set(split_strip(input, u' '))) + words.sort() + return words + + words = [] + buffer = [] + # Defer splitting of non-quoted sections until we know if there are + # any unquoted commas. + to_be_split = [] + saw_loose_comma = False + open_quote = False + i = iter(input) + try: + while 1: + c = i.next() + if c == u'"': + if buffer: + to_be_split.append(u''.join(buffer)) + buffer = [] + # Find the matching quote + open_quote = True + c = i.next() + while c != u'"': + buffer.append(c) + c = i.next() + if buffer: + word = u''.join(buffer).strip() + if word: + words.append(word) + buffer = [] + open_quote = False + else: + if not saw_loose_comma and c == u',': + saw_loose_comma = True + buffer.append(c) + except StopIteration: + # If we were parsing an open quote which was never closed treat + # the buffer as unquoted. + if buffer: + if open_quote and u',' in buffer: + saw_loose_comma = True + to_be_split.append(u''.join(buffer)) + if to_be_split: + if saw_loose_comma: + delimiter = u',' + else: + delimiter = u' ' + for chunk in to_be_split: + words.extend(split_strip(chunk, delimiter)) + words = list(set(words)) + words.sort() + return words + +def split_strip(input, delimiter=u','): + """ + Splits ``input`` on ``delimiter``, stripping each resulting string + and returning a list of non-empty strings. + """ + if not input: + return [] + + words = [w.strip() for w in input.split(delimiter)] + return [w for w in words if w] + +def edit_string_for_tags(tags): + """ + Given list of ``Tag`` instances, creates a string representation of + the list suitable for editing by the user, such that submitting the + given string representation back without changing it will give the + same list of tags. + + Tag names which contain commas will be double quoted. + + If any tag name which isn't being quoted contains whitespace, the + resulting string of tag names will be comma-delimited, otherwise + it will be space-delimited. + """ + names = [] + use_commas = False + for tag in tags: + name = tag.name + if u',' in name: + names.append('"%s"' % name) + continue + elif u' ' in name: + if not use_commas: + use_commas = True + names.append(name) + if use_commas: + glue = u', ' + else: + glue = u' ' + return glue.join(names) + +def get_queryset_and_model(queryset_or_model): + """ + Given a ``QuerySet`` or a ``Model``, returns a two-tuple of + (queryset, model). + + If a ``Model`` is given, the ``QuerySet`` returned will be created + using its default manager. + """ + try: + return queryset_or_model, queryset_or_model.model + except AttributeError: + return queryset_or_model._default_manager.all(), queryset_or_model + +def get_tag_list(tags): + """ + Utility function for accepting tag input in a flexible manner. + + If a ``Tag`` object is given, it will be returned in a list as + its single occupant. + + If given, the tag names in the following will be used to create a + ``Tag`` ``QuerySet``: + + * A string, which may contain multiple tag names. + * A list or tuple of strings corresponding to tag names. + * A list or tuple of integers corresponding to tag ids. + + If given, the following will be returned as-is: + + * A list or tuple of ``Tag`` objects. + * A ``Tag`` ``QuerySet``. + + """ + from tagging.models import Tag + if isinstance(tags, Tag): + return [tags] + elif isinstance(tags, QuerySet) and tags.model is Tag: + return tags + elif isinstance(tags, types.StringTypes): + return Tag.objects.filter(name__in=parse_tag_input(tags)) + elif isinstance(tags, (types.ListType, types.TupleType)): + if len(tags) == 0: + return tags + contents = set() + for item in tags: + if isinstance(item, types.StringTypes): + contents.add('string') + elif isinstance(item, Tag): + contents.add('tag') + elif isinstance(item, (types.IntType, types.LongType)): + contents.add('int') + if len(contents) == 1: + if 'string' in contents: + return Tag.objects.filter(name__in=[force_unicode(tag) \ + for tag in tags]) + elif 'tag' in contents: + return tags + elif 'int' in contents: + return Tag.objects.filter(id__in=tags) + else: + raise ValueError(_('If a list or tuple of tags is provided, they must all be tag names, Tag objects or Tag ids.')) + else: + raise ValueError(_('The tag input given was invalid.')) + +def get_tag(tag): + """ + Utility function for accepting single tag input in a flexible + manner. + + If a ``Tag`` object is given it will be returned as-is; if a + string or integer are given, they will be used to lookup the + appropriate ``Tag``. + + If no matching tag can be found, ``None`` will be returned. + """ + from tagging.models import Tag + if isinstance(tag, Tag): + return tag + + try: + if isinstance(tag, types.StringTypes): + return Tag.objects.get(name=tag) + elif isinstance(tag, (types.IntType, types.LongType)): + return Tag.objects.get(id=tag) + except Tag.DoesNotExist: + pass + + return None + +# Font size distribution algorithms +LOGARITHMIC, LINEAR = 1, 2 + +def _calculate_thresholds(min_weight, max_weight, steps): + delta = (max_weight - min_weight) / float(steps) + return [min_weight + i * delta for i in range(1, steps + 1)] + +def _calculate_tag_weight(weight, max_weight, distribution): + """ + Logarithmic tag weight calculation is based on code from the + `Tag Cloud`_ plugin for Mephisto, by Sven Fuchs. + + .. _`Tag Cloud`: http://www.artweb-design.de/projects/mephisto-plugin-tag-cloud + """ + if distribution == LINEAR or max_weight == 1: + return weight + elif distribution == LOGARITHMIC: + return math.log(weight) * max_weight / math.log(max_weight) + raise ValueError(_('Invalid distribution algorithm specified: %s.') % distribution) + +def calculate_cloud(tags, steps=4, distribution=LOGARITHMIC): + """ + Add a ``font_size`` attribute to each tag according to the + frequency of its use, as indicated by its ``count`` + attribute. + + ``steps`` defines the range of font sizes - ``font_size`` will + be an integer between 1 and ``steps`` (inclusive). + + ``distribution`` defines the type of font size distribution + algorithm which will be used - logarithmic or linear. It must be + one of ``tagging.utils.LOGARITHMIC`` or ``tagging.utils.LINEAR``. + """ + if len(tags) > 0: + counts = [tag.count for tag in tags] + min_weight = float(min(counts)) + max_weight = float(max(counts)) + thresholds = _calculate_thresholds(min_weight, max_weight, steps) + for tag in tags: + font_set = False + tag_weight = _calculate_tag_weight(tag.count, max_weight, distribution) + for i in range(steps): + if not font_set and tag_weight <= thresholds[i]: + tag.font_size = i + 1 + font_set = True + return tags |