From f520b80d69a9c51478a26f7c7e98a860e4e64c3d Mon Sep 17 00:00:00 2001 From: luxagraf Date: Thu, 18 Feb 2016 09:35:45 -0500 Subject: added AMP template filter to replace img tags and strip out the disallowed HTML. Because somewhere to someone it makes sense to speed up pages by requiring javascript. --- app/builder/sanitizer.py | 60 ++++++++++++++++++++++++++++++++ app/lib/templatetags/templatetags/amp.py | 39 +++++++++++++++++++++ design/sass/_writing_details.scss | 18 ++++++---- design/templates/details/entry.amp | 14 ++------ 4 files changed, 114 insertions(+), 17 deletions(-) create mode 100644 app/builder/sanitizer.py create mode 100644 app/lib/templatetags/templatetags/amp.py diff --git a/app/builder/sanitizer.py b/app/builder/sanitizer.py new file mode 100644 index 0000000..8512f4f --- /dev/null +++ b/app/builder/sanitizer.py @@ -0,0 +1,60 @@ +from bs4 import BeautifulSoup + + +class Sanitizer(object): + blacklisted_tags = [] + blacklisted_attributes = [] + blacklisted_protocols = [] + + def __init__(self, tags=None, attributes=None, protocols=None): + if tags: + self.blacklisted_tags = tags + if attributes: + self.blacklisted_attributes = attributes + if protocols: + self.blacklisted_protocols = protocols + + def strip(self, content=None): + """Strip HTML content to meet standards of output type. + Meant to be subclassed for each converter. + + Keyword arguments: + content -- subset of an HTML document. (ie. contents of a body tag) + """ + if not content: + content = self.content + return content + + soup = BeautifulSoup(content, "lxml") + self.strip_tags(soup) + self.strip_attributes(soup) + + output = soup.body.decode_contents() + return output + + def strip_tags(self, soup): + if self.blacklisted_tags: + [x.extract() for x in soup.find_all(self.blacklisted_tags)] + + def strip_attributes_extra(self, node): + pass + + def strip_attributes(self, soup): + if not (self.blacklisted_attributes or self.blacklisted_protocols): + return + + for node in soup.body.find_all(True): + attributes = node.attrs.keys() + if not attributes: + continue + + for attr in self.blacklisted_attributes: + if attr in attributes: + del node.attrs[attr] + + self.strip_attributes_extra(node) + + if 'href' in attributes: + protocol = node['href'].split(':')[0] + if protocol in self.blacklisted_protocols: + del node['href'] \ No newline at end of file diff --git a/app/lib/templatetags/templatetags/amp.py b/app/lib/templatetags/templatetags/amp.py new file mode 100644 index 0000000..9c6f118 --- /dev/null +++ b/app/lib/templatetags/templatetags/amp.py @@ -0,0 +1,39 @@ +from django import template +from PIL import Image +from io import BytesIO +try: + import Image + import ImageFile +except ImportError: + try: + from PIL import Image + from PIL import ImageFile + except ImportError: + raise ImportError("Could not import the Python Imaging Library.") + +import requests +from bs4 import BeautifulSoup +from builder.sanitizer import Sanitizer + +register = template.Library() + + +def remove_img_tags(text): + soup = BeautifulSoup(text, 'xml') + for img in soup.find_all('img'): + r = requests.get(img['src']) + i = Image.open(BytesIO(r.content)) + width, height = i.size + try: + new_tag = soup.new_tag("amp-img", alt=img["alt"], width=width, height=height, src=img['src'], srcset=img['srcset']) + except: + new_tag = soup.new_tag("amp-img", alt=img["alt"], width=width, height=height, src=img['src']) + img.replace_with(new_tag) + return soup.prettify() + + +def do_amp(text): + bs = remove_img_tags(text) + return Sanitizer().strip(bs) + +register.filter('amp', do_amp) diff --git a/design/sass/_writing_details.scss b/design/sass/_writing_details.scss index 53b3aca..c7a57b0 100644 --- a/design/sass/_writing_details.scss +++ b/design/sass/_writing_details.scss @@ -153,20 +153,25 @@ .footnote { @include constrain_narrow(); margin: 1em auto 0 auto; - padding:0; - list-style-position:inside; + padding: 0; list-style-type: none; - &:before { + &:before, &:after { + @include faded_line_after; @include breakpoint(beta) { - @include faded_line_after; margin-bottom: 2em; } } p { font-size: 0.875em; - line-height: 1.4 + line-height: 1.4; } hr {display: none;} + ol { + padding-left: 1em; + @include breakpoint(alpha) { + margin-left: 1em; + } + } } .dark .footnote:before { @include light_faded_line_after; @@ -352,7 +357,8 @@ display: block; .comments--header { font-family: Helvetica Neue, Helvetica, sans-serif; line-height: 6em; - @include fontsize(24); + @include fontsize(16); + font-style: italic; &:before { @include faded_line_after; margin-top: 2em; diff --git a/design/templates/details/entry.amp b/design/templates/details/entry.amp index 6305d46..32b6d36 100644 --- a/design/templates/details/entry.amp +++ b/design/templates/details/entry.amp @@ -1,4 +1,5 @@ {% load typogrify_tags %} +{% load amp %} @@ -45,7 +46,6 @@ }, "publisher": { "@type": "Person", - "name": "Jeremy Keith", "name": "Scott Gilbertson" "logo": { "@type": "ImageObject", @@ -137,8 +137,6 @@ hr { border-bottom: 0.0625rem dotted #ccc; } - - @@ -146,8 +144,7 @@ hr {
@@ -158,15 +155,10 @@ hr {
- {{object.body_html|safe|smartypants|widont}} + {{object.body_html|amp|safe}}
-- cgit v1.2.3