4 files changed, 225 insertions, 0 deletions
diff --git a/app/builder/__init__.py b/app/builder/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/app/builder/__init__.py
diff --git a/app/builder/base.py b/app/builder/base.py
new file mode 100644
index 0000000..0d2cb0f
--- /dev/null
+++ b/app/builder/base.py
@@ -0,0 +1,152 @@
+import os
+from math import ceil
+from decimal import Decimal
+from django.test.client import Client
+from django.template.loader import render_to_string
+from django.template import Context
+from django.urls import reverse
+from django.apps import apps
+from django.conf import settings
+from jsmin import jsmin
+
+
+class _FileWriter(object):
+    """
+    Given a path and text object; write the page to disc
+    """
+    def __init__(self, path, text_object, ext='html', filename='index', base_path=settings.FLATFILES_ROOT):
+        self.path = '%s%s' % (base_path, path)
+        if not os.path.isdir(self.path):
+            os.makedirs(self.path)
+        fpath = '%s%s.%s' % (self.path, filename, ext)
+        self.write(fpath, text_object)
+
+    def write(self, fpath, text_object):
+        f = open(fpath, 'wb')
+        f.write(text_object)
+        f.close()
+
+    def compress_js(self, filename, text_object):
+        path = '%s%s.min.js' % (self.path, filename)
+        compressed = jsmin(text_object.decode('utf-8')).encode('utf-8')
+        self.write(path, compressed)
+
+
+class BuildNew():
+    def __init__(self, model, app):
+        self.model = apps.get_model(model, app)
+        self.get_model_queryset()
+        self.client = Client()
+
+    def build(self):
+        self.build_list_view()
+        self.build_detail_view()
+
+    def get_model_queryset(self):
+        return self.model.objects.filter(status__exact=1)
+
+    def write_file(self, path, text_object, ext='html', filename='index'):
+        self.writer = _FileWriter(path, text_object, ext=ext, filename=filename)
+    
+    def get_pages(self, qs, paginate_by):
+        return int(ceil(Decimal(qs.count()) / Decimal(paginate_by)))
+
+    def build_list_view(self, base_path='', qs=None, paginate_by=10):
+        """
+        Archive Page builder that actually crawls the urls
+        because we need to be able to pass a request object to the template
+        """
+
+        if not qs:
+            qs = self.get_model_queryset()
+        pages = self.get_pages(qs, paginate_by)
+        for page in range(pages):
+            if int(pages) > 1:
+                path = '%s%s/' % (base_path, str(page + 1))
+                url = '%s%s/' % (base_path, str(page + 1))
+            else:
+                path = base_path
+                url = base_path
+            print(path)
+            response = self.client.get(url, HTTP_HOST='127.0.0.1', follow=True)
+            if page == 0:
+                self.write_file(base_path, response.content)
+            self.write_file(path, response.content)
+
+    def build_year_view(self, url, paginate_by=99999):
+        years = self.model.objects.dates('pub_date', 'year')
+        for year in years:
+            year = year.strftime('%Y')
+            qs = self.model.objects.filter(
+                status__exact=1,
+                pub_date__year=year
+            )
+            self.build_list_view(
+                base_path=reverse(url, kwargs={'year': year, }),
+                qs=qs,
+                paginate_by=paginate_by
+            )
+
+    def build_month_view(self, url, paginate_by=99999):
+        months = self.model.objects.dates('pub_date', 'month')
+        for m in months:
+            year = m.strftime('%Y')
+            month = m.strftime('%m')
+            qs = self.model.objects.filter(
+                status__exact=1,
+                pub_date__year=year,
+                pub_date__month=month
+            )
+            if qs.exists():
+                self.build_list_view(
+                    base_path=reverse(url, kwargs={'year': year, 'month': month}),
+                    qs=qs,
+                    paginate_by=paginate_by
+                )
+
+    def build_detail_view(self):
+        '''
+        Grab all the blog posts, render them to a template
+        string and write that out to the filesystem
+        '''
+        for entry in self.get_model_queryset():
+            url = entry.get_absolute_url()
+            path, slug = os.path.split(entry.get_absolute_url())
+            path = '%s/' % path
+            # write html
+            response = self.client.get(url)
+            self.write_file(path, response.content, filename=slug)
+            # write txt
+            response = self.client.get('%s.txt' % url)
+            self.write_file(path, response.content, ext='txt', filename=slug)
+
+
+    def build_feed(self, url_name):
+        """
+        Not called, but available for subclassing
+        """
+        url = reverse(url_name,)
+        path, slug = os.path.split(url)
+        slug, ext = os.path.splitext(slug)
+        response = self.client.get(url, HTTP_HOST='127.0.0.1')
+        self.write_file('%s/' % path, response.content, ext=ext.split(".")[-1], filename=slug)
+
+
+class BuildSitemap(BuildNew):
+    def build(self):
+        c = Client()
+        response = c.get('/sitemap.xml', HTTP_HOST='127.0.0.1')
+        self.write_file('', response.content, 'xml', 'sitemap')
+
+
+class BuildPages(BuildNew):
+    def build(self):
+        model = apps.get_model('pages', 'page')
+        pages = model.objects.all()
+        for page in pages:
+            c = Context({'object':page,'SITE_URL':settings.SITE_URL, 'MEDIA_URL':settings.BAKED_MEDIA_URL})
+            t = render_to_string(["details/%s.html" % page.slug, 'details/page.html'],c).encode('utf-8') 
+            s = render_to_string('details/page.txt',c).encode('utf-8') 
+            fpath = '%s' %(page.slug)
+            self.write_file('', t, 'html', page.slug)
+            self.write_file('', t, 'txt', page.slug)
diff --git a/app/builder/sanitizer.py b/app/builder/sanitizer.py
new file mode 100644
index 0000000..8512f4f
--- /dev/null
+++ b/app/builder/sanitizer.py
@@ -0,0 +1,60 @@
+from bs4 import BeautifulSoup
+
+
+class Sanitizer(object):
+    blacklisted_tags = []
+    blacklisted_attributes = []
+    blacklisted_protocols = []
+
+    def __init__(self, tags=None, attributes=None, protocols=None):
+        if tags:
+            self.blacklisted_tags = tags
+        if attributes:
+            self.blacklisted_attributes = attributes
+        if protocols:
+            self.blacklisted_protocols = protocols
+
+    def strip(self, content=None):
+        """Strip HTML content to meet standards of output type.
+        Meant to be subclassed for each converter.
+
+        Keyword arguments:
+        content -- subset of an HTML document. (ie. contents of a body tag)
+        """
+        if not content:
+            content = self.content
+            return content
+
+        soup = BeautifulSoup(content, "lxml")
+        self.strip_tags(soup)
+        self.strip_attributes(soup)
+
+        output = soup.body.decode_contents()
+        return output
+
+    def strip_tags(self, soup):
+        if self.blacklisted_tags:
+            [x.extract() for x in soup.find_all(self.blacklisted_tags)]
+
+    def strip_attributes_extra(self, node):
+        pass
+
+    def strip_attributes(self, soup):
+        if not (self.blacklisted_attributes or self.blacklisted_protocols):
+            return
+
+        for node in soup.body.find_all(True):
+            attributes = node.attrs.keys()
+            if not attributes:
+                continue
+
+            for attr in self.blacklisted_attributes:
+                if attr in attributes:
+                    del node.attrs[attr]
+
+            self.strip_attributes_extra(node)
+
+            if 'href' in attributes:
+                protocol = node['href'].split(':')[0]
+                if protocol in self.blacklisted_protocols:
+                    del node['href']
+\ No newline at end of file
diff --git a/app/builder/views.py b/app/builder/views.py
new file mode 100644
index 0000000..9d12aaa
--- /dev/null
+++ b/app/builder/views.py
@@ -0,0 +1,13 @@
+from django.shortcuts import render_to_response
+from django.template import RequestContext
+#from src.build import builder as src_builder
+from pages.build import builder as page_builder
+
+
+def do_build(request):
+    section = request.GET.get('id', '')
+    context = {}
+    if section == 'pages':
+        context = {'message': 'Writing Pages to Disk'}
+        page_builder()
+    return render_to_response('admin/message.html', context)