diff options
author | luxagraf <sng@luxagraf.net> | 2021-08-14 09:39:08 -0400 |
---|---|---|
committer | luxagraf <sng@luxagraf.net> | 2021-08-14 09:39:08 -0400 |
commit | 9327e01c60b114286f5a142552aae60843029a48 (patch) | |
tree | 7f353f4299d50013c3b90f7b749fc6ab211b32d4 /app/builder | |
parent | 0feb366b10cf422f12d18fa753e5d876120f4195 (diff) |
initial commit
Diffstat (limited to 'app/builder')
-rw-r--r-- | app/builder/__init__.py | 0 | ||||
-rw-r--r-- | app/builder/base.py | 482 | ||||
-rw-r--r-- | app/builder/sanitizer.py | 60 | ||||
-rw-r--r-- | app/builder/views.py | 41 |
4 files changed, 583 insertions, 0 deletions
diff --git a/app/builder/__init__.py b/app/builder/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/app/builder/__init__.py diff --git a/app/builder/base.py b/app/builder/base.py new file mode 100644 index 0000000..532deea --- /dev/null +++ b/app/builder/base.py @@ -0,0 +1,482 @@ +import os +from math import ceil +from decimal import Decimal +from django.contrib.sites.models import Site +from django.test.client import Client +from django.template.loader import render_to_string +from django.template import Context +from django.urls import reverse +from django.apps import apps +from django.conf import settings +from jsmin import jsmin + + +class _FileWriter(object): + """ + Given a path and text object; write the page to disc + """ + def __init__(self, path, text_object, ext='html', filename='index', site='luxagraf.net'): + site = Site.objects.get(domain=site) + base_path = os.path.join(settings.PROJ_ROOT, site.domain) + self.path = '%s%s' % (base_path, path) + if not os.path.isdir(self.path): + os.makedirs(self.path) + fpath = '%s%s.%s' % (self.path, filename, ext) + self.write(fpath, text_object) + + def write(self, fpath, text_object): + f = open(fpath, 'wb') + f.write(text_object) + f.close() + + def compress_js(self, filename, text_object): + path = '%s%s.min.js' % (self.path, filename) + compressed = jsmin(text_object.decode('utf-8')).encode('utf-8') + self.write(path, compressed) + + +class BuildNew(): + + def __init__(self, model, app, site='luxagraf.net'): + self.site = Site.objects.get(domain=site) + self.model = apps.get_model(model, app) + self.get_model_queryset() + self.client = Client() + + def build(self): + self.build_list_view() + self.build_detail_view() + + def get_model_queryset(self): + return self.model.objects.filter(status__exact=1) + + def write_file(self, path, text_object, ext='html', filename='index'): + self.writer = _FileWriter(path, text_object, ext=ext, filename=filename, site=self.site) + + def get_pages(self, qs, paginate_by): + return int(ceil(Decimal(qs.count()) / Decimal(paginate_by))) + + def build_list_view(self, base_path='', qs=None, paginate_by=10): + """ + Archive Page builder that actually crawls the urls + because we need to be able to pass a request object to the template + """ + + if not qs: + qs = self.get_model_queryset() + pages = self.get_pages(qs, paginate_by) + for page in range(pages): + if int(pages) > 1: + path = '%s%s/' % (base_path, str(page + 1)) + url = '%s%s/' % (base_path, str(page + 1)) + else: + path = base_path + url = base_path + print(path) + response = self.client.get(url, HTTP_HOST='127.0.0.1', follow=True) + if page == 0: + self.write_file(base_path, response.content) + self.write_file(path, response.content) + + def build_year_view(self, url, paginate_by=99999): + years = self.get_model_queryset().dates('pub_date', 'year') + for year in years: + year = year.strftime('%Y') + qs = self.model.objects.filter( + status__exact=1, + pub_date__year=year + ) + self.build_list_view( + base_path=reverse(url, kwargs={'year': year, }), + qs=qs, + paginate_by=paginate_by + ) + + def build_month_view(self, url, paginate_by=99999): + months = self.get_model_queryset().dates('pub_date', 'month') + for m in months: + year = m.strftime('%Y') + month = m.strftime('%m') + qs = self.model.objects.filter( + status__exact=1, + pub_date__year=year, + pub_date__month=month + ) + if qs.exists(): + self.build_list_view( + base_path=reverse(url, kwargs={'year': year, 'month': month}), + qs=qs, + paginate_by=paginate_by + ) + + def build_detail_view(self): + ''' + Grab all the blog posts, render them to a template + string and write that out to the filesystem + ''' + for entry in self.get_model_queryset(): + url = entry.get_absolute_url() + path, slug = os.path.split(entry.get_absolute_url()) + path = '%s/' % path + # write html + response = self.client.get(url) + self.write_file(path, response.content, filename=slug) + # write txt + response = self.client.get('%s.txt' % url) + self.write_file(path, response.content, ext='txt', filename=slug) + + + def build_feed(self, url_name): + """ + Not called, but available for subclassing + """ + url = reverse(url_name,) + path, slug = os.path.split(url) + slug, ext = os.path.splitext(slug) + response = self.client.get(url, HTTP_HOST='127.0.0.1') + self.write_file('%s/' % path, response.content, ext=ext.split(".")[-1], filename=slug) + +class Build(): + + def write_file(self, path, text_object, ext='html', filename='index'): + """ + Given a path and object intended to be a webpage, write the page the + disc + """ + path = '%s%s' % (settings.FLATFILES_ROOT, path) + if not os.path.isdir(path): + os.makedirs(path) + fpath = '%s%s.%s' % (path, filename, ext) + file = open(fpath, 'wb') + file.write(text_object) + file.close() + if ext == 'js': + from jsmin import jsmin + fpath = '%s%s.min.%s' % (path, filename, ext) + compressed = jsmin(text_object.decode(encoding='UTF-8')) + with open(fpath, 'wb') as js_file: + minified = js_file.write(compressed.encode('utf-8')) + js_file.close() + + def build_archive_pages(self, qs=None, base_path='', paginate_by=10): + """ + Archive Page builder that actually crawls the urls + because we need to be able to pass a request object to the template + + """ + if qs is None: + qs = self.get_model_querset() + c = Client() + pages = ceil(Decimal(qs.count()) / Decimal(paginate_by)) + for page in range(int(pages)): + path = '%s%s/' % (base_path, page + 1) + url = '/%s%s/' % (base_path, str(page + 1)) + page_url = base_path + '%d/' + response = c.post(url, {'page_url': page_url, 'page': int(page), 'builder': True}, HTTP_HOST='127.0.0.1') + if page == 0: + self.write_file(base_path, response.content) + self.write_file(path, response.content) + +class BuildAll(Build): + def build(self): + BuildWriting().build() + BuildPhotos().build() + BuildProjects().build() + BuildMap().build() + BuildWritingFeed().build() + BuildSitemap().build() + BuildPages().build() + p.write_files() + +class BuildWriting(Build): + def build(self): + self.build_detail_pages() + self.build_writing_archives() + self.build_country_archive_pages() + self.build_region_archive_pages() + self.build_homepage() + self.build_404() + self.writing_year_archives() + self.writing_month_archives() + + def get_model_querset(self): + model = apps.get_model('jrnl', 'entry') + qs = model.objects.filter(status__exact=1) + return qs + + def build_detail_pages(self): + ''' + Grab all the blog posts, render them to a template string and write that out to the filesystem + ''' + qs = self.get_model_querset() + for entry in qs: + c = { + 'object': entry, + 'MEDIA_URL': settings.BAKED_MEDIA_URL, + 'IMAGES_URL': settings.BAKED_IMAGES_URL + } + t = render_to_string('details/entry.html', c).encode('utf-8') + path = '/jrnl/%s/' %(entry.pub_date.strftime("%Y/%m").lower()) + slug = '%s' %(entry.slug) + self.write_file(path, t, 'html', slug) + s = render_to_string('details/entry.txt',c).encode('utf-8') + self.write_file(path, s,'txt', slug) + + def build_writing_archives(self): + qs = self.get_model_querset() + self.build_archive_pages(qs, 'jrnl/') + + def build_region_archive_pages(self): + model = apps.get_model('locations', 'Region') + blog = apps.get_model('jrnl', 'entry') + regions = model.objects.all() + for c in regions: + qs = blog.objects.filter(status__exact=1, location__state__country__lux_region=c.id).order_by('-pub_date') + path = 'jrnl/%s/' % (c.slug) + self.build_archive_pages(qs, path) + + def build_country_archive_pages(self): + model = apps.get_model('locations', 'Country') + blog = apps.get_model('jrnl', 'entry') + countries = model.objects.filter(visited=True) + for c in countries: + qs = blog.objects.filter(status__exact=1, location__state__country=c).order_by('-pub_date') + path = 'jrnl/%s/' % (c.slug) + self.build_archive_pages(qs, path) + + def writing_year_archives(self): + entry = apps.get_model('jrnl', 'entry') + years = entry.objects.dates('pub_date', 'year') + for year in years: + year = year.strftime('%Y') + qs = entry.objects.filter(status__exact=1, pub_date__year=year).order_by('pub_date') + c = Context({'type': 'year', 'year': year, 'object_list': qs}) + t = render_to_string('archives/writing_date.html', c).encode('utf-8') + fpath = 'jrnl/%s/' % (year) + self.write_file(fpath, t) + + def writing_month_archives(self): + entry = apps.get_model('jrnl', 'entry') + months = entry.objects.dates('pub_date', 'month') + for m in months: + year = m.strftime('%Y') + month = m.strftime('%m') + month_name = m.strftime('%b') + month_full_name = m.strftime('%B') + qs = entry.objects.filter(status__exact=1, pub_date__year=year, + pub_date__month=month).order_by('pub_date') + c = Context({'type': 'monthly', 'year': year, 'month': month_full_name, 'object_list': qs, }) + t = render_to_string('archives/writing_date.html', c).encode('utf-8') + fpath = 'jrnl/%s/%s/' % (year, month) + self.write_file(fpath, t) + + def build_homepage(self): + obj = apps.get_model('jrnl', 'homepagecurrator').objects.get(pk=1) + recent = apps.get_model('jrnl', 'entry').objects.filter(status__exact=1)[:4] + template = obj.template_name + c = Context({'homepage': obj, 'recent': recent, 'MEDIA_URL': settings.BAKED_MEDIA_URL, 'IMAGES_URL': settings.BAKED_IMAGES_URL}) + t = render_to_string(template, c).encode('utf-8') + self.write_file('', t) + + def build_404(self): + c = Context() + t = render_to_string('404.html', c).encode('utf-8') + self.write_file('', t, 'html', '404') + +class BuildPhotos(Build): + def build(self): + self.build_photo_archive_pages() + self.build_detail_pages() + self.build_js() + + def build_photo_archive_pages(self): + qs = apps.get_model('photos', 'PhotoGallery').objects.all() + self.build_archive_pages(qs, 'photos/', 18) + + def build_detail_pages(self): + qs = apps.get_model('photos', 'PhotoGallery').objects.all() + for photo in qs: + c = Context({'object': photo, 'MEDIA_URL': + settings.BAKED_MEDIA_URL, 'IMAGES_URL': settings.BAKED_IMAGES_URL}) + t = render_to_string('details/photo_galleries.html', c).encode('utf-8') + path = 'photos/galleries/%s/' % (photo.set_slug) + self.write_file(path, t) + + def build_js(self): + fpath = '%sdesign/templates/js/leaflet-providers.js' % settings.PROJ_ROOT + leaflet_providers_js = open(fpath, 'r').read() + fpath = '%sapp/photos/photos.js' % settings.PROJ_ROOT + photos_js = open(fpath, 'r', encoding='UTF8').read() + js = leaflet_providers_js + photos_js + self.write_file('media/js/', js.encode('utf-8'), 'js', 'photos') + +class BuildProjects(Build): + def build(self): + self.build_project_archive() + self.build_project_details() + self.build_project_data() + self.build_gifs() + self.build_np_basejs() + + def get_projects(self): + all_proj = [] + proj = apps.get_model('projects', 'Project').objects.get(pk=2) + row = {'slug': proj.slug, 'name': proj.model_name} + all_proj.append(row) + return all_proj + + def build_project_archive(self): + qs = apps.get_model('projects', 'Project').objects.filter(status__exact=1).order_by('-pub_date') + c = {'object_list': qs, 'MEDIA_URL': settings.BAKED_MEDIA_URL, + 'IMAGES_URL': settings.BAKED_IMAGES_URL} + t = render_to_string('archives/projects.html', c).encode('utf-8') + self.write_file('projects/', t) + + def build_project_details(self): + projects = self.get_projects() + for proj in projects: + model = apps.get_model('projects', proj['name']) + if proj['name'] == 'NationalParks': + qs = model.objects.filter(visited__exact=True).order_by("-date_visited_begin") + else: + qs = model.objects.filter(status__exact=1) + c = { + 'object_list': qs, + 'MEDIA_URL': settings.BAKED_MEDIA_URL, + 'IMAGES_URL': settings.BAKED_IMAGES_URL + } + t = render_to_string('details/%s.html' % (proj['slug'].split("/")[1]), c).encode('utf-8') + path = 'projects/%s/' % (proj['slug'].split("/")[1]) + self.write_file(path, t) + + """ + not sure how to handle projects really, the above doesn't work and + if I just keep writing if/else statements that gets messy, so I guess + functions it is. + """ + def build_gifs(self): + qs = apps.get_model('projects', 'AnimatedGif').objects.all() + for gif in qs: + c = { + 'object': gif, + 'MEDIA_URL': settings.BAKED_MEDIA_URL, + 'IMAGES_URL': settings.BAKED_IMAGES_URL + } + t = render_to_string('details/gifs.html', c).encode('utf-8') + path = 'projects/gifs/%s/' % (gif.slug) + self.write_file(path, t) + + def build_project_data(self): + model = apps.get_model('projects', 'NationalParks') + for park in model.objects.filter(visited__exact=True): + path = 'projects/data/natparks/' + json = park.mpoly.json + self.write_file(path, json.encode('utf-8'), 'json', park.id) + + def build_np_basejs(self): + fpath = '%sdesign/templates/js/leaflet-providers.js' % settings.PROJ_ROOT + leaflet_providers_js = open(fpath, 'r').read() + fpath = '%sapp/projects/natparks.js' % settings.PROJ_ROOT + natparks_js = open(fpath, 'r').read() + js = leaflet_providers_js + natparks_js + self.write_file('media/js/', js.encode('utf-8'), 'js', 'natparks') + +class BuildSitemap(Build): + def build(self): + c = Client() + response = c.get('/sitemap.xml', HTTP_HOST='127.0.0.1') + self.write_file('', response.content, 'xml', 'sitemap') + + +class BuildWritingFeed(Build): + def build(self): + qs = apps.get_model('blog', 'entry').objects.filter(status__exact=1).order_by('-pub_date')[:20] + c = Context({'object_list': qs, 'SITE_URL': settings.SITE_URL}) + t = render_to_string('feed.xml', c).encode('utf-8') + fpath = '%s' % ('rss/',) + self.write_file(fpath, t, 'xml') + +class BuildPages(Build): + def build(self): + model = apps.get_model('pages', 'page') + pages = model.objects.all() + for page in pages: + c = Context({'object':page,'SITE_URL':settings.SITE_URL, 'MEDIA_URL':settings.BAKED_MEDIA_URL}) + t = render_to_string(["details/%s.html" % page.slug, 'details/page.html'],c).encode('utf-8') + s = render_to_string('details/page.txt',c).encode('utf-8') + fpath = '%s' %(page.slug) + self.write_file('', t, 'html', page.slug) + self.write_file('', t, 'txt', page.slug) + +class BuildMap(Build): + def build(self): + qs = apps.get_model('jrnl', 'entry').objects.filter(status__exact=1) + cl = apps.get_model('locations', 'Country').objects.filter(visited=True).exclude(name='default') + rl = apps.get_model('locations', 'Region').objects.all() + rtl = apps.get_model('locations', 'Route').objects.all() + c = Context({ + 'object_list': qs, + 'country_list': cl, + 'region_list': rl, + 'route_list': rtl, + 'MEDIA_URL': settings.BAKED_MEDIA_URL, + 'IMAGES_URL': settings.BAKED_IMAGES_URL + }) + t = render_to_string('archives/map_data.html', c).encode('utf-8') + fpath = '%sdesign/templates/js/leaflet-providers.js' % settings.PROJ_ROOT + leaflet_providers_js = open(fpath, 'r').read() + js = leaflet_providers_js + t.decode(encoding='utf-8') + self.write_file('media/js/', js.encode('utf-8'), 'js', 'mainmap') + c = Context({ + 'country_list': cl, + 'region_list': rl, + 'route_list': rtl, + 'MEDIA_URL': settings.BAKED_MEDIA_URL, + 'IMAGES_URL': settings.BAKED_IMAGES_URL + }) + t = render_to_string('archives/map.html', c).encode('utf-8') + self.write_file('', t, "html",'map') + + +# Back up entries to markdown text files which are then stored in dropbox and git +class EntryBak(Build): + def get_model_querset(self): + model = apps.get_model('jrnl', 'entry') + qs = model.objects.filter(status__exact=1) + return qs + + def write_file(self, path, text_object): + file = open(path, 'wb') + file.write(text_object) + file.close() + + def build_writing_bak(self): + qs = self.get_model_querset() + for obj in qs: + c = Context({'object': obj, 'MEDIA_URL': settings.BAKED_MEDIA_URL, 'IMAGES_URL': settings.BAKED_IMAGES_URL}) + path = "%szbak/posts/%s_%s.txt" %(settings.PROJ_ROOT, (obj.pub_date.strftime("%Y-%m-%d").lower()), obj.slug) + t = render_to_string('details/entry-bak.txt', c).encode('utf-8') + self.write_file(path, t) + + + +class BuildBooks(Build): + def build(self): + self.build_detail_pages() + self.build_book_archive_pages() + + + def build_book_archive_pages(self): + qs = apps.get_model('books', 'Book').objects.all().order_by('-read_date').select_related() + print(qs) + self.build_archive_pages(qs, 'books/', 18) + + + def build_detail_pages(self): + qs = apps.get_model('books', 'Book').objects.all().order_by('-read_date').select_related() + for book in qs: + c = Context({'object': book,}) + t = render_to_string('details/book.html', c).encode('utf-8') + path = 'books/' + slug = '%s' % (book.slug) + self.write_file(path, t, 'html', slug) diff --git a/app/builder/sanitizer.py b/app/builder/sanitizer.py new file mode 100644 index 0000000..8512f4f --- /dev/null +++ b/app/builder/sanitizer.py @@ -0,0 +1,60 @@ +from bs4 import BeautifulSoup + + +class Sanitizer(object): + blacklisted_tags = [] + blacklisted_attributes = [] + blacklisted_protocols = [] + + def __init__(self, tags=None, attributes=None, protocols=None): + if tags: + self.blacklisted_tags = tags + if attributes: + self.blacklisted_attributes = attributes + if protocols: + self.blacklisted_protocols = protocols + + def strip(self, content=None): + """Strip HTML content to meet standards of output type. + Meant to be subclassed for each converter. + + Keyword arguments: + content -- subset of an HTML document. (ie. contents of a body tag) + """ + if not content: + content = self.content + return content + + soup = BeautifulSoup(content, "lxml") + self.strip_tags(soup) + self.strip_attributes(soup) + + output = soup.body.decode_contents() + return output + + def strip_tags(self, soup): + if self.blacklisted_tags: + [x.extract() for x in soup.find_all(self.blacklisted_tags)] + + def strip_attributes_extra(self, node): + pass + + def strip_attributes(self, soup): + if not (self.blacklisted_attributes or self.blacklisted_protocols): + return + + for node in soup.body.find_all(True): + attributes = node.attrs.keys() + if not attributes: + continue + + for attr in self.blacklisted_attributes: + if attr in attributes: + del node.attrs[attr] + + self.strip_attributes_extra(node) + + if 'href' in attributes: + protocol = node['href'].split(':')[0] + if protocol in self.blacklisted_protocols: + del node['href']
\ No newline at end of file diff --git a/app/builder/views.py b/app/builder/views.py new file mode 100644 index 0000000..cb9c119 --- /dev/null +++ b/app/builder/views.py @@ -0,0 +1,41 @@ +from django.shortcuts import render +from django.template import RequestContext +#from builder.base import BuildWriting, BuildWritingFeed, BuildMap, BuildPhotos, BuildProjects, BuildSitemap +#from jrnl.build import archive_builder, detail_builder, home_builder, rss_builder, map_builder +from books.build import builder as book_builder +from pages.build import BuildPages, BuildHome +from posts.build import BuildJrnl, BuildFieldNotes, BuildSrc, BuildGuide +#from lttr.build import lttr_builder + +def do_build(request): + section = request.GET.get('id', '') + context = {} + if section == 'builddetails': + context = {'message': 'Writing Jrnl Permalinks to Disk'} + p = BuildJrnl("posts", "post") + p.build_latest() + p.build_detail_view() + elif section == 'writingarchives': + context = {'message': 'Writing Jrnl Archives to Disk'} + BuildJrnl("posts", "post").build_arc() + elif section == 'buildrss': + context = {'message': 'Writing RSS to Disk'} + BuildJrnl("posts", "post").build_feed("jrnl:feed") + elif section == 'homepage': + context = {'message': 'Writing index to Disk'} + BuildHome("pages", "homepage").build() + elif section == 'pages': + context = {'message': 'Writing Pages to Disk'} + BuildPages("pages", "page", 'luxagraf.net').build() + elif section == 'lttr_archive': + context = {'message': 'Writing newsletter archives to Disk'} + #lttr_builder() + elif section == 'buildbooks': + context = {'message': 'Writing Book Pages to Disk'} + book_builder() + return render(request, 'admin/message.html', context) + + + + + |