diff options
Diffstat (limited to 'app/blog/parse.py')
-rwxr-xr-x | app/blog/parse.py | 56 |
1 files changed, 0 insertions, 56 deletions
diff --git a/app/blog/parse.py b/app/blog/parse.py deleted file mode 100755 index a1e3056..0000000 --- a/app/blog/parse.py +++ /dev/null @@ -1,56 +0,0 @@ -#!/usr/bin/python -import os -import datetime -from dateutil.parser import parse as dateparser -from os.path import abspath, dirname -from django.core.exceptions import ObjectDoesNotExist -from blog.models import Entry - -from django.conf import settings - - -def parse_file(filepath): - data = {} - contents = open(filepath).read() - raw = contents.splitlines() - for line in raw[1:]: - if line == '---': - break - else: - k, v = line.split(':', 1) - data[k.strip()] = v.strip() - body = "\n".join(line.strip() for line in raw[1:]) - data["body_markdown"] = body.split('---')[1] - return data - -""" - now I need a function to query the db for the title and date - if there's no entry then it's new and we add it and publish - What about edits though? Crap, edits. That means we need to check lastmod - and that's notoriously inaccurate. damn. -from blog.parse import * -crawl_dir() - -""" - - -def crawl_dir(): - file_root = settings.POSTS_DIR - file_list = os.listdir(file_root) - file_list = filter(lambda item: not (item.startswith('README') or item.startswith('updategithub.php') or item.startswith('.') or item.endswith('~')), file_list) - for f in file_list: - fpath = file_root + "/" + f - last_mod = datetime.datetime.fromtimestamp(os.path.getmtime(fpath)) - last_run = datetime.datetime.fromtimestamp(os.path.getmtime(abspath(dirname(__file__)) + '/last_run')) - if last_mod > last_run: - print "needs an update" - data = parse_file(fpath) - date = dateparser(data['pub_date']) - try: - row = Entry.objects.get(title=str(data['title']), pub_date=date) - print row.title, date - except ObjectDoesNotExist: - print data['title'] + str(date) + " = not found" - last_mod_dump = open(abspath(dirname(__file__)) + '/last_run', 'w') - print last_mod_dump - print >> last_mod_dump, str(datetime.datetime.now()) |