diff options
Diffstat (limited to 'app/blog')
-rwxr-xr-x | app/blog/parse.py | 52 |
1 files changed, 52 insertions, 0 deletions
diff --git a/app/blog/parse.py b/app/blog/parse.py new file mode 100755 index 0000000..27f5165 --- /dev/null +++ b/app/blog/parse.py @@ -0,0 +1,52 @@ +#!/usr/bin/python +import os, datetime +from dateutil.parser import parse as dateparser +from os.path import abspath,dirname +from django.core.exceptions import ObjectDoesNotExist +from blog.models import Entry + +from django.conf import settings + +def parse_file(filepath): + data = {} + contents = open(filepath).read() + raw = contents.splitlines() + for line in raw[1:]: + if line == '---': + break + else: + k,v = line.split(':', 1) + data[k.strip()] = v.strip() + body = "\n".join(line.strip() for line in raw[1:]) + data["body_markdown"] = body.split('---')[1] + return data + +""" + now I need a function to query the db for the title and date + if there's no entry then it's new and we add it and publish + What about edits though? Crap, edits. That means we need to check lastmod + and that's notoriously inaccurate. damn. +from blog.parse import * +crawl_dir() + +""" +def crawl_dir(): + file_root = settings.POSTS_DIR + file_list = os.listdir(file_root) + file_list = filter(lambda item: not (item.startswith('README') or item.startswith('updategithub.php') or item.startswith('.') or item.endswith('~')),file_list) + for f in file_list: + fpath = file_root+"/"+f + last_mod = datetime.datetime.fromtimestamp(os.path.getmtime(fpath)) + last_run = datetime.datetime.fromtimestamp(os.path.getmtime(abspath(dirname(__file__))+'/last_run')) + if last_mod > last_run: + print "needs an update" + data = parse_file(fpath) + date = dateparser(data['pub_date']) + try: + row = Entry.objects.get(title=str(data['title']),pub_date=date) + print row.title, date + except ObjectDoesNotExist: + print data['title'] + str(date) + " = not found" + last_mod_dump = open(abspath(dirname(__file__))+'/last_run','w') + print last_mod_dump + print >> last_mod_dump, str(datetime.datetime.now()) |