#!/usr/bin/python import os import datetime from dateutil.parser import parse as dateparser from os.path import abspath, dirname from django.core.exceptions import ObjectDoesNotExist from blog.models import Entry from django.conf import settings def parse_file(filepath): data = {} contents = open(filepath).read() raw = contents.splitlines() for line in raw[1:]: if line == '---': break else: k, v = line.split(':', 1) data[k.strip()] = v.strip() body = "\n".join(line.strip() for line in raw[1:]) data["body_markdown"] = body.split('---')[1] return data """ now I need a function to query the db for the title and date if there's no entry then it's new and we add it and publish What about edits though? Crap, edits. That means we need to check lastmod and that's notoriously inaccurate. damn. from blog.parse import * crawl_dir() """ def crawl_dir(): file_root = settings.POSTS_DIR file_list = os.listdir(file_root) file_list = filter(lambda item: not (item.startswith('README') or item.startswith('updategithub.php') or item.startswith('.') or item.endswith('~')), file_list) for f in file_list: fpath = file_root + "/" + f last_mod = datetime.datetime.fromtimestamp(os.path.getmtime(fpath)) last_run = datetime.datetime.fromtimestamp(os.path.getmtime(abspath(dirname(__file__)) + '/last_run')) if last_mod > last_run: print "needs an update" data = parse_file(fpath) date = dateparser(data['pub_date']) try: row = Entry.objects.get(title=str(data['title']), pub_date=date) print row.title, date except ObjectDoesNotExist: print data['title'] + str(date) + " = not found" last_mod_dump = open(abspath(dirname(__file__)) + '/last_run', 'w') print last_mod_dump print >> last_mod_dump, str(datetime.datetime.now())