summaryrefslogtreecommitdiff
path: root/app/blog
diff options
context:
space:
mode:
Diffstat (limited to 'app/blog')
-rwxr-xr-xapp/blog/parse.py52
1 files changed, 52 insertions, 0 deletions
diff --git a/app/blog/parse.py b/app/blog/parse.py
new file mode 100755
index 0000000..27f5165
--- /dev/null
+++ b/app/blog/parse.py
@@ -0,0 +1,52 @@
+#!/usr/bin/python
+import os, datetime
+from dateutil.parser import parse as dateparser
+from os.path import abspath,dirname
+from django.core.exceptions import ObjectDoesNotExist
+from blog.models import Entry
+
+from django.conf import settings
+
+def parse_file(filepath):
+ data = {}
+ contents = open(filepath).read()
+ raw = contents.splitlines()
+ for line in raw[1:]:
+ if line == '---':
+ break
+ else:
+ k,v = line.split(':', 1)
+ data[k.strip()] = v.strip()
+ body = "\n".join(line.strip() for line in raw[1:])
+ data["body_markdown"] = body.split('---')[1]
+ return data
+
+"""
+ now I need a function to query the db for the title and date
+ if there's no entry then it's new and we add it and publish
+ What about edits though? Crap, edits. That means we need to check lastmod
+ and that's notoriously inaccurate. damn.
+from blog.parse import *
+crawl_dir()
+
+"""
+def crawl_dir():
+ file_root = settings.POSTS_DIR
+ file_list = os.listdir(file_root)
+ file_list = filter(lambda item: not (item.startswith('README') or item.startswith('updategithub.php') or item.startswith('.') or item.endswith('~')),file_list)
+ for f in file_list:
+ fpath = file_root+"/"+f
+ last_mod = datetime.datetime.fromtimestamp(os.path.getmtime(fpath))
+ last_run = datetime.datetime.fromtimestamp(os.path.getmtime(abspath(dirname(__file__))+'/last_run'))
+ if last_mod > last_run:
+ print "needs an update"
+ data = parse_file(fpath)
+ date = dateparser(data['pub_date'])
+ try:
+ row = Entry.objects.get(title=str(data['title']),pub_date=date)
+ print row.title, date
+ except ObjectDoesNotExist:
+ print data['title'] + str(date) + " = not found"
+ last_mod_dump = open(abspath(dirname(__file__))+'/last_run','w')
+ print last_mod_dump
+ print >> last_mod_dump, str(datetime.datetime.now())