blob: a1e30565e0c1ca7855b564f75438ddcf88fa539e (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
|
#!/usr/bin/python
import os
import datetime
from dateutil.parser import parse as dateparser
from os.path import abspath, dirname
from django.core.exceptions import ObjectDoesNotExist
from blog.models import Entry
from django.conf import settings
def parse_file(filepath):
data = {}
contents = open(filepath).read()
raw = contents.splitlines()
for line in raw[1:]:
if line == '---':
break
else:
k, v = line.split(':', 1)
data[k.strip()] = v.strip()
body = "\n".join(line.strip() for line in raw[1:])
data["body_markdown"] = body.split('---')[1]
return data
"""
now I need a function to query the db for the title and date
if there's no entry then it's new and we add it and publish
What about edits though? Crap, edits. That means we need to check lastmod
and that's notoriously inaccurate. damn.
from blog.parse import *
crawl_dir()
"""
def crawl_dir():
file_root = settings.POSTS_DIR
file_list = os.listdir(file_root)
file_list = filter(lambda item: not (item.startswith('README') or item.startswith('updategithub.php') or item.startswith('.') or item.endswith('~')), file_list)
for f in file_list:
fpath = file_root + "/" + f
last_mod = datetime.datetime.fromtimestamp(os.path.getmtime(fpath))
last_run = datetime.datetime.fromtimestamp(os.path.getmtime(abspath(dirname(__file__)) + '/last_run'))
if last_mod > last_run:
print "needs an update"
data = parse_file(fpath)
date = dateparser(data['pub_date'])
try:
row = Entry.objects.get(title=str(data['title']), pub_date=date)
print row.title, date
except ObjectDoesNotExist:
print data['title'] + str(date) + " = not found"
last_mod_dump = open(abspath(dirname(__file__)) + '/last_run', 'w')
print last_mod_dump
print >> last_mod_dump, str(datetime.datetime.now())
|