app/books/kindleparser.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57

"""
kindle My Clippings.txt parsed to json by:
klip: https://www.npmjs.org/package/klip
This script then parses that json and tries to load it into the django database
"""
import json
import datetime
from django.core.exceptions import ObjectDoesNotExist
from books.models import Book, BookHighlights

def parse_kindle_clippings(path):
    json_data = open(path)
    data = json.load(json_data)
    for item in data:
        for clip in item['clippings']:
            if clip["type"] != "Bookmark":
                book = clip['title']
                clip_date = datetime.datetime.strptime(clip['date'], "%Y-%m-%dT%H:%M:%S.000Z")
                try:
                    body_markdown = clip['content']
                except KeyError:
                    body_markdown = ""
                try: 
                    location = int(clip['locationRange'])
                except:
                    location = 0
                try: 
                    page = int(clip['pageRange'])
                except:
                    page = 0
                try:
                    author_name = clip['author']
                except KeyError:
                    author_name = ''
                try:
                    #see if we already have this book:
                    row = Book.objects.get(title=clip['title'])
                except ObjectDoesNotExist:
                    b, created = Book.objects.get_or_create(
                        title = clip['title'],
                        author_name = author_name,
                        read_date = clip_date
                    )
                try:
                #see if we already this highlight
                    row = BookHighlights.objects.get(book__title=clip['title'], date_added=clip_date )
            #if we don't create a new book highlight
                except ObjectDoesNotExist:
                    book = Book.objects.get(title=clip['title'])
                    bh, created = BookHighlights.objects.get_or_create(
                        book = book,
                        page = page,
                        location = location,
                        date_added = clip_date,
                        body_markdown = body_markdown
                    )