summaryrefslogtreecommitdiff
path: root/app/books/kindleparser.py
blob: c56499e4ab4cb53ccf57e394d9c4c058e71ea61d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
"""
kindle My Clippings.txt parsed to json by:
klip: https://www.npmjs.org/package/klip
This script then parses that json and tries to load it into the django database
"""
import json
import datetime
from django.core.exceptions import ObjectDoesNotExist, MultipleObjectsReturned
from books.models import Book, BookHighlight


class KindleClippingsParser(object):

    def __init__(self, path):
        self.path = path

    def parse(self):
        with open(self.path, 'r') as f:
            # Individual highlights within clippings are separated by ==========
            old_title = ''
            for highlight in f.read().split("=========="):
                page = None
                location = None
                # For each highlight, we split it into the lines
                lines = highlight.split("\n")[1:]
                # Don't try to write if we have no body
                if len(lines) < 3 or lines[3] == "":
                    continue
                titleauth = lines[0]
                if titleauth[0] == "\ufeff":
                    titleauth = titleauth[1:]
                try:
                    titleauth = titleauth.split("(")
                    title = titleauth[0]
                    author = titleauth[1].split(")")[0]
                except IndexError:
                    title = str(titleauth)
                    author = str(titleauth)
                try: 
                    page = lines[1].split("- Your Highlight on page ")[1].split(" |")[0]
                except IndexError:
                    try:
                        location = lines[1].split("- Your Highlight on Location ")[1].split(" |")[0]
                    except IndexError:
                        page = lines[1].split("- Your Note on page ")[1].split(" |")[0]
                date = lines[1]
                date = date.split("| Added on ")[1].split(",", 1)[1]
                # date string looks like: "November 23, 2020 11:22:41 PM"
                date_time_obj = datetime.datetime.strptime(date.strip(), '%B %d, %Y %H:%M:%S %p')
                #print(date, date_time_obj)
                #print(title)
                #print(author)
                #if page:
                #    print(page)
                #if location:
                #    print(location)
                #print(lines[3])
                #print("---------------")
                if old_title != title:
                    self.create_highlight(title, date_time_obj)
                old_title = title

    def create_highlight(self, title, date):
        print(title)
        try:
            book = Book.objects.get(kindle_title=title)
            print("success")
        except ObjectDoesNotExist:
            try:
                search_title = " ".join(t for t in title.split(" ")[:3])
                book = Book.objects.get(title__icontains=search_title)
                print(book)
            except(ObjectDoesNotExist, MultipleObjectsReturned):
                book, created = Book.objects.get_or_create(
                    kindle_title=title,
                    title=title,
                    read_date=date,
                    body_markdown = 'tk',
                )
                if created:
                    print(book)
        """
        book, created = Book.objects.get_or_create(
            kindle_title=title,
            author_name=author_name,
        )
        try:
                    #see if we already this highlight
                    h = BookHighlight.objects.get(
                        book__title=clip['title'],
                    )
                    #print(bh.book.title)
                    print(location)
                    #bh.location = location
                    #bh.page = page
                    #bh.save()
                #if we don't create a new book highlight
                except ObjectDoesNotExist:
                    book = Book.objects.get(title=clip['title'])
                    print(book.title)
                    print(page)
                    print(location)
                    bh, created = BookHighlight.objects.get_or_create(
                        book=book,
                        page=page,
                        location=location,
                        date_added=clip_date,
                        body_markdown=body_markdown
                    )
                    """