1 files changed, 78 insertions, 36 deletions
diff --git a/app/books/kindleparser.py b/app/books/kindleparser.py
index 44a4686..c56499e 100644
--- a/app/books/kindleparser.py
+++ b/app/books/kindleparser.py
@@ -5,48 +5,89 @@ This script then parses that json and tries to load it into the django database
 """
 import json
 import datetime
-from django.core.exceptions import ObjectDoesNotExist
+from django.core.exceptions import ObjectDoesNotExist, MultipleObjectsReturned
 from books.models import Book, BookHighlight
 
 
-def parse_kindle_clippings(path):
-    json_data = open(path)
-    data = json.load(json_data)
-    for item in data:
-        for clip in item['clippings']:
-            if clip["type"] != "Bookmark":
-                book = clip['title']
-                clip_date = datetime.datetime.strptime(clip['date'], "%Y-%m-%dT%H:%M:%S.000Z")
-                try:
-                    body_markdown = clip['content']
-                except KeyError:
-                    body_markdown = ""
-                try:
-                    location = clip['locationRange']
-                except:
-                    location = 0
-                try:
-                    page = int(clip['pageRange'][0])
-                except:
-                    page = 0
-                try:
-                    author_name = clip['author']
-                except KeyError:
-                    author_name = ''
-                try:
-                    #see if we already have this book:
-                    row = Book.objects.get(title=clip['title'])
-                except ObjectDoesNotExist:
-                    b, created = Book.objects.get_or_create(
-                        title=clip['title'],
-                        author_name=author_name,
-                        read_date=clip_date
-                    )
+class KindleClippingsParser(object):
+
+    def __init__(self, path):
+        self.path = path
+
+    def parse(self):
+        with open(self.path, 'r') as f:
+            # Individual highlights within clippings are separated by ==========
+            old_title = ''
+            for highlight in f.read().split("=========="):
+                page = None
+                location = None
+                # For each highlight, we split it into the lines
+                lines = highlight.split("\n")[1:]
+                # Don't try to write if we have no body
+                if len(lines) < 3 or lines[3] == "":
+                    continue
+                titleauth = lines[0]
+                if titleauth[0] == "\ufeff":
+                    titleauth = titleauth[1:]
                 try:
+                    titleauth = titleauth.split("(")
+                    title = titleauth[0]
+                    author = titleauth[1].split(")")[0]
+                except IndexError:
+                    title = str(titleauth)
+                    author = str(titleauth)
+                try: 
+                    page = lines[1].split("- Your Highlight on page ")[1].split(" |")[0]
+                except IndexError:
+                    try:
+                        location = lines[1].split("- Your Highlight on Location ")[1].split(" |")[0]
+                    except IndexError:
+                        page = lines[1].split("- Your Note on page ")[1].split(" |")[0]
+                date = lines[1]
+                date = date.split("| Added on ")[1].split(",", 1)[1]
+                # date string looks like: "November 23, 2020 11:22:41 PM"
+                date_time_obj = datetime.datetime.strptime(date.strip(), '%B %d, %Y %H:%M:%S %p')
+                #print(date, date_time_obj)
+                #print(title)
+                #print(author)
+                #if page:
+                #    print(page)
+                #if location:
+                #    print(location)
+                #print(lines[3])
+                #print("---------------")
+                if old_title != title:
+                    self.create_highlight(title, date_time_obj)
+                old_title = title
+
+    def create_highlight(self, title, date):
+        print(title)
+        try:
+            book = Book.objects.get(kindle_title=title)
+            print("success")
+        except ObjectDoesNotExist:
+            try:
+                search_title = " ".join(t for t in title.split(" ")[:3])
+                book = Book.objects.get(title__icontains=search_title)
+                print(book)
+            except(ObjectDoesNotExist, MultipleObjectsReturned):
+                book, created = Book.objects.get_or_create(
+                    kindle_title=title,
+                    title=title,
+                    read_date=date,
+                    body_markdown = 'tk',
+                )
+                if created:
+                    print(book)
+        """
+        book, created = Book.objects.get_or_create(
+            kindle_title=title,
+            author_name=author_name,
+        )
+        try:
                     #see if we already this highlight
-                    bh = BookHighlight.objects.get(
+                    h = BookHighlight.objects.get(
                         book__title=clip['title'],
-                        date_added=clip_date
                     )
                     #print(bh.book.title)
                     print(location)
@@ -66,3 +107,4 @@ def parse_kindle_clippings(path):
                         date_added=clip_date,
                         body_markdown=body_markdown
                     )
+                    """