summaryrefslogtreecommitdiff
path: root/app/books/kindleparser.py
diff options
context:
space:
mode:
Diffstat (limited to 'app/books/kindleparser.py')
-rw-r--r--app/books/kindleparser.py114
1 files changed, 78 insertions, 36 deletions
diff --git a/app/books/kindleparser.py b/app/books/kindleparser.py
index 44a4686..c56499e 100644
--- a/app/books/kindleparser.py
+++ b/app/books/kindleparser.py
@@ -5,48 +5,89 @@ This script then parses that json and tries to load it into the django database
"""
import json
import datetime
-from django.core.exceptions import ObjectDoesNotExist
+from django.core.exceptions import ObjectDoesNotExist, MultipleObjectsReturned
from books.models import Book, BookHighlight
-def parse_kindle_clippings(path):
- json_data = open(path)
- data = json.load(json_data)
- for item in data:
- for clip in item['clippings']:
- if clip["type"] != "Bookmark":
- book = clip['title']
- clip_date = datetime.datetime.strptime(clip['date'], "%Y-%m-%dT%H:%M:%S.000Z")
- try:
- body_markdown = clip['content']
- except KeyError:
- body_markdown = ""
- try:
- location = clip['locationRange']
- except:
- location = 0
- try:
- page = int(clip['pageRange'][0])
- except:
- page = 0
- try:
- author_name = clip['author']
- except KeyError:
- author_name = ''
- try:
- #see if we already have this book:
- row = Book.objects.get(title=clip['title'])
- except ObjectDoesNotExist:
- b, created = Book.objects.get_or_create(
- title=clip['title'],
- author_name=author_name,
- read_date=clip_date
- )
+class KindleClippingsParser(object):
+
+ def __init__(self, path):
+ self.path = path
+
+ def parse(self):
+ with open(self.path, 'r') as f:
+ # Individual highlights within clippings are separated by ==========
+ old_title = ''
+ for highlight in f.read().split("=========="):
+ page = None
+ location = None
+ # For each highlight, we split it into the lines
+ lines = highlight.split("\n")[1:]
+ # Don't try to write if we have no body
+ if len(lines) < 3 or lines[3] == "":
+ continue
+ titleauth = lines[0]
+ if titleauth[0] == "\ufeff":
+ titleauth = titleauth[1:]
try:
+ titleauth = titleauth.split("(")
+ title = titleauth[0]
+ author = titleauth[1].split(")")[0]
+ except IndexError:
+ title = str(titleauth)
+ author = str(titleauth)
+ try:
+ page = lines[1].split("- Your Highlight on page ")[1].split(" |")[0]
+ except IndexError:
+ try:
+ location = lines[1].split("- Your Highlight on Location ")[1].split(" |")[0]
+ except IndexError:
+ page = lines[1].split("- Your Note on page ")[1].split(" |")[0]
+ date = lines[1]
+ date = date.split("| Added on ")[1].split(",", 1)[1]
+ # date string looks like: "November 23, 2020 11:22:41 PM"
+ date_time_obj = datetime.datetime.strptime(date.strip(), '%B %d, %Y %H:%M:%S %p')
+ #print(date, date_time_obj)
+ #print(title)
+ #print(author)
+ #if page:
+ # print(page)
+ #if location:
+ # print(location)
+ #print(lines[3])
+ #print("---------------")
+ if old_title != title:
+ self.create_highlight(title, date_time_obj)
+ old_title = title
+
+ def create_highlight(self, title, date):
+ print(title)
+ try:
+ book = Book.objects.get(kindle_title=title)
+ print("success")
+ except ObjectDoesNotExist:
+ try:
+ search_title = " ".join(t for t in title.split(" ")[:3])
+ book = Book.objects.get(title__icontains=search_title)
+ print(book)
+ except(ObjectDoesNotExist, MultipleObjectsReturned):
+ book, created = Book.objects.get_or_create(
+ kindle_title=title,
+ title=title,
+ read_date=date,
+ body_markdown = 'tk',
+ )
+ if created:
+ print(book)
+ """
+ book, created = Book.objects.get_or_create(
+ kindle_title=title,
+ author_name=author_name,
+ )
+ try:
#see if we already this highlight
- bh = BookHighlight.objects.get(
+ h = BookHighlight.objects.get(
book__title=clip['title'],
- date_added=clip_date
)
#print(bh.book.title)
print(location)
@@ -66,3 +107,4 @@ def parse_kindle_clippings(path):
date_added=clip_date,
body_markdown=body_markdown
)
+ """