""" kindle My Clippings.txt parsed to json by: klip: https://www.npmjs.org/package/klip This script then parses that json and tries to load it into the django database """ import json import datetime from django.core.exceptions import ObjectDoesNotExist, MultipleObjectsReturned from books.models import Book, BookHighlight class KindleClippingsParser(object): def __init__(self, path): self.path = path def parse(self): with open(self.path, 'r') as f: # Individual highlights within clippings are separated by ========== for highlight in f.read().split("=========="): page = None location = None # For each highlight, we split it into the lines lines = highlight.split("\n")[1:] # Don't try to write if we have no body if len(lines) < 3 or lines[3] == "": continue titleauth = lines[0] if titleauth[0] == "\ufeff": titleauth = titleauth[1:] try: titleauth = titleauth.split("(") title = titleauth[0] author = titleauth[1].split(")")[0] except IndexError: title = str(titleauth) author = str(titleauth) try: page = lines[1].split("- Your Highlight on page ")[1].split(" |")[0] except IndexError: try: page = lines[1].split("- Your Highlight on Location ")[1].split(" |")[0] except IndexError: page = lines[1].split("- Your Note on page ")[1].split(" |")[0] date = lines[1] date = date.split("| Added on ")[1].split(",", 1)[1] # date string looks like: "November 23, 2020 11:22:41 PM" date_time_obj = datetime.datetime.strptime(date.strip(), '%B %d, %Y %H:%M:%S %p') self.create_highlight(title, date_time_obj, page, lines[3]) def create_highlight(self, title, date, page, clip): print(title) try: book = Book.objects.get(kindle_title=title) print("success") except ObjectDoesNotExist: try: search_title = " ".join(t for t in title.split(" ")[:3]) book = Book.objects.get(title__icontains=search_title) print(book) except(ObjectDoesNotExist, MultipleObjectsReturned): book, created = Book.objects.get_or_create( kindle_title=title, title=title, read_date=date, body_markdown = 'tk', ) if created: print(book) try: #see if we already this highlight h = BookHighlight.objects.get( book__title=title, date_highlighted=date, ) #if we don't create a new book highlight except ObjectDoesNotExist: print("hightlight: %s" % book.title) print("page : %s" % page) print("on date: %s" % date) print("quote: %s" % clip) print("--------------") bh, created = BookHighlight.objects.get_or_create( book=book, page=page, date_highlighted=date, body_markdown=clip )