summaryrefslogtreecommitdiff
path: root/app/books
diff options
context:
space:
mode:
Diffstat (limited to 'app/books')
-rw-r--r--app/books/admin.py2
-rw-r--r--app/books/kindleparser.py114
-rw-r--r--app/books/migrations/0012_auto_20201218_0946.py23
-rw-r--r--app/books/migrations/0013_auto_20201218_0949.py18
-rw-r--r--app/books/models.py8
5 files changed, 125 insertions, 40 deletions
diff --git a/app/books/admin.py b/app/books/admin.py
index 8d94651..f488fe5 100644
--- a/app/books/admin.py
+++ b/app/books/admin.py
@@ -14,4 +14,4 @@ class BookAdmin(admin.ModelAdmin):
@admin.register(BookHighlight)
class BookHighlightAdmin(admin.ModelAdmin):
- list_display = ('book', 'page', 'date_added')
+ list_display = ('book', 'page', 'date_highlighted')
diff --git a/app/books/kindleparser.py b/app/books/kindleparser.py
index 44a4686..c56499e 100644
--- a/app/books/kindleparser.py
+++ b/app/books/kindleparser.py
@@ -5,48 +5,89 @@ This script then parses that json and tries to load it into the django database
"""
import json
import datetime
-from django.core.exceptions import ObjectDoesNotExist
+from django.core.exceptions import ObjectDoesNotExist, MultipleObjectsReturned
from books.models import Book, BookHighlight
-def parse_kindle_clippings(path):
- json_data = open(path)
- data = json.load(json_data)
- for item in data:
- for clip in item['clippings']:
- if clip["type"] != "Bookmark":
- book = clip['title']
- clip_date = datetime.datetime.strptime(clip['date'], "%Y-%m-%dT%H:%M:%S.000Z")
- try:
- body_markdown = clip['content']
- except KeyError:
- body_markdown = ""
- try:
- location = clip['locationRange']
- except:
- location = 0
- try:
- page = int(clip['pageRange'][0])
- except:
- page = 0
- try:
- author_name = clip['author']
- except KeyError:
- author_name = ''
- try:
- #see if we already have this book:
- row = Book.objects.get(title=clip['title'])
- except ObjectDoesNotExist:
- b, created = Book.objects.get_or_create(
- title=clip['title'],
- author_name=author_name,
- read_date=clip_date
- )
+class KindleClippingsParser(object):
+
+ def __init__(self, path):
+ self.path = path
+
+ def parse(self):
+ with open(self.path, 'r') as f:
+ # Individual highlights within clippings are separated by ==========
+ old_title = ''
+ for highlight in f.read().split("=========="):
+ page = None
+ location = None
+ # For each highlight, we split it into the lines
+ lines = highlight.split("\n")[1:]
+ # Don't try to write if we have no body
+ if len(lines) < 3 or lines[3] == "":
+ continue
+ titleauth = lines[0]
+ if titleauth[0] == "\ufeff":
+ titleauth = titleauth[1:]
try:
+ titleauth = titleauth.split("(")
+ title = titleauth[0]
+ author = titleauth[1].split(")")[0]
+ except IndexError:
+ title = str(titleauth)
+ author = str(titleauth)
+ try:
+ page = lines[1].split("- Your Highlight on page ")[1].split(" |")[0]
+ except IndexError:
+ try:
+ location = lines[1].split("- Your Highlight on Location ")[1].split(" |")[0]
+ except IndexError:
+ page = lines[1].split("- Your Note on page ")[1].split(" |")[0]
+ date = lines[1]
+ date = date.split("| Added on ")[1].split(",", 1)[1]
+ # date string looks like: "November 23, 2020 11:22:41 PM"
+ date_time_obj = datetime.datetime.strptime(date.strip(), '%B %d, %Y %H:%M:%S %p')
+ #print(date, date_time_obj)
+ #print(title)
+ #print(author)
+ #if page:
+ # print(page)
+ #if location:
+ # print(location)
+ #print(lines[3])
+ #print("---------------")
+ if old_title != title:
+ self.create_highlight(title, date_time_obj)
+ old_title = title
+
+ def create_highlight(self, title, date):
+ print(title)
+ try:
+ book = Book.objects.get(kindle_title=title)
+ print("success")
+ except ObjectDoesNotExist:
+ try:
+ search_title = " ".join(t for t in title.split(" ")[:3])
+ book = Book.objects.get(title__icontains=search_title)
+ print(book)
+ except(ObjectDoesNotExist, MultipleObjectsReturned):
+ book, created = Book.objects.get_or_create(
+ kindle_title=title,
+ title=title,
+ read_date=date,
+ body_markdown = 'tk',
+ )
+ if created:
+ print(book)
+ """
+ book, created = Book.objects.get_or_create(
+ kindle_title=title,
+ author_name=author_name,
+ )
+ try:
#see if we already this highlight
- bh = BookHighlight.objects.get(
+ h = BookHighlight.objects.get(
book__title=clip['title'],
- date_added=clip_date
)
#print(bh.book.title)
print(location)
@@ -66,3 +107,4 @@ def parse_kindle_clippings(path):
date_added=clip_date,
body_markdown=body_markdown
)
+ """
diff --git a/app/books/migrations/0012_auto_20201218_0946.py b/app/books/migrations/0012_auto_20201218_0946.py
new file mode 100644
index 0000000..90119ac
--- /dev/null
+++ b/app/books/migrations/0012_auto_20201218_0946.py
@@ -0,0 +1,23 @@
+# Generated by Django 3.1 on 2020-12-18 09:46
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('books', '0011_auto_20200205_1617'),
+ ]
+
+ operations = [
+ migrations.AddField(
+ model_name='book',
+ name='kindle_title',
+ field=models.CharField(blank=True, max_length=200, null=True),
+ ),
+ migrations.AddField(
+ model_name='bookhighlight',
+ name='date_highlighted',
+ field=models.DateTimeField(null=True),
+ ),
+ ]
diff --git a/app/books/migrations/0013_auto_20201218_0949.py b/app/books/migrations/0013_auto_20201218_0949.py
new file mode 100644
index 0000000..93c6b07
--- /dev/null
+++ b/app/books/migrations/0013_auto_20201218_0949.py
@@ -0,0 +1,18 @@
+# Generated by Django 3.1 on 2020-12-18 09:49
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('books', '0012_auto_20201218_0946'),
+ ]
+
+ operations = [
+ migrations.AlterField(
+ model_name='bookhighlight',
+ name='date_highlighted',
+ field=models.DateTimeField(),
+ ),
+ ]
diff --git a/app/books/models.py b/app/books/models.py
index a9d7cad..2ef9716 100644
--- a/app/books/models.py
+++ b/app/books/models.py
@@ -20,6 +20,7 @@ def get_upload_path(self, filename):
class Book(models.Model):
title = models.CharField(max_length=200)
+ kindle_title = models.CharField(max_length=200, blank=True, null=True)
author_name = models.CharField(max_length=200)
slug = models.CharField(max_length=50)
read_date = models.DateTimeField()
@@ -92,7 +93,7 @@ class Book(models.Model):
super(Book, self).save()
-@receiver(post_save, sender=Book)
+#@receiver(post_save, sender=Book)
def post_save_events(sender, update_fields, created, instance, **kwargs):
if instance.image:
base_path = "%s/%s/" % (settings.MEDIA_ROOT, "/".join(str(i) for i in instance.image.name.split('/')[:-1]))
@@ -105,12 +106,13 @@ def post_save_events(sender, update_fields, created, instance, **kwargs):
class BookHighlight(models.Model):
book = models.ForeignKey(Book, on_delete=models.CASCADE, blank=True)
page = models.PositiveSmallIntegerField()
- date_added = models.DateTimeField(blank=True, auto_now_add=True)
+ date_created = models.DateTimeField(blank=True, auto_now_add=True)
+ date_highlighted = models.DateTimeField()
body_markdown = models.TextField()
body_html = models.TextField(blank=True)
class Meta:
- ordering = ('-date_added', '-page',)
+ ordering = ('-date_highlighted', '-page',)
get_latest_by = 'date_added'
def __str__(self):