diff options
Diffstat (limited to 'app/books')
-rw-r--r-- | app/books/admin.py | 2 | ||||
-rw-r--r-- | app/books/kindleparser.py | 114 | ||||
-rw-r--r-- | app/books/migrations/0012_auto_20201218_0946.py | 23 | ||||
-rw-r--r-- | app/books/migrations/0013_auto_20201218_0949.py | 18 | ||||
-rw-r--r-- | app/books/models.py | 8 |
5 files changed, 125 insertions, 40 deletions
diff --git a/app/books/admin.py b/app/books/admin.py index 8d94651..f488fe5 100644 --- a/app/books/admin.py +++ b/app/books/admin.py @@ -14,4 +14,4 @@ class BookAdmin(admin.ModelAdmin): @admin.register(BookHighlight) class BookHighlightAdmin(admin.ModelAdmin): - list_display = ('book', 'page', 'date_added') + list_display = ('book', 'page', 'date_highlighted') diff --git a/app/books/kindleparser.py b/app/books/kindleparser.py index 44a4686..c56499e 100644 --- a/app/books/kindleparser.py +++ b/app/books/kindleparser.py @@ -5,48 +5,89 @@ This script then parses that json and tries to load it into the django database """ import json import datetime -from django.core.exceptions import ObjectDoesNotExist +from django.core.exceptions import ObjectDoesNotExist, MultipleObjectsReturned from books.models import Book, BookHighlight -def parse_kindle_clippings(path): - json_data = open(path) - data = json.load(json_data) - for item in data: - for clip in item['clippings']: - if clip["type"] != "Bookmark": - book = clip['title'] - clip_date = datetime.datetime.strptime(clip['date'], "%Y-%m-%dT%H:%M:%S.000Z") - try: - body_markdown = clip['content'] - except KeyError: - body_markdown = "" - try: - location = clip['locationRange'] - except: - location = 0 - try: - page = int(clip['pageRange'][0]) - except: - page = 0 - try: - author_name = clip['author'] - except KeyError: - author_name = '' - try: - #see if we already have this book: - row = Book.objects.get(title=clip['title']) - except ObjectDoesNotExist: - b, created = Book.objects.get_or_create( - title=clip['title'], - author_name=author_name, - read_date=clip_date - ) +class KindleClippingsParser(object): + + def __init__(self, path): + self.path = path + + def parse(self): + with open(self.path, 'r') as f: + # Individual highlights within clippings are separated by ========== + old_title = '' + for highlight in f.read().split("=========="): + page = None + location = None + # For each highlight, we split it into the lines + lines = highlight.split("\n")[1:] + # Don't try to write if we have no body + if len(lines) < 3 or lines[3] == "": + continue + titleauth = lines[0] + if titleauth[0] == "\ufeff": + titleauth = titleauth[1:] try: + titleauth = titleauth.split("(") + title = titleauth[0] + author = titleauth[1].split(")")[0] + except IndexError: + title = str(titleauth) + author = str(titleauth) + try: + page = lines[1].split("- Your Highlight on page ")[1].split(" |")[0] + except IndexError: + try: + location = lines[1].split("- Your Highlight on Location ")[1].split(" |")[0] + except IndexError: + page = lines[1].split("- Your Note on page ")[1].split(" |")[0] + date = lines[1] + date = date.split("| Added on ")[1].split(",", 1)[1] + # date string looks like: "November 23, 2020 11:22:41 PM" + date_time_obj = datetime.datetime.strptime(date.strip(), '%B %d, %Y %H:%M:%S %p') + #print(date, date_time_obj) + #print(title) + #print(author) + #if page: + # print(page) + #if location: + # print(location) + #print(lines[3]) + #print("---------------") + if old_title != title: + self.create_highlight(title, date_time_obj) + old_title = title + + def create_highlight(self, title, date): + print(title) + try: + book = Book.objects.get(kindle_title=title) + print("success") + except ObjectDoesNotExist: + try: + search_title = " ".join(t for t in title.split(" ")[:3]) + book = Book.objects.get(title__icontains=search_title) + print(book) + except(ObjectDoesNotExist, MultipleObjectsReturned): + book, created = Book.objects.get_or_create( + kindle_title=title, + title=title, + read_date=date, + body_markdown = 'tk', + ) + if created: + print(book) + """ + book, created = Book.objects.get_or_create( + kindle_title=title, + author_name=author_name, + ) + try: #see if we already this highlight - bh = BookHighlight.objects.get( + h = BookHighlight.objects.get( book__title=clip['title'], - date_added=clip_date ) #print(bh.book.title) print(location) @@ -66,3 +107,4 @@ def parse_kindle_clippings(path): date_added=clip_date, body_markdown=body_markdown ) + """ diff --git a/app/books/migrations/0012_auto_20201218_0946.py b/app/books/migrations/0012_auto_20201218_0946.py new file mode 100644 index 0000000..90119ac --- /dev/null +++ b/app/books/migrations/0012_auto_20201218_0946.py @@ -0,0 +1,23 @@ +# Generated by Django 3.1 on 2020-12-18 09:46 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('books', '0011_auto_20200205_1617'), + ] + + operations = [ + migrations.AddField( + model_name='book', + name='kindle_title', + field=models.CharField(blank=True, max_length=200, null=True), + ), + migrations.AddField( + model_name='bookhighlight', + name='date_highlighted', + field=models.DateTimeField(null=True), + ), + ] diff --git a/app/books/migrations/0013_auto_20201218_0949.py b/app/books/migrations/0013_auto_20201218_0949.py new file mode 100644 index 0000000..93c6b07 --- /dev/null +++ b/app/books/migrations/0013_auto_20201218_0949.py @@ -0,0 +1,18 @@ +# Generated by Django 3.1 on 2020-12-18 09:49 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('books', '0012_auto_20201218_0946'), + ] + + operations = [ + migrations.AlterField( + model_name='bookhighlight', + name='date_highlighted', + field=models.DateTimeField(), + ), + ] diff --git a/app/books/models.py b/app/books/models.py index a9d7cad..2ef9716 100644 --- a/app/books/models.py +++ b/app/books/models.py @@ -20,6 +20,7 @@ def get_upload_path(self, filename): class Book(models.Model): title = models.CharField(max_length=200) + kindle_title = models.CharField(max_length=200, blank=True, null=True) author_name = models.CharField(max_length=200) slug = models.CharField(max_length=50) read_date = models.DateTimeField() @@ -92,7 +93,7 @@ class Book(models.Model): super(Book, self).save() -@receiver(post_save, sender=Book) +#@receiver(post_save, sender=Book) def post_save_events(sender, update_fields, created, instance, **kwargs): if instance.image: base_path = "%s/%s/" % (settings.MEDIA_ROOT, "/".join(str(i) for i in instance.image.name.split('/')[:-1])) @@ -105,12 +106,13 @@ def post_save_events(sender, update_fields, created, instance, **kwargs): class BookHighlight(models.Model): book = models.ForeignKey(Book, on_delete=models.CASCADE, blank=True) page = models.PositiveSmallIntegerField() - date_added = models.DateTimeField(blank=True, auto_now_add=True) + date_created = models.DateTimeField(blank=True, auto_now_add=True) + date_highlighted = models.DateTimeField() body_markdown = models.TextField() body_html = models.TextField(blank=True) class Meta: - ordering = ('-date_added', '-page',) + ordering = ('-date_highlighted', '-page',) get_latest_by = 'date_added' def __str__(self): |