summaryrefslogtreecommitdiff
path: root/app
diff options
context:
space:
mode:
Diffstat (limited to 'app')
-rw-r--r--app/deals/admin.py27
-rw-r--r--app/deals/models.py140
-rw-r--r--app/deals/urls.py10
-rw-r--r--app/deals/views.py9
-rw-r--r--app/posts/admin.py34
-rw-r--r--app/posts/migrations/0001_initial.py33
-rw-r--r--app/posts/migrations/0002_alter_post_body.py18
-rw-r--r--app/posts/migrations/0003_alter_post_date_last_pub.py18
-rw-r--r--app/posts/migrations/0004_alter_post_update_frequency.py18
-rw-r--r--app/posts/migrations/0005_post_template_type.py18
-rw-r--r--app/posts/migrations/0006_alter_post_post_type.py18
-rw-r--r--app/posts/migrations/0007_alter_post_title.py18
-rw-r--r--app/posts/migrations/0008_post_needs_update_alter_post_products.py24
-rw-r--r--app/posts/migrations/0009_note.py29
-rw-r--r--app/posts/migrations/__init__.py0
-rw-r--r--app/posts/models.py141
-rw-r--r--app/posts/utils.py928
-rw-r--r--app/products/admin.py28
-rw-r--r--app/products/migrations/0001_initial.py68
-rw-r--r--app/products/migrations/__init__.py0
-rw-r--r--app/products/models.py87
-rw-r--r--app/products/static/product-loader.js10
-rw-r--r--app/products/templates/products/snippet.html41
-rw-r--r--app/products/views.py15
-rw-r--r--app/utils/templates/utils/clipboardtomarkdown.html79
-rw-r--r--app/utils/urls.py5
26 files changed, 1803 insertions, 13 deletions
diff --git a/app/deals/admin.py b/app/deals/admin.py
index 9e37d67..3686309 100644
--- a/app/deals/admin.py
+++ b/app/deals/admin.py
@@ -4,12 +4,12 @@ from django.contrib.gis.admin import OSMGeoAdmin
from django.contrib.contenttypes.admin import GenericStackedInline
from django_admin_listfilter_dropdown.filters import DropdownFilter, RelatedDropdownFilter, ChoiceDropdownFilter
-from .models import Deal, Brand
+from .models import Deal, Brand, MyDeal
@admin.register(Deal)
class DealAdmin(OSMGeoAdmin):
- list_display = ('title', 'brand', 'category', 'deal_price', 'original_price', 'discount_percent', 'promo_type', 'amazon_link', 'search_ccc', 'search_wired', 'get_airtable_code')
+ list_display = ('asin', 'title', 'brand', 'category', 'deal_price', 'original_price', 'discount_percent', 'promo_type', 'amazon_link', 'search_ccc', 'search_wired', 'get_airtable_code')
search_fields = ['brand__name', 'title', ]
list_filter = ('category', 'promo_type', ('brand_str', DropdownFilter),)
@@ -19,3 +19,26 @@ class BrandAdmin(OSMGeoAdmin):
list_display = ('name',)
search_fields = ['name' ]
+
+@admin.register(MyDeal)
+class MyDealAdmin(OSMGeoAdmin):
+ list_display = ('title', 'blurb', 'amazon_link', 'search_ccc', 'search_wired')
+ search_fields = ['title', ]
+ list_filter = ()
+ fieldsets = (
+ ('Base', {
+ 'fields': (
+ 'title',
+ 'blurb',
+ ('retailer', 'asin'),
+ 'url',
+ 'body',
+ ),
+ 'classes': (
+ 'show',
+ 'extrapretty',
+ 'wide'
+ )
+ }
+ ),
+ )
diff --git a/app/deals/models.py b/app/deals/models.py
index 0d58367..a4fd875 100644
--- a/app/deals/models.py
+++ b/app/deals/models.py
@@ -61,6 +61,41 @@ class Deal(models.Model):
return format_html("<a target='_blank' href='https://camelcamelcamel.com/product/%s'>Deal?</a>" % (self.asin))
admin_link.short_description = 'Link'
+
+
+class MyDeal(models.Model):
+ title = models.CharField(max_length=200, blank=True, null=True)
+ lookup_title = models.CharField(max_length=200, blank=True, null=True)
+ asin = models.CharField(max_length=200, blank=True, null=True)
+ brand = models.CharField(max_length=200, blank=True, null=True)
+ retailer = models.CharField(max_length=200, blank=True, null=True)
+ secondary_retailer = models.CharField(max_length=200, blank=True, null=True)
+ tertiary_retailer = models.CharField(max_length=200, blank=True, null=True)
+ url = models.CharField(max_length=200)
+ secondary_url = models.CharField(max_length=200, blank=True, null=True)
+ tertiary_url = models.CharField(max_length=200, blank=True, null=True)
+ body = models.TextField(null=True, blank=True)
+ blurb = models.TextField(null=True)
+
+ class Meta:
+ ordering = ('title',)
+
+ def __str__(self):
+ return self.title
+
+ def amazon_link(self):
+ return format_html('<a target="_blank" href="%s">%s</a>' % (self.url, self.asin))
+ admin_link.short_description = 'Link'
+
+ def search_wired(self):
+ term = "https://www.google.com/search?q=%s" % (str(self.lookup_title))
+ term = term+"+site%3Awired.com"
+ return format_html("<a target='_blank' href='%s'>wired search</a>" % (term))
+ admin_link.short_description = 'Link'
+
+ def search_ccc(self):
+ return format_html("<a target='_blank' href='https://camelcamelcamel.com/product/%s'>Deal?</a>" % (self.asin))
+ admin_link.short_description = 'Link'
"""
@@ -145,17 +180,16 @@ with open(path) as f:
_, created = Deal.objects.get_or_create(
print(row)
asin=row[0],
- cateogry=row[1],
- brand=row[2],
- title=row[3],
- prime_only=row[4],
- promo_type=row[5],
- deal_price= row[9],
- original_price = row[10],
- discount_percent=row[11],
- url=row[12],
+ cateogry="New",
+ brand=row[7],
+ title=row[18],
+ prime_only=row[19],
+ promo_type=row[21],
+ deal_price= row[29],
+ original_price = row[31],
+ discount_percent=row[32],
+ url=row[34],
)
- print(row[0], row[1], row[2], row[4],row[5],row[6],row[10],row[13],row[14])
title, prime_only, promo_type, deal_price, discount_percent, url)
@@ -174,3 +208,89 @@ with open(path) as f:
# creates a tuple of the new object or
# current object and a boolean of if it was created
"""
+
+"""
+
+parse airtable csv:
+
+import tldextract, csv
+path = ''
+
+with open(path) as f:
+ reader = csv.reader(f)
+ count = 0
+ for row in reader:
+ if count > 0:
+ domain = tldextract.extract(row[2])
+ retailer = domain.domain.capitalize()
+ d = '#### [%s](%s)\n\n+++button-group\n\n[%s](%s "%s"){: target="_blank"}\n\n+++\n\n%s\n\n' %(str(row[0]).strip(), row[2], retailer, row[2], retailer, row[4])
+ with open('out.txt', 'a') as the_file:
+ the_file.write(d)
+ print(d)
+ count = count+1
+
+
+import tldextract, csv
+path = ''
+
+with open(path) as f:
+ reader = csv.reader(f)
+ count = 0
+ for row in reader:
+ if count > 0:
+ domain = tldextract.extract(row[2])
+ retailer = domain.domain.capitalize()
+ title = str(row[0].strip())
+ try:
+ stitle = title.split('for')[0]
+ except:
+ stitle=title
+ try:
+ asin = str(row[2]).split("/dp/")[1]
+ except:
+ asin = ''
+ d, created = MyDeal.objects.get_or_create(
+ title = str(row[0].strip()),
+ lookup_title = stitle,
+ asin = asin,
+ retailer = retailer,
+ url = str(row[2].strip())
+ )
+ if not created:
+ d.body = row[4]
+ d.save()
+ count = count+1
+
+
+
+
++++button-group
+
+[Amazon]({{object.url}} "Amazon"){: target="_blank"}
+
++++
+"""
+
+"""
+asin = 3
+deal_type = 4
+name = 6
+url = 7
+deal price = 9
+price = 8
+percent_discount_str = 10
+prime only = 11
+
+
+
+
+from selenium import webdriver
+from selenium.webdriver.common.keys import Keys
+from selenium.webdriver.common.by import By
+
+driver = webdriver.Firefox()
+driver.get("http://www.python.org")
+
+
+
+"""
diff --git a/app/deals/urls.py b/app/deals/urls.py
index 2a9bf4a..8ca1fca 100644
--- a/app/deals/urls.py
+++ b/app/deals/urls.py
@@ -6,6 +6,16 @@ app_name = "deals"
urlpatterns = [
path(
+ r'mydeals',
+ views.MyDealListView.as_view(),
+ name="detail"
+ ),
+ path(
+ r'mydealsnew',
+ views.MyDealLapListView.as_view(),
+ name="detail"
+ ),
+ path(
r'<str:pk>',
views.DealDetailView.as_view(),
name="detail"
diff --git a/app/deals/views.py b/app/deals/views.py
index c615bd8..10b02c9 100644
--- a/app/deals/views.py
+++ b/app/deals/views.py
@@ -1,7 +1,14 @@
from django.views.generic import DetailView, ListView
-from .models import Deal
+from .models import Deal, MyDeal
class DealDetailView(DetailView):
model = Deal
+
+class MyDealListView(ListView):
+ model = MyDeal
+
+class MyDealLapListView(ListView):
+ model = MyDeal
+ queryset = MyDeal.objects.filter(asin="newbest")
diff --git a/app/posts/admin.py b/app/posts/admin.py
new file mode 100644
index 0000000..a4e29b8
--- /dev/null
+++ b/app/posts/admin.py
@@ -0,0 +1,34 @@
+from django.contrib import admin
+
+from .models import Post, Note
+from utils.widgets import AdminImageWidget, LGEntryForm
+
+from django.contrib.admin import SimpleListFilter
+
+
+@admin.register(Post)
+class PostAdmin(admin.ModelAdmin):
+ form = LGEntryForm
+ list_display = ('title', 'admin_url', 'author', 'date_last_pub', 'post_type', 'update_frequency', 'needs_update', 'days_overdue')
+ search_fields = ['title']
+ list_filter = ['needs_update', 'author', 'post_type']
+
+ class Media:
+ js = ('image-loader.js', 'next-prev-links.js')
+ css = {
+ "all": ("my_styles.css",)
+ }
+
+
+@admin.register(Note)
+class NoteAdmin(admin.ModelAdmin):
+ form = LGEntryForm
+ list_display = ('date_created', 'title', 'post')
+ search_fields = ['title']
+ list_filter = ['date_created']
+
+ class Media:
+ js = ('image-loader.js', 'next-prev-links.js')
+ css = {
+ "all": ("my_styles.css",)
+ }
diff --git a/app/posts/migrations/0001_initial.py b/app/posts/migrations/0001_initial.py
new file mode 100644
index 0000000..87a9c95
--- /dev/null
+++ b/app/posts/migrations/0001_initial.py
@@ -0,0 +1,33 @@
+# Generated by Django 4.2.2 on 2023-07-10 18:02
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ initial = True
+
+ dependencies = [
+ ('products', '__first__'),
+ ]
+
+ operations = [
+ migrations.CreateModel(
+ name='Post',
+ fields=[
+ ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+ ('title', models.TextField(blank=True)),
+ ('body', models.TextField()),
+ ('url', models.CharField(blank=True, max_length=512, null=True)),
+ ('date_last_pub', models.DateTimeField()),
+ ('guid', models.CharField(blank=True, db_index=True, max_length=512, null=True)),
+ ('author', models.CharField(blank=True, max_length=255, null=True)),
+ ('post_type', models.IntegerField(choices=[(0, 'review'), (1, 'guide'), (2, 'gallery'), (3, 'how-to')], default=1)),
+ ('update_frequency', models.IntegerField(help_text='In days')),
+ ('products', models.ManyToManyField(to='products.productlink')),
+ ],
+ options={
+ 'ordering': ('date_last_pub',),
+ },
+ ),
+ ]
diff --git a/app/posts/migrations/0002_alter_post_body.py b/app/posts/migrations/0002_alter_post_body.py
new file mode 100644
index 0000000..2a33e58
--- /dev/null
+++ b/app/posts/migrations/0002_alter_post_body.py
@@ -0,0 +1,18 @@
+# Generated by Django 4.2.2 on 2023-07-10 18:42
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('posts', '0001_initial'),
+ ]
+
+ operations = [
+ migrations.AlterField(
+ model_name='post',
+ name='body',
+ field=models.TextField(blank=True, null=True),
+ ),
+ ]
diff --git a/app/posts/migrations/0003_alter_post_date_last_pub.py b/app/posts/migrations/0003_alter_post_date_last_pub.py
new file mode 100644
index 0000000..f19c142
--- /dev/null
+++ b/app/posts/migrations/0003_alter_post_date_last_pub.py
@@ -0,0 +1,18 @@
+# Generated by Django 4.2.2 on 2023-07-10 18:59
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('posts', '0002_alter_post_body'),
+ ]
+
+ operations = [
+ migrations.AlterField(
+ model_name='post',
+ name='date_last_pub',
+ field=models.DateField(),
+ ),
+ ]
diff --git a/app/posts/migrations/0004_alter_post_update_frequency.py b/app/posts/migrations/0004_alter_post_update_frequency.py
new file mode 100644
index 0000000..ec176b7
--- /dev/null
+++ b/app/posts/migrations/0004_alter_post_update_frequency.py
@@ -0,0 +1,18 @@
+# Generated by Django 4.2.2 on 2023-07-10 19:11
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('posts', '0003_alter_post_date_last_pub'),
+ ]
+
+ operations = [
+ migrations.AlterField(
+ model_name='post',
+ name='update_frequency',
+ field=models.BigIntegerField(help_text='In days'),
+ ),
+ ]
diff --git a/app/posts/migrations/0005_post_template_type.py b/app/posts/migrations/0005_post_template_type.py
new file mode 100644
index 0000000..2eef54b
--- /dev/null
+++ b/app/posts/migrations/0005_post_template_type.py
@@ -0,0 +1,18 @@
+# Generated by Django 4.2.2 on 2023-07-10 19:21
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('posts', '0004_alter_post_update_frequency'),
+ ]
+
+ operations = [
+ migrations.AddField(
+ model_name='post',
+ name='template_type',
+ field=models.IntegerField(choices=[(0, 'story'), (1, 'gallery')], default=0),
+ ),
+ ]
diff --git a/app/posts/migrations/0006_alter_post_post_type.py b/app/posts/migrations/0006_alter_post_post_type.py
new file mode 100644
index 0000000..93985c7
--- /dev/null
+++ b/app/posts/migrations/0006_alter_post_post_type.py
@@ -0,0 +1,18 @@
+# Generated by Django 4.2.2 on 2023-07-10 19:21
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('posts', '0005_post_template_type'),
+ ]
+
+ operations = [
+ migrations.AlterField(
+ model_name='post',
+ name='post_type',
+ field=models.IntegerField(choices=[(0, 'review'), (1, 'guide'), (2, 'how-to')], default=1),
+ ),
+ ]
diff --git a/app/posts/migrations/0007_alter_post_title.py b/app/posts/migrations/0007_alter_post_title.py
new file mode 100644
index 0000000..a838347
--- /dev/null
+++ b/app/posts/migrations/0007_alter_post_title.py
@@ -0,0 +1,18 @@
+# Generated by Django 4.2.2 on 2023-07-10 19:25
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('posts', '0006_alter_post_post_type'),
+ ]
+
+ operations = [
+ migrations.AlterField(
+ model_name='post',
+ name='title',
+ field=models.CharField(blank=True, max_length=512, null=True),
+ ),
+ ]
diff --git a/app/posts/migrations/0008_post_needs_update_alter_post_products.py b/app/posts/migrations/0008_post_needs_update_alter_post_products.py
new file mode 100644
index 0000000..f40f62e
--- /dev/null
+++ b/app/posts/migrations/0008_post_needs_update_alter_post_products.py
@@ -0,0 +1,24 @@
+# Generated by Django 4.2.2 on 2023-07-12 21:44
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('products', '0001_initial'),
+ ('posts', '0007_alter_post_title'),
+ ]
+
+ operations = [
+ migrations.AddField(
+ model_name='post',
+ name='needs_update',
+ field=models.BooleanField(default=False),
+ ),
+ migrations.AlterField(
+ model_name='post',
+ name='products',
+ field=models.ManyToManyField(blank=True, null=True, to='products.productlink'),
+ ),
+ ]
diff --git a/app/posts/migrations/0009_note.py b/app/posts/migrations/0009_note.py
new file mode 100644
index 0000000..ecd6473
--- /dev/null
+++ b/app/posts/migrations/0009_note.py
@@ -0,0 +1,29 @@
+# Generated by Django 4.2.2 on 2023-07-14 19:38
+
+from django.db import migrations, models
+import django.db.models.deletion
+import django.utils.timezone
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('posts', '0008_post_needs_update_alter_post_products'),
+ ]
+
+ operations = [
+ migrations.CreateModel(
+ name='Note',
+ fields=[
+ ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+ ('title', models.CharField(max_length=400)),
+ ('url', models.CharField(max_length=400)),
+ ('body_markdown', models.TextField(blank=True, null=True)),
+ ('date_created', models.DateTimeField(default=django.utils.timezone.now)),
+ ('post', models.ForeignKey(null=True, on_delete=django.db.models.deletion.CASCADE, to='posts.post')),
+ ],
+ options={
+ 'ordering': ('date_created',),
+ },
+ ),
+ ]
diff --git a/app/posts/migrations/__init__.py b/app/posts/migrations/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/app/posts/migrations/__init__.py
diff --git a/app/posts/models.py b/app/posts/models.py
new file mode 100644
index 0000000..a19a50f
--- /dev/null
+++ b/app/posts/models.py
@@ -0,0 +1,141 @@
+from django.db import models
+from django.utils.html import format_html, format_html_join
+from django.utils import timezone
+
+from products.models import ProductLink
+
+"""
+class Feed(models.Model):
+ name = models.CharField(max_length=255)
+ feed_url = models.CharField(max_length=512)
+ slug = models.CharField(max_length=50)
+ last_polled = models.DateTimeField(blank=True, null=True)
+ due_poll = models.DateTimeField(default=datetime.datetime(1900, 1, 1)) # default to distant past to put new sources to front of queue
+ etag = models.CharField(max_length=255, blank=True, null=True)
+ last_modified = models.CharField(max_length=255, blank=True, null=True) # just pass this back and forward between server and me , no need to parse
+ last_result = models.CharField(max_length=255,blank=True,null=True)
+ interval = models.PositiveIntegerField(default=400)
+ last_success = models.DateTimeField(blank=True, null=True)
+ last_change = models.DateTimeField(blank=True, null=True)
+ live = models.BooleanField(default=True)
+ status_code = models.PositiveIntegerField(default=0)
+ last_302_url = models.CharField(max_length=512, null=True, blank=True)
+ last_302_start = models.DateTimeField(null=True, blank=True)
+
+ def __str__(self):
+ return self.name
+"""
+
+
+class PostType(models.IntegerChoices):
+ REVIEW = 0, ('review')
+ GUIDE = 1, ('guide')
+ HOWTO = 2, ('how-to')
+
+
+class TemplateType(models.IntegerChoices):
+ STORY = 0, ('story')
+ GALLERY = 1, ('gallery')
+
+
+class Post(models.Model):
+ # an entry in a feed
+ title = models.CharField(max_length=512, blank=True, null=True)
+ body = models.TextField(blank=True, null=True)
+ url = models.CharField(max_length=512, blank=True, null=True)
+ date_last_pub = models.DateField()
+ guid = models.CharField(max_length=512, blank=True, null=True, db_index=True)
+ author = models.CharField(max_length=255, blank=True, null=True)
+ post_type = models.IntegerField(choices=PostType.choices, default=PostType.GUIDE)
+ template_type = models.IntegerField(choices=TemplateType.choices, default=TemplateType.STORY)
+ update_frequency = models.BigIntegerField(help_text="In days")
+ products = models.ManyToManyField(ProductLink, blank=True, null=True)
+ needs_update = models.BooleanField(default=False)
+
+ class Meta:
+ ordering = ('date_last_pub',)
+
+ def __str__(self):
+ return self.title
+
+ def time_since_update(self):
+ td = timezone.localdate() - self.date_last_pub
+ return td.days
+
+ #def get_needs_update(self):
+ # if self.time_since_update() > self.update_frequency:
+ # return True
+ # else:
+ # return False
+
+ def days_overdue(self):
+ if self.needs_update == True:
+ return self.time_since_update() - self.update_frequency
+ else:
+ return ''
+
+ def admin_url(self):
+ return format_html('<a target="_blank" href="%s">%s</a>' % (self.url, self.url))
+ admin_link.short_description = 'Link'
+
+ def save(self, *args, **kwargs):
+ td = timezone.localdate() - self.date_last_pub
+ if td.days > self.update_frequency:
+ self.needs_update = True
+ else:
+ self.needs_update = False
+ super(Post, self).save()
+
+
+class Note(models.Model):
+ title = models.CharField(max_length=400)
+ url = models.CharField(max_length=400)
+ body_markdown = models.TextField(blank=True, null=True)
+ date_created = models.DateTimeField(default=timezone.now)
+ post = models.ForeignKey(Post, on_delete=models.CASCADE, null=True)
+
+ class Meta:
+ ordering = ('date_created',)
+
+ def __str__(self):
+ return self.title
+
+
+#URL,This Article,Type,Lead,Previous Leads,Other Testers,Notes/Docs,Last Pub Date,Update Next,Months Since Update,Update Frequency (Months),Updates per year,Prev. Updates,"18 Mo Traffic Trend
+'''
+row[0] #url
+row[1] #title
+row[2] #post_type
+row[3] #author
+row[7] #date_last_pub
+row[10] #update_frequency
+
+
+with open(path) as f:
+ reader = csv.reader(f)
+ count = 0
+ for row in reader:
+ if count > 1:
+ if row[2] == "Deals":
+ # don't care about deals posts
+ continue
+ elif row[2] == "Buying Guide":
+ gtype = PostType.GUIDE
+ else:
+ gtype = PostType.HOWTO
+ if row[10] == "Retired":
+ continue
+ else:
+ print(int(row[10]))
+ print(gtype)
+ d = datetime.strptime(row[7], '%m/%d/%Y')
+ post, created = Post.objects.get_or_create(
+ title = str(row[1]).strip(),
+ url = str(row[0]).strip(),
+ date_last_pub = d,
+ author = str(row[3]).strip(),
+ post_type = gtype,
+ update_frequency = int(row[10]*30),
+ )
+
+'''
diff --git a/app/posts/utils.py b/app/posts/utils.py
new file mode 100644
index 0000000..915721c
--- /dev/null
+++ b/app/posts/utils.py
@@ -0,0 +1,928 @@
+from bs4 import BeautifulSoup
+
+def get_agent(source_feed):
+
+ if source_feed.is_cloudflare:
+ agent = random_user_agent()
+ logging.error("using agent: {}".format(agent))
+ else:
+ agent = "{user_agent} (+{server}; Updater; {subs} subscribers)".format(user_agent=settings.FEEDS_USER_AGENT, server=settings.FEEDS_SERVER, subs=source_feed.num_subs)
+
+ return agent
+
+def random_user_agent():
+
+ return choice([
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_5) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.1.1 Safari/605.1.15",
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36",
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36 Edge/14.14393",
+ "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 1.1.4322; .NET CLR 2.0.50727; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729)",
+ "Mozilla/5.0 (iPad; CPU OS 8_4_1 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12H321 Safari/600.1.4",
+ "Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1",
+ "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)",
+ "Mozilla/5.0 (Linux; Android 5.0; SAMSUNG SM-N900 Build/LRX21V) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/2.1 Chrome/34.0.1847.76 Mobile Safari/537.36",
+ "Mozilla/5.0 (Linux; Android 6.0.1; SAMSUNG SM-G570Y Build/MMB29K) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/4.0 Chrome/44.0.2403.133 Mobile Safari/537.36",
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:53.0) Gecko/20100101 Firefox/53.0"
+ ])
+
+
+def update_feeds(max_feeds=3, output=NullOutput()):
+
+
+ todo = Source.objects.filter(Q(due_poll__lt = timezone.now()) & Q(live = True))
+
+
+ output.write("Queue size is {}".format(todo.count()))
+
+ sources = todo.order_by("due_poll")[:max_feeds]
+
+ output.write("\nProcessing %d\n\n" % sources.count())
+
+
+ for src in sources:
+ read_feed(src, output)
+
+ # kill shit proxies
+
+ WebProxy.objects.filter(address='X').delete()
+
+
+def read_feed(source_feed, output=NullOutput()):
+
+ old_interval = source_feed.interval
+
+
+ was302 = False
+
+ output.write("\n------------------------------\n")
+
+ source_feed.last_polled = timezone.now()
+
+ agent = get_agent(source_feed)
+
+ headers = { "User-Agent": agent } #identify ourselves
+
+
+
+
+ proxies = {}
+ proxy = None
+
+ feed_url = source_feed.feed_url
+ if source_feed.is_cloudflare : # Fuck you !
+
+
+ if settings.FEEDS_CLOUDFLARE_WORKER:
+ feed_url = "{}/read/?target={}".format(settings.FEEDS_CLOUDFLARE_WORKER, feed_url)
+ else:
+ try:
+ proxy = get_proxy(output)
+
+ if proxy.address != "X":
+
+ proxies = {
+ 'http': proxy.address,
+ 'https': proxy.address,
+ }
+ except:
+ pass
+
+
+ if source_feed.etag:
+ headers["If-None-Match"] = str(source_feed.etag)
+ if source_feed.last_modified:
+ headers["If-Modified-Since"] = str(source_feed.last_modified)
+
+ output.write("\nFetching %s" % feed_url)
+
+ ret = None
+ try:
+ ret = requests.get(feed_url, headers=headers, verify=False, allow_redirects=False, timeout=20, proxies=proxies)
+ source_feed.status_code = ret.status_code
+ source_feed.last_result = "Unhandled Case"
+ output.write(str(ret))
+ except Exception as ex:
+ source_feed.last_result = ("Fetch error:" + str(ex))[:255]
+ source_feed.status_code = 0
+ output.write("\nFetch error: " + str(ex))
+
+
+ if proxy:
+ source_feed.last_result = "Proxy failed. Next retry will use new proxy"
+ source_feed.status_code = 1 # this will stop us increasing the interval
+
+ output.write("\nBurning the proxy.")
+ proxy.delete()
+ source_feed.interval /= 2
+
+
+
+ if ret is None and source_feed.status_code == 1: # er ??
+ pass
+ elif ret == None or source_feed.status_code == 0:
+ source_feed.interval += 120
+ elif ret.status_code < 200 or ret.status_code >= 500:
+ #errors, impossible return codes
+ source_feed.interval += 120
+ source_feed.last_result = "Server error fetching feed (%d)" % ret.status_code
+ elif ret.status_code == 404:
+ #not found
+ source_feed.interval += 120
+ source_feed.last_result = "The feed could not be found"
+ elif ret.status_code == 403 or ret.status_code == 410: #Forbidden or gone
+
+ if "Cloudflare" in ret.text or ("Server" in ret.headers and "cloudflare" in ret.headers["Server"]):
+
+ if source_feed.is_cloudflare and proxy is not None:
+ # we are already proxied - this proxy on cloudflare's shit list too?
+ proxy.delete()
+ output.write("\nProxy seemed to also be blocked, burning")
+ source_feed.interval /= 2
+ source_feed.last_result = "Proxy kind of worked but still got cloudflared."
+ else:
+ source_feed.is_cloudflare = True
+ source_feed.last_result = "Blocked by Cloudflare (grr)"
+ else:
+ source_feed.last_result = "Feed is no longer accessible."
+ source_feed.live = False
+
+
+ elif ret.status_code >= 400 and ret.status_code < 500:
+ #treat as bad request
+ source_feed.live = False
+ source_feed.last_result = "Bad request (%d)" % ret.status_code
+ elif ret.status_code == 304:
+ #not modified
+ source_feed.interval += 10
+ source_feed.last_result = "Not modified"
+ source_feed.last_success = timezone.now()
+
+ if source_feed.last_success and (timezone.now() - source_feed.last_success).days > 7:
+ source_feed.last_result = "Clearing etag/last modified due to lack of changes"
+ source_feed.etag = None
+ source_feed.last_modified = None
+
+
+
+ elif ret.status_code == 301 or ret.status_code == 308: #permenant redirect
+ new_url = ""
+ try:
+ if "Location" in ret.headers:
+ new_url = ret.headers["Location"]
+
+ if new_url[0] == "/":
+ #find the domain from the feed
+
+ base = "/".join(source_feed.feed_url.split("/")[:3])
+
+
+ new_url = base + new_url
+
+
+ source_feed.feed_url = new_url
+ source_feed.last_result = "Moved"
+ source_feed.save(update_fields=["feed_url", "last_result"])
+
+
+ else:
+ source_feed.last_result = "Feed has moved but no location provided"
+ except exception as Ex:
+ output.write("\nError redirecting.")
+ source_feed.last_result = ("Error redirecting feed to " + new_url)[:255]
+ pass
+ elif ret.status_code == 302 or ret.status_code == 303 or ret.status_code == 307: #Temporary redirect
+ new_url = ""
+ was302 = True
+ try:
+ new_url = ret.headers["Location"]
+
+ if new_url[0] == "/":
+ #find the domain from the feed
+ start = source_feed.feed_url[:8]
+ end = source_feed.feed_url[8:]
+ if end.find("/") >= 0:
+ end = end[:end.find("/")]
+
+ new_url = start + end + new_url
+
+
+ ret = requests.get(new_url, headers=headers, allow_redirects=True, timeout=20, verify=False)
+ source_feed.status_code = ret.status_code
+ source_feed.last_result = ("Temporary Redirect to " + new_url)[:255]
+
+ if source_feed.last_302_url == new_url:
+ #this is where we 302'd to last time
+ td = timezone.now() - source_feed.last_302_start
+ if td.days > 60:
+ source_feed.feed_url = new_url
+ source_feed.last_302_url = " "
+ source_feed.last_302_start = None
+ source_feed.last_result = ("Permanent Redirect to " + new_url)[:255]
+
+ source_feed.save(update_fields=["feed_url", "last_result", "last_302_url", "last_302_start"])
+
+
+
+ else:
+ source_feed.last_result = ("Temporary Redirect to " + new_url + " since " + source_feed.last_302_start.strftime("%d %B"))[:255]
+
+ else:
+ source_feed.last_302_url = new_url
+ source_feed.last_302_start = timezone.now()
+
+ source_feed.last_result = ("Temporary Redirect to " + new_url + " since " + source_feed.last_302_start.strftime("%d %B"))[:255]
+
+
+ except Exception as ex:
+ source_feed.last_result = ("Failed Redirection to " + new_url + " " + str(ex))[:255]
+ source_feed.interval += 60
+
+ #NOT ELIF, WE HAVE TO START THE IF AGAIN TO COPE WTIH 302
+ if ret and ret.status_code >= 200 and ret.status_code < 300: #now we are not following redirects 302,303 and so forth are going to fail here, but what the hell :)
+
+ # great!
+ ok = True
+ changed = False
+
+
+ if was302:
+ source_feed.etag = None
+ source_feed.last_modified = None
+ else:
+ try:
+ source_feed.etag = ret.headers["etag"]
+ except Exception as ex:
+ source_feed.etag = None
+ try:
+ source_feed.last_modified = ret.headers["Last-Modified"]
+ except Exception as ex:
+ source_feed.last_modified = None
+
+ output.write("\netag:%s\nLast Mod:%s\n\n" % (source_feed.etag,source_feed.last_modified))
+
+
+ content_type = "Not Set"
+ if "Content-Type" in ret.headers:
+ content_type = ret.headers["Content-Type"]
+
+ (ok,changed) = import_feed(source_feed=source_feed, feed_body=ret.content, content_type=content_type, output=output)
+
+ if ok and changed:
+ source_feed.interval /= 2
+ source_feed.last_result = " OK (updated)" #and temporary redirects
+ source_feed.last_change = timezone.now()
+
+ elif ok:
+ source_feed.last_result = " OK"
+ source_feed.interval += 20 # we slow down feeds a little more that don't send headers we can use
+ else: #not OK
+ source_feed.interval += 120
+
+ if source_feed.interval < 60:
+ source_feed.interval = 60 # no less than 1 hour
+ if source_feed.interval > (60 * 24):
+ source_feed.interval = (60 * 24) # no more than a day
+
+ output.write("\nUpdating source_feed.interval from %d to %d\n" % (old_interval, source_feed.interval))
+ td = datetime.timedelta(minutes=source_feed.interval)
+ source_feed.due_poll = timezone.now() + td
+ source_feed.save(update_fields=[
+ "due_poll", "interval", "last_result",
+ "last_modified", "etag", "last_302_start",
+ "last_302_url", "last_success", "live",
+ "status_code", "max_index", "is_cloudflare",
+ "last_change",
+ ])
+
+
+def parse_feed_xml(feed_content):
+ #r = requests.get('https://news.ycombinator.com/rss')
+ soup = BeautifulSoup(r.content, features='xml')
+ posts = soup.findAll('item')
+ for post in posts:
+ title = post.find('title').text
+ link = post.find('link').text
+ date = post.find('pubDate').text
+ for category in post.findAll('category'):
+ if category.text in text_list:
+ #assign post type
+ print(title, link, date, "-----------------\r\r")
+ #create a new post if it doesn't exist:
+ p, created = Post.objects.get_or_create (
+ title = post.find('title').text
+ url = post.find('link').text
+ pub_date = post.find('pubDate').text
+ author = post.find('dc:creator').text
+ last_update = timezone.now()
+ post_type = # need to parse categories to get posttype
+ )
+ #crawl here to get links from post
+ # then get or create products for each thing in the post
+ #product, created = Post.objects.get_or_create (
+ # title = post.find('title').text
+ # url = post.find('link').text
+ # pub_date = post.find('pubDate').text
+ # author = post.find('dc:creator').text
+ # last_update = timezone.now()
+ # post_type = # need to parse categories to get posttype
+ #)
+
+
+def parse_feed_xml(source_feed, feed_content, output):
+
+ ok = True
+ changed = False
+
+ if source_feed.posts.all().count() == 0:
+ is_first = True
+ else:
+ is_first = False
+
+ #output.write(ret.content)
+ try:
+
+ _customize_sanitizer(parser)
+ f = parser.parse(feed_content) #need to start checking feed parser errors here
+ entries = f['entries']
+ if len(entries):
+ source_feed.last_success = timezone.now() #in case we start auto unsubscribing long dead feeds
+ else:
+ source_feed.last_result = "Feed is empty"
+ ok = False
+
+ except Exception as ex:
+ source_feed.last_result = "Feed Parse Error"
+ entries = []
+ ok = False
+
+ source_feed.save(update_fields=["last_success", "last_result"])
+
+ if ok:
+ try:
+ source_feed.name = f.feed.title
+ source_feed.save(update_fields=["name"])
+ except Exception as ex:
+ output.write("\nUpdate name error:" + str(ex))
+ pass
+
+ try:
+ source_feed.site_url = f.feed.link
+ source_feed.save(update_fields=["site_url"])
+ except Exception as ex:
+ pass
+
+
+ try:
+ source_feed.image_url = f.feed.image.href
+ source_feed.save(update_fields=["image_url"])
+ except:
+ pass
+
+
+ # either of these is fine, prefer description over summary
+ # also feedparser will give us itunes:summary etc if there
+ try:
+ source_feed.description = f.feed.summary
+ except:
+ pass
+
+ try:
+ source_feed.description = f.feed.description
+ except:
+ pass
+
+ try:
+ source_feed.save(update_fields=["description"])
+ except:
+ pass
+
+
+ #output.write(entries)
+ entries.reverse() # Entries are typically in reverse chronological order - put them in right order
+ for e in entries:
+
+
+ # we are going to take the longest
+ body = ""
+
+ if hasattr(e, "content"):
+ for c in e.content:
+ if len(c.value) > len(body):
+ body = c.value
+
+ if hasattr(e, "summary"):
+ if len(e.summary) > len(body):
+ body = e.summary
+
+ if hasattr(e, "summary_detail"):
+ if len(e.summary_detail.value) > len(body):
+ body = e.summary_detail.value
+
+ if hasattr(e, "description"):
+ if len(e.description) > len(body):
+ body = e.description
+
+
+ body = fix_relative(body, source_feed.site_url)
+
+ try:
+ guid = e.guid
+ except Exception as ex:
+ try:
+ guid = e.link
+ except Exception as ex:
+ m = hashlib.md5()
+ m.update(body.encode("utf-8"))
+ guid = m.hexdigest()
+
+ try:
+ p = Post.objects.filter(source=source_feed).filter(guid=guid)[0]
+ output.write("EXISTING " + guid + "\n")
+
+ except Exception as ex:
+ output.write("NEW " + guid + "\n")
+ p = Post(index=0, body=" ", title="", guid=guid)
+ p.found = timezone.now()
+ changed = True
+
+
+ try:
+ p.created = datetime.datetime.fromtimestamp(time.mktime(e.published_parsed)).replace(tzinfo=timezone.utc)
+ except Exception as ex2:
+ try:
+ p.created = datetime.datetime.fromtimestamp(time.mktime(e.updated_parsed)).replace(tzinfo=timezone.utc)
+ except Exception as ex3:
+ output.write("CREATED ERROR:" + str(ex3))
+ p.created = timezone.now()
+
+
+ p.source = source_feed
+ p.save()
+
+ try:
+ p.title = e.title
+ p.save(update_fields=["title"])
+ except Exception as ex:
+ output.write("Title error:" + str(ex))
+
+ try:
+ p.link = e.link
+ p.save(update_fields=["link"])
+ except Exception as ex:
+ output.write("Link error:" + str(ex))
+
+ try:
+ p.image_url = e.image.href
+ p.save(update_fields=["image_url"])
+ except:
+ pass
+
+
+
+ try:
+ p.author = e.author
+ p.save(update_fields=["author"])
+ except Exception as ex:
+ p.author = ""
+
+
+
+ try:
+ p.body = body
+ p.save(update_fields=["body"])
+ # output.write(p.body)
+ except Exception as ex:
+ output.write(str(ex))
+ output.write(p.body)
+
+
+ try:
+ seen_files = []
+
+ post_files = e["enclosures"]
+ non_dupes = []
+
+ # find any files in media_content that aren't already declared as enclosures
+ if "media_content" in e:
+ for ee in e["media_content"]:
+ found = False
+ for ff in post_files:
+ if ff["href"] == ee["url"]:
+ found = True
+ break
+ if not found:
+ non_dupes.append(ee)
+
+ post_files += non_dupes
+
+
+ for ee in list(p.enclosures.all()):
+ # check existing enclosure is still there
+ found_enclosure = False
+ for pe in post_files:
+
+ href = "href"
+ if href not in pe:
+ href = "url"
+
+ length = "length"
+ if length not in pe:
+ length = "filesize"
+
+
+ if pe["href"] == ee.href and ee.href not in seen_files:
+ found_enclosure = True
+
+ try:
+ ee.length = int(pe[length])
+ except:
+ ee.length = 0
+
+ try:
+ type = pe["type"]
+ except:
+ type = "audio/mpeg" # we are assuming podcasts here but that's probably not safe
+
+ ee.type = type
+ ee.save()
+ break
+ if not found_enclosure:
+ ee.delete()
+ seen_files.append(ee.href)
+
+ for pe in post_files:
+
+ href = "href"
+ if href not in pe:
+ href = "url"
+
+ length = "length"
+ if length not in pe:
+ length = "filesize"
+
+ try:
+ if pe[href] not in seen_files:
+
+ try:
+ length = int(pe[length])
+ except:
+ length = 0
+
+ try:
+ type = pe["type"]
+ except:
+ type = "audio/mpeg"
+
+ ee = Enclosure(post=p, href=pe[href], length=length, type=type)
+ ee.save()
+ except Exception as ex:
+ pass
+ except Exception as ex:
+ if output:
+ output.write("No enclosures - " + str(ex))
+
+
+ if is_first and source_feed.posts.all().count() > 0:
+ # If this is the first time we have parsed this
+ # then see if it's paginated and go back through its history
+ agent = get_agent(source_feed)
+ headers = { "User-Agent": agent } #identify ourselves
+ keep_going = True
+ while keep_going:
+ keep_going = False # assume were stopping unless we find a next link
+ if hasattr(f.feed, 'links'):
+ for link in f.feed.links:
+ if 'rel' in link and link['rel'] == "next":
+ ret = requests.get(link['href'], headers=headers, verify=False, allow_redirects=True, timeout=20)
+ (pok, pchanged) = parse_feed_xml(source_feed, ret.content, output)
+ # print(link['href'])
+ # print((pok, pchanged))
+ f = parser.parse(ret.content) # rebase the loop on this feed version
+ keep_going = True
+
+
+ return (ok,changed)
+
+
+def parse_feed_json(source_feed, feed_content, output):
+
+ ok = True
+ changed = False
+
+ try:
+ f = json.loads(feed_content)
+ entries = f['items']
+ if len(entries):
+ source_feed.last_success = timezone.now() #in case we start auto unsubscribing long dead feeds
+ else:
+ source_feed.last_result = "Feed is empty"
+ source_feed.interval += 120
+ ok = False
+
+ source_feed.save(update_fields=["last_success", "last_result"])
+
+
+ except Exception as ex:
+ source_feed.last_result = "Feed Parse Error"
+ entries = []
+ source_feed.interval += 120
+ ok = False
+
+ if ok:
+
+
+ if "expired" in f and f["expired"]:
+ # This feed says it is done
+ # TODO: permanently disable
+ # for now source_feed.interval to max
+ source_feed.interval = (24*3*60)
+ source_feed.last_result = "This feed has expired"
+ return (False, False, source_feed.interval)
+
+ try:
+ source_feed.site_url = f["home_page_url"]
+ source_feed.name = f["title"]
+
+ source_feed.save(update_fields=["site_url", "title"])
+
+ except Exception as ex:
+ pass
+
+
+ try:
+ if "description" in f:
+ _customize_sanitizer(parser)
+ source_feed.description = parser.sanitizer._sanitize_html(f["description"], "utf-8", 'text/html')
+ source_feed.save(update_fields=["description"])
+ except Exception as ex:
+ pass
+
+ try:
+ _customize_sanitizer(parser)
+ source_feed.name = parser.sanitizer._sanitize_html(source_feed.name, "utf-8", 'text/html')
+ source_feed.save(update_fields=["name"])
+
+ except Exception as ex:
+ pass
+
+ try:
+ if "icon" in f:
+ source_feed.image_url = f["icon"]
+ source_feed.save(update_fields=["icon"])
+ except Exception as ex:
+ pass
+
+ #output.write(entries)
+ entries.reverse() # Entries are typically in reverse chronological order - put them in right order
+ for e in entries:
+ body = " "
+ if "content_text" in e:
+ body = e["content_text"]
+ if "content_html" in e:
+ body = e["content_html"] # prefer html over text
+
+ body = fix_relative(body,source_feed.site_url)
+
+
+
+ try:
+ guid = e["id"]
+ except Exception as ex:
+ try:
+ guid = e["url"]
+ except Exception as ex:
+ m = hashlib.md5()
+ m.update(body.encode("utf-8"))
+ guid = m.hexdigest()
+
+ try:
+ p = Post.objects.filter(source=source_feed).filter(guid=guid)[0]
+ output.write("EXISTING " + guid + "\n")
+
+ except Exception as ex:
+ output.write("NEW " + guid + "\n")
+ p = Post(index=0, body=' ')
+ p.found = timezone.now()
+ changed = True
+ p.source = source_feed
+
+ try:
+ title = e["title"]
+ except Exception as ex:
+ title = ""
+
+ # borrow the RSS parser's sanitizer
+ _customize_sanitizer(parser)
+ body = parser.sanitizer._sanitize_html(body, "utf-8", 'text/html') # TODO: validate charset ??
+ _customize_sanitizer(parser)
+ title = parser.sanitizer._sanitize_html(title, "utf-8", 'text/html') # TODO: validate charset ??
+ # no other fields are ever marked as |safe in the templates
+
+ if "banner_image" in e:
+ p.image_url = e["banner_image"]
+
+ if "image" in e:
+ p.image_url = e["image"]
+
+
+ try:
+ p.link = e["url"]
+ except Exception as ex:
+ p.link = ''
+
+ p.title = title
+
+ try:
+ p.created = pyrfc3339.parse(e["date_published"])
+ except Exception as ex:
+ output.write("CREATED ERROR")
+ p.created = timezone.now()
+
+
+ p.guid = guid
+ try:
+ p.author = e["author"]
+ except Exception as ex:
+ p.author = ""
+
+ p.save()
+
+
+ try:
+ seen_files = []
+ for ee in list(p.enclosures.all()):
+ # check existing enclosure is still there
+ found_enclosure = False
+ if "attachments" in e:
+ for pe in e["attachments"]:
+
+ if pe["url"] == ee.href and ee.href not in seen_files:
+ found_enclosure = True
+
+ try:
+ ee.length = int(pe["size_in_bytes"])
+ except:
+ ee.length = 0
+
+ try:
+ type = pe["mime_type"]
+ except:
+ type = "audio/mpeg" # we are assuming podcasts here but that's probably not safe
+
+ ee.type = type
+ ee.save()
+ break
+ if not found_enclosure:
+ ee.delete()
+ seen_files.append(ee.href)
+
+ if "attachments" in e:
+ for pe in e["attachments"]:
+
+ try:
+ if pe["url"] not in seen_files:
+
+ try:
+ length = int(pe["size_in_bytes"])
+ except:
+ length = 0
+
+ try:
+ type = pe["mime_type"]
+ except:
+ type = "audio/mpeg"
+
+ ee = Enclosure(post=p , href=pe["url"], length=length, type=type)
+ ee.save()
+ except Exception as ex:
+ pass
+ except Exception as ex:
+ if output:
+ output.write("No enclosures - " + str(ex))
+
+ try:
+ p.body = body
+ p.save()
+ # output.write(p.body)
+ except Exception as ex:
+ output.write(str(ex))
+ output.write(p.body)
+
+ return (ok,changed)
+
+
+def test_feed(source, cache=False, output=NullOutput()):
+
+
+ headers = { "User-Agent": get_agent(source) } #identify ourselves and also stop our requests getting picked up by any cache
+
+ if cache:
+ if source.etag:
+ headers["If-None-Match"] = str(source.etag)
+ if source.last_modified:
+ headers["If-Modified-Since"] = str(source.last_modified)
+ else:
+ headers["Cache-Control"] = "no-cache,max-age=0"
+ headers["Pragma"] = "no-cache"
+
+ output.write("\n" + str(headers))
+
+ ret = requests.get(source.feed_url, headers=headers, allow_redirects=False, verify=False, timeout=20)
+
+ output.write("\n\n")
+
+ output.write(str(ret))
+
+ output.write("\n\n")
+
+ output.write(ret.text)
+
+
+def get_proxy(out=NullOutput()):
+
+ p = WebProxy.objects.first()
+
+ if p is None:
+ find_proxies(out)
+ p = WebProxy.objects.first()
+
+ out.write("Proxy: {}".format(str(p)))
+
+ return p
+
+
+
+def find_proxies(out=NullOutput()):
+
+
+ out.write("\nLooking for proxies\n")
+
+ try:
+ req = requests.get("https://raw.githubusercontent.com/clarketm/proxy-list/master/proxy-list.txt", timeout=30)
+ if req.status_code == 200:
+ list = req.text
+
+ list = list.split("\n")
+
+ # remove header
+ list = list[4:]
+
+ for item in list:
+ if ":" in item:
+ item = item.split(" ")[0]
+ WebProxy(address=item).save()
+
+
+
+ except Exception as ex:
+ logging.error("Proxy scrape error: {}".format(str(ex)))
+ out.write("Proxy scrape error: {}\n".format(str(ex)))
+
+ if WebProxy.objects.count() == 0:
+ # something went wrong.
+ # to stop infinite loops we will insert duff proxys now
+ for i in range(20):
+ WebProxy(address="X").save()
+ out.write("No proxies found.\n")
+
+
+import csv
+from datetime import datetime
+def import_master_guides(path):
+ """
+ Takes a CSV dump of Jeff's sheet and puts it in the database
+ row[0] #url
+ row[1] #title
+ row[2] #post_type
+ row[3] #author
+ row[7] #date_last_pub
+ row[10] #update_frequency
+ """
+ with open(path) as f:
+ reader = csv.reader(f)
+ count = 0
+ for row in reader:
+ if count > 1:
+ if row[2] == "Deals":
+ continue
+ elif row[2] == "Buying Guide":
+ gtype = PostType.GUIDE
+ else:
+ gtype = PostType.HOWTO
+ if row[10] == "Retired":
+ continue
+ else:
+ up = int(row[10])*30
+ print(row[10])
+ d = datetime.strptime(row[7], '%m/%d/%Y')
+ post, created = Post.objects.get_or_create(
+ title = str(row[1]).strip(),
+ url = str(row[0]).strip(),
+ date_last_pub = d,
+ author = str(row[3]).strip(),
+ post_type = gtype,
+ update_frequency = up
+ )
+ count = count+1
+
+
diff --git a/app/products/admin.py b/app/products/admin.py
new file mode 100644
index 0000000..a25b84e
--- /dev/null
+++ b/app/products/admin.py
@@ -0,0 +1,28 @@
+from django.contrib import admin
+
+from .models import Brand, Product
+from utils.widgets import AdminImageWidget, LGEntryForm
+
+
+@admin.register(Product)
+class ProductAdmin(admin.ModelAdmin):
+ form = LGEntryForm
+ list_display = ('name', 'admin_thumbnail', 'rating', 'date_created')
+ search_fields = ['name', 'body_markdown']
+ list_filter = ('rating', 'date_created')
+
+ class Media:
+ js = ('image-loader.js', 'next-prev-links.js')
+ css = {
+ "all": ("my_styles.css",)
+ }
+
+
+@admin.register(Brand)
+class BrandAdmin(admin.ModelAdmin):
+ list_display = ('name', )
+ search_fields = ['name',]
+ list_filter = ('date_created',)
+
+ class Media:
+ js = ('next-prev-links.js',)
diff --git a/app/products/migrations/0001_initial.py b/app/products/migrations/0001_initial.py
new file mode 100644
index 0000000..028bc04
--- /dev/null
+++ b/app/products/migrations/0001_initial.py
@@ -0,0 +1,68 @@
+# Generated by Django 4.2.2 on 2023-07-10 18:02
+
+from django.db import migrations, models
+import django.db.models.deletion
+
+
+class Migration(migrations.Migration):
+
+ initial = True
+
+ dependencies = [
+ ]
+
+ operations = [
+ migrations.CreateModel(
+ name='Brand',
+ fields=[
+ ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+ ('name', models.CharField(max_length=200)),
+ ('slug', models.CharField(max_length=50)),
+ ('date_created', models.DateTimeField(auto_now_add=True)),
+ ],
+ ),
+ migrations.CreateModel(
+ name='Retailer',
+ fields=[
+ ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+ ('name', models.CharField(max_length=200)),
+ ('slug', models.CharField(max_length=50)),
+ ('date_created', models.DateTimeField(auto_now_add=True)),
+ ],
+ ),
+ migrations.CreateModel(
+ name='ProductLink',
+ fields=[
+ ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+ ('url', models.CharField(blank=True, max_length=255, null=True)),
+ ('date_last_checked', models.DateTimeField(blank=True, null=True)),
+ ('date_created', models.DateTimeField(auto_now_add=True)),
+ ('is_active', models.BooleanField(default=True)),
+ ('product', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, to='products.brand')),
+ ('retailer', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, to='products.retailer')),
+ ],
+ options={
+ 'ordering': ('date_last_checked',),
+ },
+ ),
+ migrations.CreateModel(
+ name='Product',
+ fields=[
+ ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+ ('name', models.CharField(max_length=200)),
+ ('wired_name', models.CharField(max_length=200)),
+ ('normalized_name', models.CharField(max_length=200)),
+ ('slug', models.CharField(max_length=250)),
+ ('date_created', models.DateTimeField()),
+ ('body_markdown', models.TextField(blank=True)),
+ ('body_html', models.TextField(blank=True, null=True)),
+ ('wired_price', models.IntegerField()),
+ ('lowest_price_ever', models.IntegerField(null=True)),
+ ('rating', models.IntegerField()),
+ ('brand', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, to='products.brand')),
+ ],
+ options={
+ 'ordering': ('-date_created',),
+ },
+ ),
+ ]
diff --git a/app/products/migrations/__init__.py b/app/products/migrations/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/app/products/migrations/__init__.py
diff --git a/app/products/models.py b/app/products/models.py
new file mode 100644
index 0000000..58a0264
--- /dev/null
+++ b/app/products/models.py
@@ -0,0 +1,87 @@
+import os
+from PIL import Image
+from django.db import models
+from django.db.models.signals import post_save
+from django.contrib.sitemaps import Sitemap
+from django.dispatch import receiver
+from django.urls import reverse
+from django.apps import apps
+from django.utils.html import format_html
+from django.conf import settings
+from django.template.defaultfilters import slugify
+
+
+class Retailer(models.Model):
+ name = models.CharField(max_length=200)
+ slug = models.CharField(max_length=50)
+ date_created = models.DateTimeField(auto_now_add=True)
+
+ def __str__(self):
+ return self.name
+
+
+class Brand(models.Model):
+ name = models.CharField(max_length=200)
+ slug = models.CharField(max_length=50)
+ date_created = models.DateTimeField(auto_now_add=True)
+
+ def __str__(self):
+ return self.name
+
+
+class Product(models.Model):
+ name = models.CharField(max_length=200)
+ wired_name = models.CharField(max_length=200)
+ normalized_name = models.CharField(max_length=200)
+ brand = models.ForeignKey(Brand, null=True, on_delete=models.SET_NULL)
+ slug = models.CharField(max_length=250)
+ date_created = models.DateTimeField()
+ body_markdown = models.TextField(blank=True)
+ body_html = models.TextField(null=True, blank=True)
+ wired_price = models.IntegerField()
+ lowest_price_ever = models.IntegerField(null=True)
+ rating = models.IntegerField()
+
+ class Meta:
+ ordering = ('-date_created',)
+
+ def __str__(self):
+ return self.name
+
+ @property
+ def get_previous_admin_url(self):
+ n = self.get_previous_by_read_date()
+ return reverse('admin:%s_%s_change' % (self._meta.app_label, self._meta.model_name), args=[n.id])
+
+ @property
+ def get_next_admin_url(self):
+ model = apps.get_model(app_label=self._meta.app_label, model_name=self._meta.model_name)
+ try:
+ return reverse('admin:%s_%s_change' % (self._meta.app_label, self._meta.model_name), args=[self.get_next_by_read_date().pk])
+ except model.DoesNotExist:
+ return ''
+
+ def admin_thumbnail(self):
+ return format_html('<img src="%s" width="100" style="width:100px" />' % (self.featured_image.get_thumbnail_url()))
+ admin_thumbnail.short_description = 'Thumbnail'
+
+ def get_full_name(self):
+ return "%s %s" % (self.brand.name, self.name)
+
+ def save(self, *args, **kwargs):
+ super(Product, self).save()
+
+
+class ProductLink(models.Model):
+ retailer = models.ForeignKey(Retailer, null=True, on_delete=models.SET_NULL)
+ url = models.CharField(max_length=255, blank=True, null=True)
+ date_last_checked = models.DateTimeField(blank=True, null=True)
+ date_created = models.DateTimeField(auto_now_add=True)
+ product = models.ForeignKey(Brand, null=True, on_delete=models.SET_NULL)
+ is_active = models.BooleanField(default=True)
+
+ class Meta:
+ ordering = ('date_last_checked',)
+
+ def __str__(self):
+ return self.url
diff --git a/app/products/static/product-loader.js b/app/products/static/product-loader.js
new file mode 100644
index 0000000..6d04b61
--- /dev/null
+++ b/app/products/static/product-loader.js
@@ -0,0 +1,10 @@
+function add_products(){
+ var el = document.getElementById("images_frame");
+ if (el){
+ var iframe='<iframe frameborder="0" style="border: #dddddd 1px solid;margin-left: 20px;width:330px; height:720px;" src="/luxproduct/insert/?textarea='+el.id+'"></iframe>';
+ el.insertAdjacentHTML('afterend', iframe);
+ }
+}
+document.addEventListener("DOMContentLoaded", function(event) {
+ add_products();
+});
diff --git a/app/products/templates/products/snippet.html b/app/products/templates/products/snippet.html
new file mode 100644
index 0000000..3fc9f6f
--- /dev/null
+++ b/app/products/templates/products/snippet.html
@@ -0,0 +1,41 @@
+{% load get_image_by_size %}
+{% load get_image_width %}
+{% with image=object.featured_image %}
+<div itemscope itemtype="http://schema.org/Product" class="product-card">
+ <meta itemprop="brand" content="{{object.brand.name}}" />
+ <figure itemscope itemtype="http://schema.org/ImageObject" class="picfull">
+ <a href="{% get_image_by_size image 'original' %}" title="view larger image">
+ <img class="u-photo" itemprop="contentUrl" sizes="(max-width: 750px) 100vw, (min-width: 751) 750px" srcset="{% for size in image.sizes.all%}{% get_image_by_size image size.name %} {{size.width}}w{% if forloop.last%}"{%else%}, {%endif%}{%endfor%}{% for size in image.sizes.all%}{%if not forloop.first and not forloop.last%} src="{% get_image_by_size image size.name%}"{%endif%}{%endfor%} alt="{{image.alt}} photographed by {% if image.photo_credit_source %}{{image.photo_credit_source}}{%else%}luxagraf{%endif%}" >
+ </a>
+ <figcaption>{% if image.caption %}{{image.caption|safe}}{% endif %}{% if image.photo_credit_source %}{%if image.caption %} | {%endif%}image: {% if image.photo_credit_url %}<a href="{{image.photo_credit_url}}" itemprop="author">{%endif%}{{image.photo_credit_source|lower}}{% if image.photo_credit_url %}</a>{%endif%}{%endif%}
+ </figcaption>
+ </figure>
+ <div class="buy-btn-wrapper">
+ <h4 class="product-header" itemprop="name">{{object.get_full_name}}</h4>
+ <h5 class="product-link" itemprop="offers" itemscope itemtype="http://schema.org/Offer">
+ <a href="{{object.primary_offer_url}}" title="buy the {{object.get_full_name}} for ${{object.primary_offer_price}} from {{object.primary_offer_retailer.get_primary_offer_retailer_display}}" itemprop="url" rel="nofollow">
+ Buy Now ({{object.get_primary_offer_retailer_display}}
+ <span itemprop="priceCurrency" content="USD">$</span><span itemprop="price" content="{{object.primary_offer_price}}">{{object.primary_offer_price}}</span>)
+ </a>
+ <link itemprop="availability" href="http://schema.org/InStock" />
+ </h5>{% if object.secondary_offer_url %}
+ <h5 class="product-link" itemprop="offers" itemscope itemtype="http://schema.org/Offer">
+ <a href="{{object.secondary_offer_url}}" title="buy the {{object.get_full_name}} for ${{object.secondary_offer_price}} from {{object.secondary_offer_retailer.get_secondary_offer_retailer_display}}" itemprop="url">
+ Buy Now ({{object.get_secondary_offer_retailer_display}}
+ <span itemprop="priceCurrency" content="USD">$</span><span itemprop="price" content="{{object.secondary_offer_price}}">{{object.secondary_offer_price}}</span>)
+ </a>
+ <link itemprop="availability" href="http://schema.org/InStock" />
+ </h5>{% endif %}
+ </div>
+ <span itemprop="review" itemscope itemtype="http://schema.org/Review">
+ <meta itemprop="name" content="{{object.get_full_name}}" />
+ <meta itemprop="author" content="Scott Gilbertson" />
+ <meta itemprop="datePublished" content="{{object.pub_date}}" />
+ <span itemprop="reviewRating" itemscope itemtype="http://schema.org/Rating">
+ <meta itemprop="worstRating" content = "1"/>
+ <meta itemprop="ratingValue" content="{{object.rating}}" />
+ <meta itemprop="bestRating" content="10" />
+ </span>
+ <meta itemprop="description" content="{{object.body_markdown}}" />
+</div>
+{% endwith %}
diff --git a/app/products/views.py b/app/products/views.py
new file mode 100644
index 0000000..30be542
--- /dev/null
+++ b/app/products/views.py
@@ -0,0 +1,15 @@
+from django.shortcuts import render
+from .models import Product
+
+
+def insert_products(request):
+ """
+ The view that handles the admin insert products feature
+ """
+ object_list = Product.objects.all()
+ #object_list = sorted(
+ # chain(images, videos, audio),
+ # key=lambda instance: instance.pub_date,
+ # reverse=True
+ #)
+ return render(request, 'admin/insert_products.html', {'object_list': object_list, 'textarea_id': request.GET['textarea']})
diff --git a/app/utils/templates/utils/clipboardtomarkdown.html b/app/utils/templates/utils/clipboardtomarkdown.html
new file mode 100644
index 0000000..50ac63b
--- /dev/null
+++ b/app/utils/templates/utils/clipboardtomarkdown.html
@@ -0,0 +1,79 @@
+<!DOCTYPE html>
+<html>
+ <head>
+ <title>Paste to Markdown</title>
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+ <meta charset="utf-8">
+ <meta content="text/html; charset=utf-8" http-equiv="Content-Type">
+ <link href="favicon.ico" rel="icon" type="image/x-icon">
+ <link rel="stylesheet" href="bootstrap.css">
+ <script src="to-markdown.js"></script>
+ <script src="clipboard2markdown.js"></script>
+ <style>
+ h2 {
+ font-size: 22px;
+ font-style: italic;
+ }
+ body {
+ background-attachment: fixed;
+ background-image: url('background.svg');
+ background-position: right 10% bottom 20%;
+ background-repeat: no-repeat;
+ background-size: 20em;
+ margin-bottom: 10vh;
+ margin-top: 10vh;
+ }
+ #pastebin {
+ opacity: 0.01;
+ width: 100%;
+ height: 1px;
+ overflow: hidden;
+ }
+ #output {
+ border: none;
+ background: transparent;
+ font-family: Consolas, "Liberation Mono", Courier, monospace;
+ font-size: 13px;
+ line-height: 19px;
+ min-height: 78vh;
+ overflow: auto;
+ padding: 6px 10px;
+ resize: none;
+ width: 100%;
+ }
+ @media (prefers-color-scheme: dark) {
+ body {
+ background-image: url('background-dark.svg');
+ background-color: #222;
+ color:white;
+ }
+ code {
+ color: #ff7a9c;
+ background-color: #0e0609;
+ }
+ a {
+ color: #8fcbff;
+ }
+ }
+ </style>
+ </head>
+ <body>
+ <div class="container">
+ <section id="info">
+ <h1>Paste to Markdown</h1>
+ <h2>Instructions</h2>
+ <ol>
+ <li>Find the text to convert to Markdown (<i>e.g.</i>, in another browser tab)</li>
+ <li>Copy it to the clipboard (<code>Ctrl+C</code>, or <code>&#8984;+C</code> on Mac)</li>
+ <li>Paste it into this window (<code>Ctrl+V</code>, or <code>&#8984;+V</code> on Mac)</li>
+ <li>The converted Markdown will appear!</li>
+ </ol>
+ <p>The conversion is carried out by <a href="https://github.com/domchristie/to-markdown">to-markdown</a>, a Markdown converter written in JavaScript and running locally in the browser.</p>
+ </section>
+ <div contenteditable="true" id="pastebin"></div>
+ <section class="hidden" id="wrapper">
+ <textarea id="output"></textarea>
+ </section>
+ </div>
+ </body>
+</html>
diff --git a/app/utils/urls.py b/app/utils/urls.py
index 7c37c5d..6ffa29f 100644
--- a/app/utils/urls.py
+++ b/app/utils/urls.py
@@ -9,4 +9,9 @@ urlpatterns = [
views.nav_json,
name="admin_links"
),
+ path(
+ r'clipboardtomarkdown',
+ TemplateView.as_view(template_name="clipboardtomarkdown.html"),
+ name="tomarkdown"
+ ),
]