summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorluxagraf <sng@luxagraf.net>2023-08-10 16:12:10 -0500
committerluxagraf <sng@luxagraf.net>2023-08-10 16:12:10 -0500
commit0b689714d9580ad4a0e4f4399df70ea8d5448040 (patch)
treec3974d5f4877a401c7e38520bba33949e0b02c1d
parent15f3969829705ee78ba20fb0fd5201e84112b7e9 (diff)
posts: added updating feature to automatically update post pub dates
from wired RSS feed.
-rw-r--r--app/posts/management/__init__.py0
-rw-r--r--app/posts/management/commands/__init__.py0
-rw-r--r--app/posts/management/commands/rss_updater.py42
-rw-r--r--app/posts/models.py22
-rw-r--r--app/posts/views.py8
5 files changed, 50 insertions, 22 deletions
diff --git a/app/posts/management/__init__.py b/app/posts/management/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/app/posts/management/__init__.py
diff --git a/app/posts/management/commands/__init__.py b/app/posts/management/commands/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/app/posts/management/commands/__init__.py
diff --git a/app/posts/management/commands/rss_updater.py b/app/posts/management/commands/rss_updater.py
new file mode 100644
index 0000000..7ac26e6
--- /dev/null
+++ b/app/posts/management/commands/rss_updater.py
@@ -0,0 +1,42 @@
+from django.core.management.base import BaseCommand, CommandError
+
+import datetime
+import feedparser
+from urllib.parse import urlparse
+
+from posts.models import PostStatus, Post
+
+"""
+run from a cronscript that looks line this:
+
+*/1 * * * * cd /home/lxf/sites/wired.luxagraf.net && source /home/lxf/sites/wired.luxagraf.net/venv/bin/activate && /home/lxf/sites/wired.luxagraf.net/venv/bin/python /home/lxf/sites/wired.luxagraf.net/manage.py rss_updater --settings=config.settings
+"""
+
+class Command(BaseCommand):
+ help = "Update all published posts"
+
+ def is_deal(tags):
+ for tag in tags:
+ if tag['term'] == "Deals":
+ return True
+ return False
+
+ def handle(self, *args, **options):
+ feed = feedparser.parse("https://www.wired.com/feed/tag/commerce/latest/rss")
+ for item in feed.entries:
+ url = urlparse(item.link)
+ story_type = url.path.split('/')[1]
+ if story_type == "story" or "gallery":
+ if not self.is_deal(item.tags):
+ try:
+ post = Post.objects.get(url=item.link)
+ post.date_last_pub = datetime.datetime.strptime(item.published, '%a, %d %b %Y %H:%M:%S %z').date()
+ post.post_status = PostStatus.PUBLISHED
+ post.save()
+ self.stdout.write(
+ self.style.SUCCESS('Successfully updated post "%s"' % post.title)
+ )
+ except:
+ continue
+
+
diff --git a/app/posts/models.py b/app/posts/models.py
index f42956e..b518554 100644
--- a/app/posts/models.py
+++ b/app/posts/models.py
@@ -6,28 +6,6 @@ import settings
from products.models import ProductLink
-"""
-class Feed(models.Model):
- name = models.CharField(max_length=255)
- feed_url = models.CharField(max_length=512)
- slug = models.CharField(max_length=50)
- last_polled = models.DateTimeField(blank=True, null=True)
- due_poll = models.DateTimeField(default=datetime.datetime(1900, 1, 1)) # default to distant past to put new sources to front of queue
- etag = models.CharField(max_length=255, blank=True, null=True)
- last_modified = models.CharField(max_length=255, blank=True, null=True) # just pass this back and forward between server and me , no need to parse
- last_result = models.CharField(max_length=255,blank=True,null=True)
- interval = models.PositiveIntegerField(default=400)
- last_success = models.DateTimeField(blank=True, null=True)
- last_change = models.DateTimeField(blank=True, null=True)
- live = models.BooleanField(default=True)
- status_code = models.PositiveIntegerField(default=0)
- last_302_url = models.CharField(max_length=512, null=True, blank=True)
- last_302_start = models.DateTimeField(null=True, blank=True)
-
- def __str__(self):
- return self.name
-"""
-
class PostType(models.IntegerChoices):
REVIEW = 0, ('review')
diff --git a/app/posts/views.py b/app/posts/views.py
index 25a8074..29796c4 100644
--- a/app/posts/views.py
+++ b/app/posts/views.py
@@ -8,6 +8,14 @@ from .models import Post
from .forms import PostUpdateForm
from notes.models import Note
+"""
+TODO:
+ crawl the RSS feed and compare url to stored url
+ if it matches update the pub_date.
+ Then scrape the HTML.
+ Then parse the scraped html for aff links and create products from them
+"""
+
class PostListView(LoginRequiredMixin, ListView):
model = Post
template_name = 'posts/post_table.html'