diff options
author | luxagraf <sng@luxagraf.net> | 2023-08-10 16:12:10 -0500 |
---|---|---|
committer | luxagraf <sng@luxagraf.net> | 2023-08-10 16:12:10 -0500 |
commit | 0b689714d9580ad4a0e4f4399df70ea8d5448040 (patch) | |
tree | c3974d5f4877a401c7e38520bba33949e0b02c1d | |
parent | 15f3969829705ee78ba20fb0fd5201e84112b7e9 (diff) |
posts: added updating feature to automatically update post pub dates
from wired RSS feed.
-rw-r--r-- | app/posts/management/__init__.py | 0 | ||||
-rw-r--r-- | app/posts/management/commands/__init__.py | 0 | ||||
-rw-r--r-- | app/posts/management/commands/rss_updater.py | 42 | ||||
-rw-r--r-- | app/posts/models.py | 22 | ||||
-rw-r--r-- | app/posts/views.py | 8 |
5 files changed, 50 insertions, 22 deletions
diff --git a/app/posts/management/__init__.py b/app/posts/management/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/app/posts/management/__init__.py diff --git a/app/posts/management/commands/__init__.py b/app/posts/management/commands/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/app/posts/management/commands/__init__.py diff --git a/app/posts/management/commands/rss_updater.py b/app/posts/management/commands/rss_updater.py new file mode 100644 index 0000000..7ac26e6 --- /dev/null +++ b/app/posts/management/commands/rss_updater.py @@ -0,0 +1,42 @@ +from django.core.management.base import BaseCommand, CommandError + +import datetime +import feedparser +from urllib.parse import urlparse + +from posts.models import PostStatus, Post + +""" +run from a cronscript that looks line this: + +*/1 * * * * cd /home/lxf/sites/wired.luxagraf.net && source /home/lxf/sites/wired.luxagraf.net/venv/bin/activate && /home/lxf/sites/wired.luxagraf.net/venv/bin/python /home/lxf/sites/wired.luxagraf.net/manage.py rss_updater --settings=config.settings +""" + +class Command(BaseCommand): + help = "Update all published posts" + + def is_deal(tags): + for tag in tags: + if tag['term'] == "Deals": + return True + return False + + def handle(self, *args, **options): + feed = feedparser.parse("https://www.wired.com/feed/tag/commerce/latest/rss") + for item in feed.entries: + url = urlparse(item.link) + story_type = url.path.split('/')[1] + if story_type == "story" or "gallery": + if not self.is_deal(item.tags): + try: + post = Post.objects.get(url=item.link) + post.date_last_pub = datetime.datetime.strptime(item.published, '%a, %d %b %Y %H:%M:%S %z').date() + post.post_status = PostStatus.PUBLISHED + post.save() + self.stdout.write( + self.style.SUCCESS('Successfully updated post "%s"' % post.title) + ) + except: + continue + + diff --git a/app/posts/models.py b/app/posts/models.py index f42956e..b518554 100644 --- a/app/posts/models.py +++ b/app/posts/models.py @@ -6,28 +6,6 @@ import settings from products.models import ProductLink -""" -class Feed(models.Model): - name = models.CharField(max_length=255) - feed_url = models.CharField(max_length=512) - slug = models.CharField(max_length=50) - last_polled = models.DateTimeField(blank=True, null=True) - due_poll = models.DateTimeField(default=datetime.datetime(1900, 1, 1)) # default to distant past to put new sources to front of queue - etag = models.CharField(max_length=255, blank=True, null=True) - last_modified = models.CharField(max_length=255, blank=True, null=True) # just pass this back and forward between server and me , no need to parse - last_result = models.CharField(max_length=255,blank=True,null=True) - interval = models.PositiveIntegerField(default=400) - last_success = models.DateTimeField(blank=True, null=True) - last_change = models.DateTimeField(blank=True, null=True) - live = models.BooleanField(default=True) - status_code = models.PositiveIntegerField(default=0) - last_302_url = models.CharField(max_length=512, null=True, blank=True) - last_302_start = models.DateTimeField(null=True, blank=True) - - def __str__(self): - return self.name -""" - class PostType(models.IntegerChoices): REVIEW = 0, ('review') diff --git a/app/posts/views.py b/app/posts/views.py index 25a8074..29796c4 100644 --- a/app/posts/views.py +++ b/app/posts/views.py @@ -8,6 +8,14 @@ from .models import Post from .forms import PostUpdateForm from notes.models import Note +""" +TODO: + crawl the RSS feed and compare url to stored url + if it matches update the pub_date. + Then scrape the HTML. + Then parse the scraped html for aff links and create products from them +""" + class PostListView(LoginRequiredMixin, ListView): model = Post template_name = 'posts/post_table.html' |