import re
from bs4 import BeautifulSoup
import markdown
import requests 
from urllib.parse import urlparse

def get_article_text(url):
    headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.57 Safari/537.36'}
    return requests.get(url, headers=headers)


def parse_article(url):
    article = get_article_text(url)
    soup = BeautifulSoup(article.text, "lxml")
    for div in soup.find_all("div", class_="article__body"):
        for a in div.find_all('a'):
            domain = urlparse(a['href'])
            if domain.netloc in ('www.amazon.com', 'amzn.to'):
                try:
                    w_price = a.get_text().split("$")[1]
                    try: 
                        w_price = w_price.strip()
                        w_price = re.sub("[^\d\.]", "", w_price)
                        amzn_url = a['href']
                        amzn_r = requests.get(amzn_url)
                        print(amzn_r.status_code)
                        if amzn_r.status_code == 404:
                            print(a.get_text(), "is a 404")
                        else:
                            amzn_soup = BeautifulSoup(amzn_r.text, "lxml")
                            amzn_price = amzn_soup.find("span", {"id": "newBuyBoxPrice"})
                            print(w_price, amzn_price)
                    except:
                        print("wrong")
                except:
                    w_price = "link has no price"

class Article(models.Model):
    title = models.CharField(max_length=200, blank=True)
    url = models.CharField(max_length=200)
    pub_date = models.DateTimeField('Date published', blank=True)

    class Meta:
        ordering = ('-pub_date',)
        get_latest_by = 'pub_date'

    def __str__(self):
        return self.title

    def save(self, *args, **kwargs):
        parse_article(self.url)
        super(Article, self).save(*args, **kwargs)

class Deal(models.Model):
    deal_title = models.CharField(max_length=200)
    deal_price = models.FloatField(max_length=200)
    deal_url = models.CharField(max_length=200)
    store_price = models.FloatField(max_length=200)

    class Meta:
        ordering = ('-pub_date',)
        get_latest_by = 'pub_date'

    def __str__(self):
        return self.item_title

def get_article_text(url):
    return requests.get(url)


subid = "weekenddeals06132020"
page_url = "https://www.wired.com/story/weekend-deals-june-12-2020"
for a in soup.find_all('a'):
    start = a['href'].split('//')[1][:4]
    if str(start) == 'best' or start == 'goto':
        l = "%s,%s,%s,Impact,%s\n" % (page_url, subid, a.contents[0], a['href']) 
        result.write(l)
result.close()