summaryrefslogtreecommitdiff
path: root/bin/wired-count-deals-items.py
blob: 3fb5bc9c368b431099809c60d389c29419ccb9a5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
import sys
from bs4 import BeautifulSoup
import markdown

filename = '%s' %(sys.argv[1])
subid  = '%s' %(sys.argv[2])
page_url = "https://www.wired.com/story/best-black-friday-photography-deals-2019/"
with open(filename, newline='') as f:
    content = f.readlines()
    count = 0
    for line in content:
        if line.startswith('- **'):
            count = count + 1
    print("count is: ", count)


with open(filename, newline='') as f:
    data = f.read()
    soup = BeautifulSoup(markdown.markdown(data), "lxml")
    for a in soup.find_all('a'):
        start = a['href'].split('//')[1][:4]
        if str(start) == 'best' or start == 'goto':
            l = "%s,,%s,Impact,%s\n" % (page_url, subid, a['href']) 
            print(l)