summaryrefslogtreecommitdiff
path: root/bin/parse_subids.py
blob: 466c15b7d250106b1797b51f83a02615876b4b4b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
from bs4 import BeautifulSoup
import markdown

with open('/home/lxf/writing/conde/wired/weekend-deals-061320.txt', 'r') as f:
    data = f.read()
result = open('/home/lxf/writing/conde/wired/subid-links.txt', 'a')
print(data)
soup = BeautifulSoup(markdown.markdown(data), "lxml")
subid = "weekenddeals06132020"
page_url = "https://www.wired.com/story/weekend-deals-june-12-2020"
for a in soup.find_all('a'):
    start = a['href'].split('//')[1][:4]
    if str(start) == 'best' or start == 'goto':
        l = "%s,%s,%s,Impact,%s\n" % (page_url, subid, a.contents[0], a['href']) 
        result.write(l)
result.close()