summaryrefslogtreecommitdiff
path: root/bin/parse_subids.py
blob: 5e8b8f2bf1a19009b3aaadcc1a7a523073b8fa11 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
from bs4 import BeautifulSoup
import markdown


with open('/home/lxf/writing/wired/bf-photo-deals.txt', 'r') as f:
    data = f.read()
result = open('/home/lxf/writing/wired/subid-links.txt', 'a')
soup = BeautifulSoup(markdown.markdown(data), "lxml")
subid = "blackfridayphotodeals2019"
page_url = "https://www.wired.com/story/best-black-friday-photography-deals-2019/"
for a in soup.find_all('a'):
    start = a['href'].split('//')[1][:4]
    if str(start) == 'best' or start == 'goto':
        l = "%s,,%s,Impact,%s\n" % (page_url, subid, a['href']) 
        result.write(l)
result.close()

def parse_links(f):