diff options
Diffstat (limited to 'bin/parse_subids.py')
-rw-r--r-- | bin/parse_subids.py | 14 |
1 files changed, 6 insertions, 8 deletions
diff --git a/bin/parse_subids.py b/bin/parse_subids.py index 5e8b8f2..466c15b 100644 --- a/bin/parse_subids.py +++ b/bin/parse_subids.py @@ -1,19 +1,17 @@ from bs4 import BeautifulSoup import markdown - -with open('/home/lxf/writing/wired/bf-photo-deals.txt', 'r') as f: +with open('/home/lxf/writing/conde/wired/weekend-deals-061320.txt', 'r') as f: data = f.read() -result = open('/home/lxf/writing/wired/subid-links.txt', 'a') +result = open('/home/lxf/writing/conde/wired/subid-links.txt', 'a') +print(data) soup = BeautifulSoup(markdown.markdown(data), "lxml") -subid = "blackfridayphotodeals2019" -page_url = "https://www.wired.com/story/best-black-friday-photography-deals-2019/" +subid = "weekenddeals06132020" +page_url = "https://www.wired.com/story/weekend-deals-june-12-2020" for a in soup.find_all('a'): start = a['href'].split('//')[1][:4] if str(start) == 'best' or start == 'goto': - l = "%s,,%s,Impact,%s\n" % (page_url, subid, a['href']) + l = "%s,%s,%s,Impact,%s\n" % (page_url, subid, a.contents[0], a['href']) result.write(l) result.close() -def parse_links(f): - |