blob: 466c15b7d250106b1797b51f83a02615876b4b4b (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
|
from bs4 import BeautifulSoup
import markdown
with open('/home/lxf/writing/conde/wired/weekend-deals-061320.txt', 'r') as f:
data = f.read()
result = open('/home/lxf/writing/conde/wired/subid-links.txt', 'a')
print(data)
soup = BeautifulSoup(markdown.markdown(data), "lxml")
subid = "weekenddeals06132020"
page_url = "https://www.wired.com/story/weekend-deals-june-12-2020"
for a in soup.find_all('a'):
start = a['href'].split('//')[1][:4]
if str(start) == 'best' or start == 'goto':
l = "%s,%s,%s,Impact,%s\n" % (page_url, subid, a.contents[0], a['href'])
result.write(l)
result.close()
|