import sys from bs4 import BeautifulSoup import markdown filename = '%s' %(sys.argv[1]) subid = '%s' %(sys.argv[2]) page_url = "https://www.wired.com/story/best-black-friday-photography-deals-2019/" with open(filename, newline='') as f: content = f.readlines() count = 0 for line in content: if line.startswith('- **'): count = count + 1 print("count is: ", count) with open(filename, newline='') as f: data = f.read() soup = BeautifulSoup(markdown.markdown(data), "lxml") for a in soup.find_all('a'): start = a['href'].split('//')[1][:4] if str(start) == 'best' or start == 'goto': l = "%s,,%s,Impact,%s\n" % (page_url, subid, a['href']) print(l)