df_links = [] PAGE_NUM = 281 for page in range(1, PAGE_NUM): URL = f"https://www.urban.org/research?page={page}" r = requests.get(URL) soup = BeautifulSoup(r.content, 'html5lib') articles = soup.find_all('li', attrs = {'class':'mb-16 md:col-span-2 mb-8 sm:mb-0'}) for article in articles: pattern = 'href="(.+?)"' if re.search(pattern, str(article)): df_links.append([page, re.search(pattern, str(article)).group(1)]) print(f'Page {page} done') df_links = pd.DataFrame(df_links, columns = ['page', 'link']) df_links.to_csv('urban_links.csv', index = False)
Preview:
downloadDownload PNG
downloadDownload JPEG
downloadDownload SVG
Tip: You can change the style, width & colours of the snippet with the inspect tool before clicking Download!
Click to optimize width for Twitter