df_links = []
PAGE_NUM = 281
for page in range(1, PAGE_NUM):
URL = f"https://www.urban.org/research?page={page}"
r = requests.get(URL)
soup = BeautifulSoup(r.content, 'html5lib')
articles = soup.find_all('li', attrs = {'class':'mb-16 md:col-span-2 mb-8 sm:mb-0'})
for article in articles:
pattern = 'href="(.+?)"'
if re.search(pattern, str(article)):
df_links.append([page, re.search(pattern, str(article)).group(1)])
print(f'Page {page} done')
df_links = pd.DataFrame(df_links, columns = ['page', 'link'])
df_links.to_csv('urban_links.csv', index = False)