This method should parse through every page within the new release directory
Tue May 21 2024 15:31:35 GMT+0000 (Coordinated Universal Time)
Saved by
@Uncoverit
#python
# This method should parse through every page within the new release directory (theres a total of 50 of them)
def scrape_all_pages(base_url, total_pages):
all_links = []
base_url = 'https://www.parfumo.com/Recently_added?current_page='
end_url = '&'
total_pages = 50
for page_number in range(1, total_pages + 1):
page_url = f"{base_url}{page_number}{end_url}"
try:
links_on_page = scrape_perfume_links(page_url)
all_links.extend(links_on_page)
print(f"Scraped links from page {page_number}")
except requests.HTTPError as e:
print(f"Error scraping page {page_number}: {e}")
time.sleep(1)
return all_links
content_copyCOPY
Comments