# Defining a method for scraping the web page
def scrape_perfume_data(url):
headers = {
"User-Agent": "Insert user agent"
}
page = requests.get(url, headers=headers)
page.raise_for_status()
soup = BeautifulSoup(page.content, 'html.parser')
perfume_name = brand_name = release_year = overall_rating = rating_count = perfumer = scent_rating = longevity_rating = sillage_rating = bottle_rating = value_rating = scraping_date = None
try:
perfume_name = soup.find('h1', class_='p_name_h1', itemprop='name').get_text().strip().split('\n')[0].strip()
except AttributeError:
pass
try:
brand_span = soup.find('span', itemprop='brand')
brand_name = brand_span.find('span', itemprop='name').get_text().strip()
except AttributeError:
pass
try:
year = soup.find('a', href=lambda href: href and 'Release_Years' in href)
release_year = year.get_text().strip()
except AttributeError:
pass
try:
overall_rating = soup.find('span', itemprop='ratingValue').get_text().strip()
except AttributeError:
pass
try:
rating_count = soup.find('span', itemprop='ratingCount').get_text().strip()
except AttributeError:
pass
try:
perfumer = soup.find('div', {'class': 'w-100 mt-0-5 mb-3'}).get_text().strip()
except AttributeError:
pass
try:
top_notes = soup.find('div', class_='pyramid_block nb_t w-100 mt-2')
top_note_list = [span.get_text(strip=True) for span in top_notes.find_all('span', class_='clickable_note_img')]
except AttributeError:
pass
try:
heart_notes = soup.find('div', class_='pyramid_block nb_m w-100 mt-2')
heart_note_list = [span.get_text(strip=True) for span in heart_notes.find_all('span', class_='clickable_note_img')]
except AttributeError:
pass
try:
base_notes = soup.find('div', class_='pyramid_block nb_b w-100 mt-2')
base_note_list = [span.get_text(strip=True) for span in base_notes.find_all('span', class_='clickable_note_img')]
except AttributeError:
pass
scraping_date = datetime.date.today()
return {
'PerfumeName': perfume_name,
'Brand': brand_name,
'ReleaseYear': release_year,
'OverallRating': overall_rating,
'RatingCount': rating_count,
'Perfumer': perfumer,
'TopNotes': top_note_list if 'top_note_list' in locals() else None,
'HeartNotes': heart_note_list if 'heart_note_list' in locals() else None,
'BaseNotes': base_note_list if 'base_note_list' in locals() else None,
}