# YOUR CODE HERE
import requests
from bs4 import BeautifulSoup
import numpy as np
import pandas as pd
%matplotlib inline
import matplotlib
# TODO: Use `requests` to do an HTTP request to fetch data located at that URL
url = "http://books.toscrape.com/"
response = requests.get(url)
# TODO: Create a `BeautifulSoup` instance with that data
soup = BeautifulSoup(response.content, "html.parser")
books_html = soup.find_all("article", class_="product_pod")
len(books_html)
books_html[0]
book_title =books_html[0].find("h3").find("a").attrs['title']
print(book_title)
book_price = books_html[0].find("p", class_="price_color").string
print(book_price[1:])
book_stars_html = books_html[0].find("p", class_="star-rating")
def parse_rating(rating_classes):
if 'One' in rating_classes:
return 1
elif 'Two' in rating_classes:
return 2
elif 'Three' in rating_classes:
return 3
elif 'Four' in rating_classes:
return 4
elif 'Five' in rating_classes:
return 5
else:
return 0
parse_rating(book_stars_html.attrs['class'])
book_rating = parse_rating(books_html[0].find("p", class_="star-rating").attrs['class'])
books_dict = { 'Title': [], 'Price': [], 'Rating': [] }
for book in books_html:
title = book.find("h3").find("a").attrs["title"]
price = float(book.find("p", class_="price_color").text[1:])
rating = parse_rating(book.find("p", class_="star-rating").attrs['class'])
books_dict["Title"].append(title)
books_dict["Price"].append(price)
books_dict["Rating"].append(rating)
books_dict
len(books_dict) # You should have 3 key:value pairs
len(books_dict["Title"]) # Each value should contain 20 elements from the 20 books, as many as on the web page!
books_df = pd.DataFrame.from_dict(books_dict)
books_df
pip install XlsxWriter
books_df.to_excel('books.xlsx', sheet_name='Books')
Comments