# YOUR CODE HERE
import requests
from bs4 import BeautifulSoup
import numpy as np
import pandas as pd
%matplotlib inline
import matplotlib
# TODO: Use `requests` to do an HTTP request to fetch data located at that URL
url = "http://books.toscrape.com/"
response = requests.get(url)
# TODO: Create a `BeautifulSoup` instance with that data
soup = BeautifulSoup(response.content, "html.parser")
books_html = soup.find_all("article", class_="product_pod")
len(books_html)
books_html[0]
book_title =books_html[0].find("h3").find("a").attrs['title']
print(book_title)
book_price = books_html[0].find("p", class_="price_color").string
print(book_price[1:])
book_stars_html = books_html[0].find("p", class_="star-rating")
def parse_rating(rating_classes):
    if 'One' in rating_classes:
        return 1
    elif 'Two' in rating_classes:
        return 2
    elif 'Three' in rating_classes:
        return 3
    elif 'Four' in rating_classes:
        return 4
    elif 'Five' in rating_classes:
        return 5
    else:
        return 0
parse_rating(book_stars_html.attrs['class'])
book_rating = parse_rating(books_html[0].find("p", class_="star-rating").attrs['class'])
books_dict = { 'Title': [], 'Price': [], 'Rating': [] }
for book in books_html:
    title = book.find("h3").find("a").attrs["title"]
    price = float(book.find("p", class_="price_color").text[1:])
    rating = parse_rating(book.find("p", class_="star-rating").attrs['class'])
    books_dict["Title"].append(title)
    books_dict["Price"].append(price)
    books_dict["Rating"].append(rating)
books_dict
len(books_dict)         # You should have 3 key:value pairs
len(books_dict["Title"]) # Each value should contain 20 elements from the 20 books, as many as on the web page!
books_df = pd.DataFrame.from_dict(books_dict)
books_df
pip install XlsxWriter
books_df.to_excel('books.xlsx', sheet_name='Books')
                                 
                             
                        
Comments