# YOUR CODE HERE import requests from bs4 import BeautifulSoup import numpy as np import pandas as pd %matplotlib inline import matplotlib # TODO: Use `requests` to do an HTTP request to fetch data located at that URL url = "http://books.toscrape.com/" response = requests.get(url) # TODO: Create a `BeautifulSoup` instance with that data soup = BeautifulSoup(response.content, "html.parser") books_html = soup.find_all("article", class_="product_pod") len(books_html) books_html[0] book_title =books_html[0].find("h3").find("a").attrs['title'] print(book_title) book_price = books_html[0].find("p", class_="price_color").string print(book_price[1:]) book_stars_html = books_html[0].find("p", class_="star-rating") def parse_rating(rating_classes): if 'One' in rating_classes: return 1 elif 'Two' in rating_classes: return 2 elif 'Three' in rating_classes: return 3 elif 'Four' in rating_classes: return 4 elif 'Five' in rating_classes: return 5 else: return 0 parse_rating(book_stars_html.attrs['class']) book_rating = parse_rating(books_html[0].find("p", class_="star-rating").attrs['class']) books_dict = { 'Title': [], 'Price': [], 'Rating': [] } for book in books_html: title = book.find("h3").find("a").attrs["title"] price = float(book.find("p", class_="price_color").text[1:]) rating = parse_rating(book.find("p", class_="star-rating").attrs['class']) books_dict["Title"].append(title) books_dict["Price"].append(price) books_dict["Rating"].append(rating) books_dict len(books_dict) # You should have 3 key:value pairs len(books_dict["Title"]) # Each value should contain 20 elements from the 20 books, as many as on the web page! books_df = pd.DataFrame.from_dict(books_dict) books_df pip install XlsxWriter books_df.to_excel('books.xlsx', sheet_name='Books')
Preview:
downloadDownload PNG
downloadDownload JPEG
downloadDownload SVG
Tip: You can change the style, width & colours of the snippet with the inspect tool before clicking Download!
Click to optimize width for Twitter