from parsel import Selector import requests, json, re params = { "q": "richard branson", "tbm": "bks", "gl": "us", "hl": "en" } headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.87 Safari/537.36", } html = requests.get("https://www.google.com/search", params=params, headers=headers, timeout=30) selector = Selector(text=html.text) books_results = [] # https://regex101.com/r/mapBs4/1 book_thumbnails = re.findall(r"s=\\'data:image/jpg;base64,(.*?)\\'", str(selector.css("script").getall()), re.DOTALL) for book_thumbnail, book_result in zip(book_thumbnails, selector.css(".Yr5TG")): title = book_result.css(".DKV0Md::text").get() link = book_result.css(".bHexk a::attr(href)").get() displayed_link = book_result.css(".tjvcx::text").get() snippet = book_result.css(".cmlJmd span::text").get() author = book_result.css(".fl span::text").get() author_link = f'https://www.google.com/search{book_result.css(".N96wpd .fl::attr(href)").get()}' date_published = book_result.css(".fl+ span::text").get() preview_link = book_result.css(".R1n8Q a.yKioRe:nth-child(1)::attr(href)").get() more_editions_link = book_result.css(".R1n8Q a.yKioRe:nth-child(2)::attr(href)").get() books_results.append({ "title": title, "link": link, "displayed_link": displayed_link, "snippet": snippet, "author": author, "author_link": author_link, "date_published": date_published, "preview_link": preview_link, "more_editions_link": f"https://www.google.com{more_editions_link}" if more_editions_link is not None else None, "thumbnail": bytes(bytes(book_thumbnail, "ascii").decode("unicode-escape"), "ascii").decode("unicode-escape") }) print(json.dumps(books_results, indent=2))