from bs4 import BeautifulSoup
import requests, lxml, urllib.parse
def print_extracted_data_from_url(url):
headers = {
"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.19582"
}
response = requests.get(url, headers=headers).text
soup = BeautifulSoup(response, 'lxml')
print(f'Current page: {int(soup.select_one(".YyVfkd").text)}')
print(f'Current URL: {url}')
print()
for container in soup.findAll('div', class_='tF2Cxc'):
head_text = container.find('h3', class_='LC20lb DKV0Md').text
head_sum = container.find('div', class_='IsZvec').text
head_link = container.a['href']
print(head_text)
print(head_sum)
print(head_link)
print()
return soup.select_one('a#pnnext')
def scrape():
next_page_node = print_extracted_data_from_url(
'https://www.google.com/search?hl=en-US&q=cute+animals')
while next_page_node is not None:
next_page_url = urllib.parse.urljoin('https://www.google.com',
next_page_node['href'])
next_page_node = print_extracted_data_from_url(next_page_url)
scrape()