thiscodeWorks - Organizing the best of code online

web scraping tutorial in python - Google Search

#pip install beautifulsoup4

import os
import requests
from bs4 import BeautifulSoup

url = "https://www.google.com/"
reponse = requests.get(url)

if reponse.ok:
	soup = BeautifulSoup(reponse.text, "lxml")
	title = str(soup.find("title"))

	title = title.replace("<title>", "")
	title = title.replace("</title>", "")
	print("The title is : " + str(title))

os.system("pause")

#python (code name).py

#webscraping #googlenews #serpapi

Scrape Google News results using SerpApi

import os

# https://github.com/serpapi/google-search-results-python
from serpapi import GoogleSearch

params = {
  "engine": "google",
  "q": "cute animals",
  "tbm": "nws",
  "api_key": os.getenv("API_KEY"),
}

search = GoogleSearch(params)

pages = search.pagination()

for result in pages:
  print(f"Current page: {result['serpapi_pagination']['current']}")

  for news_result in result["news_results"]:
      print(f"Title: {news_result['title']}\nLink: {news_result['link']}\n")

#webscraping #googlenews

Scrape Google News with Pagination

from bs4 import BeautifulSoup
import requests, lxml, urllib.parse


def print_extracted_data_from_url(url):
    headers = {
        "User-Agent":
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.19582"
    }
    response = requests.get(url, headers=headers).text

    soup = BeautifulSoup(response, 'lxml')

    print(f'Current page: {int(soup.select_one(".YyVfkd").text)}')
    print(f'Current URL: {url}')
    print()

    for container in soup.findAll('div', class_='tF2Cxc'):
        head_text = container.find('h3', class_='LC20lb DKV0Md').text
        head_sum = container.find('div', class_='IsZvec').text
        head_link = container.a['href']
        print(head_text)
        print(head_sum)
        print(head_link)
        print()

    return soup.select_one('a#pnnext')


def scrape():
    next_page_node = print_extracted_data_from_url(
        'https://www.google.com/search?hl=en-US&q=cute+animals')

    while next_page_node is not None:
        next_page_url = urllib.parse.urljoin('https://www.google.com',
                                             next_page_node['href'])

        next_page_node = print_extracted_data_from_url(next_page_url)

scrape()

#webscraping #serpapi #googlemaps #deobfuscation #reverse-engineering #ruby

Full code to convert ll to pb for Google Maps pagination

# https://regex101.com/r/nOoiJ6/2
PAGINATION_PARAMETERS_REGEX = %r{
  \A                                      # Start of string
  (?:\s*)                                 # initial possible whitespace
  @(?<latitude>[-+]?\d{1,2}(?:[.,]\d+)?)  # latitude: @10.78472
  (?:\s*,\s*)                             # separator between latitude and longitude
  (?<longitude>[-+]?\d{1,3}(?:[.,]\d+)?)  # longitude: @-110
  (?:\s*,\s*)                             # separator between longitude and zoom
  (?<zoom>\d{1,2}(?:[.,]\d+)?)z           # zoom: 9.22
  \z                                      # End of string
}x

EARTH_RADIUS_IN_METERS = 6371010
TILE_SIZE = 256
SCREEN_PIXEL_HEIGHT = 768
RADIUS_X_PIXEL_HEIGHT = 27.3611 * EARTH_RADIUS_IN_METERS * SCREEN_PIXEL_HEIGHT

def pagination(ll, start)
  extracted_parameters = ll.match(PAGINATION_PARAMETERS_REGEX)

  return "" unless extracted_parameters

  "!4m8!1m3!1d" +
    altitude(extracted_parameters[:zoom].to_f, extracted_parameters[:latitude].to_f) +
    "!2d" +
    extracted_parameters[:longitude] +
    "!3d" +
    extracted_parameters[:latitude] +
    "!3m2!1i1024!2i768!4f13.1!7i20!8i" +
    (start || "0") +
    "!10b1!12m25!1m1!18b1!2m3!5m1!6e2!20e3!6m16!4b1!23b1!26i1!27i1!41i2!45b1!49b1!63m0!67b1!73m0!74i150000!75b1!89b1!105b1!109b1!110m0!10b1!16b1!19m4!2m3!1i360!2i120!4i8!20m65!2m2!1i203!2i100!3m2!2i4!5b1!6m6!1m2!1i86!2i86!1m2!1i408!2i240!7m50!1m3!1e1!2b0!3e3!1m3!1e2!2b1!3e2!1m3!1e2!2b0!3e3!1m3!1e3!2b0!3e3!1m3!1e8!2b0!3e3!1m3!1e3!2b1!3e2!1m3!1e10!2b0!3e3!1m3!1e10!2b1!3e2!1m3!1e9!2b1!3e2!1m3!1e10!2b0!3e3!1m3!1e10!2b1!3e2!1m3!1e10!2b0!3e4!2b1!4b1!9b0!22m3!1s!2z!7e81!24m55!1m15!13m7!2b1!3b1!4b1!6i1!8b1!9b1!20b0!18m6!3b1!4b1!5b1!6b1!13b0!14b0!2b1!5m5!2b1!3b1!5b1!6b1!7b1!10m1!8e3!14m1!3b1!17b1!20m4!1e3!1e6!1e14!1e15!24b1!25b1!26b1!29b1!30m1!2b1!36b1!43b1!52b1!54m1!1b1!55b1!56m2!1b1!3b1!65m5!3m4!1m3!1m2!1i224!2i298!89b1!26m4!2m3!1i80!2i92!4i8!30m28!1m6!1m2!1i0!2i0!2m2!1i458!2i768!1m6!1m2!1i974!2i0!2m2!1i1024!2i768!1m6!1m2!1i0!2i0!2m2!1i1024!2i20!1m6!1m2!1i0!2i748!2m2!1i1024!2i768!34m16!2b1!3b1!4b1!6b1!8m4!1b1!3b1!4b1!6b1!9b1!12b1!14b1!20b1!23b1!25b1!26b1!37m1!1e81!42b1!46m1!1e9!47m0!49m1!3b1!50m53!1m49!2m7!1u3!4s!5e1!9s!10m2!3m1!1e1!2m7!1u2!4s!5e1!9s!10m2!2m1!1e1!2m7!1u16!4s!5e1!9s!10m2!16m1!1e1!2m7!1u16!4s!5e1!9s!10m2!16m1!1e2!3m11!1u16!2m4!1m2!16m1!1e1!2s!2m4!1m2!16m1!1e2!2s!3m1!1u2!3m1!1u3!4BIAE!2e2!3m1!3b1!59B!65m0!69i540"
end

def altitude(zoom, latitude)
  ((RADIUS_X_PIXEL_HEIGHT * Math.cos((latitude * Math::PI) / 180)) / ((2 ** zoom) * TILE_SIZE)).to_s
end

#webscraping #serpapi #googlemaps #deobfuscation #reverse-engineering #ruby

Convert the ll URL parameter to pb for the specific results offset on Google Maps

def pagination(ll, start)
  extracted_parameters = ll.match(PAGINATION_PARAMETERS_REGEX)

  return "" unless extracted_parameters

  "!4m8!1m3!1d" +
    altitude(extracted_parameters[:zoom].to_f, extracted_parameters[:latitude].to_f) +
    "!2d" +
    extracted_parameters[:longitude] +
    "!3d" +
    extracted_parameters[:latitude] +
    "!3m2!1i1024!2i768!4f13.1!7i20!8i" +
    (start || "0") +
    "!10b1!12m25!1m1!18b1!2m3!5m1!6e2!20e3!6m16!4b1!23b1!26i1!27i1!41i2!45b1!49b1!63m0!67b1!73m0!74i150000!75b1!89b1!105b1!109b1!110m0!10b1!16b1!19m4!2m3!1i360!2i120!4i8!20m65!2m2!1i203!2i100!3m2!2i4!5b1!6m6!1m2!1i86!2i86!1m2!1i408!2i240!7m50!1m3!1e1!2b0!3e3!1m3!1e2!2b1!3e2!1m3!1e2!2b0!3e3!1m3!1e3!2b0!3e3!1m3!1e8!2b0!3e3!1m3!1e3!2b1!3e2!1m3!1e10!2b0!3e3!1m3!1e10!2b1!3e2!1m3!1e9!2b1!3e2!1m3!1e10!2b0!3e3!1m3!1e10!2b1!3e2!1m3!1e10!2b0!3e4!2b1!4b1!9b0!22m3!1s!2z!7e81!24m55!1m15!13m7!2b1!3b1!4b1!6i1!8b1!9b1!20b0!18m6!3b1!4b1!5b1!6b1!13b0!14b0!2b1!5m5!2b1!3b1!5b1!6b1!7b1!10m1!8e3!14m1!3b1!17b1!20m4!1e3!1e6!1e14!1e15!24b1!25b1!26b1!29b1!30m1!2b1!36b1!43b1!52b1!54m1!1b1!55b1!56m2!1b1!3b1!65m5!3m4!1m3!1m2!1i224!2i298!89b1!26m4!2m3!1i80!2i92!4i8!30m28!1m6!1m2!1i0!2i0!2m2!1i458!2i768!1m6!1m2!1i974!2i0!2m2!1i1024!2i768!1m6!1m2!1i0!2i0!2m2!1i1024!2i20!1m6!1m2!1i0!2i748!2m2!1i1024!2i768!34m16!2b1!3b1!4b1!6b1!8m4!1b1!3b1!4b1!6b1!9b1!12b1!14b1!20b1!23b1!25b1!26b1!37m1!1e81!42b1!46m1!1e9!47m0!49m1!3b1!50m53!1m49!2m7!1u3!4s!5e1!9s!10m2!3m1!1e1!2m7!1u2!4s!5e1!9s!10m2!2m1!1e1!2m7!1u16!4s!5e1!9s!10m2!16m1!1e1!2m7!1u16!4s!5e1!9s!10m2!16m1!1e2!3m11!1u16!2m4!1m2!16m1!1e1!2s!2m4!1m2!16m1!1e2!2s!3m1!1u2!3m1!1u3!4BIAE!2e2!3m1!3b1!59B!65m0!69i540"
end

#webscraping #serpapi #googlemaps #deobfuscation #reverse-engineering #ruby

Extract latitude, longitude and zoom from the ll parameter in Google Maps

PAGINATION_PARAMETERS_REGEX = %r{
  \A                                      # Start of string
  (?:\s*)                                 # initial possible whitespace
  @(?<latitude>[-+]?\d{1,2}(?:[.,]\d+)?)  # latitude: @10.78472
  (?:\s*,\s*)                             # separator between latitude and longitude
  (?<longitude>[-+]?\d{1,3}(?:[.,]\d+)?)  # longitude: @-110
  (?:\s*,\s*)                             # separator between longitude and zoom
  (?<zoom>\d{1,2}(?:[.,]\d+)?)z           # zoom: 9.22
  \z                                      # End of string
}x

#webscraping #serpapi #googlemaps #deobfuscation #reverse-engineering #ruby

Convert zoom and latitude to altitude

EARTH_RADIUS_IN_METERS = 6371010
TILE_SIZE = 256
SCREEN_PIXEL_HEIGHT = 768
RADIUS_X_PIXEL_HEIGHT = 27.3611 * EARTH_RADIUS_IN_METERS * SCREEN_PIXEL_HEIGHT

def altitude(zoom, latitude)
  (RADIUS_X_PIXEL_HEIGHT * Math.cos((latitude * Math::PI) / 180)) / ((2 ** zoom) * TILE_SIZE)
end

#webscraping #serpapi #googlemaps #deobfuscation #reverse-engineering #javascript

Convert altitude and latitude to zoom

const EARTH_RADIUS_IN_METERS = 6371010
const TILE_SIZE = 256
const SCREEN_PIXEL_HEIGHT = 768

var zoom = (altitude, latitude) => Math.log(1 / Math.tan(Math.PI / 180 * 13.1 / 2) * (SCREEN_PIXEL_HEIGHT / 2) * 2 * Math.PI / (TILE_SIZE * altitude / (EARTH_RADIUS_IN_METERS * Math.cos(Math.PI / 180 * latitude)))) / Math.LN2;

#ruby #webscraping #serpapi #googlemaps #deobfuscation #reverse-engineering

Testing generation of the next page URL for Google Maps

$ bundle exec rails runner 'puts Search.new(engine: :google_maps, q: "coffee", lat: 36.3996184, long: -113.9511419, alt: 2124931.1267513777, offset: 20, psi: "rZkJYJuoINHmrgTP1IVI").query_randomized' | xargs curl -s -k -x $HTTP_PROXY -A 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.46' - > f.txt

# "https://www.google.com/search?tbm=map&authuser=0&hl=en&gl=ua&pb=!4m8!1m3!1d2124931.1267513777!2d36.3996184!3d-113.9511419!3m2!1i1024!2i768!4f13.1!7i20!8i20!10b1!12m25!1m1!18b1!2m3!5m1!6e2!20e3!6m16!4b1!23b1!26i1!27i1!41i2!45b1!49b1!63m0!67b1!73m0!74i150000!75b1!89b1!105b1!109b1!110m0!10b1!16b1!19m4!2m3!1i360!2i120!4i8!20m65!2m2!1i203!2i100!3m2!2i4!5b1!6m6!1m2!1i86!2i86!1m2!1i408!2i240!7m50!1m3!1e1!2b0!3e3!1m3!1e2!2b1!3e2!1m3!1e2!2b0!3e3!1m3!1e3!2b0!3e3!1m3!1e8!2b0!3e3!1m3!1e3!2b1!3e2!1m3!1e10!2b0!3e3!1m3!1e10!2b1!3e2!1m3!1e9!2b1!3e2!1m3!1e10!2b0!3e3!1m3!1e10!2b1!3e2!1m3!1e10!2b0!3e4!2b1!4b1!9b0!22m3!1sgpYJYKSwMsH9rgT0_7nAAg!2zMWk6Mix0OjEyNjk2LGU6MSxwOmdwWUpZS1N3TXNIOXJnVDBfN25BQWc6MjM!7e81!24m55!1m15!13m7!2b1!3b1!4b1!6i1!8b1!9b1!20b0!18m6!3b1!4b1!5b1!6b1!13b0!14b0!2b1!5m5!2b1!3b1!5b1!6b1!7b1!10m1!8e3!14m1!3b1!17b1!20m4!1e3!1e6!1e14!1e15!24b1!25b1!26b1!29b1!30m1!2b1!36b1!43b1!52b1!54m1!1b1!55b1!56m2!1b1!3b1!65m5!3m4!1m3!1m2!1i224!2i298!89b1!26m4!2m3!1i80!2i92!4i8!30m28!1m6!1m2!1i0!2i0!2m2!1i458!2i768!1m6!1m2!1i974!2i0!2m2!1i1024!2i768!1m6!1m2!1i0!2i0!2m2!1i1024!2i20!1m6!1m2!1i0!2i748!2m2!1i1024!2i768!34m17!2b1!3b1!4b1!6b1!7b1!8m4!1b1!3b1!4b1!6b1!9b1!12b1!14b1!20b1!23b1!25b1!26b1!37m1!1e81!42b1!46m1!1e9!47m0!49m1!3b1!50m72!1m68!2m7!1u3!4sOpen+now!5e1!9s0ahUKEwizhPrmpK3uAhXBvosKHfR_DigQ_KkBCMgJKBY!10m2!3m1!1e1!2m7!1u2!4sTop+rated!5e1!9s0ahUKEwizhPrmpK3uAhXBvosKHfR_DigQ_KkBCMkJKBc!10m2!2m1!1e1!2m7!1u1!4sCheap!5e1!9s0ahUKEwizhPrmpK3uAhXBvosKHfR_DigQ_KkBCMoJKBg!10m2!1m1!1e1!2m7!1u1!4sUpscale!5e1!9s0ahUKEwizhPrmpK3uAhXBvosKHfR_DigQ_KkBCMsJKBk!10m2!1m1!1e2!2m7!1u16!4sVisited!5e1!9s0ahUKEwizhPrmpK3uAhXBvosKHfR_DigQ_KkBCMwJKBo!10m2!16m1!1e1!2m7!1u16!4sHaven%27t+visited!5e1!9s0ahUKEwizhPrmpK3uAhXBvosKHfR_DigQ_KkBCM0JKBs!10m2!16m1!1e2!3m11!1u16!2m4!1m2!16m1!1e1!2sVisited!2m4!1m2!16m1!1e2!2sHaven%27t+visited!3m1!1u2!3m2!1u1!3e0!3m1!1u3!4BIAE!2e2!3m1!3b1!59BQ2dBd0Fn!65m0!69i540&q=coffee&tch=1&ech=1&psi=rZkJYJuoINHmrgTP1IVI.1611241093531.1"

# Hit next page in browser, copy URL and curl it.

$ curl -s -k -x $HTTP_PROXY -A 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.46' 'https://www.google.com/search?tbm=map&authuser=0&hl=en&gl=ua&pb=!4m8!1m3!1d2124931.1267513777!2d-113.9511419!3d36.3996184!3m2!1i1024!2i768!4f13.1!7i20!8i20!10b1!12m25!1m1!18b1!2m3!5m1!6e2!20e3!6m16!4b1!23b1!26i1!27i1!41i2!45b1!49b1!63m0!67b1!73m0!74i150000!75b1!89b1!105b1!109b1!110m0!10b1!16b1!19m4!2m3!1i360!2i120!4i8!20m65!2m2!1i203!2i100!3m2!2i4!5b1!6m6!1m2!1i86!2i86!1m2!1i408!2i240!7m50!1m3!1e1!2b0!3e3!1m3!1e2!2b1!3e2!1m3!1e2!2b0!3e3!1m3!1e3!2b0!3e3!1m3!1e8!2b0!3e3!1m3!1e3!2b1!3e2!1m3!1e10!2b0!3e3!1m3!1e10!2b1!3e2!1m3!1e9!2b1!3e2!1m3!1e10!2b0!3e3!1m3!1e10!2b1!3e2!1m3!1e10!2b0!3e4!2b1!4b1!9b0!22m3!1srZkJYJuoINHmrgTP1IVI!2zMWk6Mix0OjEyNjk2LGU6MSxwOnJaa0pZSnVvSU5IbXJnVFAxSVZJOjIz!7e81!24m55!1m15!13m7!2b1!3b1!4b1!6i1!8b1!9b1!20b0!18m6!3b1!4b1!5b1!6b1!13b0!14b0!2b1!5m5!2b1!3b1!5b1!6b1!7b1!10m1!8e3!14m1!3b1!17b1!20m4!1e3!1e6!1e14!1e15!24b1!25b1!26b1!29b1!30m1!2b1!36b1!43b1!52b1!54m1!1b1!55b1!56m2!1b1!3b1!65m5!3m4!1m3!1m2!1i224!2i298!89b1!26m4!2m3!1i80!2i92!4i8!30m28!1m6!1m2!1i0!2i0!2m2!1i458!2i768!1m6!1m2!1i974!2i0!2m2!1i1024!2i768!1m6!1m2!1i0!2i0!2m2!1i1024!2i20!1m6!1m2!1i0!2i748!2m2!1i1024!2i768!34m17!2b1!3b1!4b1!6b1!7b1!8m4!1b1!3b1!4b1!6b1!9b1!12b1!14b1!20b1!23b1!25b1!26b1!37m1!1e81!42b1!46m1!1e9!47m0!49m1!3b1!50m72!1m68!2m7!1u3!4sOpen+now!5e1!9s0ahUKEwig-8Lpp63uAhVRs4sKHU9qAQkQ_KkBCN8KKBY!10m2!3m1!1e1!2m7!1u2!4sTop+rated!5e1!9s0ahUKEwig-8Lpp63uAhVRs4sKHU9qAQkQ_KkBCOAKKBc!10m2!2m1!1e1!2m7!1u1!4sCheap!5e1!9s0ahUKEwig-8Lpp63uAhVRs4sKHU9qAQkQ_KkBCOEKKBg!10m2!1m1!1e1!2m7!1u1!4sUpscale!5e1!9s0ahUKEwig-8Lpp63uAhVRs4sKHU9qAQkQ_KkBCOIKKBk!10m2!1m1!1e2!2m7!1u16!4sVisited!5e1!9s0ahUKEwig-8Lpp63uAhVRs4sKHU9qAQkQ_KkBCOMKKBo!10m2!16m1!1e1!2m7!1u16!4sHaven%27t+visited!5e1!9s0ahUKEwig-8Lpp63uAhVRs4sKHU9qAQkQ_KkBCOQKKBs!10m2!16m1!1e2!3m11!1u16!2m4!1m2!16m1!1e1!2sVisited!2m4!1m2!16m1!1e2!2sHaven%27t+visited!3m1!1u2!3m2!1u1!3e0!3m1!1u3!4BIAE!2e2!3m1!3b1!59BQ2dBd0Fn!65m0!69i540&q=coffee&tch=1&ech=1&psi=rZkJYJuoINHmrgTP1IVI.1611241905488.1' > correct.txt

# Compare f.txt and correct.txt in a text editor - they almost the same.

#ruby #webscraping #serpapi #googlemaps #deobfuscation #reverse-engineering

Pseudo-code to construct the URL for Google Maps pagination

if offset
  long ||= -73.91476977539236
  lat ||= 40.68525694561602
  alt ||= 120027.44487325678

  offset ||= 40

  psi ||= "b24JYPPGOoaJrwTXlbHACw"

  google_query = "#{query_scheme_and_domain}/search?tbm=map&authuser=0&hl=en&gl=ua&pb=!4m12!1m3!1d#{alt}!2d#{lat}!3d#{long}!2m3!1f0!2f0!3f0!3m2!1i1920!2i549!4f13.1!7i20!8i#{offset}!10b1!12m8!1m1!18b1!2m3!5m1!6e2!20e3!10b1!16b1!19m4!2m3!1i360!2i120!4i8!20m65!2m2!1i203!2i100!3m2!2i4!5b1!6m6!1m2!1i86!2i86!1m2!1i408!2i240!7m50!1m3!1e1!2b0!3e3!1m3!1e2!2b1!3e2!1m3!1e2!2b0!3e3!1m3!1e3!2b0!3e3!1m3!1e8!2b0!3e3!1m3!1e3!2b1!3e2!1m3!1e10!2b0!3e3!1m3!1e10!2b1!3e2!1m3!1e9!2b1!3e2!1m3!1e10!2b0!3e3!1m3!1e10!2b1!3e2!1m3!1e10!2b0!3e4!2b1!4b1!9b0!22m3!1s#{psi}!2s1i%3A2%2Ct%3A12696%2Ce%3A1%2Cp%3A#{psi}%3A1273!7e81!24m56!1m16!13m7!2b1!3b1!4b1!6i1!8b1!9b1!20b0!18m7!3b1!4b1!5b1!6b1!9b1!13b0!14b0!2b1!5m5!2b1!3b1!5b1!6b1!7b1!10m1!8e3!14m1!3b1!17b1!20m4!1e3!1e6!1e14!1e15!24b1!25b1!26b1!29b1!30m1!2b1!36b1!43b1!52b1!54m1!1b1!55b1!56m2!1b1!3b1!65m5!3m4!1m3!1m2!1i224!2i298!89b1!26m4!2m3!1i80!2i92!4i8!30m28!1m6!1m2!1i0!2i0!2m2!1i458!2i549!1m6!1m2!1i1870!2i0!2m2!1i1920!2i549!1m6!1m2!1i0!2i0!2m2!1i1920!2i20!1m6!1m2!1i0!2i529!2m2!1i1920!2i549!31b1!34m16!2b1!3b1!4b1!6b1!8m4!1b1!3b1!4b1!6b1!9b1!12b1!14b1!20b1!23b1!25b1!26b1!37m1!1e81!42b1!46m1!1e9!47m0!49m1!3b1!50m73!1m68!2m7!1u3!4sOpen+now!5e1!9s0ahUKEwiPrMCgia3uAhUjlYsKHanUBmYQ_KkBCNUJKBg!10m2!3m1!1e1!2m7!1u2!4sTop+rated!5e1!9s0ahUKEwiPrMCgia3uAhUjlYsKHanUBmYQ_KkBCNYJKBk!10m2!2m1!1e1!2m7!1u1!4sCheap!5e1!9s0ahUKEwiPrMCgia3uAhUjlYsKHanUBmYQ_KkBCNcJKBo!10m2!1m1!1e1!2m7!1u1!4sUpscale!5e1!9s0ahUKEwiPrMCgia3uAhUjlYsKHanUBmYQ_KkBCNgJKBs!10m2!1m1!1e2!2m7!1u16!4sVisited!5e1!9s0ahUKEwiPrMCgia3uAhUjlYsKHanUBmYQ_KkBCNkJKBw!10m2!16m1!1e1!2m7!1u16!4sHaven%27t+visited!5e1!9s0ahUKEwiPrMCgia3uAhUjlYsKHanUBmYQ_KkBCNoJKB0!10m2!16m1!1e2!3m1!1u3!3m1!1u2!3m2!1u1!3e1!3m11!1u16!2m4!1m2!16m1!1e1!2sVisited!2m4!1m2!16m1!1e2!2sHaven%27t+visited!4BIAE!2e2!3m2!1b1!3b1!59BQ2dBd0Fn!65m0!69i540&q=#{safe_query}&tch=1&ech=1&psi=#{psi}.1611230833287.1"
end

#json #ruby #webscraping #serpapi

extract_js_image

def extract_js_image(image_node)
  return unless image_node
  
  thumbnail_id = image_node["id"]
  return unless thumbnail_id
  
  if (found_thumbnail = extracted_thumbnails[thumbnail_id])
    JSUtils.unescape(found_thumbnail)
  end
end

#json #ruby #webscraping #serpapi

Image extraction from Google HTML

def extracted_thumbnails
  return @extracted_thumbnails if @extracted_thumbnails.present?
  
  js_image_regexes = JS_IMAGE_REGEXES.detect { |key, _| engine.starts_with?(key.to_s) }&.last || JS_IMAGE_REGEXES[:all]
@extracted_thumbnails = js_image_regexes.collect { |regex|
    regex_capture_names = regex.names
thumbnail_index = regex_capture_names.index(THUMBNAIL_CAPTURE_NAME)
    thumbnail_id_index = regex_capture_names.index(THUMBNAIL_ID_CAPTURE_NAME)

    html.scan(regex).collect do |match|
      found_thumbnail = match[thumbnail_index]
      found_thumbnail_id = match[thumbnail_id_index]

      found_thumbnail_id.split(",").map { |thumb| Hash[thumb.tr("'", "").squish, found_thumbnail] }
    end
  }.flatten.inject(:merge) || {}
end

#ruby #webscraping #serpapi

ruby-prof for SerpApi's Search#parse!

# tmp/profiler.rb

require "ruby-prof"

profile = RubyProf.profile do
  search_params = { engine: "google", q: "roller blades", location: "Austin, United States", google_domain: "google.com", hl: "en", gl: "us", num: "500", device: "desktop", tbm: "shop", tbs: "p_ord:rv", file_path: "tmp/roller-blades.html" }

  Search.new(search_params).parse!
end

printer = RubyProf::FlatPrinter.new(profile)
printer.print($stdout, min_percent: 2)

web scraping tutorial in python - Google Search

Scrape Google News results using SerpApi

Scrape Google News with Pagination

Full code to convert ll to pb for Google Maps pagination

Convert the ll URL parameter to pb for the specific results offset on Google Maps

Extract latitude, longitude and zoom from the ll parameter in Google Maps

Convert zoom and latitude to altitude

Convert altitude and latitude to zoom

Testing generation of the next page URL for Google Maps

Pseudo-code to construct the URL for Google Maps pagination

extract_js_image

Image extraction from Google HTML

ruby-prof for SerpApi's Search#parse!

Save snippets that work with our extensions