Scrape Google News with Pagination

PHOTO

Mon May 03 2021 13:43:50 GMT+0000 (Coordinated Universal Time)

Saved by @ilyazub #webscraping #googlenews

from bs4 import BeautifulSoup
import requests, lxml, urllib.parse


def print_extracted_data_from_url(url):
    headers = {
        "User-Agent":
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.19582"
    }
    response = requests.get(url, headers=headers).text

    soup = BeautifulSoup(response, 'lxml')

    print(f'Current page: {int(soup.select_one(".YyVfkd").text)}')
    print(f'Current URL: {url}')
    print()

    for container in soup.findAll('div', class_='tF2Cxc'):
        head_text = container.find('h3', class_='LC20lb DKV0Md').text
        head_sum = container.find('div', class_='IsZvec').text
        head_link = container.a['href']
        print(head_text)
        print(head_sum)
        print(head_link)
        print()

    return soup.select_one('a#pnnext')


def scrape():
    next_page_node = print_extracted_data_from_url(
        'https://www.google.com/search?hl=en-US&q=cute+animals')

    while next_page_node is not None:
        next_page_url = urllib.parse.urljoin('https://www.google.com',
                                             next_page_node['href'])

        next_page_node = print_extracted_data_from_url(next_page_url)

scrape()

COPY

By https://stackoverflow.com/users/15164646

https://stackoverflow.com/a/67071648/1291371

Save snippets that work from anywhere online with our extensions

Comments

More like this

#webscraping #googlenews #serpapi

Scrape Google News results using SerpApi

import os

# https://github.com/serpapi/google-search-results-python
from serpapi import GoogleSearch

params = {
  "engine": "google",
  "q": "cute animals",
  "tbm": "nws",
  "api_key": os.getenv("API_KEY"),
}

search = GoogleSearch(params)

pages = search.pagination()

for result in pages:
  print(f"Current page: {result['serpapi_pagination']['current']}")

  for news_result in result["news_results"]:
      print(f"Title: {news_result['title']}\nLink: {news_result['link']}\n")

#webscraping #googlenews

Scrape Google News with Pagination

from bs4 import BeautifulSoup
import requests, lxml, urllib.parse


def print_extracted_data_from_url(url):
    headers = {
        "User-Agent":
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.19582"
    }
    response = requests.get(url, headers=headers).text

    soup = BeautifulSoup(response, 'lxml')

    print(f'Current page: {int(soup.select_one(".YyVfkd").text)}')
    print(f'Current URL: {url}')
    print()

    for container in soup.findAll('div', class_='tF2Cxc'):
        head_text = container.find('h3', class_='LC20lb DKV0Md').text
        head_sum = container.find('div', class_='IsZvec').text
        head_link = container.a['href']
        print(head_text)
        print(head_sum)
        print(head_link)
        print()

    return soup.select_one('a#pnnext')


def scrape():
    next_page_node = print_extracted_data_from_url(
        'https://www.google.com/search?hl=en-US&q=cute+animals')

    while next_page_node is not None:
        next_page_url = urllib.parse.urljoin('https://www.google.com',
                                             next_page_node['href'])

        next_page_node = print_extracted_data_from_url(next_page_url)

scrape()

#ruby #webscraping #serpapi

ruby-prof for SerpApi's Search#parse!

# tmp/profiler.rb

require "ruby-prof"

profile = RubyProf.profile do
  search_params = { engine: "google", q: "roller blades", location: "Austin, United States", google_domain: "google.com", hl: "en", gl: "us", num: "500", device: "desktop", tbm: "shop", tbs: "p_ord:rv", file_path: "tmp/roller-blades.html" }

  Search.new(search_params).parse!
end

printer = RubyProf::FlatPrinter.new(profile)
printer.print($stdout, min_percent: 2)

#json #ruby #webscraping #serpapi

Image extraction from Google HTML

def extracted_thumbnails
  return @extracted_thumbnails if @extracted_thumbnails.present?
  
  js_image_regexes = JS_IMAGE_REGEXES.detect { |key, _| engine.starts_with?(key.to_s) }&.last || JS_IMAGE_REGEXES[:all]
@extracted_thumbnails = js_image_regexes.collect { |regex|
    regex_capture_names = regex.names
thumbnail_index = regex_capture_names.index(THUMBNAIL_CAPTURE_NAME)
    thumbnail_id_index = regex_capture_names.index(THUMBNAIL_ID_CAPTURE_NAME)

    html.scan(regex).collect do |match|
      found_thumbnail = match[thumbnail_index]
      found_thumbnail_id = match[thumbnail_id_index]

      found_thumbnail_id.split(",").map { |thumb| Hash[thumb.tr("'", "").squish, found_thumbnail] }
    end
  }.flatten.inject(:merge) || {}
end

#json #ruby #webscraping #serpapi

extract_js_image

def extract_js_image(image_node)
  return unless image_node
  
  thumbnail_id = image_node["id"]
  return unless thumbnail_id
  
  if (found_thumbnail = extracted_thumbnails[thumbnail_id])
    JSUtils.unescape(found_thumbnail)
  end
end

#ruby #webscraping #serpapi #googlemaps #deobfuscation #reverse-engineering

Pseudo-code to construct the URL for Google Maps pagination

if offset
  long ||= -73.91476977539236
  lat ||= 40.68525694561602
  alt ||= 120027.44487325678

  offset ||= 40

  psi ||= "b24JYPPGOoaJrwTXlbHACw"

  google_query = "#{query_scheme_and_domain}/search?tbm=map&authuser=0&hl=en&gl=ua&pb=!4m12!1m3!1d#{alt}!2d#{lat}!3d#{long}!2m3!1f0!2f0!3f0!3m2!1i1920!2i549!4f13.1!7i20!8i#{offset}!10b1!12m8!1m1!18b1!2m3!5m1!6e2!20e3!10b1!16b1!19m4!2m3!1i360!2i120!4i8!20m65!2m2!1i203!2i100!3m2!2i4!5b1!6m6!1m2!1i86!2i86!1m2!1i408!2i240!7m50!1m3!1e1!2b0!3e3!1m3!1e2!2b1!3e2!1m3!1e2!2b0!3e3!1m3!1e3!2b0!3e3!1m3!1e8!2b0!3e3!1m3!1e3!2b1!3e2!1m3!1e10!2b0!3e3!1m3!1e10!2b1!3e2!1m3!1e9!2b1!3e2!1m3!1e10!2b0!3e3!1m3!1e10!2b1!3e2!1m3!1e10!2b0!3e4!2b1!4b1!9b0!22m3!1s#{psi}!2s1i%3A2%2Ct%3A12696%2Ce%3A1%2Cp%3A#{psi}%3A1273!7e81!24m56!1m16!13m7!2b1!3b1!4b1!6i1!8b1!9b1!20b0!18m7!3b1!4b1!5b1!6b1!9b1!13b0!14b0!2b1!5m5!2b1!3b1!5b1!6b1!7b1!10m1!8e3!14m1!3b1!17b1!20m4!1e3!1e6!1e14!1e15!24b1!25b1!26b1!29b1!30m1!2b1!36b1!43b1!52b1!54m1!1b1!55b1!56m2!1b1!3b1!65m5!3m4!1m3!1m2!1i224!2i298!89b1!26m4!2m3!1i80!2i92!4i8!30m28!1m6!1m2!1i0!2i0!2m2!1i458!2i549!1m6!1m2!1i1870!2i0!2m2!1i1920!2i549!1m6!1m2!1i0!2i0!2m2!1i1920!2i20!1m6!1m2!1i0!2i529!2m2!1i1920!2i549!31b1!34m16!2b1!3b1!4b1!6b1!8m4!1b1!3b1!4b1!6b1!9b1!12b1!14b1!20b1!23b1!25b1!26b1!37m1!1e81!42b1!46m1!1e9!47m0!49m1!3b1!50m73!1m68!2m7!1u3!4sOpen+now!5e1!9s0ahUKEwiPrMCgia3uAhUjlYsKHanUBmYQ_KkBCNUJKBg!10m2!3m1!1e1!2m7!1u2!4sTop+rated!5e1!9s0ahUKEwiPrMCgia3uAhUjlYsKHanUBmYQ_KkBCNYJKBk!10m2!2m1!1e1!2m7!1u1!4sCheap!5e1!9s0ahUKEwiPrMCgia3uAhUjlYsKHanUBmYQ_KkBCNcJKBo!10m2!1m1!1e1!2m7!1u1!4sUpscale!5e1!9s0ahUKEwiPrMCgia3uAhUjlYsKHanUBmYQ_KkBCNgJKBs!10m2!1m1!1e2!2m7!1u16!4sVisited!5e1!9s0ahUKEwiPrMCgia3uAhUjlYsKHanUBmYQ_KkBCNkJKBw!10m2!16m1!1e1!2m7!1u16!4sHaven%27t+visited!5e1!9s0ahUKEwiPrMCgia3uAhUjlYsKHanUBmYQ_KkBCNoJKB0!10m2!16m1!1e2!3m1!1u3!3m1!1u2!3m2!1u1!3e1!3m11!1u16!2m4!1m2!16m1!1e1!2sVisited!2m4!1m2!16m1!1e2!2sHaven%27t+visited!4BIAE!2e2!3m2!1b1!3b1!59BQ2dBd0Fn!65m0!69i540&q=#{safe_query}&tch=1&ech=1&psi=#{psi}.1611230833287.1"
end

#ruby #webscraping #serpapi #googlemaps #deobfuscation #reverse-engineering

Testing generation of the next page URL for Google Maps

$ bundle exec rails runner 'puts Search.new(engine: :google_maps, q: "coffee", lat: 36.3996184, long: -113.9511419, alt: 2124931.1267513777, offset: 20, psi: "rZkJYJuoINHmrgTP1IVI").query_randomized' | xargs curl -s -k -x $HTTP_PROXY -A 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.46' - > f.txt

# "https://www.google.com/search?tbm=map&authuser=0&hl=en&gl=ua&pb=!4m8!1m3!1d2124931.1267513777!2d36.3996184!3d-113.9511419!3m2!1i1024!2i768!4f13.1!7i20!8i20!10b1!12m25!1m1!18b1!2m3!5m1!6e2!20e3!6m16!4b1!23b1!26i1!27i1!41i2!45b1!49b1!63m0!67b1!73m0!74i150000!75b1!89b1!105b1!109b1!110m0!10b1!16b1!19m4!2m3!1i360!2i120!4i8!20m65!2m2!1i203!2i100!3m2!2i4!5b1!6m6!1m2!1i86!2i86!1m2!1i408!2i240!7m50!1m3!1e1!2b0!3e3!1m3!1e2!2b1!3e2!1m3!1e2!2b0!3e3!1m3!1e3!2b0!3e3!1m3!1e8!2b0!3e3!1m3!1e3!2b1!3e2!1m3!1e10!2b0!3e3!1m3!1e10!2b1!3e2!1m3!1e9!2b1!3e2!1m3!1e10!2b0!3e3!1m3!1e10!2b1!3e2!1m3!1e10!2b0!3e4!2b1!4b1!9b0!22m3!1sgpYJYKSwMsH9rgT0_7nAAg!2zMWk6Mix0OjEyNjk2LGU6MSxwOmdwWUpZS1N3TXNIOXJnVDBfN25BQWc6MjM!7e81!24m55!1m15!13m7!2b1!3b1!4b1!6i1!8b1!9b1!20b0!18m6!3b1!4b1!5b1!6b1!13b0!14b0!2b1!5m5!2b1!3b1!5b1!6b1!7b1!10m1!8e3!14m1!3b1!17b1!20m4!1e3!1e6!1e14!1e15!24b1!25b1!26b1!29b1!30m1!2b1!36b1!43b1!52b1!54m1!1b1!55b1!56m2!1b1!3b1!65m5!3m4!1m3!1m2!1i224!2i298!89b1!26m4!2m3!1i80!2i92!4i8!30m28!1m6!1m2!1i0!2i0!2m2!1i458!2i768!1m6!1m2!1i974!2i0!2m2!1i1024!2i768!1m6!1m2!1i0!2i0!2m2!1i1024!2i20!1m6!1m2!1i0!2i748!2m2!1i1024!2i768!34m17!2b1!3b1!4b1!6b1!7b1!8m4!1b1!3b1!4b1!6b1!9b1!12b1!14b1!20b1!23b1!25b1!26b1!37m1!1e81!42b1!46m1!1e9!47m0!49m1!3b1!50m72!1m68!2m7!1u3!4sOpen+now!5e1!9s0ahUKEwizhPrmpK3uAhXBvosKHfR_DigQ_KkBCMgJKBY!10m2!3m1!1e1!2m7!1u2!4sTop+rated!5e1!9s0ahUKEwizhPrmpK3uAhXBvosKHfR_DigQ_KkBCMkJKBc!10m2!2m1!1e1!2m7!1u1!4sCheap!5e1!9s0ahUKEwizhPrmpK3uAhXBvosKHfR_DigQ_KkBCMoJKBg!10m2!1m1!1e1!2m7!1u1!4sUpscale!5e1!9s0ahUKEwizhPrmpK3uAhXBvosKHfR_DigQ_KkBCMsJKBk!10m2!1m1!1e2!2m7!1u16!4sVisited!5e1!9s0ahUKEwizhPrmpK3uAhXBvosKHfR_DigQ_KkBCMwJKBo!10m2!16m1!1e1!2m7!1u16!4sHaven%27t+visited!5e1!9s0ahUKEwizhPrmpK3uAhXBvosKHfR_DigQ_KkBCM0JKBs!10m2!16m1!1e2!3m11!1u16!2m4!1m2!16m1!1e1!2sVisited!2m4!1m2!16m1!1e2!2sHaven%27t+visited!3m1!1u2!3m2!1u1!3e0!3m1!1u3!4BIAE!2e2!3m1!3b1!59BQ2dBd0Fn!65m0!69i540&q=coffee&tch=1&ech=1&psi=rZkJYJuoINHmrgTP1IVI.1611241093531.1"

# Hit next page in browser, copy URL and curl it.

$ curl -s -k -x $HTTP_PROXY -A 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.46' 'https://www.google.com/search?tbm=map&authuser=0&hl=en&gl=ua&pb=!4m8!1m3!1d2124931.1267513777!2d-113.9511419!3d36.3996184!3m2!1i1024!2i768!4f13.1!7i20!8i20!10b1!12m25!1m1!18b1!2m3!5m1!6e2!20e3!6m16!4b1!23b1!26i1!27i1!41i2!45b1!49b1!63m0!67b1!73m0!74i150000!75b1!89b1!105b1!109b1!110m0!10b1!16b1!19m4!2m3!1i360!2i120!4i8!20m65!2m2!1i203!2i100!3m2!2i4!5b1!6m6!1m2!1i86!2i86!1m2!1i408!2i240!7m50!1m3!1e1!2b0!3e3!1m3!1e2!2b1!3e2!1m3!1e2!2b0!3e3!1m3!1e3!2b0!3e3!1m3!1e8!2b0!3e3!1m3!1e3!2b1!3e2!1m3!1e10!2b0!3e3!1m3!1e10!2b1!3e2!1m3!1e9!2b1!3e2!1m3!1e10!2b0!3e3!1m3!1e10!2b1!3e2!1m3!1e10!2b0!3e4!2b1!4b1!9b0!22m3!1srZkJYJuoINHmrgTP1IVI!2zMWk6Mix0OjEyNjk2LGU6MSxwOnJaa0pZSnVvSU5IbXJnVFAxSVZJOjIz!7e81!24m55!1m15!13m7!2b1!3b1!4b1!6i1!8b1!9b1!20b0!18m6!3b1!4b1!5b1!6b1!13b0!14b0!2b1!5m5!2b1!3b1!5b1!6b1!7b1!10m1!8e3!14m1!3b1!17b1!20m4!1e3!1e6!1e14!1e15!24b1!25b1!26b1!29b1!30m1!2b1!36b1!43b1!52b1!54m1!1b1!55b1!56m2!1b1!3b1!65m5!3m4!1m3!1m2!1i224!2i298!89b1!26m4!2m3!1i80!2i92!4i8!30m28!1m6!1m2!1i0!2i0!2m2!1i458!2i768!1m6!1m2!1i974!2i0!2m2!1i1024!2i768!1m6!1m2!1i0!2i0!2m2!1i1024!2i20!1m6!1m2!1i0!2i748!2m2!1i1024!2i768!34m17!2b1!3b1!4b1!6b1!7b1!8m4!1b1!3b1!4b1!6b1!9b1!12b1!14b1!20b1!23b1!25b1!26b1!37m1!1e81!42b1!46m1!1e9!47m0!49m1!3b1!50m72!1m68!2m7!1u3!4sOpen+now!5e1!9s0ahUKEwig-8Lpp63uAhVRs4sKHU9qAQkQ_KkBCN8KKBY!10m2!3m1!1e1!2m7!1u2!4sTop+rated!5e1!9s0ahUKEwig-8Lpp63uAhVRs4sKHU9qAQkQ_KkBCOAKKBc!10m2!2m1!1e1!2m7!1u1!4sCheap!5e1!9s0ahUKEwig-8Lpp63uAhVRs4sKHU9qAQkQ_KkBCOEKKBg!10m2!1m1!1e1!2m7!1u1!4sUpscale!5e1!9s0ahUKEwig-8Lpp63uAhVRs4sKHU9qAQkQ_KkBCOIKKBk!10m2!1m1!1e2!2m7!1u16!4sVisited!5e1!9s0ahUKEwig-8Lpp63uAhVRs4sKHU9qAQkQ_KkBCOMKKBo!10m2!16m1!1e1!2m7!1u16!4sHaven%27t+visited!5e1!9s0ahUKEwig-8Lpp63uAhVRs4sKHU9qAQkQ_KkBCOQKKBs!10m2!16m1!1e2!3m11!1u16!2m4!1m2!16m1!1e1!2sVisited!2m4!1m2!16m1!1e2!2sHaven%27t+visited!3m1!1u2!3m2!1u1!3e0!3m1!1u3!4BIAE!2e2!3m1!3b1!59BQ2dBd0Fn!65m0!69i540&q=coffee&tch=1&ech=1&psi=rZkJYJuoINHmrgTP1IVI.1611241905488.1' > correct.txt

# Compare f.txt and correct.txt in a text editor - they almost the same.

#webscraping #serpapi #googlemaps #deobfuscation #reverse-engineering #javascript

Convert altitude and latitude to zoom

const EARTH_RADIUS_IN_METERS = 6371010
const TILE_SIZE = 256
const SCREEN_PIXEL_HEIGHT = 768

var zoom = (altitude, latitude) => Math.log(1 / Math.tan(Math.PI / 180 * 13.1 / 2) * (SCREEN_PIXEL_HEIGHT / 2) * 2 * Math.PI / (TILE_SIZE * altitude / (EARTH_RADIUS_IN_METERS * Math.cos(Math.PI / 180 * latitude)))) / Math.LN2;

#webscraping #serpapi #googlemaps #deobfuscation #reverse-engineering #ruby

Convert zoom and latitude to altitude

EARTH_RADIUS_IN_METERS = 6371010
TILE_SIZE = 256
SCREEN_PIXEL_HEIGHT = 768
RADIUS_X_PIXEL_HEIGHT = 27.3611 * EARTH_RADIUS_IN_METERS * SCREEN_PIXEL_HEIGHT

def altitude(zoom, latitude)
  (RADIUS_X_PIXEL_HEIGHT * Math.cos((latitude * Math::PI) / 180)) / ((2 ** zoom) * TILE_SIZE)
end

#webscraping #serpapi #googlemaps #deobfuscation #reverse-engineering #ruby

Extract latitude, longitude and zoom from the ll parameter in Google Maps

PAGINATION_PARAMETERS_REGEX = %r{
  \A                                      # Start of string
  (?:\s*)                                 # initial possible whitespace
  @(?<latitude>[-+]?\d{1,2}(?:[.,]\d+)?)  # latitude: @10.78472
  (?:\s*,\s*)                             # separator between latitude and longitude
  (?<longitude>[-+]?\d{1,3}(?:[.,]\d+)?)  # longitude: @-110
  (?:\s*,\s*)                             # separator between longitude and zoom
  (?<zoom>\d{1,2}(?:[.,]\d+)?)z           # zoom: 9.22
  \z                                      # End of string
}x

#webscraping #serpapi #googlemaps #deobfuscation #reverse-engineering #ruby

Convert the ll URL parameter to pb for the specific results offset on Google Maps

def pagination(ll, start)
  extracted_parameters = ll.match(PAGINATION_PARAMETERS_REGEX)

  return "" unless extracted_parameters

  "!4m8!1m3!1d" +
    altitude(extracted_parameters[:zoom].to_f, extracted_parameters[:latitude].to_f) +
    "!2d" +
    extracted_parameters[:longitude] +
    "!3d" +
    extracted_parameters[:latitude] +
    "!3m2!1i1024!2i768!4f13.1!7i20!8i" +
    (start || "0") +
    "!10b1!12m25!1m1!18b1!2m3!5m1!6e2!20e3!6m16!4b1!23b1!26i1!27i1!41i2!45b1!49b1!63m0!67b1!73m0!74i150000!75b1!89b1!105b1!109b1!110m0!10b1!16b1!19m4!2m3!1i360!2i120!4i8!20m65!2m2!1i203!2i100!3m2!2i4!5b1!6m6!1m2!1i86!2i86!1m2!1i408!2i240!7m50!1m3!1e1!2b0!3e3!1m3!1e2!2b1!3e2!1m3!1e2!2b0!3e3!1m3!1e3!2b0!3e3!1m3!1e8!2b0!3e3!1m3!1e3!2b1!3e2!1m3!1e10!2b0!3e3!1m3!1e10!2b1!3e2!1m3!1e9!2b1!3e2!1m3!1e10!2b0!3e3!1m3!1e10!2b1!3e2!1m3!1e10!2b0!3e4!2b1!4b1!9b0!22m3!1s!2z!7e81!24m55!1m15!13m7!2b1!3b1!4b1!6i1!8b1!9b1!20b0!18m6!3b1!4b1!5b1!6b1!13b0!14b0!2b1!5m5!2b1!3b1!5b1!6b1!7b1!10m1!8e3!14m1!3b1!17b1!20m4!1e3!1e6!1e14!1e15!24b1!25b1!26b1!29b1!30m1!2b1!36b1!43b1!52b1!54m1!1b1!55b1!56m2!1b1!3b1!65m5!3m4!1m3!1m2!1i224!2i298!89b1!26m4!2m3!1i80!2i92!4i8!30m28!1m6!1m2!1i0!2i0!2m2!1i458!2i768!1m6!1m2!1i974!2i0!2m2!1i1024!2i768!1m6!1m2!1i0!2i0!2m2!1i1024!2i20!1m6!1m2!1i0!2i748!2m2!1i1024!2i768!34m16!2b1!3b1!4b1!6b1!8m4!1b1!3b1!4b1!6b1!9b1!12b1!14b1!20b1!23b1!25b1!26b1!37m1!1e81!42b1!46m1!1e9!47m0!49m1!3b1!50m53!1m49!2m7!1u3!4s!5e1!9s!10m2!3m1!1e1!2m7!1u2!4s!5e1!9s!10m2!2m1!1e1!2m7!1u16!4s!5e1!9s!10m2!16m1!1e1!2m7!1u16!4s!5e1!9s!10m2!16m1!1e2!3m11!1u16!2m4!1m2!16m1!1e1!2s!2m4!1m2!16m1!1e2!2s!3m1!1u2!3m1!1u3!4BIAE!2e2!3m1!3b1!59B!65m0!69i540"
end

#webscraping #serpapi #googlemaps #deobfuscation #reverse-engineering #ruby

Full code to convert ll to pb for Google Maps pagination

# https://regex101.com/r/nOoiJ6/2
PAGINATION_PARAMETERS_REGEX = %r{
  \A                                      # Start of string
  (?:\s*)                                 # initial possible whitespace
  @(?<latitude>[-+]?\d{1,2}(?:[.,]\d+)?)  # latitude: @10.78472
  (?:\s*,\s*)                             # separator between latitude and longitude
  (?<longitude>[-+]?\d{1,3}(?:[.,]\d+)?)  # longitude: @-110
  (?:\s*,\s*)                             # separator between longitude and zoom
  (?<zoom>\d{1,2}(?:[.,]\d+)?)z           # zoom: 9.22
  \z                                      # End of string
}x

EARTH_RADIUS_IN_METERS = 6371010
TILE_SIZE = 256
SCREEN_PIXEL_HEIGHT = 768
RADIUS_X_PIXEL_HEIGHT = 27.3611 * EARTH_RADIUS_IN_METERS * SCREEN_PIXEL_HEIGHT

def pagination(ll, start)
  extracted_parameters = ll.match(PAGINATION_PARAMETERS_REGEX)

  return "" unless extracted_parameters

  "!4m8!1m3!1d" +
    altitude(extracted_parameters[:zoom].to_f, extracted_parameters[:latitude].to_f) +
    "!2d" +
    extracted_parameters[:longitude] +
    "!3d" +
    extracted_parameters[:latitude] +
    "!3m2!1i1024!2i768!4f13.1!7i20!8i" +
    (start || "0") +
    "!10b1!12m25!1m1!18b1!2m3!5m1!6e2!20e3!6m16!4b1!23b1!26i1!27i1!41i2!45b1!49b1!63m0!67b1!73m0!74i150000!75b1!89b1!105b1!109b1!110m0!10b1!16b1!19m4!2m3!1i360!2i120!4i8!20m65!2m2!1i203!2i100!3m2!2i4!5b1!6m6!1m2!1i86!2i86!1m2!1i408!2i240!7m50!1m3!1e1!2b0!3e3!1m3!1e2!2b1!3e2!1m3!1e2!2b0!3e3!1m3!1e3!2b0!3e3!1m3!1e8!2b0!3e3!1m3!1e3!2b1!3e2!1m3!1e10!2b0!3e3!1m3!1e10!2b1!3e2!1m3!1e9!2b1!3e2!1m3!1e10!2b0!3e3!1m3!1e10!2b1!3e2!1m3!1e10!2b0!3e4!2b1!4b1!9b0!22m3!1s!2z!7e81!24m55!1m15!13m7!2b1!3b1!4b1!6i1!8b1!9b1!20b0!18m6!3b1!4b1!5b1!6b1!13b0!14b0!2b1!5m5!2b1!3b1!5b1!6b1!7b1!10m1!8e3!14m1!3b1!17b1!20m4!1e3!1e6!1e14!1e15!24b1!25b1!26b1!29b1!30m1!2b1!36b1!43b1!52b1!54m1!1b1!55b1!56m2!1b1!3b1!65m5!3m4!1m3!1m2!1i224!2i298!89b1!26m4!2m3!1i80!2i92!4i8!30m28!1m6!1m2!1i0!2i0!2m2!1i458!2i768!1m6!1m2!1i974!2i0!2m2!1i1024!2i768!1m6!1m2!1i0!2i0!2m2!1i1024!2i20!1m6!1m2!1i0!2i748!2m2!1i1024!2i768!34m16!2b1!3b1!4b1!6b1!8m4!1b1!3b1!4b1!6b1!9b1!12b1!14b1!20b1!23b1!25b1!26b1!37m1!1e81!42b1!46m1!1e9!47m0!49m1!3b1!50m53!1m49!2m7!1u3!4s!5e1!9s!10m2!3m1!1e1!2m7!1u2!4s!5e1!9s!10m2!2m1!1e1!2m7!1u16!4s!5e1!9s!10m2!16m1!1e1!2m7!1u16!4s!5e1!9s!10m2!16m1!1e2!3m11!1u16!2m4!1m2!16m1!1e1!2s!2m4!1m2!16m1!1e2!2s!3m1!1u2!3m1!1u3!4BIAE!2e2!3m1!3b1!59B!65m0!69i540"
end

def altitude(zoom, latitude)
  ((RADIUS_X_PIXEL_HEIGHT * Math.cos((latitude * Math::PI) / 180)) / ((2 ** zoom) * TILE_SIZE)).to_s
end

#python #webscraping #beautifulsuap4

web scraping tutorial in python - Google Search

#pip install beautifulsoup4

import os
import requests
from bs4 import BeautifulSoup

url = "https://www.google.com/"
reponse = requests.get(url)

if reponse.ok:
	soup = BeautifulSoup(reponse.text, "lxml")
	title = str(soup.find("title"))

	title = title.replace("<title>", "")
	title = title.replace("</title>", "")
	print("The title is : " + str(title))

os.system("pause")

#python (code name).py

Scrape Google News with Pagination

Save snippets that work from anywhere online with our extensions

Comments

More like this

Browse more snippets >>

Embed code snippet