Image extraction from Google HTML

PHOTO EMBED

Tue Feb 09 2021 10:03:03 GMT+0000 (Coordinated Universal Time)

Saved by @ilyazub #json #ruby #webscraping #serpapi

def extracted_thumbnails
  return @extracted_thumbnails if @extracted_thumbnails.present?
  
  js_image_regexes = JS_IMAGE_REGEXES.detect { |key, _| engine.starts_with?(key.to_s) }&.last || JS_IMAGE_REGEXES[:all]
@extracted_thumbnails = js_image_regexes.collect { |regex|
    regex_capture_names = regex.names
thumbnail_index = regex_capture_names.index(THUMBNAIL_CAPTURE_NAME)
    thumbnail_id_index = regex_capture_names.index(THUMBNAIL_ID_CAPTURE_NAME)

    html.scan(regex).collect do |match|
      found_thumbnail = match[thumbnail_index]
      found_thumbnail_id = match[thumbnail_id_index]

      found_thumbnail_id.split(",").map { |thumb| Hash[thumb.tr("'", "").squish, found_thumbnail] }
    end
  }.flatten.inject(:merge) || {}
end
content_copyCOPY