Convert Snippet to Image

Preview:

import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException
import csv

URL = "https://www.theinfatuation.com/new-york/guides"

chrome_options = webdriver.ChromeOptions()
chrome_options.add_experimental_option("detach", True)

print("Starting the browser...")
driver = webdriver.Chrome(options=chrome_options)
driver.get(URL)

# Close the modal if it appears
try:
    wait = WebDriverWait(driver, 10)
    no_thanks_button = wait.until(
        EC.element_to_be_clickable(
            (By.CSS_SELECTOR, "button[data-click='close'][aria-label='No thanks; close the dialog']"))
    )
    no_thanks_button.click()
    print("Closed the modal!")
except TimeoutException:
    print("No modal to close!")

# close the bottom overlay
try:
    overlay_close_button = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.ID, "bx-close-inside-2272826"))
    )
    driver.execute_script("arguments[0].click();", overlay_close_button)
    print("Closed the overlay!")
except TimeoutException:
    print("Overlay not found or already closed.")


# Click load more 10 times
for _ in range(10):
    try:
        load_more_button = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.CSS_SELECTOR, "div.css-xi606m"))
        )
        load_more_button.click()
        print(f"Clicked load more {_+1} times.")

        # Adding a sleep duration to allow data to be rendered
        time.sleep(5)
    except TimeoutException:
        print(
            f"Couldn't click 'load more' the {_+1}-th time. Continuing with the available data.")
        break

# Extract data
guide_elements = driver.find_elements(
    By.CSS_SELECTOR, "a[data-testid='detailedStory-link']")
guide_data = [guide.get_attribute('href') for guide in guide_elements]

# CSV output setup
with open("output.csv", "w", newline="") as csvfile:
    fieldnames = ["Article Title", "URL", "Restaurants", "Date"]
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()

    for link in guide_data:
        driver.get(link)
        try:
            # CSS selector to get the article title
            title = driver.find_element(
                By.CSS_SELECTOR, "h1 > .styles_title__QfDF5").text
            # Extracting the date
            date_element = driver.find_element(
                By.CSS_SELECTOR, "div.styles_contributorsList__EKq26 time")
            date = date_element.get_attribute("datetime")
            # CSS selector to get restaurant names
            restaurants = [restaurant.text for restaurant in driver.find_elements(
                By.CSS_SELECTOR, "h2.chakra-heading.styles_mainHeading__e4VAy.flatplan_venue-heading.css-67umdg")]

            # Print data
            print("Article Title:", title)
            print("URL:", link)
            print("Restaurants:", ", ".join(restaurants))
            print("Date:", date)
            print("------")

            writer.writerow({"Article Title": title, "URL": link,
                            "Restaurants": ", ".join(restaurants), "Date": date})
        except NoSuchElementException:
            print(
                f"Couldn't retrieve complete data for article at URL: {link}")

driver.quit()
Download PNG Download JPEG Download SVG
Tip: You can change the style, width & colours of the snippet with the inspect tool before clicking Download!
Click to optimize width for Twitter