5.7 selecting_elements_with_a_dropdown.py

PHOTO EMBED

Wed Jan 31 2024 19:50:47 GMT+0000 (Coordinated Universal Time)

Saved by @mnis00014 #python #selenium #scraping

# Import necessary libraries
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
from selenium.common.exceptions import NoSuchElementException, TimeoutException

# Set the path to chromedriver.exe
path = r"C:\Drivers\chromedriver-win64\chromedriver.exe"
website = "https://www.adamchoi.co.uk/overs/detailed"

# Use the Service class to specify the path to chromedriver.exe
service = Service(executable_path=path)

# Use ChromeOptions for additional configurations
options = webdriver.ChromeOptions()

# Add the --headless option to run Chrome in headless mode (optional)
# options.add_argument("--headless")

# Add the --detach option to keep the browser open after the script finishes
options.add_experimental_option("detach", True)

# Initialize the WebDriver with the specified service and options
driver = webdriver.Chrome(service=service, options=options)

# Navigate to the specified website
driver.get(website)

try:
    # Wait for the "All matches" button to be clickable
    all_matches_button = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.XPATH, '//label[@analytics-event="All matches"]'))
    )

    # Click on the "All matches" button
    all_matches_button.click()

    # Wait for the matches to load (adjust the timeout as needed)
    WebDriverWait(driver, 10).until(
        EC.presence_of_all_elements_located((By.TAG_NAME, "tr"))
    )

    # Get all match elements
    matches = driver.find_elements(By.TAG_NAME, "tr")

    date = []
    home_team = []
    score = []
    away_team = []

    # Extract data from each match
    for match in matches:
        date.append(match.find_element("xpath", "./td[1]").text)
        home_team.append(match.find_element("xpath", "./td[2]").text)
        score.append(match.find_element("xpath", "./td[3]").text)
        away_team.append(match.find_element("xpath", "./td[4]").text)

except (NoSuchElementException, TimeoutException) as e:
    print(f"Error: {e}")

finally:
    # Close the WebDriver when you're done
    driver.quit()

# Create a DataFrame from the scraped data
df = pd.DataFrame({'date': date,
                   'home_team': home_team,
                   'score': score,
                   'away_team': away_team})

# Save the DataFrame to a CSV file
df.to_csv('football_data.csv', index=False)

# Print the DataFrame
print(df)
content_copyCOPY