5.7 selecting_elements_with_a_dropdown.py
Wed Jan 31 2024 19:50:47 GMT+0000 (Coordinated Universal Time)
Saved by
@mnis00014
#python
#selenium
#scraping
# Import necessary libraries
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
from selenium.common.exceptions import NoSuchElementException, TimeoutException
# Set the path to chromedriver.exe
path = r"C:\Drivers\chromedriver-win64\chromedriver.exe"
website = "https://www.adamchoi.co.uk/overs/detailed"
# Use the Service class to specify the path to chromedriver.exe
service = Service(executable_path=path)
# Use ChromeOptions for additional configurations
options = webdriver.ChromeOptions()
# Add the --headless option to run Chrome in headless mode (optional)
# options.add_argument("--headless")
# Add the --detach option to keep the browser open after the script finishes
options.add_experimental_option("detach", True)
# Initialize the WebDriver with the specified service and options
driver = webdriver.Chrome(service=service, options=options)
# Navigate to the specified website
driver.get(website)
try:
# Wait for the "All matches" button to be clickable
all_matches_button = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.XPATH, '//label[@analytics-event="All matches"]'))
)
# Click on the "All matches" button
all_matches_button.click()
# Wait for the matches to load (adjust the timeout as needed)
WebDriverWait(driver, 10).until(
EC.presence_of_all_elements_located((By.TAG_NAME, "tr"))
)
# Get all match elements
matches = driver.find_elements(By.TAG_NAME, "tr")
date = []
home_team = []
score = []
away_team = []
# Extract data from each match
for match in matches:
date.append(match.find_element("xpath", "./td[1]").text)
home_team.append(match.find_element("xpath", "./td[2]").text)
score.append(match.find_element("xpath", "./td[3]").text)
away_team.append(match.find_element("xpath", "./td[4]").text)
except (NoSuchElementException, TimeoutException) as e:
print(f"Error: {e}")
finally:
# Close the WebDriver when you're done
driver.quit()
# Create a DataFrame from the scraped data
df = pd.DataFrame({'date': date,
'home_team': home_team,
'score': score,
'away_team': away_team})
# Save the DataFrame to a CSV file
df.to_csv('football_data.csv', index=False)
# Print the DataFrame
print(df)
content_copyCOPY
Comments