Zapier - Python - Xing Scraper for Follower #

PHOTO

Mon Jun 02 2025 08:28:15 GMT+0000 (Coordinated Universal Time)

Saved by @abm #zapier #taconova #phython #scraping

--Run Code 1--

import requests
import json

# Define the URL to scrape and the API credentials
url = 'https://www.xing.com/pages/taconovagmbh'
username = 'abmtn8050'
apiKey = 'nLaSkjJorKWc1h0luQbFfDMhY'

# Set up the API URL for the scraping bot
apiUrl = "http://api.scraping-bot.io/scrape/raw-html"

# Prepare the payload for the POST request
payload = json.dumps({"url": url})
headers = {
    'Content-Type': "application/json"
}

# Send the request to the scraping bot API
response = requests.post(apiUrl, data=payload, auth=(username, apiKey), headers=headers)

# Check if the request was successful
response.raise_for_status()

# Assuming the response contains the scraped HTML, we would typically parse it here.
# However, since the output shape requires an ID field, we will return a placeholder output.
output = [{'id': '1', 'content': response.text}]  # Placeholder for actual content extraction

--Run Code 2 --  input html--

import re
from datetime import datetime

# Assume html is provided by input_data
html = input_data.get('html', '')

# Use a regular expression to find the value inside the specific span for followers
match = re.search(r'<span class="entity-infostyles__EntityInfoBlockValue-dyptuz-3.*?>(\d+)</span>', html)

# Extract the followers count
if match:
    followers_value = match.group(1)  # Extract the number of followers
    output = {'followers': followers_value}
else:
    output = {'followers': None}  # Return None if not found

# Extract and process the title from the HTML
title_match = re.search(r'<title[^>]*>(.*?)<\/title>', html)
if title_match:
    title = title_match.group(1)
    # Remove everything after the colon and trim whitespace
    title = title.split(':')[0].strip()
    output['pageTitle'] = title
else:
    output['pageTitle'] = ''

# Add the execution date and time to the output
output['executionDate'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')

print(output)

COPY

Save snippets that work from anywhere online with our extensions

Comments

More like this

#zapier #taconova #phython #scraping

Zapier - Python - Xing Scraper for Follower #

--Run Code 1--

import requests
import json

# Define the URL to scrape and the API credentials
url = 'https://www.xing.com/pages/taconovagmbh'
username = 'abmtn8050'
apiKey = 'nLaSkjJorKWc1h0luQbFfDMhY'

# Set up the API URL for the scraping bot
apiUrl = "http://api.scraping-bot.io/scrape/raw-html"

# Prepare the payload for the POST request
payload = json.dumps({"url": url})
headers = {
    'Content-Type': "application/json"
}

# Send the request to the scraping bot API
response = requests.post(apiUrl, data=payload, auth=(username, apiKey), headers=headers)

# Check if the request was successful
response.raise_for_status()

# Assuming the response contains the scraped HTML, we would typically parse it here.
# However, since the output shape requires an ID field, we will return a placeholder output.
output = [{'id': '1', 'content': response.text}]  # Placeholder for actual content extraction

--Run Code 2 --  input html--

import re
from datetime import datetime

# Assume html is provided by input_data
html = input_data.get('html', '')

# Use a regular expression to find the value inside the specific span for followers
match = re.search(r'<span class="entity-infostyles__EntityInfoBlockValue-dyptuz-3.*?>(\d+)</span>', html)

# Extract the followers count
if match:
    followers_value = match.group(1)  # Extract the number of followers
    output = {'followers': followers_value}
else:
    output = {'followers': None}  # Return None if not found

# Extract and process the title from the HTML
title_match = re.search(r'<title[^>]*>(.*?)<\/title>', html)
if title_match:
    title = title_match.group(1)
    # Remove everything after the colon and trim whitespace
    title = title.split(':')[0].strip()
    output['pageTitle'] = title
else:
    output['pageTitle'] = ''

# Add the execution date and time to the output
output['executionDate'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')

print(output)

#python #selenium #scraping

5.6 extracting_data_to_a_csv_file_with_pandas.py

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.by import By
import pandas as pd
import time

path = r"C:\Drivers\chromedriver-win64\chromedriver.exe"
website = "https://www.adamchoi.co.uk/overs/detailed"

# Use the Service class to specify the path to chromedriver.exe
service = Service(executable_path=path)

# Use ChromeOptions for additional configurations
options = webdriver.ChromeOptions()
options.add_experimental_option("detach", True)

# Initialize the WebDriver with the specified service and options
driver = webdriver.Chrome(service=service, options=options)

# Navigate to the specified website
driver.get(website)

all_matches_button = driver.find_element("xpath", '//label[@analytics-event="All matches"]')
all_matches_button.click()

dropdown = Select(driver.find_element(By.ID, "country"))
dropdown.select_by_visible_text('Spain')

time.sleep(3)

matches = driver.find_elements(By.TAG_NAME, "tr")

date = []
home_team = []
score = []
away_team = []

for match in matches:
    date.append(match.find_element("xpath", "./td[1]").text)
    home_team.append(match.find_element("xpath", "./td[2]").text)
    score.append(match.find_element("xpath", "./td[3]").text)
    away_team.append(match.find_element("xpath", "./td[4]").text)

# Close the WebDriver when you're done
driver.quit()

df = pd.DataFrame({'date': date,
                   'home_team': home_team,
                   'score': score,
                   'away_team': away_team})
df.to_csv('football_data.csv', index=False)
print(df)

#python #selenium #scraping

5.5 extracting_data_from_a_table.py

# ---------------------------    Chrome   ---------------------------------

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By

path = r"C:\Drivers\chromedriver-win64\chromedriver.exe"
website = "https://www.adamchoi.co.uk/overs/detailed"

# Use the Service class to specify the path to chromedriver.exe
service = Service(executable_path=path)

# Use ChromeOptions for additional configurations
options = webdriver.ChromeOptions()
options.add_experimental_option("detach", True)

# Initialize the WebDriver with the specified service and options
driver = webdriver.Chrome(service=service, options=options)

# Navigate to the specified website
driver.get(website)

all_matches_button = driver.find_element("xpath", '//label[@analytics-event="All matches"]')
all_matches_button.click()

matches = driver.find_elements(By.TAG_NAME, "tr")

date = []
home_team = []
score = []
away_team = []

for match in matches:
    date.append(match.find_element("xpath", "./td[1]").text)
    home_team.append(match.find_element("xpath", "./td[2]").text)
    home = match.find_element("xpath","./td[2]").text
    print(home)
    score.append(match.find_element("xpath", "./td[3]").text)
    away_team.append(match.find_element("xpath", "./td[4]").text)

# Close the WebDriver when you're done
# driver.quit()

#python #selenium #scraping

5.6 extracting_data_to_a_csv_file_with_pandas.py

# ---------------------------    Chrome   ---------------------------------

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.by import By
import pandas as pd
import time

path = r"C:\Drivers\chromedriver-win64\chromedriver.exe"
website = "https://www.adamchoi.co.uk/overs/detailed"

# Use the Service class to specify the path to chromedriver.exe
service = Service(executable_path=path)

# Use ChromeOptions for additional configurations
options = webdriver.ChromeOptions()
options.add_experimental_option("detach", True)

# Initialize the WebDriver with the specified service and options
driver = webdriver.Chrome(service=service, options=options)

# Navigate to the specified website
driver.get(website)

all_matches_button = driver.find_element("xpath", '//label[@analytics-event="All matches"]')
all_matches_button.click()

dropdown = Select(driver.find_element(By.ID, "country"))
dropdown.select_by_visible_text('Spain')

time.sleep(3)

matches = driver.find_elements(By.TAG_NAME, "tr")

date = []
home_team = []
score = []
away_team = []

for match in matches:
    date.append(match.find_element("xpath", "./td[1]").text)
    home_team.append(match.find_element("xpath", "./td[2]").text)
    score.append(match.find_element("xpath", "./td[3]").text)
    away_team.append(match.find_element("xpath", "./td[4]").text)

# Close the WebDriver when you're done
driver.quit()

df = pd.DataFrame({'date': date,
                   'home_team': home_team,
                   'score': score,
                   'away_team': away_team})
df.to_csv('football_data.csv', index=False)
print(df)

#python #selenium #scraping

5.7 selecting_elements_with_a_dropdown.py

# Import necessary libraries
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
from selenium.common.exceptions import NoSuchElementException, TimeoutException

# Set the path to chromedriver.exe
path = r"C:\Drivers\chromedriver-win64\chromedriver.exe"
website = "https://www.adamchoi.co.uk/overs/detailed"

# Use the Service class to specify the path to chromedriver.exe
service = Service(executable_path=path)

# Use ChromeOptions for additional configurations
options = webdriver.ChromeOptions()

# Add the --headless option to run Chrome in headless mode (optional)
# options.add_argument("--headless")

# Add the --detach option to keep the browser open after the script finishes
options.add_experimental_option("detach", True)

# Initialize the WebDriver with the specified service and options
driver = webdriver.Chrome(service=service, options=options)

# Navigate to the specified website
driver.get(website)

try:
    # Wait for the "All matches" button to be clickable
    all_matches_button = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.XPATH, '//label[@analytics-event="All matches"]'))
    )

    # Click on the "All matches" button
    all_matches_button.click()

    # Wait for the matches to load (adjust the timeout as needed)
    WebDriverWait(driver, 10).until(
        EC.presence_of_all_elements_located((By.TAG_NAME, "tr"))
    )

    # Get all match elements
    matches = driver.find_elements(By.TAG_NAME, "tr")

    date = []
    home_team = []
    score = []
    away_team = []

    # Extract data from each match
    for match in matches:
        date.append(match.find_element("xpath", "./td[1]").text)
        home_team.append(match.find_element("xpath", "./td[2]").text)
        score.append(match.find_element("xpath", "./td[3]").text)
        away_team.append(match.find_element("xpath", "./td[4]").text)

except (NoSuchElementException, TimeoutException) as e:
    print(f"Error: {e}")

finally:
    # Close the WebDriver when you're done
    driver.quit()

# Create a DataFrame from the scraped data
df = pd.DataFrame({'date': date,
                   'home_team': home_team,
                   'score': score,
                   'away_team': away_team})

# Save the DataFrame to a CSV file
df.to_csv('football_data.csv', index=False)

# Print the DataFrame
print(df)

#python #selenium #scraping

6.1 building_the_bot.py

# Handle pagination with Selenium
# Scrape Website (www.audible.com)

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
import pandas as pd
import time

path = r"C:\Drivers\chromedriver-win64\chromedriver.exe"
website = "https://www.audible.com/search"

# Use the Service class to specify the path to chromedriver.exe
service = Service(executable_path=path)

# Use ChromeOptions for additional configurations
options = webdriver.ChromeOptions()
options.add_experimental_option("detach", True)

# Initialize the WebDriver with the specified service and options
driver = webdriver.Chrome(service=service, options=options)

# Navigate to the specific website
driver.get(website)

# Wait for some time to ensure the page is loaded
time.sleep(5)

try:
    # Wait for the container to be present
    container = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.CLASS_NAME, 'adbl-impression-container'))
    )

    # Wait for the products to be present within the container
    products = WebDriverWait(container, 10).until(
        EC.presence_of_all_elements_located((By.XPATH, './/li[contains(@class, "productListItem")]'))
    )

    book_title = []
    author_name = []
    run_time = []
    release_date = []

    for product in products:
        try:
            # Wait for the book title element to be present within each product
            book_title_elem = WebDriverWait(product, 5).until(
                EC.presence_of_element_located((By.XPATH, './/h3[contains(@class, "bc-heading")]'))
            )

            # Append book title
            book_title.append(book_title_elem.text)
            
            # Append author name
            author_name_elem = product.find_element(By.XPATH, './/li[contains(@class, "authorLabel")]')
            author_name.append(author_name_elem.text)

            # Append run time
            run_time_elem = product.find_element(By.XPATH, './/li[contains(@class, "runtimeLabel")]')
            run_time.append(run_time_elem.text)

            # Append release date
            release_date_elem = product.find_element(By.XPATH, './/li[contains(@class, "releaseDateLabel")]')
            release_date.append(release_date_elem.text)

        except TimeoutException:
            print("Timeout occurred while waiting for element within product.")
            # Handle the timeout situation here (e.g., skip this product or log the issue)

    # Create DataFrame and save to CSV
    df = pd.DataFrame({'book_title': book_title,
                       'author_name': author_name,
                       'run_time': run_time,
                       'release_date': release_date})

    df.to_csv('amazon_audible.csv', index=False)
    print(df)

except TimeoutException:
    print("Timeout occurred while waiting for container element.")
    # Handle the timeout situation here (e.g., retry navigating to the page or log the issue)

finally:
    # Quit the driver
    driver.quit()

#python #selenium #scraping

7.1 login_to_website

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
import pandas as pd
import time

web = 'https://twitter.com/'
path = r"C:\Drivers\chromedriver-win64\chromedriver.exe"
options = webdriver.ChromeOptions()
service = Service(executable_path=path)
options.add_experimental_option("detach", True)
options.add_argument('window-size=1920x1080')

driver = webdriver.Chrome(service=service, options=options)
driver.get(web)  # Open the webpage

# Wait for the login button to be clickable
login_button = WebDriverWait(driver, 10).until(
    EC.element_to_be_clickable((By.XPATH, '//a[contains(@href, "/login")]'))
)
login_button.click()
time.sleep(2)

# Wait for the username input field to be visible and then enter username
user_name = WebDriverWait(driver, 10).until(
    EC.visibility_of_element_located((By.XPATH, '//input[contains(@autocomplete, "username")]'))
)
user_name.send_keys("mnis00014@gmail.com")

# Wait for the next button to be clickable and then click
next_button = WebDriverWait(driver, 10).until(
    EC.element_to_be_clickable((By.XPATH, '//div[contains(@role, "button")]//span[text()="Next"]'))
)
next_button.click()

time.sleep(2)

# Wait for the password input field to be visible and then enter password
password = WebDriverWait(driver, 10).until(
    EC.visibility_of_element_located((By.XPATH, '//input[contains(@autocomplete, "current-password")]'))
)
password.send_keys("Tw@mnis@2024")

# Wait for the login button to be clickable and then click
login_button = WebDriverWait(driver, 10).until(
    EC.element_to_be_clickable((By.XPATH, '//div[contains(@role, "button")]//span[text()="Log in"]'))
)
login_button.click()

# closing driver
# driver.quit()

#zapier

Insert the current time your Zap runs into a field

{{zap_meta_human_now}}

#zapier

accessing nested JSON array data in Zapier Webhook | Zapier Community

I have gotten full access to the JSON array through the following steps:

1) Catch a raw webhook

2) Send the webhook data to a Code by Zapier step

3) Start with the following code and output the values you want to use.

var obj = {},

data = JSON.parse(inputData.data); //Data will now function like a json object



//Pull out your values

obj.value = data.user_column_data[1].string_value

return obj;

Zapier - Python - Xing Scraper for Follower #

Save snippets that work from anywhere online with our extensions

Comments

More like this

Browse more snippets >>

Embed code snippet