Zapier - Python - Xing Scraper for Follower #

PHOTO EMBED

Mon Jun 02 2025 08:28:15 GMT+0000 (Coordinated Universal Time)

Saved by @abm #zapier #taconova #phython #scraping

--Run Code 1--

import requests
import json

# Define the URL to scrape and the API credentials
url = 'https://www.xing.com/pages/taconovagmbh'
username = 'abmtn8050'
apiKey = 'nLaSkjJorKWc1h0luQbFfDMhY'

# Set up the API URL for the scraping bot
apiUrl = "http://api.scraping-bot.io/scrape/raw-html"

# Prepare the payload for the POST request
payload = json.dumps({"url": url})
headers = {
    'Content-Type': "application/json"
}

# Send the request to the scraping bot API
response = requests.post(apiUrl, data=payload, auth=(username, apiKey), headers=headers)

# Check if the request was successful
response.raise_for_status()

# Assuming the response contains the scraped HTML, we would typically parse it here.
# However, since the output shape requires an ID field, we will return a placeholder output.
output = [{'id': '1', 'content': response.text}]  # Placeholder for actual content extraction

--Run Code 2 --  input html--

import re
from datetime import datetime

# Assume html is provided by input_data
html = input_data.get('html', '')

# Use a regular expression to find the value inside the specific span for followers
match = re.search(r'<span class="entity-infostyles__EntityInfoBlockValue-dyptuz-3.*?>(\d+)</span>', html)

# Extract the followers count
if match:
    followers_value = match.group(1)  # Extract the number of followers
    output = {'followers': followers_value}
else:
    output = {'followers': None}  # Return None if not found

# Extract and process the title from the HTML
title_match = re.search(r'<title[^>]*>(.*?)<\/title>', html)
if title_match:
    title = title_match.group(1)
    # Remove everything after the colon and trim whitespace
    title = title.split(':')[0].strip()
    output['pageTitle'] = title
else:
    output['pageTitle'] = ''

# Add the execution date and time to the output
output['executionDate'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')

print(output)
content_copyCOPY