Automating HTML Parsing and JSON Extraction from Multiple URLs Using PowerShell | by Rihab Beji | Jan, 2025 | Medium
Sat Mar 08 2025 14:31:13 GMT+0000 (Coordinated Universal Time)
Saved by
@baamn
# Step 1: Define the list of URLs
$urls = @(
"https://example.com/page1",
"https://example.com/page2",
"https://example.com/page3"
# Add more URLs here
)
# Step 2: Loop through URLs and process them
foreach ($url in $urls) {
try {
# Fetch the HTML content
$response = Invoke-WebRequest -Uri $url
$htmlContent = $response.Content
# Use regex to extract the JSON string
if ($htmlContent -match 'var data\s*=\s*({.*?})\s*;') {
$jsonString = $matches[1]
} else {
Write-Output "No JSON data found in $url"
continue
}
# Clean up the JSON string (remove escape characters, etc.)
$jsonString = $jsonString -replace '\\/', '/'
# Convert the JSON string to a PowerShell object
$jsonObject = $jsonString | ConvertFrom-Json
# Display the JSON object
Write-Output "JSON from $url:"
$jsonObject | Format-List
} catch {
Write-Output "Failed to process $url: $_"
}
}
content_copyCOPY
https://medium.com/@rihab.beji099/automating-html-parsing-and-json-extraction-from-multiple-urls-using-powershell-3c0ce3a93292#id_token
Comments