Get news yohoo in python
Wed Sep 11 2024 20:06:12 GMT+0000 (Coordinated Universal Time)
Saved by @cx_21 #javascript
from bs4 import BeautifulSoup import requests import json # Initialize the list to store API data api = [] # Base URL for Yahoo News base_url = 'https://www.yahoo.com' # Fetch the main page url = f'{base_url}/news/' response = requests.get(url) # Check if the request was successful if response.status_code == 200: soup = BeautifulSoup(response.text, 'html.parser') # Find all news items for news_item in soup.find_all('ul', class_='stream-items'): for item in news_item.find_all('li', class_='stream-item'): # Extract the article link item_id = item.find('a', class_='js-content-viewer') if item_id: link = item_id.get('href') full_link = f'{base_url}{link}' # Fetch the article page response2 = requests.get(full_link) if response2.status_code == 200: soup2 = BeautifulSoup(response2.text, 'html.parser') # Extract article details itemInfo = soup2.find('div', class_='caas-inner-body') if itemInfo: text1 = '' for text in itemInfo.find_all('div', class_='caas-body'): text1 += text.text # Remove "View comments" from the text text1 = text1.replace("View comments", "").strip() # Extract additional details if available image = item.find('img')['src'] if item.find('img') else '' category = item.find('strong', class_='Tt(c)').text if item.find('strong', class_='Tt(c)') else '' ell = item.find('span', class_='Ell').text if item.find('span', class_='Ell') else '' title = item.find('h3', class_='stream-item-title').text if item.find('h3', class_='stream-item-title') else '' description = item.find('p', class_='finance-ticker-fetch-success_D(n)').text if item.find('p', class_='finance-ticker-fetch-success_D(n)') else '' # Append data to the api list api.append({ 'link': full_link, 'image': image, 'category': category, 'ell': ell, 'title': title, 'description': description, 'text': text1, }) # Convert the api list to a JSON-formatted string api_json = json.dumps(api, indent=4) # Write the JSON data to a file with open('news_data.json', 'w') as file: file.write(api_json) print("Data has been written to news_data.json")
Comments