Get news yohoo in python

PHOTO EMBED

Wed Sep 11 2024 20:06:12 GMT+0000 (Coordinated Universal Time)

Saved by @cx_21 #javascript

from bs4 import BeautifulSoup
import requests
import json

# Initialize the list to store API data
api = []

# Base URL for Yahoo News
base_url = 'https://www.yahoo.com'

# Fetch the main page
url = f'{base_url}/news/'
response = requests.get(url)

# Check if the request was successful
if response.status_code == 200:
    soup = BeautifulSoup(response.text, 'html.parser')
    
    # Find all news items
    for news_item in soup.find_all('ul', class_='stream-items'):
        for item in news_item.find_all('li', class_='stream-item'):
            
            # Extract the article link
            item_id = item.find('a', class_='js-content-viewer')
            if item_id:
                link = item_id.get('href')
                full_link = f'{base_url}{link}'
                
                # Fetch the article page
                response2 = requests.get(full_link)
                if response2.status_code == 200:
                    soup2 = BeautifulSoup(response2.text, 'html.parser')
                    
                    # Extract article details
                    itemInfo = soup2.find('div', class_='caas-inner-body')
                    if itemInfo:
                        text1 = ''
                        for text in itemInfo.find_all('div', class_='caas-body'):
                            text1 += text.text

                        # Remove "View comments" from the text
                        text1 = text1.replace("View comments", "").strip()

                        # Extract additional details if available
                        image = item.find('img')['src'] if item.find('img') else ''
                        category = item.find('strong', class_='Tt(c)').text if item.find('strong', class_='Tt(c)') else ''
                        ell = item.find('span', class_='Ell').text if item.find('span', class_='Ell') else ''
                        title = item.find('h3', class_='stream-item-title').text if item.find('h3', class_='stream-item-title') else ''
                        description = item.find('p', class_='finance-ticker-fetch-success_D(n)').text if item.find('p', class_='finance-ticker-fetch-success_D(n)') else ''
                        
                        # Append data to the api list
                        api.append({
                            'link': full_link,
                            'image': image,
                            'category': category,
                            'ell': ell,
                            'title': title,
                            'description': description,
                            'text': text1,
                        })

# Convert the api list to a JSON-formatted string
api_json = json.dumps(api, indent=4)

# Write the JSON data to a file
with open('news_data.json', 'w') as file:
    file.write(api_json)

print("Data has been written to news_data.json")
content_copyCOPY