web scraper with crewai

PHOTO EMBED

Mon Feb 24 2025 08:55:17 GMT+0000 (Coordinated Universal Time)

Saved by @piyushkumar121

from crewai import Agent, Task, Crew ,LLM
from langchain_openai import ChatOpenAI
from crewai_tools import ScrapeWebsiteTool
import os
from dotenv import load_dotenv
from langchain_google_genai import ChatGoogleGenerativeAI

load_dotenv()
api_key = 'AIzaSyBmJ8zlaygJaeMYsxL88e-PzGke70gGFEI'
# call gemini model
# llm = ChatGoogleGenerativeAI(model='gemini/gemini-1.5-flash',
#                             verbose=True,
#                             temperature=0.5,
#                             goggle_api_key=api_key)  
llm = LLM(
    api_key=api_key,
    model="gemini/gemini-1.5-flash",
)

# load_dotenv()
# Model = 'gpt-3.5-turbo'
# llm = ChatOpenAI(model=Model,api_key=api_key)

# Instantiate tools
site = 'https://www.simplifymoney.in/'
web_scrape_tool = ScrapeWebsiteTool(website_url=site)

# Create agents
web_scraper_agent = Agent(
    role='Web Scraper',
    goal='Effectively Scrape data on the websites for your company',
    backstory='''You are expert web scraper, your job is to scrape all the data for
                your company from a given website.
                ''',
    tools=[web_scrape_tool],
    verbose=True,
    llm = llm
)


# Define tasks
web_scraper_task = Task(
    description='Scrape all the  data on the site so your company can use for decision making.',
    expected_output='All the content of the website.',
    agent=web_scraper_agent,
    output_file = 'data.txt'
)


# Assemble a crew
crew = Crew(
    agents=[web_scraper_agent],
    tasks=[web_scraper_task],
    verbose=True,
)

# Execute tasks
result = crew.kickoff()
print(result.raw)

with open('results.txt', 'w') as f:
    f.write(result.raw)
content_copyCOPY