import os
import asyncio
import requests
from bs4 import BeautifulSoup
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from agents import Agent, Runner, function_tool, set_trace_processors
from datetime import date
import scale_gp_beta.lib.tracing as tracing
from scale_gp_beta import SGPClient
from scale_gp_beta.lib.tracing.integrations import OpenAITracingSGPProcessor
@function_tool
def google_search(query: str) -> list[str]:
scoped_span = tracing.current_span()
try:
api_key, cse_id = os.environ["GOOGLE_API_KEY"], os.environ["GOOGLE_CSE_ID"]
with tracing.create_span("build_custom_search"):
service = build("customsearch", "v1", developerKey=api_key)
with tracing.create_span("execute_custom_search", input={"query": query}) as span:
res = service.cse().list(q=query, cx=cse_id, num=5).execute()
span.output = {"response": res}
urls = [item['link'] for item in res.get('items', [])]
return urls
except HttpError as e:
error_message = f"An error occurred with Google Search: {e}"
scoped_span.set_error(error_message) if scoped_span else None
return [error_message]
except Exception as e:
error_message = f"An unexpected error occurred: {e}"
scoped_span.set_error(error_message) if scoped_span else None
return [error_message]
@function_tool
def get_article_content(url: str) -> str:
scoped_span = tracing.current_span()
try:
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'
}
with tracing.create_span("GET_article_content", input={"url": url}, span_type="FILE_RETRIEVAL") as span:
response = requests.get(url, headers=headers, timeout=10)
response.raise_for_status()
span.output = {"text": response.text, "code": response.status_code}
with tracing.create_span("parse_article_content", span_type="DATA_MANIPULATION", input={"text": response.text}) as span:
soup = BeautifulSoup(response.text, 'html.parser')
paragraphs = soup.find_all('p')
article_text = "\n".join([p.get_text() for p in paragraphs])
truncated_text = (article_text[:4000] + '...') if len(article_text) > 4000 else article_text
span.output = {"truncated_text": truncated_text}
return truncated_text
except requests.RequestException as e:
error_message = f"An error occurred while fetching the URL: {e}"
scoped_span.set_error(error_message) if scoped_span else None
return error_message
except Exception as e:
error_message = f"An unexpected error occurred during content extraction: {e}"
scoped_span.set_error(error_message) if scoped_span else None
return error_message
async def main() -> None:
today_str = f"Today's date is {date.today().strftime("%A, %B %d, %Y")}."
news_agent = Agent(
name="Personalized News Digest Agent",
instructions=(
f"{today_str}. You are a research assistant. Your goal is to provide a concise summary of recent news based on the user's request. "
"First, use the `Google Search` tool to find relevant articles. "
"Next, use the `get_article_content` tool on the most promising URLs to read them. "
"Finally, synthesize the information from the articles into a coherent summary for the user."
),
model="gpt-4-turbo",
tools=[google_search, get_article_content],
)
user_prompt = input("Please enter your news request: ")
result = await Runner.run(news_agent, user_prompt)
print(result.final_output)
if __name__ == "__main__":
SGP_API_KEY, SGP_ACCOUNT_ID = "XXX", "XXX"
os.environ["OPENAI_API_KEY"], os.environ["GOOGLE_API_KEY"], os.environ["GOOGLE_CSE_ID"] = "XXX", "XXX", "XXX"
client = SGPClient(api_key=SGP_API_KEY, account_id=SGP_ACCOUNT_ID)
tracing.init(client)
set_trace_processors([OpenAITracingSGPProcessor()])
asyncio.run(main())