import aiohttp from markdownify import markdownify SUPPORTED_TEXT_MIMETYPES = [ "text/plain", "text/html", "text/css", "text/csv", "text/javascript", "text/markdown", "text/xml", "text/yaml", "text/rtf", "text/x-python", "text/x-c", "text/x-java-source", "text/x-lua", "text/x-sh", "text/x-sass", "text/x-scss", "application/javascript", "application/json", "application/xml", "application/rtf", "application/xhtml+xml", "application/atom+xml", "application/rss+xml", "application/sql", "application/ld+json", "application/x-yaml", ] async def searxng(query: str) -> list: """ Search the web with SearXNG. Arguments: query (str): The search query Returns: a list of the first 10 search results. """ params = { "q": query, "format": "json", "engines": "google,duckduckgo,brave" } # Use an aiohttp.ClientSession for making HTTP requests. # The 'async with' ensures the session is properly closed when done. async with aiohttp.ClientSession() as session: try: # Make an asynchronous GET request # The 'async with' here ensures the response object is properly closed async with session.get("https://searx.xorydev.xyz/search", params=params) as response: # Raise an exception for bad status codes (4xx or 5xx) response.raise_for_status() # Await the JSON parsing of the response body data = await response.json() except aiohttp.ClientError as e: # Catch any aiohttp-related errors (network issues, invalid URL, etc.) print(f"Error making request to SearXNG: {e}") return [] # Return an empty list on error except Exception as e: # Catch any other unexpected errors print(f"An unexpected error occurred: {e}") return [] results = [] # Safely get "results" array, defaulting to empty list if not present for r in data.get("results", []): title = r.get("title") url = r.get("url") # Only append if both title and URL are present and we have less than 10 results if title and url and len(results) < 10: results.append({"title": title, "url": url}) return results async def open_url(url: str) -> dict: """ Opens a URL and returns its full content (if it's HTML, it will be converted to clean Markdown). Use this when a `search` result's content is insufficient or when a user provides a direct URL to analyze. """ async with aiohttp.ClientSession( headers={ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", "Accept-Encoding": "gzip, deflate", "Accept-Language": "en-US,en;q=0.9", "Sec-Ch-Ua": '"Not)A;Brand";v="8", "Chromium";v="138", "Google Chrome";v="138"', "Sec-Ch-Ua-Mobile": "?0", "Sec-Ch-Ua-Platform": '"Windows"', "Sec-Fetch-Dest": "document", "Sec-Fetch-Mode": "navigate", "Sec-Fetch-Site": "none", "Sec-Fetch-User": "?1", "Upgrade-Insecure-Requests": "1", "Priority": "u=0, i", }, ) as session: async with session.get(url) as response: response.raise_for_status() content_type = response.content_type.split(";")[0].strip() content_length = response.content_length or 0 if content_type not in SUPPORTED_TEXT_MIMETYPES: return { "content_type": content_type, "content_length": content_length, "content": None, } if "text/html" in content_type: content = markdownify(await response.text()) if len(content) > 262144: content = content[:262144] return { "content_type": content_type, "content_length": content_length, "content": content, } content = await response.text() if len(content) > 262144: content = content[:262144] return { "content_type": content_type, "content_length": content_length, "content": content, }