huginn/tools.py

172 lines
5.7 KiB
Python

import aiohttp
import subprocess
import asyncio
from markdownify import markdownify
from google.genai import types
SUPPORTED_TEXT_MIMETYPES = [
"text/plain",
"text/html",
"text/css",
"text/csv",
"text/javascript",
"text/markdown",
"text/xml",
"text/yaml",
"text/rtf",
"text/x-python",
"text/x-c",
"text/x-java-source",
"text/x-lua",
"text/x-sh",
"text/x-sass",
"text/x-scss",
"application/javascript",
"application/json",
"application/xml",
"application/rtf",
"application/xhtml+xml",
"application/atom+xml",
"application/rss+xml",
"application/sql",
"application/ld+json",
"application/x-yaml",
]
SUPPORTED_IMAGE_DOCUMENT_MIMETYPES = [
"application/pdf",
"image/png",
"image/apng",
"image/jpeg"
]
async def searxng(query: str) -> list:
"""
Search the web with SearXNG.
Arguments:
query (str): The search query
Returns: a list of the first 10 search results.
"""
params = {
"q": query,
"format": "json",
"engines": "google,duckduckgo,brave"
}
# Use an aiohttp.ClientSession for making HTTP requests.
# The 'async with' ensures the session is properly closed when done.
async with aiohttp.ClientSession() as session:
try:
# Make an asynchronous GET request
# The 'async with' here ensures the response object is properly closed
async with session.get("https://searx.xorydev.xyz/search", params=params) as response:
# Raise an exception for bad status codes (4xx or 5xx)
response.raise_for_status()
# Await the JSON parsing of the response body
data = await response.json()
except aiohttp.ClientError as e:
# Catch any aiohttp-related errors (network issues, invalid URL, etc.)
print(f"Error making request to SearXNG: {e}")
return [] # Return an empty list on error
except Exception as e:
# Catch any other unexpected errors
print(f"An unexpected error occurred: {e}")
return []
results = []
# Safely get "results" array, defaulting to empty list if not present
for r in data.get("results", []):
title = r.get("title")
url = r.get("url")
# Only append if both title and URL are present and we have less than 10 results
if title and url and len(results) < 10:
results.append({"title": title, "url": url})
return results
async def open_url(url: str) -> dict | types.Part:
"""
Opens a URL and returns its full content (if it's HTML, it will be converted to clean Markdown).
Use this when a `search` result's content is insufficient or when a user provides a direct URL to analyze.
"""
async with aiohttp.ClientSession(
headers={
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"Accept-Encoding": "gzip, deflate",
"Accept-Language": "en-US,en;q=0.9",
"Sec-Ch-Ua": '"Not)A;Brand";v="8", "Chromium";v="138", "Google Chrome";v="138"',
"Sec-Ch-Ua-Mobile": "?0",
"Sec-Ch-Ua-Platform": '"Windows"',
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "none",
"Sec-Fetch-User": "?1",
"Upgrade-Insecure-Requests": "1",
"Priority": "u=0, i",
},
) as session:
async with session.get(url) as response:
response.raise_for_status()
content_type = response.content_type.split(";")[0].strip()
content_length = response.content_length or 0
if content_type not in SUPPORTED_TEXT_MIMETYPES + SUPPORTED_IMAGE_DOCUMENT_MIMETYPES:
return {
"content_type": content_type,
"content_length": content_length,
"content": None,
}
if content_type in SUPPORTED_IMAGE_DOCUMENT_MIMETYPES:
return types.Part.from_bytes(
data=await response.read(),
mime_type=content_type
)
if "text/html" in content_type:
content = markdownify(await response.text())
if len(content) > 262144:
content = content[:262144]
return {
"content_type": content_type,
"content_length": content_length,
"content": content,
}
content = await response.text()
if len(content) > 262144:
content = content[:262144]
return {
"content_type": content_type,
"content_length": content_length,
"content": content,
}
async def run_command(command: str) -> tuple[str, str, int]:
"""
Runs a shell command on the host machine and captures its stdout, stderr and error code.
Args:
command: str
Returns:
tuple containing: stdout, stderr and error code (in that order)
"""
process = await asyncio.create_subprocess_shell(
command,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
stdout_data, stderr_data = await process.communicate()
stdout = stdout_data.decode().strip()
stderr = stderr_data.decode().strip()
return_code = process.returncode
if return_code is None:
raise TypeError
return stdout, stderr, return_code