i havent bothered to test the image & pdf funcs but we ball

This commit is contained in:
xory 2025-08-15 11:23:10 +00:00
parent 4721b685b4
commit 9b78aa2ce5
3 changed files with 54 additions and 5 deletions

View file

@ -1,5 +1,8 @@
import aiohttp
import subprocess
import asyncio
from markdownify import markdownify
from google.genai import types
SUPPORTED_TEXT_MIMETYPES = [
"text/plain",
@ -30,6 +33,13 @@ SUPPORTED_TEXT_MIMETYPES = [
"application/x-yaml",
]
SUPPORTED_IMAGE_DOCUMENT_MIMETYPES = [
"application/pdf",
"image/png",
"image/apng",
"image/jpeg"
]
async def searxng(query: str) -> list:
"""
@ -79,12 +89,11 @@ async def searxng(query: str) -> list:
return results
async def open_url(url: str) -> dict:
async def open_url(url: str) -> dict | types.Part:
"""
Opens a URL and returns its full content (if it's HTML, it will be converted to clean Markdown).
Use this when a `search` result's content is insufficient or when a user provides a direct URL to analyze.
"""
async with aiohttp.ClientSession(
headers={
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36",
@ -107,13 +116,19 @@ async def open_url(url: str) -> dict:
content_type = response.content_type.split(";")[0].strip()
content_length = response.content_length or 0
if content_type not in SUPPORTED_TEXT_MIMETYPES:
if content_type not in SUPPORTED_TEXT_MIMETYPES + SUPPORTED_IMAGE_DOCUMENT_MIMETYPES:
return {
"content_type": content_type,
"content_length": content_length,
"content": None,
}
if content_type in SUPPORTED_IMAGE_DOCUMENT_MIMETYPES:
return types.Part.from_bytes(
data=await response.read(),
mime_type=content_type
)
if "text/html" in content_type:
content = markdownify(await response.text())
if len(content) > 262144:
@ -132,3 +147,26 @@ async def open_url(url: str) -> dict:
"content_length": content_length,
"content": content,
}
async def run_command(command: str) -> tuple[str, str, int]:
"""
Runs a shell command on the host machine and captures its stdout, stderr and error code.
Args:
command: str
Returns:
tuple containing: stdout, stderr and error code (in that order)
"""
process = await asyncio.create_subprocess_shell(
command,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
stdout_data, stderr_data = await process.communicate()
stdout = stdout_data.decode().strip()
stderr = stderr_data.decode().strip()
return_code = process.returncode
if return_code is None:
raise TypeError
return stdout, stderr, return_code