Sintoma: "el agente se para cuando hace acciones". MCPClient._read_loop lee las
respuestas JSON-RPC con stdout.readline(), cuyo StreamReader tenia buffer de 1MB.
Una respuesta llega en UNA linea; playwright__browser_take_screenshot({fullPage:
true}) devuelve la imagen en base64 en esa linea y supera el limite →
asyncio.LimitOverrunError → el except Exception mataba el read loop y dejaba la
sesion MCP inservible (los turnos siguientes ejecutaban 0 tools).
Fix en dos capas:
- MCP_STREAM_LIMIT=64MB en create_subprocess_exec(limit=...) — cubre cualquier
screenshot real.
- Read loop tolerante: captura (ValueError, LimitOverrunError), descarta solo esa
respuesta re-sincronizando el stream hasta el \n (_drain_until_newline) y sigue
vivo, en vez de matar toda la sesion MCP.
Validado: navegar + screenshot fullPage + glob ejecuta las 4 tools sin "read loop
error" y sin colapsar el contexto.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
335 lines
12 KiB
Python
335 lines
12 KiB
Python
"""MCP (Model Context Protocol) client — stdio transport.
|
|
|
|
Manages subprocess lifecycle, JSON-RPC request/response, timeouts,
|
|
and a tool registry populated from the server's capabilities.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import json
|
|
import logging
|
|
import os
|
|
import uuid
|
|
from typing import Any
|
|
|
|
from ..config import settings
|
|
from ..models.tools import ToolDefinition
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Buffer maximo (bytes) del StreamReader para leer las respuestas JSON-RPC del
|
|
# MCP por stdout. Una respuesta llega en UNA sola linea; tools como el
|
|
# screenshot fullPage de Playwright devuelven la imagen en base64 en esa linea
|
|
# y superan de largo el 64KB por defecto de asyncio (y el 1MB que teniamos),
|
|
# lanzando LimitOverrunError que mataba el read loop y dejaba la sesion MCP
|
|
# inservible (el agente "se paraba" al hacer acciones). 64MB cubre cualquier
|
|
# screenshot real; por encima, el read loop descarta esa respuesta y sigue vivo.
|
|
MCP_STREAM_LIMIT = 64 * 1024 * 1024
|
|
|
|
|
|
class MCPClientError(Exception):
|
|
pass
|
|
|
|
|
|
class MCPClient:
|
|
"""Stdio-based MCP client with full lifecycle management."""
|
|
|
|
def __init__(
|
|
self,
|
|
command: str | None = None,
|
|
args: list[str] | None = None,
|
|
timeout: float | None = None,
|
|
startup_timeout: float | None = None,
|
|
env: dict[str, str] | None = None,
|
|
name: str = "mcp",
|
|
) -> None:
|
|
self.name = name
|
|
self._command = command or settings.mcp_server_command
|
|
self._args = args if args is not None else list(settings.mcp_server_args)
|
|
self._timeout = timeout or settings.mcp_timeout_seconds
|
|
self._startup_timeout = startup_timeout or settings.mcp_startup_timeout_seconds
|
|
# Inherit current env + any overrides (passes ACAI_* vars to MCP server)
|
|
self._env = {**os.environ, **(env or {})}
|
|
self._process: asyncio.subprocess.Process | None = None
|
|
self._tools: dict[str, ToolDefinition] = {}
|
|
self._pending: dict[str, asyncio.Future[dict[str, Any]]] = {}
|
|
self._reader_task: asyncio.Task[None] | None = None
|
|
self._running = False
|
|
|
|
@property
|
|
def tools(self) -> dict[str, ToolDefinition]:
|
|
return dict(self._tools)
|
|
|
|
@property
|
|
def is_running(self) -> bool:
|
|
return self._running and self._process is not None
|
|
|
|
# ------------------------------------------------------------------
|
|
# Lifecycle
|
|
# ------------------------------------------------------------------
|
|
|
|
async def start(self) -> None:
|
|
"""Start the MCP server subprocess and discover tools."""
|
|
if not self._command:
|
|
logger.warning("No MCP server command configured — skipping start")
|
|
return
|
|
|
|
logger.info("Starting MCP server [%s]: %s %s", self.name, self._command, self._args)
|
|
self._process = await asyncio.create_subprocess_exec(
|
|
self._command,
|
|
*self._args,
|
|
stdin=asyncio.subprocess.PIPE,
|
|
stdout=asyncio.subprocess.PIPE,
|
|
stderr=asyncio.subprocess.PIPE,
|
|
env=self._env,
|
|
limit=MCP_STREAM_LIMIT, # buffer grande para respuestas MCP (screenshots base64)
|
|
)
|
|
self._running = True
|
|
self._reader_task = asyncio.create_task(self._read_loop())
|
|
|
|
# Initialize
|
|
try:
|
|
init_result = await asyncio.wait_for(
|
|
self._send_request("initialize", {
|
|
"protocolVersion": "2024-11-05",
|
|
"capabilities": {},
|
|
"clientInfo": {"name": "agentic-microservice", "version": "1.0.0"},
|
|
}),
|
|
timeout=self._startup_timeout,
|
|
)
|
|
logger.info("MCP initialized: %s", init_result)
|
|
|
|
# Send initialized notification
|
|
await self._send_notification("notifications/initialized", {})
|
|
|
|
# Discover tools
|
|
tools_result = await asyncio.wait_for(
|
|
self._send_request("tools/list", {}),
|
|
timeout=self._startup_timeout,
|
|
)
|
|
self._register_tools(tools_result)
|
|
logger.info("Discovered %d MCP tools", len(self._tools))
|
|
|
|
except asyncio.TimeoutError:
|
|
logger.error("MCP server startup timed out")
|
|
await self.stop()
|
|
raise MCPClientError("MCP server startup timed out")
|
|
|
|
async def stop(self) -> None:
|
|
"""Gracefully stop the MCP server."""
|
|
self._running = False
|
|
if self._reader_task:
|
|
self._reader_task.cancel()
|
|
try:
|
|
await self._reader_task
|
|
except asyncio.CancelledError:
|
|
pass
|
|
|
|
if self._process:
|
|
try:
|
|
if self._process.stdin:
|
|
self._process.stdin.close()
|
|
self._process.terminate()
|
|
await asyncio.wait_for(self._process.wait(), timeout=5.0)
|
|
except (asyncio.TimeoutError, ProcessLookupError):
|
|
self._process.kill()
|
|
self._process = None
|
|
|
|
# Cancel any pending requests
|
|
for fut in self._pending.values():
|
|
if not fut.done():
|
|
fut.cancel()
|
|
self._pending.clear()
|
|
self._tools.clear()
|
|
|
|
# ------------------------------------------------------------------
|
|
# Tool execution
|
|
# ------------------------------------------------------------------
|
|
|
|
async def call_tool(
|
|
self, tool_name: str, arguments: dict[str, Any]
|
|
) -> dict[str, Any]:
|
|
"""Call a tool on the MCP server with timeout."""
|
|
if not self.is_running:
|
|
raise MCPClientError("MCP client is not running")
|
|
|
|
if tool_name not in self._tools:
|
|
raise MCPClientError(f"Unknown tool: {tool_name}")
|
|
|
|
try:
|
|
result = await asyncio.wait_for(
|
|
self._send_request("tools/call", {
|
|
"name": tool_name,
|
|
"arguments": arguments,
|
|
}),
|
|
timeout=self._timeout,
|
|
)
|
|
return result
|
|
except asyncio.TimeoutError:
|
|
raise MCPClientError(
|
|
f"Tool '{tool_name}' timed out after {self._timeout}s"
|
|
)
|
|
|
|
def get_tool_definitions(self) -> list[dict[str, Any]]:
|
|
"""Return tool definitions in a format suitable for model adapters."""
|
|
definitions: list[dict[str, Any]] = []
|
|
for tool in self._tools.values():
|
|
definitions.append({
|
|
"name": tool.name,
|
|
"description": tool.description,
|
|
"input_schema": tool.input_schema,
|
|
})
|
|
return definitions
|
|
|
|
# ------------------------------------------------------------------
|
|
# JSON-RPC transport
|
|
# ------------------------------------------------------------------
|
|
|
|
async def _send_request(
|
|
self, method: str, params: dict[str, Any]
|
|
) -> dict[str, Any]:
|
|
"""Send a JSON-RPC request and await the response."""
|
|
request_id = uuid.uuid4().hex[:12]
|
|
message = {
|
|
"jsonrpc": "2.0",
|
|
"id": request_id,
|
|
"method": method,
|
|
"params": params,
|
|
}
|
|
|
|
loop = asyncio.get_event_loop()
|
|
future: asyncio.Future[dict[str, Any]] = loop.create_future()
|
|
self._pending[request_id] = future
|
|
|
|
await self._write_message(message)
|
|
|
|
try:
|
|
return await future
|
|
finally:
|
|
self._pending.pop(request_id, None)
|
|
|
|
async def _send_notification(
|
|
self, method: str, params: dict[str, Any]
|
|
) -> None:
|
|
"""Send a JSON-RPC notification (no response expected)."""
|
|
message = {
|
|
"jsonrpc": "2.0",
|
|
"method": method,
|
|
"params": params,
|
|
}
|
|
await self._write_message(message)
|
|
|
|
async def _write_message(self, message: dict[str, Any]) -> None:
|
|
"""Write a JSON-RPC message to the server's stdin."""
|
|
if not self._process or not self._process.stdin:
|
|
raise MCPClientError("MCP process stdin not available")
|
|
|
|
data = json.dumps(message) + "\n"
|
|
self._process.stdin.write(data.encode())
|
|
await self._process.stdin.drain()
|
|
|
|
async def _read_loop(self) -> None:
|
|
"""Continuously read JSON-RPC responses from stdout."""
|
|
if not self._process or not self._process.stdout:
|
|
return
|
|
|
|
stdout = self._process.stdout
|
|
try:
|
|
while self._running:
|
|
try:
|
|
line = await stdout.readline()
|
|
except (ValueError, asyncio.LimitOverrunError):
|
|
# Una respuesta JSON-RPC supero el buffer (p.ej. screenshot
|
|
# fullPage de Playwright en base64 por encima de 64MB). Antes
|
|
# esto mataba el read loop y dejaba TODA la sesion MCP muerta
|
|
# (el agente se "paraba" en la siguiente accion). Ahora
|
|
# descartamos solo esa respuesta, re-sincronizamos el stream
|
|
# y seguimos vivos para las demas tools.
|
|
logger.warning(
|
|
"MCP [%s]: respuesta supera el buffer (%d MB), se descarta y se continua",
|
|
self.name, MCP_STREAM_LIMIT // (1024 * 1024),
|
|
)
|
|
await self._drain_until_newline(stdout)
|
|
continue
|
|
|
|
if not line:
|
|
logger.warning("MCP server stdout closed")
|
|
break
|
|
|
|
line_str = line.decode(errors="replace").strip()
|
|
if not line_str:
|
|
continue
|
|
|
|
try:
|
|
message = json.loads(line_str)
|
|
except json.JSONDecodeError:
|
|
logger.debug("Non-JSON MCP output: %s", line_str[:200])
|
|
continue
|
|
|
|
self._handle_message(message)
|
|
|
|
except asyncio.CancelledError:
|
|
pass
|
|
except Exception:
|
|
logger.exception("MCP read loop error")
|
|
finally:
|
|
self._running = False
|
|
|
|
async def _drain_until_newline(self, stdout: asyncio.StreamReader) -> None:
|
|
"""Consume bytes del stream hasta el proximo salto de linea para
|
|
re-sincronizar tras un LimitOverrunError (la respuesta sobredimensionada
|
|
se descarta). `read()` no usa separador, asi que no vuelve a disparar el
|
|
overrun y va vaciando el buffer hasta liberar la linea gigante."""
|
|
while self._running:
|
|
try:
|
|
chunk = await stdout.read(65536)
|
|
except Exception:
|
|
return
|
|
if not chunk:
|
|
return
|
|
if b"\n" in chunk:
|
|
return
|
|
|
|
def _handle_message(self, message: dict[str, Any]) -> None:
|
|
"""Route an incoming JSON-RPC message."""
|
|
msg_id = message.get("id")
|
|
|
|
if msg_id and msg_id in self._pending:
|
|
future = self._pending[msg_id]
|
|
if future.done():
|
|
return
|
|
|
|
if "error" in message:
|
|
future.set_exception(
|
|
MCPClientError(
|
|
f"MCP error {message['error'].get('code')}: "
|
|
f"{message['error'].get('message')}"
|
|
)
|
|
)
|
|
else:
|
|
future.set_result(message.get("result", {}))
|
|
elif "method" in message:
|
|
# Server-initiated notification — log it
|
|
logger.debug(
|
|
"MCP notification: %s", message.get("method")
|
|
)
|
|
|
|
# ------------------------------------------------------------------
|
|
# Tool registry
|
|
# ------------------------------------------------------------------
|
|
|
|
def _register_tools(self, tools_result: dict[str, Any]) -> None:
|
|
"""Parse tools/list response and populate the registry."""
|
|
raw_tools = tools_result.get("tools", [])
|
|
for t in raw_tools:
|
|
name = t.get("name", "")
|
|
if not name:
|
|
continue
|
|
self._tools[name] = ToolDefinition(
|
|
name=name,
|
|
description=t.get("description", ""),
|
|
input_schema=t.get("inputSchema", {}),
|
|
server_name=self.name,
|
|
)
|