Mas cosas

This commit is contained in:
Jordan Diaz
2026-05-06 07:07:57 +00:00
parent 8875cb29cb
commit 06ce51a9c1
9 changed files with 643 additions and 80 deletions

View File

@@ -1,5 +1,9 @@
Eres el asistente de desarrollo de Acai CMS. Ayudas al usuario sobre su web Acai: crear y editar módulos, gestionar páginas y registros, configurar tablas, escribir hooks, ajustar header/footer/librerías y subir contenido. Hablas y respondes **siempre en español**. Eres el asistente de desarrollo de Acai CMS. Ayudas al usuario sobre su web Acai: crear y editar módulos, gestionar páginas y registros, configurar tablas, escribir hooks, ajustar header/footer/librerías y subir contenido. Hablas y respondes **siempre en español**.
# Mecanismo de tools (CRÍTICO)
Para invocar herramientas usa **EXCLUSIVAMENTE el mecanismo nativo de tool_use** del API. NUNCA escribas tool calls como texto en tu respuesta. En particular NO escribas marcadores como `<tool_call>`, `[TOOL_CALL]`, `<minimax:tool_call>`, `<invoke>`, `{tool => ...}`, `{name: ..., parameters: ...}` ni cualquier pseudocódigo similar dentro del campo `content` de texto. El sistema tiene soporte de tools incorporado — invócalas directamente. Si escribes una tool call como texto, **no se ejecutará** y el usuario solo verá el markup crudo.
# Identidad y rol # Identidad y rol
Actúas como un desarrollador senior experto en Acai CMS. Antes de cualquier acción no trivial: Actúas como un desarrollador senior experto en Acai CMS. Antes de cualquier acción no trivial:

View File

@@ -77,14 +77,17 @@ export async function validateMcpToken(secret) {
} catch { } catch {
return null; return null;
} }
if (!meta || !meta.user || !meta.project) return null; // Solo exigimos `user`. `project` puede ser "" (token user-wide que
// autoriza todos los proyectos del usuario, ver handlers/mcp_tokens.py
// del backend Python para los detalles del modelo).
if (!meta || !meta.user) return null;
// Actualizacion asincrona de lastUsedAt — no bloqueamos la request. // Actualizacion asincrona de lastUsedAt — no bloqueamos la request.
updateLastUsedAt(key, meta).catch((e) => { updateLastUsedAt(key, meta).catch((e) => {
console.error("[mcp-tokens] lastUsedAt update failed:", e.message); console.error("[mcp-tokens] lastUsedAt update failed:", e.message);
}); });
return { user: meta.user, project: meta.project, id: meta.id || "" }; return { user: meta.user, project: meta.project || "", id: meta.id || "" };
} }
async function updateLastUsedAt(key, meta) { async function updateLastUsedAt(key, meta) {

View File

@@ -195,6 +195,13 @@ export function startHttpServer() {
// identifica manualmente con X-Acai-User + X-Project-Name). // identifica manualmente con X-Acai-User + X-Project-Name).
//============================================================================= //=============================================================================
app.use(async (req, res, next) => { app.use(async (req, res, next) => {
// DEBUG temporal: loguear TODA request que llegue. Quitar cuando este
// claro el flujo del cliente.
const secretPresent = !!req.headers["x-mcp-secret"];
const authPresent = !!req.headers["authorization"];
console.error(
`[MCP req] ${req.method} ${req.url} - X-MCP-Secret=${secretPresent ? "yes" : "MISSING"}, Authorization=${authPresent ? "yes" : "MISSING"}, UA=${(req.headers["user-agent"] || "").substring(0, 60)}`,
);
const secret = req.headers["x-mcp-secret"]; const secret = req.headers["x-mcp-secret"];
if (!secret) { if (!secret) {
return next(); return next();
@@ -202,6 +209,7 @@ export function startHttpServer() {
try { try {
const auth = await validateMcpToken(secret); const auth = await validateMcpToken(secret);
if (!auth) { if (!auth) {
console.error("[MCP middleware] Invalid X-MCP-Secret rejected");
res.status(401) res.status(401)
.setHeader("Content-Type", "application/json") .setHeader("Content-Type", "application/json")
.end(JSON.stringify({ error: "Invalid MCP token" })); .end(JSON.stringify({ error: "Invalid MCP token" }));
@@ -209,7 +217,18 @@ export function startHttpServer() {
} }
// Sobrescribe los headers de identidad con los del token validado. // Sobrescribe los headers de identidad con los del token validado.
req.headers["x-acai-user"] = auth.user; req.headers["x-acai-user"] = auth.user;
req.headers["x-project-name"] = auth.project; // `auth.project` solo se sobrescribe si el token es project-scoped.
// Si es user-wide (auth.project === ""), preservamos el
// `X-Project-Name` que el cliente envio (la extension VS Code
// Acai Forge lo manda con el slug del proyecto descargado).
if (auth.project) {
req.headers["x-project-name"] = auth.project;
}
console.error(
`[MCP middleware] Auth OK user=${auth.user} ` +
`tokenScope=${auth.project || "user-wide"} ` +
`clientProject=${req.headers["x-project-name"] || "(none)"}`,
);
return next(); return next();
} catch (err) { } catch (err) {
console.error("[MCP] mcpSecretMiddleware error:", err.message); console.error("[MCP] mcpSecretMiddleware error:", err.message);
@@ -580,11 +599,42 @@ export function startHttpServer() {
}); });
//============================================================================= //=============================================================================
// OAUTH2 ENDPOINTS // OAUTH2 ENDPOINTS — DESHABILITADOS
//=============================================================================
// El flujo OAuth se diseno a medida (client_secret = nombre de proyecto)
// y no funciona con clientes MCP estandar (Claude Code, etc.) que usan
// PKCE puro. El unico cliente "oficial" es la extension VS Code Acai
// Forge, que NO usa OAuth — autentica con header X-MCP-Secret directo.
//
// Devolver 404 en `.well-known/oauth-authorization-server` hace que los
// clientes que hacen OAuth discovery hagan fallback a header auth, lo
// cual usa X-MCP-Secret (validado en el middleware de las lineas ~197).
// Los handlers `/register`, `/authorize`, `/token` y los helpers `signJwt`
// / `verifyJwt` / `resolveProjectCredentials` se mantienen porque son
// usados internamente por el transport SSE legacy (lineas ~113, ~265).
//============================================================================= //=============================================================================
// OAuth2 Authorization Server Metadata endpoint (per RFC8414) // Rutas OAuth/OIDC discovery deshabilitadas — devuelven 404 JSON limpio
app.get('/.well-known/oauth-authorization-server', (req, res) => { // para que el cliente fallback a header auth (X-MCP-Secret) en vez de
// intentar OAuth flow. Cubrimos ambas paths comunes y sus variantes
// anidadas bajo /mcp/ porque algunos clientes (Claude Code) prueban
// ambas: en la raiz Y bajo el endpoint MCP.
const _disabledOauthPaths = [
'/.well-known/oauth-authorization-server',
'/.well-known/openid-configuration',
'/.well-known/oauth-protected-resource',
'/mcp/.well-known/oauth-authorization-server',
'/mcp/.well-known/openid-configuration',
'/mcp/.well-known/oauth-protected-resource',
];
for (const _p of _disabledOauthPaths) {
app.get(_p, (req, res) => {
res.status(404).json({ error: "OAuth not available; use X-MCP-Secret header" });
});
}
// OAuth2 Authorization Server Metadata endpoint (per RFC8414) — REMOVED
app.get('/.well-known/oauth-authorization-server-DISABLED', (req, res) => {
const baseUrl = `https://${req.headers.host}`; const baseUrl = `https://${req.headers.host}`;
res.json({ res.json({
issuer: baseUrl, issuer: baseUrl,

View File

@@ -9,7 +9,16 @@ from typing import Any, AsyncIterator
@dataclass @dataclass
class StreamChunk: class StreamChunk:
"""A single chunk from a streaming model response.""" """A single chunk from a streaming model response.
Campos legacy (`delta`, `tool_*`, `finish_reason`, `usage`) cubren todo el
flujo OpenAI/Anthropic original. Los `thinking_*` + `block_type`/`block_index`
se anaden para el interleaved thinking de MiniMax M2: el adapter Claude
los emite cuando ve bloques `type=thinking` y los `signature_delta` que el
SDK Anthropic devuelve al cerrar el bloque. El orquestador acumula esos
bloques con su `signature` para reenviarlos en el siguiente turno (sin sig,
MiniMax rechaza el assistant message).
"""
delta: str = "" delta: str = ""
tool_call_id: str = "" tool_call_id: str = ""
@@ -18,6 +27,16 @@ class StreamChunk:
finish_reason: str = "" finish_reason: str = ""
usage: dict[str, int] = field(default_factory=dict) usage: dict[str, int] = field(default_factory=dict)
# Interleaved thinking (MiniMax M2). Default vacios → no-op para callers
# que no los miran (OpenAI adapter, codigo legacy del orquestador).
thinking_delta: str = ""
thinking_signature: str = ""
# block_type ∈ {"text", "thinking", "tool_use", ""} — "" = chunk sin bloque
# asociado (p.ej. solo lleva `usage` o `finish_reason`).
block_type: str = ""
# 0-based, posicion del bloque en el turno. -1 = no aplica.
block_index: int = -1
@dataclass @dataclass
class ModelResponse: class ModelResponse:

View File

@@ -18,18 +18,21 @@ logger = logging.getLogger(__name__)
# Algunos fine-tunes (sobre todo MiniMax) ocasionalmente emiten las tool calls # Algunos fine-tunes (sobre todo MiniMax) ocasionalmente emiten las tool calls
# como texto literal en lugar de usar los `tool_use` blocks nativos: # como texto literal en lugar de usar los `tool_use` blocks nativos. Vistos
# <minimax:tool_call> # tres formatos:
# <invoke name="acai_code__acai_view"> # 1) <minimax:tool_call><invoke name="X"><parameter name="P">V</parameter></invoke></minimax:tool_call>
# <parameter name="file_path">...</parameter> # 2) <invoke name="X"><parameter name="P">V</parameter></invoke> (sin minimax wrapper)
# </invoke> # 3) <tool_call>{"name":"X","parameters":{...}}{"name":"Y","parameters":{...}}</tool_call>
# </minimax:tool_call> # (multiples tool calls JSON-encoded dentro de un solo wrapper)
# #
# Cuando eso pasa el orquestador ve "texto" y la tool nunca se ejecuta — el # Cuando eso pasa el orquestador ve "texto" y la tool nunca se ejecuta — el
# usuario ve el XML crudo en el chat. Detectamos y convertimos a tool_use # usuario ve el markup crudo en el chat. Detectamos y convertimos a tool_use
# sintetico mientras streameamos. Es un parche defensivo: el caso normal # sintetico mientras streameamos. Es un parche defensivo: el caso normal
# (tool_use blocks) sigue por el camino estandar. # (tool_use blocks) sigue por el camino estandar.
_TOOL_CALL_OPEN_RE = re.compile(r"<(?:minimax:tool_call|invoke\s+name)", re.IGNORECASE) _TOOL_CALL_OPEN_RE = re.compile(
r"<(?:minimax:tool_call|invoke\s+name|tool_call\s*>)|\[TOOL_CALL\]",
re.IGNORECASE,
)
_INVOKE_RE = re.compile( _INVOKE_RE = re.compile(
r"<invoke\s+name=\"([^\"]+)\"\s*>(.*?)</invoke>", r"<invoke\s+name=\"([^\"]+)\"\s*>(.*?)</invoke>",
re.IGNORECASE | re.DOTALL, re.IGNORECASE | re.DOTALL,
@@ -38,18 +41,48 @@ _PARAM_RE = re.compile(
r"<parameter\s+name=\"([^\"]+)\"\s*>(.*?)</parameter>", r"<parameter\s+name=\"([^\"]+)\"\s*>(.*?)</parameter>",
re.IGNORECASE | re.DOTALL, re.IGNORECASE | re.DOTALL,
) )
# Formato 3: <tool_call>...JSON...</tool_call>. El cuerpo puede contener uno
# o varios objetos JSON consecutivos (con o sin commas/newlines entre ellos).
_TOOL_CALL_JSON_BLOCK_RE = re.compile(
r"<tool_call\s*>(.*?)</tool_call\s*>",
re.IGNORECASE | re.DOTALL,
)
# Formato 4: [TOOL_CALL]\n{tool => "X", args => {--key "v" --k2 12}}\n[/TOOL_CALL]
# Sintaxis Perl-ish que MiniMax tambien improvisa. Cada bloque puede contener
# uno o varios "{tool => ..., args => {...}}" consecutivos.
_TOOL_CALL_BRACKET_BLOCK_RE = re.compile(
r"\[TOOL_CALL\](.*?)\[/TOOL_CALL\]",
re.DOTALL,
)
_PERL_TOOL_NAME_RE = re.compile(
r"tool\s*=>\s*[\"']([^\"']+)[\"']",
)
_PERL_ARGS_BLOCK_RE = re.compile(
r"args\s*=>\s*\{(.*?)\}\s*\}\s*(?=\{|\[|$)",
re.DOTALL,
)
# Args estilo `--key "value"` o `--key 12` o `--key true`.
_PERL_KV_RE = re.compile(
r"--([a-zA-Z_][a-zA-Z0-9_]*)\s+(\"[^\"]*\"|\'[^\']*\'|-?\d+(?:\.\d+)?|true|false|null)",
)
def _safe_emit_split(buf: str) -> str: def _safe_emit_split(buf: str) -> str:
"""Devuelve el prefijo del buffer que es seguro emitir como texto sin """Devuelve el prefijo del buffer que es seguro emitir como texto sin
perder un posible inicio de tag XML que esta llegando fragmentado. perder un posible inicio de tag de tool_call que esta llegando fragmentado.
Mantenemos en hold los ultimos 30 chars si terminan con `<` o con un Mantenemos en hold los ultimos chars si terminan con `<` o con un prefijo
prefijo parcial de `<minimax:tool_call` / `<invoke`. Si el buffer es parcial de `<minimax:tool_call` / `<invoke` / `<tool_call`. Si el buffer
largo y no termina con `<`, todo es seguro. es largo y no termina con `<`, todo es seguro.
""" """
if not buf: if not buf:
return "" return ""
# Comprobar holdback de `[TOOL_CALL]` (formato Perl-ish).
for marker in ("[TOOL_CALL]", "[TOOL_CALL"):
for k in range(1, len(marker) + 1):
if buf.endswith(marker[:k]):
# Tail puede ser inicio de [TOOL_CALL] — retener desde ahi.
return buf[:-k]
# Buscar el ultimo `<` y comprobar si lo que sigue puede ser apertura. # Buscar el ultimo `<` y comprobar si lo que sigue puede ser apertura.
idx = buf.rfind("<") idx = buf.rfind("<")
if idx == -1: if idx == -1:
@@ -58,8 +91,8 @@ def _safe_emit_split(buf: str) -> str:
# Si el tail ya tiene `>` cerrado, es un tag normal — emitir todo. # Si el tail ya tiene `>` cerrado, es un tag normal — emitir todo.
if ">" in tail: if ">" in tail:
return buf return buf
# Si el tail puede ser inicio de tool_call/invoke, retenerlo. # Si el tail puede ser inicio de tool_call/invoke/tool_call_json, retenerlo.
candidates = ("<minimax:tool_call", "<invoke") candidates = ("<minimax:tool_call", "<invoke", "<tool_call")
for cand in candidates: for cand in candidates:
if cand.startswith(tail.lower()) or tail.lower().startswith(cand[:len(tail)].lower()): if cand.startswith(tail.lower()) or tail.lower().startswith(cand[:len(tail)].lower()):
return buf[:idx] return buf[:idx]
@@ -67,14 +100,47 @@ def _safe_emit_split(buf: str) -> str:
return buf return buf
def _parse_json_objects(text: str) -> list[dict[str, Any]]:
"""Parsea uno o varios objetos JSON consecutivos en `text`. Tolerante a
espacios, newlines y commas entre objetos. Devuelve los que se pudieron
decodificar; salta los malformados."""
objs: list[dict[str, Any]] = []
decoder = json.JSONDecoder()
i = 0
n = len(text)
while i < n:
# Saltar separadores no-JSON
while i < n and text[i] in " \t\r\n,":
i += 1
if i >= n:
break
try:
obj, end = decoder.raw_decode(text, i)
except json.JSONDecodeError:
# Avanzar 1 char y reintentar; defensivo ante markup raro.
i += 1
continue
if isinstance(obj, dict):
objs.append(obj)
i = end
return objs
def _parse_xml_tool_calls(text: str) -> list[dict[str, Any]]: def _parse_xml_tool_calls(text: str) -> list[dict[str, Any]]:
"""Extrae tool calls del texto. Devuelve lista de {id, name, arguments}. """Extrae tool calls del texto. Cubre tres formatos de fine-tunes:
Si no encuentra patrones validos devuelve [].""" - <invoke name="X"><parameter name="P">V</parameter></invoke>
calls = [] - <minimax:tool_call><invoke ...>...</invoke></minimax:tool_call>
- <tool_call>{"name":"X","parameters":{...}}...</tool_call>
Devuelve lista de {id, name, arguments}. Si no encuentra patrones validos
devuelve []."""
calls: list[dict[str, Any]] = []
# Formato 1+2: <invoke name="..."><parameter ...>...</parameter></invoke>
for m in _INVOKE_RE.finditer(text): for m in _INVOKE_RE.finditer(text):
name = m.group(1).strip() name = m.group(1).strip()
body = m.group(2) body = m.group(2)
args = {} args: dict[str, Any] = {}
for p in _PARAM_RE.finditer(body): for p in _PARAM_RE.finditer(body):
args[p.group(1).strip()] = p.group(2).strip() args[p.group(1).strip()] = p.group(2).strip()
if name: if name:
@@ -83,6 +149,69 @@ def _parse_xml_tool_calls(text: str) -> list[dict[str, Any]]:
"name": name, "name": name,
"arguments": args, "arguments": args,
}) })
# Formato 3: <tool_call>{json}...{json}</tool_call>
for m in _TOOL_CALL_JSON_BLOCK_RE.finditer(text):
body = m.group(1)
for obj in _parse_json_objects(body):
name = obj.get("name", "") or ""
# Algunos fine-tunes usan "parameters", otros "arguments", otros "input"
args_val = (
obj.get("parameters")
or obj.get("arguments")
or obj.get("input")
or {}
)
if isinstance(args_val, str):
# Si llega stringificado, intentar parsearlo
try:
args_val = json.loads(args_val)
except (json.JSONDecodeError, TypeError):
args_val = {"_raw": args_val}
if not isinstance(args_val, dict):
args_val = {"_raw": str(args_val)}
if name:
calls.append({
"id": "xml_{}".format(uuid.uuid4().hex[:12]),
"name": str(name),
"arguments": args_val,
})
# Formato 4: [TOOL_CALL]{tool => "X", args => {--k "v" --k2 12}}{...}[/TOOL_CALL]
for m in _TOOL_CALL_BRACKET_BLOCK_RE.finditer(text):
body = m.group(1)
# Extraer pares (name, args_block). Recorremos por nombre y el bloque
# de args lo extraemos por proximidad textual.
names = list(_PERL_TOOL_NAME_RE.finditer(body))
for i, nm in enumerate(names):
name = nm.group(1).strip()
# Cuerpo de args entre la posicion de este nombre y el siguiente
# (o final del bloque).
start = nm.end()
end = names[i + 1].start() if i + 1 < len(names) else len(body)
segment = body[start:end]
args: dict[str, Any] = {}
for kv in _PERL_KV_RE.finditer(segment):
k = kv.group(1)
v = kv.group(2)
if v.startswith('"') or v.startswith("'"):
args[k] = v[1:-1]
elif v in ("true", "false"):
args[k] = (v == "true")
elif v == "null":
args[k] = None
else:
try:
args[k] = int(v) if "." not in v else float(v)
except ValueError:
args[k] = v
if name:
calls.append({
"id": "xml_{}".format(uuid.uuid4().hex[:12]),
"name": name,
"arguments": args,
})
return calls return calls
@@ -113,6 +242,20 @@ class ClaudeAdapter(ModelAdapter):
def __init__(self, api_key: str | None = None, base_url: str | None = None) -> None: def __init__(self, api_key: str | None = None, base_url: str | None = None) -> None:
kwargs: dict[str, Any] = { kwargs: dict[str, Any] = {
"api_key": api_key or settings.anthropic_api_key, "api_key": api_key or settings.anthropic_api_key,
# Timeout granular: el endpoint MiniMax a veces se queda colgado sin
# devolver respuesta ni cerrar la conexion. Sin timeout explicito el
# stream queda pendiente para siempre. 120s total por request es
# generoso (M2 con thinking puede tardar 30-60s en respuestas largas)
# pero acota el peor caso.
"timeout": anthropic.Timeout(
connect=10.0,
read=120.0,
write=30.0,
pool=10.0,
),
# Cero retries internos del SDK — manejamos retries en stream() con
# backoff propio (_RETRY_DELAYS).
"max_retries": 0,
} }
url = base_url or settings.anthropic_base_url url = base_url or settings.anthropic_base_url
if url: if url:
@@ -178,6 +321,16 @@ class ClaudeAdapter(ModelAdapter):
in_xml_capture = False in_xml_capture = False
xml_buffer = "" xml_buffer = ""
# Interleaved thinking (MiniMax M2): el SDK emite un block
# con type=thinking, le siguen thinking_delta y al cerrar
# devuelve un signature criptografico. Trackeamos el indice
# de bloque actual para que el orquestador pueda reconstruir
# el assistant turn en orden.
current_block_index = -1
current_block_type = ""
current_thinking_chars = 0 # solo para log al cerrar
current_thinking_sig_emitted = False
async for event in stream: async for event in stream:
yielded_any = True yielded_any = True
if event.type == "message_start" and hasattr(event, "message"): if event.type == "message_start" and hasattr(event, "message"):
@@ -187,14 +340,30 @@ class ClaudeAdapter(ModelAdapter):
if event.type == "content_block_start": if event.type == "content_block_start":
block = event.content_block block = event.content_block
if block.type == "tool_use": current_block_index += 1
current_block_type = getattr(block, "type", "")
if current_block_type == "tool_use":
current_tool_id = block.id current_tool_id = block.id
current_tool_name = block.name current_tool_name = block.name
accumulated_args = "" accumulated_args = ""
yield StreamChunk( yield StreamChunk(
tool_call_id=current_tool_id, tool_call_id=current_tool_id,
tool_name=current_tool_name, tool_name=current_tool_name,
block_type="tool_use",
block_index=current_block_index,
) )
elif current_block_type == "thinking":
# Reset contadores y emitimos un "header" para
# que el orquestador registre que arranca un
# bloque thinking en este indice.
current_thinking_chars = 0
current_thinking_sig_emitted = False
yield StreamChunk(
block_type="thinking",
block_index=current_block_index,
)
# block_type == "text" no necesita header — los
# text_delta ya llevaran el indice.
continue continue
if event.type == "content_block_delta": if event.type == "content_block_delta":
@@ -211,7 +380,11 @@ class ClaudeAdapter(ModelAdapter):
# legitimo que el modelo escribio antes del XML). # legitimo que el modelo escribio antes del XML).
prev = text_buffer[:m.start()] prev = text_buffer[:m.start()]
if prev: if prev:
yield StreamChunk(delta=prev) yield StreamChunk(
delta=prev,
block_type="text",
block_index=current_block_index,
)
in_xml_capture = True in_xml_capture = True
xml_buffer = text_buffer[m.start():] xml_buffer = text_buffer[m.start():]
text_buffer = "" text_buffer = ""
@@ -221,7 +394,11 @@ class ClaudeAdapter(ModelAdapter):
# esperamos al siguiente delta antes de emitir. # esperamos al siguiente delta antes de emitir.
safe = _safe_emit_split(text_buffer) safe = _safe_emit_split(text_buffer)
if safe: if safe:
yield StreamChunk(delta=safe) yield StreamChunk(
delta=safe,
block_type="text",
block_index=current_block_index,
)
text_buffer = text_buffer[len(safe):] text_buffer = text_buffer[len(safe):]
elif delta.type == "input_json_delta": elif delta.type == "input_json_delta":
accumulated_args += delta.partial_json accumulated_args += delta.partial_json
@@ -229,26 +406,81 @@ class ClaudeAdapter(ModelAdapter):
tool_call_id=current_tool_id, tool_call_id=current_tool_id,
tool_name=current_tool_name, tool_name=current_tool_name,
tool_arguments=delta.partial_json, tool_arguments=delta.partial_json,
block_type="tool_use",
block_index=current_block_index,
) )
elif delta.type == "thinking_delta":
txt = getattr(delta, "thinking", "") or ""
current_thinking_chars += len(txt)
yield StreamChunk(
thinking_delta=txt,
block_type="thinking",
block_index=current_block_index,
)
elif delta.type == "signature_delta":
sig = getattr(delta, "signature", "") or ""
if sig:
current_thinking_sig_emitted = True
yield StreamChunk(
thinking_signature=sig,
block_type="thinking",
block_index=current_block_index,
)
continue continue
if event.type == "content_block_stop": if event.type == "content_block_stop":
# Si el bloque cerrado es thinking y el signature
# no llego como signature_delta, intentar leerlo
# del content_block ya completo (algunos SDK lo
# exponen aqui).
if current_block_type == "thinking":
if not current_thinking_sig_emitted:
cb = getattr(event, "content_block", None)
sig = getattr(cb, "signature", "") if cb else ""
if sig:
yield StreamChunk(
thinking_signature=sig,
block_type="thinking",
block_index=current_block_index,
)
current_thinking_sig_emitted = True
else:
logger.warning(
"Thinking block #%d cerrado sin signature (%d chars). "
"MiniMax rechazara el siguiente turno si lo reenviamos.",
current_block_index, current_thinking_chars,
)
logger.info(
"[adapter] thinking block #%d: %d chars, sig=%s",
current_block_index, current_thinking_chars,
"yes" if current_thinking_sig_emitted else "MISSING",
)
if current_tool_id and accumulated_args: if current_tool_id and accumulated_args:
yield StreamChunk( yield StreamChunk(
tool_call_id=current_tool_id, tool_call_id=current_tool_id,
tool_name=current_tool_name, tool_name=current_tool_name,
tool_arguments=accumulated_args, tool_arguments=accumulated_args,
finish_reason="tool_use", finish_reason="tool_use",
block_type="tool_use",
block_index=current_block_index,
) )
current_tool_id = "" current_tool_id = ""
current_tool_name = "" current_tool_name = ""
accumulated_args = "" accumulated_args = ""
current_block_type = ""
current_thinking_chars = 0
current_thinking_sig_emitted = False
continue continue
if event.type == "message_delta": if event.type == "message_delta":
# Antes de cerrar, vaciar buffers. # Antes de cerrar, vaciar buffers.
if in_xml_capture and xml_buffer: if in_xml_capture and xml_buffer:
# Parsear el XML capturado y emitir tool_use sinteticos. # Parsear el XML capturado y emitir tool_use sinteticos.
# Asignamos block_index sintetico a cada XML tool call
# para que el orquestador pueda registrarlo en
# turn_blocks_by_index. Si no, el assistant message
# iria sin el tool_use pero el tool_result sí lo
# referenciaria → MiniMax devuelve 400.
calls = _parse_xml_tool_calls(xml_buffer) calls = _parse_xml_tool_calls(xml_buffer)
if calls: if calls:
logger.info( logger.info(
@@ -256,24 +488,38 @@ class ClaudeAdapter(ModelAdapter):
len(calls), len(calls),
) )
for c in calls: for c in calls:
current_block_index += 1
synthetic_idx = current_block_index
yield StreamChunk( yield StreamChunk(
tool_call_id=c["id"], tool_call_id=c["id"],
tool_name=c["name"], tool_name=c["name"],
block_type="tool_use",
block_index=synthetic_idx,
) )
yield StreamChunk( yield StreamChunk(
tool_call_id=c["id"], tool_call_id=c["id"],
tool_name=c["name"], tool_name=c["name"],
tool_arguments=json.dumps(c["arguments"]), tool_arguments=json.dumps(c["arguments"]),
finish_reason="tool_use", finish_reason="tool_use",
block_type="tool_use",
block_index=synthetic_idx,
) )
else: else:
# No se pudo parsear — devolver al usuario el # No se pudo parsear — devolver al usuario el
# texto crudo para no perderlo silenciosamente. # texto crudo para no perderlo silenciosamente.
yield StreamChunk(delta=xml_buffer) yield StreamChunk(
delta=xml_buffer,
block_type="text",
block_index=current_block_index,
)
xml_buffer = "" xml_buffer = ""
in_xml_capture = False in_xml_capture = False
elif text_buffer: elif text_buffer:
yield StreamChunk(delta=text_buffer) yield StreamChunk(
delta=text_buffer,
block_type="text",
block_index=current_block_index,
)
text_buffer = "" text_buffer = ""
output_tokens = getattr(event.usage, "output_tokens", 0) if event.usage else 0 output_tokens = getattr(event.usage, "output_tokens", 0) if event.usage else 0
# Si convertimos XML a tool_use, override el stop_reason. # Si convertimos XML a tool_use, override el stop_reason.
@@ -404,11 +650,33 @@ class ClaudeAdapter(ModelAdapter):
- role=tool → role=user with tool_result content blocks - role=tool → role=user with tool_result content blocks
- assistant with tool_calls → assistant with tool_use content blocks - assistant with tool_calls → assistant with tool_use content blocks
- Consecutive same-role messages get merged (Claude requires alternating) - Consecutive same-role messages get merged (Claude requires alternating)
- Fast-path: si content ya viene como list (Anthropic-style nativo, p.ej.
messages emitidos por BaseAgent con interleaved thinking de M2), pasa
tal cual y solo hace merge con el anterior si toca.
""" """
converted: list[dict[str, Any]] = [] converted: list[dict[str, Any]] = []
for m in messages: for m in messages:
role = m.get("role", "") role = m.get("role", "")
content = m.get("content")
# Fast-path Anthropic-style: content ya es lista de blocks.
if isinstance(content, list) and role in ("user", "assistant"):
if converted and converted[-1]["role"] == role:
prev = converted[-1]["content"]
if isinstance(prev, list):
prev.extend(content)
elif isinstance(prev, str):
merged: list[dict[str, Any]] = []
if prev:
merged.append({"type": "text", "text": prev})
merged.extend(content)
converted[-1]["content"] = merged
else:
converted[-1]["content"] = list(content)
else:
converted.append({"role": role, "content": list(content)})
continue
if role == "tool": if role == "tool":
# Convert to user message with tool_result block # Convert to user message with tool_result block

View File

@@ -187,19 +187,55 @@ class ContextCompactor:
(i for i, m in enumerate(compacted) if m.get("role") == "user"), (i for i, m in enumerate(compacted) if m.get("role") == "user"),
default=-1, default=-1,
) )
# Tool messages legacy (role=tool) y nuevos (role=user con tool_result blocks)
tool_indexes = [i for i, m in enumerate(compacted) if m.get("role") == "tool"] tool_indexes = [i for i, m in enumerate(compacted) if m.get("role") == "tool"]
# Indices de user messages que contienen tool_result blocks (Anthropic-style)
user_tool_result_indexes = [
i for i, m in enumerate(compacted)
if m.get("role") == "user"
and isinstance(m.get("content"), list)
and any(
isinstance(b, dict) and b.get("type") == "tool_result"
for b in m["content"]
)
]
# Combinamos para aplicar la misma politica de "preservar los ultimos N raw"
all_tool_carriers = tool_indexes + user_tool_result_indexes
all_tool_carriers.sort()
keep_raw_tool_indexes = ( keep_raw_tool_indexes = (
set(tool_indexes[-recent_raw_limit:]) set(all_tool_carriers[-recent_raw_limit:])
if recent_raw_limit > 0 if recent_raw_limit > 0
else set() else set()
) )
def _truncate_tool_result_blocks(msg: dict[str, Any], char_limit: int) -> bool:
"""Trunca el campo `content` de los tool_result blocks de un user
message con content list. Devuelve True si modifico algo."""
modified = False
content = msg.get("content")
if not isinstance(content, list):
return False
for block in content:
if not isinstance(block, dict) or block.get("type") != "tool_result":
continue
bc = block.get("content", "")
if isinstance(bc, str) and len(bc) > char_limit:
block["content"] = bc[:char_limit]
modified = True
return modified
for idx in keep_raw_tool_indexes: for idx in keep_raw_tool_indexes:
content = compacted[idx].get("content", "") msg = compacted[idx]
content = msg.get("content", "")
if isinstance(content, str) and content: if isinstance(content, str) and content:
truncated = content[:raw_char_limit] truncated = content[:raw_char_limit]
if truncated != content: if truncated != content:
compacted[idx]["content"] = truncated msg["content"] = truncated
meta["messages_compacted"] += 1
meta["tool_messages_compacted"] += 1
meta["raw_tool_results_kept"] += 1
elif isinstance(content, list):
if _truncate_tool_result_blocks(msg, raw_char_limit):
meta["messages_compacted"] += 1 meta["messages_compacted"] += 1
meta["tool_messages_compacted"] += 1 meta["tool_messages_compacted"] += 1
meta["raw_tool_results_kept"] += 1 meta["raw_tool_results_kept"] += 1
@@ -271,20 +307,49 @@ class ContextCompactor:
if total <= max_tokens: if total <= max_tokens:
break break
# Last-resort: drop thinking blocks (M2 interleaved) de assistant
# messages que NO sean los 2 ultimos turnos. Ahorra muchisimo sin
# perder utilidad — los thinking de turnos lejanos ya cumplieron.
if total > max_tokens:
assistant_indexes = [
i for i, m in enumerate(compacted)
if m.get("role") == "assistant" and isinstance(m.get("content"), list)
]
# Conservar los thinking de los ultimos 2 assistants; descartar el resto.
droppable = assistant_indexes[:-2] if len(assistant_indexes) > 2 else []
for idx in droppable:
content = compacted[idx]["content"]
new_content = [b for b in content if not (isinstance(b, dict) and b.get("type") == "thinking")]
if len(new_content) != len(content):
compacted[idx]["content"] = new_content
meta["messages_compacted"] += 1
meta["assistant_messages_compacted"] += 1
total = sum(self._estimate_message_tokens(m) for m in compacted)
if total <= max_tokens:
break
if total > max_tokens: if total > max_tokens:
for idx, message in enumerate(compacted): for idx, message in enumerate(compacted):
if idx == last_user_idx: if idx == last_user_idx:
continue continue
role = message.get("role", "") role = message.get("role", "")
content = message.get("content", "") content = message.get("content", "")
if not isinstance(content, str) or not content: if isinstance(content, str) and content:
if role == "tool":
message["content"] = "[TOOL RESULT COMPACTADO]"
elif role == "assistant":
message["content"] = "[ASSISTANT COMPACTADO]"
elif role == "user":
message["content"] = "[USER CONTEXT COMPACTADO]"
elif isinstance(content, list) and content:
# Anthropic-style: reemplazar lista entera por placeholder string.
# Nota: pierde tool_use ids — solo aplicar al final como ultimo recurso.
if role == "assistant":
message["content"] = "[ASSISTANT COMPACTADO]"
elif role == "user":
message["content"] = "[USER CONTEXT COMPACTADO]"
else:
continue continue
if role == "tool":
message["content"] = "[TOOL RESULT COMPACTADO]"
elif role == "assistant":
message["content"] = "[ASSISTANT COMPACTADO]"
elif role == "user":
message["content"] = "[USER CONTEXT COMPACTADO]"
total = sum(self._estimate_message_tokens(m) for m in compacted) total = sum(self._estimate_message_tokens(m) for m in compacted)
if total <= max_tokens: if total <= max_tokens:
break break
@@ -575,7 +640,30 @@ class ContextCompactor:
@staticmethod @staticmethod
def _estimate_message_tokens(message: dict[str, Any]) -> int: def _estimate_message_tokens(message: dict[str, Any]) -> int:
content = message.get("content", "") content = message.get("content", "")
tokens = estimate_tokens(content if isinstance(content, str) else str(content)) if isinstance(content, str):
tokens = estimate_tokens(content)
elif isinstance(content, list):
# Anthropic-style content blocks (interleaved thinking M2).
tokens = 0
for block in content:
if not isinstance(block, dict):
tokens += estimate_tokens(str(block))
continue
btype = block.get("type", "")
if btype == "text":
tokens += estimate_tokens(block.get("text", ""))
elif btype == "thinking":
tokens += estimate_tokens(block.get("thinking", ""))
elif btype == "tool_use":
tokens += estimate_tokens(block.get("name", ""))
tokens += estimate_tokens(str(block.get("input", "")))
elif btype == "tool_result":
tc = block.get("content", "")
tokens += estimate_tokens(tc if isinstance(tc, str) else str(tc))
else:
tokens += estimate_tokens(str(block))
else:
tokens = estimate_tokens(str(content))
if message.get("tool_calls"): if message.get("tool_calls"):
tokens += estimate_tokens(json.dumps(message.get("tool_calls", []), ensure_ascii=False)) tokens += estimate_tokens(json.dumps(message.get("tool_calls", []), ensure_ascii=False))
return tokens return tokens

View File

@@ -825,7 +825,11 @@ class ContextEngine:
sanitized: dict[str, Any] = {"role": role} sanitized: dict[str, Any] = {"role": role}
content = message.get("content", "") content = message.get("content", "")
if isinstance(content, str) and content: # Anthropic-style content list (blocks: thinking/text/tool_use/tool_result)
# se preserva tal cual — necesario para interleaved thinking de M2.
if isinstance(content, list) and content:
sanitized["content"] = content
elif isinstance(content, str) and content:
sanitized["content"] = content sanitized["content"] = content
if role == "assistant": if role == "assistant":
@@ -848,6 +852,28 @@ class ContextEngine:
content = message.get("content", "") content = message.get("content", "")
if isinstance(content, str): if isinstance(content, str):
return estimate_tokens(content) return estimate_tokens(content)
if isinstance(content, list):
# Sumar tokens de cada bloque por su campo correspondiente.
total = 0
for block in content:
if not isinstance(block, dict):
total += estimate_tokens(str(block))
continue
btype = block.get("type", "")
if btype == "text":
total += estimate_tokens(block.get("text", ""))
elif btype == "thinking":
total += estimate_tokens(block.get("thinking", ""))
# signature es opaque — no cuenta tokens significativos
elif btype == "tool_use":
total += estimate_tokens(block.get("name", ""))
total += estimate_tokens(str(block.get("input", "")))
elif btype == "tool_result":
tc = block.get("content", "")
total += estimate_tokens(tc if isinstance(tc, str) else str(tc))
else:
total += estimate_tokens(str(block))
return total
return estimate_tokens(str(content)) return estimate_tokens(str(content))
@staticmethod @staticmethod

View File

@@ -89,6 +89,13 @@ class BaseAgent:
full_text = "" full_text = ""
tool_calls: list[dict[str, Any]] = [] tool_calls: list[dict[str, Any]] = []
active_tools: dict[str, dict[str, Any]] = {} active_tools: dict[str, dict[str, Any]] = {}
# Acumuladores Anthropic-style por turno (interleaved thinking M2).
# Por cada block_index guardamos un dict block parcial. Al cerrar el
# turno, lo serializamos en orden.
turn_blocks_by_index: dict[int, dict[str, Any]] = {}
# Cuando text_delta llega sin block_index (p.ej. via OpenAI adapter
# legacy), asignamos un sintetico para no perder el texto.
synthetic_text_idx = 10_000
async for chunk in self.model.stream( async for chunk in self.model.stream(
messages=ctx.to_messages(), messages=ctx.to_messages(),
@@ -97,6 +104,16 @@ class BaseAgent:
): ):
if chunk.delta: if chunk.delta:
full_text += chunk.delta full_text += chunk.delta
# Acumular por block_index para reconstruir blocks.
idx = chunk.block_index
if idx < 0:
idx = synthetic_text_idx
blk = turn_blocks_by_index.get(idx)
if blk is None:
blk = {"type": "text", "text": ""}
turn_blocks_by_index[idx] = blk
if blk.get("type") == "text":
blk["text"] = blk.get("text", "") + chunk.delta
if self.profile.stream_deltas: if self.profile.stream_deltas:
await self.sse.emit( await self.sse.emit(
EventType.AGENT_DELTA, EventType.AGENT_DELTA,
@@ -108,12 +125,31 @@ class BaseAgent:
session_id=session.session_id, session_id=session.session_id,
) )
# Thinking deltas (MiniMax M2 interleaved). El adapter ya viene
# con block_index correcto; solo acumulamos.
if chunk.thinking_delta and chunk.block_index >= 0:
blk = turn_blocks_by_index.get(chunk.block_index)
if blk is None:
blk = {"type": "thinking", "thinking": "", "signature": ""}
turn_blocks_by_index[chunk.block_index] = blk
if blk.get("type") == "thinking":
blk["thinking"] = blk.get("thinking", "") + chunk.thinking_delta
if chunk.thinking_signature and chunk.block_index >= 0:
blk = turn_blocks_by_index.get(chunk.block_index)
if blk is None:
blk = {"type": "thinking", "thinking": "", "signature": ""}
turn_blocks_by_index[chunk.block_index] = blk
if blk.get("type") == "thinking":
blk["signature"] = chunk.thinking_signature
if chunk.tool_name and chunk.tool_call_id: if chunk.tool_name and chunk.tool_call_id:
if chunk.tool_call_id not in active_tools: if chunk.tool_call_id not in active_tools:
active_tools[chunk.tool_call_id] = { active_tools[chunk.tool_call_id] = {
"id": chunk.tool_call_id, "id": chunk.tool_call_id,
"name": chunk.tool_name, "name": chunk.tool_name,
"arguments": "", "arguments": "",
"block_index": chunk.block_index,
} }
await self.sse.emit( await self.sse.emit(
EventType.TOOL_STARTED, EventType.TOOL_STARTED,
@@ -144,6 +180,7 @@ class BaseAgent:
"id": chunk.tool_call_id, "id": chunk.tool_call_id,
"name": chunk.tool_name or "", "name": chunk.tool_name or "",
"arguments": "", "arguments": "",
"block_index": chunk.block_index,
} }
final_args = tool["arguments"] or chunk.tool_arguments or "" final_args = tool["arguments"] or chunk.tool_arguments or ""
try: try:
@@ -168,6 +205,16 @@ class BaseAgent:
tool["parsed_arguments"] = args tool["parsed_arguments"] = args
tool_calls.append(tool) tool_calls.append(tool)
# Registrar tool_use block en su posicion del turno.
bidx = tool.get("block_index", -1)
if bidx >= 0:
turn_blocks_by_index[bidx] = {
"type": "tool_use",
"id": tool["id"],
"name": tool["name"],
"input": args,
}
# Accumulate token usage from any chunk that has it # Accumulate token usage from any chunk that has it
if chunk.usage: if chunk.usage:
total_input_tokens += chunk.usage.get("input_tokens", 0) total_input_tokens += chunk.usage.get("input_tokens", 0)
@@ -178,39 +225,80 @@ class BaseAgent:
accumulated_content += full_text accumulated_content += full_text
# Materializar blocks del turno en orden por block_index.
# Filtra thinking blocks sin signature: MiniMax los rechazaria al
# reenviarlos. Mejor descartar el thinking entero que mandar uno
# corrupto y ver un 400.
turn_blocks: list[dict[str, Any]] = []
for idx in sorted(turn_blocks_by_index.keys()):
b = turn_blocks_by_index[idx]
if b.get("type") == "thinking":
if not b.get("signature"):
logger.warning(
"Drop thinking block at idx=%d (no signature) — chars=%d",
idx, len(b.get("thinking", "")),
)
continue
# Limpiar texto vacio defensivo.
if not b.get("thinking"):
continue
turn_blocks.append(b)
# Backstop: garantizar que CADA tool_call tenga su tool_use block
# en turn_blocks. Si no lo tiene (chunks sin block_index, adapter
# legacy, etc.), apendearlo al final. Sin esto, MiniMax devuelve
# 400 ("tool result's tool id not found") en el siguiente request.
tool_use_ids_in_blocks = {
b.get("id") for b in turn_blocks
if b.get("type") == "tool_use" and b.get("id")
}
for tc in tool_calls:
if tc["id"] not in tool_use_ids_in_blocks:
turn_blocks.append({
"type": "tool_use",
"id": tc["id"],
"name": tc["name"],
"input": tc.get("parsed_arguments", {}),
})
tool_use_ids_in_blocks.add(tc["id"])
# If no tool calls, we're done # If no tool calls, we're done
if not tool_calls: if not tool_calls:
# Add final assistant message to conversation if turn_blocks:
if full_text: conversation.append({"role": "assistant", "content": turn_blocks})
elif full_text:
# Fallback (no debiera ocurrir si el adapter emite block_index).
conversation.append({"role": "assistant", "content": full_text}) conversation.append({"role": "assistant", "content": full_text})
break break
# Add assistant message with tool calls to conversation # Push del assistant turn con TODOS los blocks (thinking+text+tool_use).
# (OpenAI format: assistant message carries tool_calls) # Esto preserva la cadena de razonamiento de M2 entre turnos.
assistant_msg: dict[str, Any] = {"role": "assistant"} if turn_blocks:
if full_text: conversation.append({"role": "assistant", "content": turn_blocks})
assistant_msg["content"] = full_text else:
assistant_msg["tool_calls"] = [ # Fallback OpenAI-style si no hay blocks (modelo legacy o sin
{ # block_index). Mantenemos compat con OpenAIAdapter / cualquier
"id": tc["id"], # adapter que no propague block_index.
"type": "function", assistant_msg: dict[str, Any] = {"role": "assistant"}
"function": { if full_text:
"name": tc["name"], assistant_msg["content"] = full_text
"arguments": json.dumps(tc.get("parsed_arguments", {})), assistant_msg["tool_calls"] = [
}, {
} "id": tc["id"],
for tc in tool_calls "type": "function",
] "function": {
conversation.append(assistant_msg) "name": tc["name"],
"arguments": json.dumps(tc.get("parsed_arguments", {})),
},
}
for tc in tool_calls
]
conversation.append(assistant_msg)
# Execute tool calls and add COMPLETE results to conversation. # Execute tool calls. Los results se agrupan en UN solo user message
# Antes habia dos capas anti-duplicado: (a) cachear resultado y # con array de tool_result blocks (formato Anthropic). Anteriormente
# devolver "[DUPLICADO]" en lugar de re-ejecutar y (b) cortar el # se hacian N appends `{"role":"tool",...}` en formato OpenAI.
# step si TODAS las llamadas del paso eran duplicadas. Las quitamos tool_result_blocks: list[dict[str, Any]] = []
# porque en conversaciones largas el agente puede LEGITIMAMENTE
# repetir una llamada (p.ej. re-leer un fichero tras editarlo) y
# las heuristicas bloqueaban acciones validas. El usuario prefiere
# libertad — runaway loops se mitigan con limit de steps externo.
for tc in tool_calls: for tc in tool_calls:
# Si los args no se pudieron parsear (p.ej. truncados por max_tokens), # Si los args no se pudieron parsear (p.ej. truncados por max_tokens),
# NO ejecutamos la tool. En su lugar devolvemos un mensaje al modelo # NO ejecutamos la tool. En su lugar devolvemos un mensaje al modelo
@@ -218,9 +306,9 @@ class BaseAgent:
# (dividir el contenido, acortar, etc.). # (dividir el contenido, acortar, etc.).
if tc.get("parse_error"): if tc.get("parse_error"):
pe = tc["parse_error"] pe = tc["parse_error"]
conversation.append({ tool_result_blocks.append({
"role": "tool", "type": "tool_result",
"tool_call_id": tc["id"], "tool_use_id": tc["id"],
"content": ( "content": (
f"[ERROR] No se pudieron parsear los argumentos del tool " f"[ERROR] No se pudieron parsear los argumentos del tool "
f"'{tc['name']}'. Los argumentos llegaron truncados o mal " f"'{tc['name']}'. Los argumentos llegaron truncados o mal "
@@ -230,6 +318,7 @@ class BaseAgent:
f"Reintenta dividiendo el contenido en varios tool calls mas " f"Reintenta dividiendo el contenido en varios tool calls mas "
f"pequenos o reduciendo el tamano del argumento 'content'." f"pequenos o reduciendo el tamano del argumento 'content'."
), ),
"is_error": True,
}) })
continue continue
@@ -242,10 +331,9 @@ class BaseAgent:
) )
tool_executions.append(tool_exec) tool_executions.append(tool_exec)
# COMPLETE result in conversation (truncated to safe limit) tool_result_blocks.append({
conversation.append({ "type": "tool_result",
"role": "tool", "tool_use_id": tc["id"],
"tool_call_id": tc["id"],
"content": ( "content": (
tool_exec.raw_output[:settings.tool_raw_output_max_chars] tool_exec.raw_output[:settings.tool_raw_output_max_chars]
if tool_exec.raw_output if tool_exec.raw_output
@@ -253,6 +341,9 @@ class BaseAgent:
), ),
}) })
if tool_result_blocks:
conversation.append({"role": "user", "content": tool_result_blocks})
return { return {
"content": accumulated_content, "content": accumulated_content,
"artifacts": artifacts, "artifacts": artifacts,
@@ -285,9 +376,20 @@ class BaseAgent:
start = time.monotonic() start = time.monotonic()
try: try:
if self.mcp.is_running and tool_name in self.mcp.tools: if self.mcp.is_running:
result = await self.mcp.call_tool(tool_name, arguments) # Intentar llamada directa: call_tool tiene fallback bare-name
raw_output = self._extract_mcp_output(result) # via _resolve_tool, asi que aunque venga sin prefijo
# `acai_code__` (caso comun cuando el modelo emite XML inline)
# se resuelve solo. El check `tool_name in self.mcp.tools` que
# haciamos antes era demasiado estricto y rechazaba bare names.
try:
result = await self.mcp.call_tool(tool_name, arguments)
raw_output = self._extract_mcp_output(result)
except Exception as resolve_err:
raw_output = (
f"Tool '{tool_name}' no disponible o fallo al resolver: "
f"{str(resolve_err)[:200]}"
)
else: else:
raw_output = f"Tool '{tool_name}' not available via MCP." raw_output = f"Tool '{tool_name}' not available via MCP."

View File

@@ -253,7 +253,10 @@ class OrchestratorEngine:
sanitized: dict[str, Any] = {"role": role} sanitized: dict[str, Any] = {"role": role}
content = message.get("content", "") content = message.get("content", "")
if isinstance(content, str) and content: # Anthropic-style content list (interleaved thinking) → preservar tal cual.
if isinstance(content, list) and content:
sanitized["content"] = content
elif isinstance(content, str) and content:
sanitized["content"] = content sanitized["content"] = content
if role == "assistant": if role == "assistant":