This commit is contained in:
Jordan Diaz
2026-05-08 21:31:28 +00:00
parent 0dabba5442
commit 44cb956f95
37 changed files with 2120 additions and 251 deletions

View File

@@ -19,6 +19,9 @@ from ...models.artifacts import ArtifactSummary
from ...models.session import SessionState
from ...models.tools import ToolExecution, ToolExecutionStatus
from ...streaming.sse import SSEEmitter, EventType
from ..planner import run_planner_subloop
from ..plan_judge import judge_plan_progress
from ..tool_groups import is_plan_internal_tool, strip_namespace
logger = logging.getLogger(__name__)
@@ -64,6 +67,10 @@ class BaseAgent:
total_output_tokens = 0
# Real conversation history: assistant messages + tool results
conversation: list[dict[str, Any]] = []
# Expuesta para que las tools internas (acai_plan) puedan resumir
# el thinking acumulado del agente principal sin que tengamos que
# pasarlo explicitamente por cada llamada a `_execute_tool`.
self._current_conversation = conversation
for step in range(max_steps):
# Build context with real conversation
@@ -86,6 +93,11 @@ class BaseAgent:
temperature=self.profile.temperature or 0.3,
)
# Snapshot del numero de tool_executions ya acumulados ANTES del
# step. El judge solo necesita las del step actual; el slice
# `tool_executions[exec_offset:]` da exactamente ese delta.
exec_offset = len(tool_executions)
full_text = ""
tool_calls: list[dict[str, Any]] = []
active_tools: dict[str, dict[str, Any]] = {}
@@ -269,6 +281,18 @@ class BaseAgent:
elif full_text:
# Fallback (no debiera ocurrir si el adapter emite block_index).
conversation.append({"role": "assistant", "content": full_text})
# El agente termino sin mas tool calls: cerramos el plan si
# estaba activo. El judge no se llama (no hay tools que evaluar);
# el flag `no_tool_calls_this_step=True` marca todos los pendientes
# como completados.
try:
await self._auto_advance_plan_cursor(
session,
[],
no_tool_calls_this_step=True,
)
except Exception as e:
logger.warning("[plan-advance] failed at end_turn: %s", e)
break
# Push del assistant turn con TODOS los blocks (thinking+text+tool_use).
@@ -344,6 +368,17 @@ class BaseAgent:
if tool_result_blocks:
conversation.append({"role": "user", "content": tool_result_blocks})
# Auto-avance del cursor del plan TRAS CADA STEP INTERNO (no solo
# al final del turno). Asi el frontend ve los `✓` aparecer en vivo
# conforme el agente ejecuta tools, no de golpe al final.
try:
await self._auto_advance_plan_cursor(
session,
tool_executions[exec_offset:],
)
except Exception as e:
logger.warning("Auto-advance plan cursor failed: %s", e)
return {
"content": accumulated_content,
"artifacts": artifacts,
@@ -374,6 +409,20 @@ class BaseAgent:
logger.info("Tool call: %s(%s)", tool_name, json.dumps(arguments)[:200])
# Intercepcion: tools internas del orquestador (Fase 5: acai_plan).
# No atraviesan MCP — se ejecutan en Python directamente.
if is_plan_internal_tool(tool_name):
raw_name = strip_namespace(tool_name)
await self.sse.emit(
EventType.TOOL_STARTED,
{"tool": raw_name, "tool_call_id": tool_call_id},
session_id=session.session_id,
)
if raw_name == "acai_plan":
return await self._execute_acai_plan(session, arguments, tool_call_id, tool_exec)
if raw_name == "acai_plan_advance":
return await self._execute_acai_plan_advance(session, arguments, tool_call_id, tool_exec)
start = time.monotonic()
try:
if self.mcp.is_running:
@@ -439,25 +488,554 @@ class BaseAgent:
return tool_exec
# ---- Tools internas del orquestador (Fase 5) -----------------------------
@staticmethod
def _summarize_parent_thinking(conversation: list[dict[str, Any]], max_chars: int = 1200) -> str:
"""Resumen del thinking acumulado del agente principal hasta este turno.
Recorre los assistants Anthropic-style con content blocks `type=thinking`,
junta los textos y trunca a `max_chars`. Se usa para pasar contexto
comprimido al planner sub-loop sin contaminarlo con el thinking entero.
"""
chunks: list[str] = []
total = 0
for msg in reversed(conversation):
if msg.get("role") != "assistant":
continue
content = msg.get("content")
if not isinstance(content, list):
continue
for block in content:
if isinstance(block, dict) and block.get("type") == "thinking":
txt = block.get("thinking", "") or ""
if not txt:
continue
chunks.append(txt)
total += len(txt)
if total >= max_chars:
break
if total >= max_chars:
break
# Concatenamos del mas viejo al mas reciente para mantener orden logico.
joined = "\n---\n".join(reversed(chunks))
if len(joined) > max_chars:
joined = "[...] " + joined[-max_chars:]
return joined
async def _execute_acai_plan(
self,
session: SessionState,
arguments: dict[str, Any],
tool_call_id: str,
tool_exec: ToolExecution,
) -> ToolExecution:
"""Implementacion de la tool sintetica `acai_plan`.
Lanza un sub-loop con `system.planner.md` y solo tools de lectura.
Persiste el plan resultante en `session.metadata["current_plan"]`.
"""
# Limite de invocaciones por turno: maximo 2. Tras eso, el modelo debe
# ejecutar directo o abandonar.
count = int(session.metadata.get("plan_call_count_in_turn", 0))
if count >= 2:
tool_exec.status = ToolExecutionStatus.COMPLETED
tool_exec.result_summary = (
"Ya invocaste acai_plan dos veces este turno. "
"Ejecuta directo o usa acai_plan_advance({abandon:true}) para resetear."
)
tool_exec.raw_output = json.dumps({"error": "max_plan_calls_per_turn"})
await self.sse.emit(
EventType.TOOL_COMPLETED,
{"tool": "acai_plan", "status": "completed", "summary": tool_exec.result_summary, "tool_call_id": tool_call_id},
session_id=session.session_id,
)
return tool_exec
session.metadata["plan_call_count_in_turn"] = count + 1
objective = str(arguments.get("objective") or "").strip()
scope = str(arguments.get("scope") or "").strip()
if not objective:
tool_exec.status = ToolExecutionStatus.FAILED
tool_exec.error = "Falta el campo 'objective'"
tool_exec.result_summary = "acai_plan FALLO: falta objective."
tool_exec.raw_output = json.dumps({"error": "missing_objective"})
await self.sse.emit(
EventType.TOOL_COMPLETED,
{"tool": "acai_plan", "status": "failed", "error": tool_exec.error, "tool_call_id": tool_call_id},
session_id=session.session_id,
)
return tool_exec
# Resumen del thinking acumulado en el turno actual (si lo hay).
# `self._current_conversation` se setea al inicio de execute() — ver mas abajo.
parent_summary = self._summarize_parent_thinking(
getattr(self, "_current_conversation", []) or [],
)
start = time.monotonic()
try:
result = await run_planner_subloop(
objective=objective,
scope=scope,
agent_profile=self.profile,
model_adapter=self.model,
mcp=self.mcp,
parent_thinking_summary=parent_summary,
)
except Exception as e:
logger.error("Planner sub-loop crashed: %s", e)
tool_exec.status = ToolExecutionStatus.FAILED
tool_exec.error = str(e)
tool_exec.duration_ms = (time.monotonic() - start) * 1000
tool_exec.result_summary = f"acai_plan FALLO: {str(e)[:200]}"
tool_exec.raw_output = json.dumps({"error": str(e)[:500]})
await self.sse.emit(
EventType.TOOL_COMPLETED,
{"tool": "acai_plan", "status": "failed", "error": str(e), "tool_call_id": tool_call_id},
session_id=session.session_id,
)
return tool_exec
tool_exec.duration_ms = (time.monotonic() - start) * 1000
if not result.plan:
err = result.error or "Plan vacio"
logger.warning(
"[acai_plan] Plan FAILED: %s (raw_preview=%r)",
err, (result.raw_text or "")[:200],
)
tool_exec.status = ToolExecutionStatus.FAILED
tool_exec.error = err
tool_exec.result_summary = (
f"acai_plan FALLO: {err}. Procede en modo directo o reintenta con scope distinto."
)
tool_exec.raw_output = json.dumps({
"error": err,
"raw_text_preview": (result.raw_text or "")[:500],
})
await self.sse.emit(
EventType.TOOL_COMPLETED,
{"tool": "acai_plan", "status": "failed", "error": err, "tool_call_id": tool_call_id},
session_id=session.session_id,
)
return tool_exec
# Plan valido: persistir en metadata. Si habia un plan activo previo,
# moverlo a history como `superseded`.
old_plan = session.metadata.get("current_plan")
if old_plan and old_plan.get("status") == "active":
old_plan["status"] = "superseded"
session.metadata.setdefault("plan_history", []).append(old_plan)
plan = dict(result.plan)
plan["cursor"] = 0
plan["completed_step_ids"] = []
plan["status"] = "active"
plan["created_at"] = int(time.time())
session.metadata["current_plan"] = plan
steps = plan.get("steps") or []
next_desc = steps[0]["description"] if steps else "(plan vacio)"
n_steps = len(steps)
n_risks = len(plan.get("risks") or [])
tool_exec.status = ToolExecutionStatus.COMPLETED
tool_exec.result_summary = (
f"Plan generado: {n_steps} step(s), {n_risks} risk(s). "
f"Proximo: step 1 — {next_desc[:200]}"
)
logger.info(
"[acai_plan] Plan persisted: %d steps, %d risks, objective=%r",
n_steps, n_risks, objective[:120],
)
# raw_output al modelo: el JSON completo del plan (truncado a 4000 chars).
plan_json = json.dumps(plan, ensure_ascii=False)
if len(plan_json) > 4000:
tool_exec.raw_output = plan_json[:4000] + "\n[...truncated]"
else:
tool_exec.raw_output = plan_json
await self.sse.emit(
EventType.TOOL_COMPLETED,
{
"tool": "acai_plan",
"status": "completed",
"summary": tool_exec.result_summary[:200],
"raw_output": tool_exec.raw_output[:4000],
"tool_call_id": tool_call_id,
},
session_id=session.session_id,
)
# PlanStepper UI: notifica al frontend que hay un plan nuevo activo.
await self.sse.emit(
EventType.PLAN_CREATED,
{
"objective": plan.get("objective", ""),
"steps": [
{
"id": s.get("id"),
"description": s.get("description", "")[:300],
"agent_action": s.get("agent_action", "")[:200],
"files_touched": s.get("files_touched", [])[:10],
"tables_touched": s.get("tables_touched", [])[:10],
}
for s in plan.get("steps", [])
],
"risks": plan.get("risks", [])[:10],
"cursor": plan.get("cursor", 0),
"completed_step_ids": plan.get("completed_step_ids", []),
"status": plan.get("status", "active"),
},
session_id=session.session_id,
)
return tool_exec
async def _execute_acai_plan_advance(
self,
session: SessionState,
arguments: dict[str, Any],
tool_call_id: str,
tool_exec: ToolExecution,
) -> ToolExecution:
"""Avanza/abandona el plan activo."""
plan = session.metadata.get("current_plan")
if not plan or plan.get("status") != "active":
tool_exec.status = ToolExecutionStatus.COMPLETED
tool_exec.result_summary = "No hay plan activo."
tool_exec.raw_output = json.dumps({"status": "no_active_plan"})
await self.sse.emit(
EventType.TOOL_COMPLETED,
{"tool": "acai_plan_advance", "status": "completed", "summary": tool_exec.result_summary, "tool_call_id": tool_call_id},
session_id=session.session_id,
)
return tool_exec
if arguments.get("abandon"):
plan["status"] = "abandoned"
session.metadata.setdefault("plan_history", []).append(plan)
session.metadata["current_plan"] = None
tool_exec.status = ToolExecutionStatus.COMPLETED
tool_exec.result_summary = "Plan abandonado."
tool_exec.raw_output = json.dumps({"status": "abandoned"})
await self.sse.emit(
EventType.TOOL_COMPLETED,
{"tool": "acai_plan_advance", "status": "completed", "summary": tool_exec.result_summary, "tool_call_id": tool_call_id},
session_id=session.session_id,
)
await self.sse.emit(
EventType.PLAN_ENDED,
{"status": "abandoned", "objective": plan.get("objective", "")},
session_id=session.session_id,
)
return tool_exec
# Aplicar completed_ids
completed_in = arguments.get("completed_ids") or []
completed_set = set(plan.get("completed_step_ids", []))
for cid in completed_in:
if isinstance(cid, int) and cid not in completed_set:
plan.setdefault("completed_step_ids", []).append(cid)
completed_set.add(cid)
# Aplicar cursor
steps = plan.get("steps") or []
if "next_cursor" in arguments:
plan["cursor"] = max(0, min(int(arguments["next_cursor"]), len(steps)))
else:
# Auto-avanzar al primer step no completado.
for i, st in enumerate(steps):
if st.get("id") not in completed_set:
plan["cursor"] = i
break
else:
plan["status"] = "done"
cursor = plan.get("cursor", 0)
if plan.get("status") == "done" or cursor >= len(steps):
tool_exec.result_summary = f"Plan completado ({len(completed_set)}/{len(steps)} steps)."
else:
next_desc = steps[cursor].get("description", "(?)") if cursor < len(steps) else "(?)"
tool_exec.result_summary = (
f"Plan avanzado a step {cursor + 1}/{len(steps)}: {next_desc[:200]}"
)
tool_exec.status = ToolExecutionStatus.COMPLETED
tool_exec.raw_output = json.dumps({
"cursor": plan.get("cursor", 0),
"completed_step_ids": plan.get("completed_step_ids", []),
"status": plan.get("status", "active"),
})
await self.sse.emit(
EventType.TOOL_COMPLETED,
{"tool": "acai_plan_advance", "status": "completed", "summary": tool_exec.result_summary, "tool_call_id": tool_call_id},
session_id=session.session_id,
)
# Emitir PLAN_ADVANCED o PLAN_ENDED segun el resultado.
if plan.get("status") == "done":
await self.sse.emit(
EventType.PLAN_ENDED,
{"status": "done", "objective": plan.get("objective", "")},
session_id=session.session_id,
)
else:
await self.sse.emit(
EventType.PLAN_ADVANCED,
{
"cursor": plan.get("cursor", 0),
"completed_step_ids": plan.get("completed_step_ids", []),
"status": plan.get("status", "active"),
},
session_id=session.session_id,
)
return tool_exec
@staticmethod
def _match_step_to_executions(
step: dict[str, Any],
tool_executions: list[ToolExecution],
) -> bool:
"""Heuristica: matchea step.agent_action con tool calls reales.
Marca el step como completado si alguna de las tools ejecutadas
coincide con el `agent_action` del step. Compara:
1) nombre de la tool (normalizando guion/underscore: `acai-write`
matchea con `acai_write`).
2) si action menciona algun `files_touched` y la tool ejecutada
tiene ese path en sus argumentos.
3) si action menciona algun `tables_touched` y la tool ejecutada
tiene ese tableName en sus argumentos.
"""
action = (step.get("agent_action") or "").lower()
files_touched = [str(f).lower() for f in (step.get("files_touched") or [])]
tables_touched = [str(t).lower() for t in (step.get("tables_touched") or [])]
if not action and not files_touched and not tables_touched:
return False
for te in tool_executions:
if te.status != ToolExecutionStatus.COMPLETED:
continue
raw_name = strip_namespace(te.tool_name).lower()
# Normaliza guiones/underscores para matching tool name <-> action.
tool_variants = {raw_name, raw_name.replace("-", "_"), raw_name.replace("_", "-")}
# Match 1: nombre de la tool aparece en action
if any(v and v in action for v in tool_variants):
return True
# Match 2/3: path o tableName en los args de la tool
try:
args_str = json.dumps(te.arguments or {}, ensure_ascii=False).lower()
except Exception:
args_str = str(te.arguments or "").lower()
for f in files_touched:
if f and f in args_str:
return True
for t in tables_touched:
if t and t in args_str:
return True
return False
async def _auto_advance_plan_cursor(
self,
session: SessionState,
tool_executions_this_step: list[ToolExecution],
no_tool_calls_this_step: bool = False,
) -> None:
"""Avanza el cursor del plan tras un step interno del agente.
Usa LLM-as-judge (`plan_judge.judge_plan_progress`) para decidir que
steps del plan se acaban de completar con las tool_executions del step
actual. Mas robusto que el matching string heuristico anterior.
Si `no_tool_calls_this_step=True` y hay un plan active, marcamos el plan
como `done` — el agente decidio terminar (end_turn) sin mas tools, asi
que confiamos en su criterio. Esto cierra el plan visualmente cuando el
agente acaba.
"""
plan = session.metadata.get("current_plan")
if not plan or plan.get("status") != "active":
return
steps = plan.get("steps") or []
prev_cursor = int(plan.get("cursor", 0))
prev_completed = list(plan.get("completed_step_ids", []))
completed_set = set(prev_completed)
rationale = ""
# Si el agente termino el turn sin tools, NO marcamos los pendientes
# como completados — seria un falso positivo (caso real: agente se
# queda atascado y devuelve mensaje de chat sin haber hecho la tarea).
# Solo si el `completed_set` previo ya cubre todos los steps cerramos
# como done; si quedan pendientes, dejamos `active`.
if no_tool_calls_this_step:
if steps and len(completed_set) >= len(steps):
rationale = "agente termino el turn; todos los steps ya completados"
else:
rationale = "agente termino el turn con steps pendientes (no cerrado)"
# No tocar completed_set: respetamos lo que el judge dijo en steps previos
elif tool_executions_this_step:
# Pregunta al judge que steps acaba de completar.
try:
completed_ids, judge_rationale = await judge_plan_progress(
plan=plan,
tool_executions_this_step=tool_executions_this_step,
model_adapter=self.model,
model_id=self.profile.model_id,
)
for cid in completed_ids:
completed_set.add(cid)
rationale = judge_rationale
except Exception as e:
logger.warning("[plan-judge] failed, no advance this step: %s", e)
# Sin judge, no avanzamos el cursor — preferimos dejar el plan
# como esta antes que falsos positivos heuristicos.
return
# Cursor: primer step NO completado. Si todos completados → done.
cursor = len(steps)
for i, step in enumerate(steps):
if step.get("id") not in completed_set:
cursor = i
break
plan["cursor"] = cursor
plan["completed_step_ids"] = sorted(completed_set)
ended = False
if cursor >= len(steps) and steps:
plan["status"] = "done"
ended = True
# Solo emitimos si hubo cambio real.
changed = cursor != prev_cursor or set(plan["completed_step_ids"]) != set(prev_completed)
logger.info(
"[plan-advance] tools_in_step=%d prev_cursor=%d new_cursor=%d completed=%s changed=%s rationale=%r",
len(tool_executions_this_step), prev_cursor, cursor,
plan["completed_step_ids"], changed, rationale[:160],
)
if not changed:
return
try:
if ended:
await self.sse.emit(
EventType.PLAN_ENDED,
{"status": "done", "objective": plan.get("objective", "")},
session_id=session.session_id,
)
else:
await self.sse.emit(
EventType.PLAN_ADVANCED,
{
"cursor": plan["cursor"],
"completed_step_ids": plan["completed_step_ids"],
"status": plan.get("status", "active"),
},
session_id=session.session_id,
)
except Exception as e:
logger.warning("PLAN_ADVANCED/ENDED emit failed: %s", e)
# ---- Allowed tools --------------------------------------------------------
def _get_allowed_tools(self, followup_mode: str = "none") -> list[dict[str, Any]]:
"""Return tool definitions filtered by this agent's allowed_tools."""
"""Return tool definitions filtered by this agent's allowed_tools.
Si el agente tiene `has_planner_tool=True`, anade definiciones sinteticas
de `acai_plan` y `acai_plan_advance` (Fase 5: la tool interna no
atraviesa MCP — se intercepta en `_execute_tool`).
"""
if followup_mode == "transform":
return []
if not self.mcp.is_running:
return []
all_tools = self.mcp.get_tool_definitions()
if not self.profile.allowed_tools:
return all_tools # No filter → all tools
return [t for t in all_tools if t["name"] in self.profile.allowed_tools]
if self.profile.allowed_tools:
tool_defs = [t for t in all_tools if t["name"] in self.profile.allowed_tools]
else:
tool_defs = list(all_tools)
if self.profile.has_planner_tool:
tool_defs.append({
"name": "acai_plan",
"description": (
"Genera un plan estructurado de ejecucion. Usa esta tool al recibir "
"una peticion compuesta (landing entera, tienda, refactor amplio, modulo "
"con tabla+hook+frontend). NO la uses para tareas triviales (cambiar un titulo, "
"ajustar un color, leer datos). Devuelve JSON con steps, risks, files_touched, "
"tables_touched."
),
"input_schema": {
"type": "object",
"required": ["objective"],
"properties": {
"objective": {
"type": "string",
"description": "Descripcion en español de lo que hay que conseguir.",
},
"scope": {
"type": "string",
"description": "Restricciones opcionales (ej. 'no toques el header').",
},
},
},
})
tool_defs.append({
"name": "acai_plan_advance",
"description": (
"Avanza/abandona el plan activo. Llama con `abandon: true` si el "
"usuario corrige y el plan ya no es valido, o con `next_cursor` para "
"saltar al siguiente step pendiente."
),
"input_schema": {
"type": "object",
"properties": {
"abandon": {"type": "boolean"},
"completed_ids": {"type": "array", "items": {"type": "integer"}},
"next_cursor": {"type": "integer"},
},
},
})
return tool_defs
@staticmethod
def _extract_mcp_output(result: dict[str, Any]) -> str:
"""Extract text content from MCP tool result."""
"""Extract text content from MCP tool result.
El modelo (MiniMax M2.7) es text-only — los blocks `type=image` no
pueden reenviarse. En lugar de descartar silenciosamente (lo que dejaba
al agente con un tool_result vacio y le hacia repetir la llamada),
emitimos un placeholder explicito que le dice que use `browser_snapshot`
si quiere inspeccionar la pagina.
"""
content = result.get("content", [])
if isinstance(content, list):
parts: list[str] = []
image_count = 0
for item in content:
if isinstance(item, dict) and item.get("type") == "text":
if not isinstance(item, dict):
continue
itype = item.get("type")
if itype == "text":
parts.append(item.get("text", ""))
elif itype == "image":
image_count += 1
if image_count and not parts:
return (
f"[{image_count} imagen(es) no procesada(s) — el modelo es "
f"text-only. Para inspeccionar la pagina usa "
f"`browser_snapshot` (devuelve accessibility tree en texto). "
f"`browser_take_screenshot` solo sirve para que el usuario "
f"vea la captura, no para tu analisis.]"
)
if image_count and parts:
parts.append(
f"\n[Adicionalmente {image_count} imagen(es) no incluida(s): "
f"el modelo no las procesa.]"
)
return "\n".join(parts) if parts else json.dumps(result)
return str(content)