"""Base subagent class with shared execution logic.""" from __future__ import annotations import hashlib import json import logging import time import uuid from typing import Any, AsyncIterator from ...adapters.base import ModelAdapter, ModelConfig, StreamChunk from ...config import settings from ...context.engine import ContextEngine from ...mcp.manager import MCPManager from ...memory.store import MemoryStore from ...models.agent import AgentProfile from ...models.artifacts import ArtifactSummary from ...models.session import SessionState from ...models.tools import ToolExecution, ToolExecutionStatus from ...streaming.sse import SSEEmitter, EventType from ..planner import run_planner_subloop from ..plan_judge import judge_plan_progress from ..tool_groups import is_plan_internal_tool, strip_namespace logger = logging.getLogger(__name__) class BaseAgent: """Base class for all subagents.""" def __init__( self, profile: AgentProfile, model_adapter: ModelAdapter, context_engine: ContextEngine, mcp_client: MCPManager, memory_store: MemoryStore, sse_emitter: SSEEmitter, ) -> None: self.profile = profile self.model = model_adapter self.context = context_engine self.mcp = mcp_client self.memory = memory_store self.sse = sse_emitter async def execute( self, session: SessionState, max_steps: int = 30, ) -> dict[str, Any]: """Run the agent's execution loop. Uses real conversation messages with complete tool results, like professional agentic tools (Claude Code, Cursor). Compaction happens at the step level, not per tool result. Returns a result dict with keys: content, artifacts, tool_executions. """ artifacts: list[ArtifactSummary] = await self.memory.list_artifacts( session.session_id ) tool_executions: list[ToolExecution] = [] accumulated_content = "" total_input_tokens = 0 total_output_tokens = 0 # Real conversation history: assistant messages + tool results conversation: list[dict[str, Any]] = [] # Expuesta para que las tools internas (acai_plan) puedan resumir # el thinking acumulado del agente principal sin que tengamos que # pasarlo explicitamente por cada llamada a `_execute_tool`. self._current_conversation = conversation for step in range(max_steps): # Build context with real conversation ctx = await self.context.build_context( session=session, agent=self.profile, artifacts=artifacts, conversation=conversation, ) # Prepare tool definitions. plan_mode "off" oculta acai_plan al # modelo (toggle del UI desactivado). "force" la expone normalmente. tool_defs = self._get_allowed_tools( followup_mode=str(session.metadata.get("followup_mode", "none")), plan_mode=str(session.metadata.get("plan_mode", "off") or "off"), ) # Stream model response config = ModelConfig( model_id=self.profile.model_id or "", max_tokens=self.profile.max_tokens or 4096, temperature=self.profile.temperature or 0.3, ) # Snapshot del numero de tool_executions ya acumulados ANTES del # step. El judge solo necesita las del step actual; el slice # `tool_executions[exec_offset:]` da exactamente ese delta. exec_offset = len(tool_executions) full_text = "" tool_calls: list[dict[str, Any]] = [] active_tools: dict[str, dict[str, Any]] = {} # Acumuladores Anthropic-style por turno (interleaved thinking M2). # Por cada block_index guardamos un dict block parcial. Al cerrar el # turno, lo serializamos en orden. turn_blocks_by_index: dict[int, dict[str, Any]] = {} # Cuando text_delta llega sin block_index (p.ej. via OpenAI adapter # legacy), asignamos un sintetico para no perder el texto. synthetic_text_idx = 10_000 async for chunk in self.model.stream( messages=ctx.to_messages(), tools=tool_defs if tool_defs else None, config=config, ): if chunk.delta: full_text += chunk.delta # Acumular por block_index para reconstruir blocks. idx = chunk.block_index if idx < 0: idx = synthetic_text_idx blk = turn_blocks_by_index.get(idx) if blk is None: blk = {"type": "text", "text": ""} turn_blocks_by_index[idx] = blk if blk.get("type") == "text": blk["text"] = blk.get("text", "") + chunk.delta if self.profile.stream_deltas: await self.sse.emit( EventType.AGENT_DELTA, { "agent": self.profile.role, "delta": chunk.delta, "step": step, }, session_id=session.session_id, ) # Thinking deltas (MiniMax M2 interleaved). El adapter ya viene # con block_index correcto; solo acumulamos. if chunk.thinking_delta and chunk.block_index >= 0: blk = turn_blocks_by_index.get(chunk.block_index) if blk is None: blk = {"type": "thinking", "thinking": "", "signature": ""} turn_blocks_by_index[chunk.block_index] = blk if blk.get("type") == "thinking": blk["thinking"] = blk.get("thinking", "") + chunk.thinking_delta if self.profile.stream_deltas: await self.sse.emit( EventType.AGENT_DELTA, { "agent": self.profile.role, "thinking_delta": chunk.thinking_delta, "block_index": chunk.block_index, "step": step, }, session_id=session.session_id, ) if chunk.thinking_signature and chunk.block_index >= 0: blk = turn_blocks_by_index.get(chunk.block_index) if blk is None: blk = {"type": "thinking", "thinking": "", "signature": ""} turn_blocks_by_index[chunk.block_index] = blk if blk.get("type") == "thinking": blk["signature"] = chunk.thinking_signature if chunk.tool_name and chunk.tool_call_id: if chunk.tool_call_id not in active_tools: active_tools[chunk.tool_call_id] = { "id": chunk.tool_call_id, "name": chunk.tool_name, "arguments": "", "block_index": chunk.block_index, } await self.sse.emit( EventType.TOOL_STARTED, {"tool": chunk.tool_name, "tool_call_id": chunk.tool_call_id, "step": step}, session_id=session.session_id, ) if chunk.tool_arguments and chunk.tool_call_id and not chunk.finish_reason: tool = active_tools.get(chunk.tool_call_id) if tool: tool["arguments"] += chunk.tool_arguments await self.sse.emit( EventType.AGENT_DELTA, { "agent": self.profile.role, "delta": "", "tool_arguments": chunk.tool_arguments, "tool_call_id": chunk.tool_call_id, "step": step, }, session_id=session.session_id, ) if chunk.finish_reason == "tool_use" and chunk.tool_call_id: tool = active_tools.pop(chunk.tool_call_id, None) if not tool: tool = { "id": chunk.tool_call_id, "name": chunk.tool_name or "", "arguments": "", "block_index": chunk.block_index, } final_args = tool["arguments"] or chunk.tool_arguments or "" try: args = json.loads(final_args) if final_args else {} tool["parse_error"] = None except json.JSONDecodeError as e: # Args truncados o malformados — causa tipica: el modelo # excedio max_tokens a mitad de la serializacion JSON # del tool_use (ej. escribiendo un fichero grande). logger.warning( "Failed to parse tool args for %s (%d chars): %s... | err: %s", tool.get("name", "?"), len(final_args), final_args[:200], str(e)[:100], ) args = {} # Guardamos el raw para poder generar un fingerprint distinto # al de otros fallos y un mensaje util para el modelo. tool["parse_error"] = { "raw": final_args, "raw_hash": hashlib.md5(final_args.encode()).hexdigest()[:8], "message": str(e)[:200], } tool["parsed_arguments"] = args tool_calls.append(tool) # Registrar tool_use block en su posicion del turno. bidx = tool.get("block_index", -1) if bidx >= 0: turn_blocks_by_index[bidx] = { "type": "tool_use", "id": tool["id"], "name": tool["name"], "input": args, } # Accumulate token usage from any chunk that has it if chunk.usage: total_input_tokens += chunk.usage.get("input_tokens", 0) total_output_tokens += chunk.usage.get("output_tokens", 0) if chunk.finish_reason == "end_turn": break accumulated_content += full_text # Materializar blocks del turno en orden por block_index. # Filtra thinking blocks sin signature: MiniMax los rechazaria al # reenviarlos. Mejor descartar el thinking entero que mandar uno # corrupto y ver un 400. turn_blocks: list[dict[str, Any]] = [] for idx in sorted(turn_blocks_by_index.keys()): b = turn_blocks_by_index[idx] if b.get("type") == "thinking": if not b.get("signature"): logger.warning( "Drop thinking block at idx=%d (no signature) — chars=%d", idx, len(b.get("thinking", "")), ) continue # Limpiar texto vacio defensivo. if not b.get("thinking"): continue turn_blocks.append(b) # Backstop: garantizar que CADA tool_call tenga su tool_use block # en turn_blocks. Si no lo tiene (chunks sin block_index, adapter # legacy, etc.), apendearlo al final. Sin esto, MiniMax devuelve # 400 ("tool result's tool id not found") en el siguiente request. tool_use_ids_in_blocks = { b.get("id") for b in turn_blocks if b.get("type") == "tool_use" and b.get("id") } for tc in tool_calls: if tc["id"] not in tool_use_ids_in_blocks: turn_blocks.append({ "type": "tool_use", "id": tc["id"], "name": tc["name"], "input": tc.get("parsed_arguments", {}), }) tool_use_ids_in_blocks.add(tc["id"]) # If no tool calls, we're done if not tool_calls: # Quirk DeepSeek thinking: a veces el modelo emite TODA su # respuesta como reasoning y cierra el turno sin text ni # tool_use. Si el turno termina SOLO con bloques thinking, # promovemos el thinking a un bloque text en el snapshot que # se persiste — asi el UI no lo muestra como "pensando" al # recargar y el siguiente turno no rompe con # "content or tool_calls must be set". if turn_blocks and all(b.get("type") == "thinking" for b in turn_blocks): promoted = "\n".join( b.get("thinking", "") for b in turn_blocks if b.get("thinking") ) turn_blocks = [{"type": "text", "text": promoted}] accumulated_content += promoted if promoted and self.profile.stream_deltas: # Emision en vivo via AGENT_DELTA normal: el # ClaudeFormatEmitter cierra el thinking block abierto # (content_block_stop) y abre un text block nuevo con # su propio indice (start/delta/stop), asi que el # protocolo de bloques no se rompe. await self.sse.emit( EventType.AGENT_DELTA, { "agent": self.profile.role, "delta": promoted, "step": step, }, session_id=session.session_id, ) if turn_blocks: conversation.append({"role": "assistant", "content": turn_blocks}) elif full_text: # Fallback (no debiera ocurrir si el adapter emite block_index). conversation.append({"role": "assistant", "content": full_text}) # El agente termino sin mas tool calls: cerramos el plan si # estaba activo. El judge no se llama (no hay tools que evaluar); # el flag `no_tool_calls_this_step=True` marca todos los pendientes # como completados. try: await self._auto_advance_plan_cursor( session, [], no_tool_calls_this_step=True, ) except Exception as e: logger.warning("[plan-advance] failed at end_turn: %s", e) break # Push del assistant turn con TODOS los blocks (thinking+text+tool_use). # Esto preserva la cadena de razonamiento de M2 entre turnos. if turn_blocks: conversation.append({"role": "assistant", "content": turn_blocks}) else: # Fallback OpenAI-style si no hay blocks (modelo legacy o sin # block_index). Mantenemos compat con OpenAIAdapter / cualquier # adapter que no propague block_index. assistant_msg: dict[str, Any] = {"role": "assistant"} if full_text: assistant_msg["content"] = full_text assistant_msg["tool_calls"] = [ { "id": tc["id"], "type": "function", "function": { "name": tc["name"], "arguments": json.dumps(tc.get("parsed_arguments", {})), }, } for tc in tool_calls ] conversation.append(assistant_msg) # Execute tool calls. Los results se agrupan en UN solo user message # con array de tool_result blocks (formato Anthropic). Anteriormente # se hacian N appends `{"role":"tool",...}` en formato OpenAI. tool_result_blocks: list[dict[str, Any]] = [] for tc in tool_calls: # Si los args no se pudieron parsear (p.ej. truncados por max_tokens), # NO ejecutamos la tool. En su lugar devolvemos un mensaje al modelo # explicando el problema para que pueda ajustar el siguiente intento # (dividir el contenido, acortar, etc.). if tc.get("parse_error"): pe = tc["parse_error"] tool_result_blocks.append({ "type": "tool_result", "tool_use_id": tc["id"], "content": ( f"[ERROR] No se pudieron parsear los argumentos del tool " f"'{tc['name']}'. Los argumentos llegaron truncados o mal " f"formados (probablemente excediste el limite de max_tokens " f"al serializar el tool_use). Recibido {len(pe['raw'])} chars. " f"Error: {pe['message']}. " f"Reintenta dividiendo el contenido en varios tool calls mas " f"pequenos o reduciendo el tamano del argumento 'content'." ), "is_error": True, }) continue tool_exec = await self._execute_tool( session=session, tool_name=tc["name"], arguments=tc.get("parsed_arguments", {}), artifacts=artifacts, tool_call_id=tc["id"], ) tool_executions.append(tool_exec) tool_result_blocks.append({ "type": "tool_result", "tool_use_id": tc["id"], "content": ( tool_exec.raw_output[:settings.tool_raw_output_max_chars] if tool_exec.raw_output else tool_exec.result_summary ), }) if tool_result_blocks: conversation.append({"role": "user", "content": tool_result_blocks}) # Auto-avance del cursor del plan TRAS CADA STEP INTERNO (no solo # al final del turno). Asi el frontend ve los `✓` aparecer en vivo # conforme el agente ejecuta tools, no de golpe al final. try: await self._auto_advance_plan_cursor( session, tool_executions[exec_offset:], ) except Exception as e: logger.warning("Auto-advance plan cursor failed: %s", e) return { "content": accumulated_content, "artifacts": artifacts, "tool_executions": tool_executions, "conversation": conversation, "usage": { "input_tokens": total_input_tokens, "output_tokens": total_output_tokens, }, } async def _execute_tool( self, session: SessionState, tool_name: str, arguments: dict[str, Any], artifacts: list[ArtifactSummary], tool_call_id: str = "", ) -> ToolExecution: """Execute a tool and summarise the result.""" exec_id = uuid.uuid4().hex[:12] tool_exec = ToolExecution( execution_id=exec_id, tool_name=tool_name, arguments=arguments, status=ToolExecutionStatus.RUNNING, ) logger.info("Tool call: %s(%s)", tool_name, json.dumps(arguments)[:200]) # Intercepcion: tools internas del orquestador (Fase 5: acai_plan). # No atraviesan MCP — se ejecutan en Python directamente. if is_plan_internal_tool(tool_name): raw_name = strip_namespace(tool_name) await self.sse.emit( EventType.TOOL_STARTED, {"tool": raw_name, "tool_call_id": tool_call_id}, session_id=session.session_id, ) if raw_name == "acai_plan": return await self._execute_acai_plan(session, arguments, tool_call_id, tool_exec) if raw_name == "acai_plan_advance": return await self._execute_acai_plan_advance(session, arguments, tool_call_id, tool_exec) start = time.monotonic() try: if self.mcp.is_running: # Intentar llamada directa: call_tool tiene fallback bare-name # via _resolve_tool, asi que aunque venga sin prefijo # `acai_code__` (caso comun cuando el modelo emite XML inline) # se resuelve solo. El check `tool_name in self.mcp.tools` que # haciamos antes era demasiado estricto y rechazaba bare names. try: result = await self.mcp.call_tool(tool_name, arguments) raw_output = self._extract_mcp_output(result) except Exception as resolve_err: raw_output = ( f"Tool '{tool_name}' no disponible o fallo al resolver: " f"{str(resolve_err)[:200]}" ) else: raw_output = f"Tool '{tool_name}' not available via MCP." duration = (time.monotonic() - start) * 1000 # Summarise — raw output NEVER enters context task_id = session.current_task.task_id if session.current_task else "none" artifact = self.context.summarize_tool_output( tool_name=tool_name, raw_output=raw_output, session_id=session.session_id, task_id=task_id, ) # Store artifact await self.memory.store_artifact(session.session_id, artifact) artifacts.append(artifact) tool_exec.status = ToolExecutionStatus.COMPLETED tool_exec.result_summary = artifact.summary tool_exec.raw_output = raw_output[:settings.tool_raw_output_max_chars] tool_exec.duration_ms = duration await self.sse.emit( EventType.TOOL_COMPLETED, { "tool": tool_name, "status": "completed", "summary": artifact.summary[:200], "raw_output": raw_output[:min(4000, settings.tool_raw_output_max_chars)], "tool_call_id": tool_call_id, }, session_id=session.session_id, ) except Exception as e: tool_exec.status = ToolExecutionStatus.FAILED tool_exec.error = str(e) tool_exec.duration_ms = (time.monotonic() - start) * 1000 logger.error("Tool execution failed: %s — %s", tool_name, e) await self.sse.emit( EventType.TOOL_COMPLETED, {"tool": tool_name, "status": "failed", "error": str(e), "tool_call_id": tool_call_id}, session_id=session.session_id, ) return tool_exec # ---- Tools internas del orquestador (Fase 5) ----------------------------- @staticmethod def _summarize_parent_thinking(conversation: list[dict[str, Any]], max_chars: int = 1200) -> str: """Resumen del thinking acumulado del agente principal hasta este turno. Recorre los assistants Anthropic-style con content blocks `type=thinking`, junta los textos y trunca a `max_chars`. Se usa para pasar contexto comprimido al planner sub-loop sin contaminarlo con el thinking entero. """ chunks: list[str] = [] total = 0 for msg in reversed(conversation): if msg.get("role") != "assistant": continue content = msg.get("content") if not isinstance(content, list): continue for block in content: if isinstance(block, dict) and block.get("type") == "thinking": txt = block.get("thinking", "") or "" if not txt: continue chunks.append(txt) total += len(txt) if total >= max_chars: break if total >= max_chars: break # Concatenamos del mas viejo al mas reciente para mantener orden logico. joined = "\n---\n".join(reversed(chunks)) if len(joined) > max_chars: joined = "[...] " + joined[-max_chars:] return joined async def _execute_acai_plan( self, session: SessionState, arguments: dict[str, Any], tool_call_id: str, tool_exec: ToolExecution, ) -> ToolExecution: """Implementacion de la tool sintetica `acai_plan`. Lanza un sub-loop con `system.planner.md` y solo tools de lectura. Persiste el plan resultante en `session.metadata["current_plan"]`. """ # Limite de invocaciones por turno: maximo 2. Tras eso, el modelo debe # ejecutar directo o abandonar. count = int(session.metadata.get("plan_call_count_in_turn", 0)) if count >= 2: tool_exec.status = ToolExecutionStatus.COMPLETED tool_exec.result_summary = ( "Ya invocaste acai_plan dos veces este turno. " "Ejecuta directo o usa acai_plan_advance({abandon:true}) para resetear." ) tool_exec.raw_output = json.dumps({"error": "max_plan_calls_per_turn"}) await self.sse.emit( EventType.TOOL_COMPLETED, {"tool": "acai_plan", "status": "completed", "summary": tool_exec.result_summary, "tool_call_id": tool_call_id}, session_id=session.session_id, ) return tool_exec session.metadata["plan_call_count_in_turn"] = count + 1 objective = str(arguments.get("objective") or "").strip() scope = str(arguments.get("scope") or "").strip() if not objective: tool_exec.status = ToolExecutionStatus.FAILED tool_exec.error = "Falta el campo 'objective'" tool_exec.result_summary = "acai_plan FALLO: falta objective." tool_exec.raw_output = json.dumps({"error": "missing_objective"}) await self.sse.emit( EventType.TOOL_COMPLETED, {"tool": "acai_plan", "status": "failed", "error": tool_exec.error, "tool_call_id": tool_call_id}, session_id=session.session_id, ) return tool_exec # Resumen del thinking acumulado en el turno actual (si lo hay). # `self._current_conversation` se setea al inicio de execute() — ver mas abajo. parent_summary = self._summarize_parent_thinking( getattr(self, "_current_conversation", []) or [], ) start = time.monotonic() try: result = await run_planner_subloop( objective=objective, scope=scope, agent_profile=self.profile, model_adapter=self.model, mcp=self.mcp, parent_thinking_summary=parent_summary, ) except Exception as e: logger.error("Planner sub-loop crashed: %s", e) tool_exec.status = ToolExecutionStatus.FAILED tool_exec.error = str(e) tool_exec.duration_ms = (time.monotonic() - start) * 1000 tool_exec.result_summary = f"acai_plan FALLO: {str(e)[:200]}" tool_exec.raw_output = json.dumps({"error": str(e)[:500]}) await self.sse.emit( EventType.TOOL_COMPLETED, {"tool": "acai_plan", "status": "failed", "error": str(e), "tool_call_id": tool_call_id}, session_id=session.session_id, ) return tool_exec tool_exec.duration_ms = (time.monotonic() - start) * 1000 if not result.plan: err = result.error or "Plan vacio" logger.warning( "[acai_plan] Plan FAILED: %s (raw_preview=%r)", err, (result.raw_text or "")[:200], ) tool_exec.status = ToolExecutionStatus.FAILED tool_exec.error = err tool_exec.result_summary = ( f"acai_plan FALLO: {err}. Procede en modo directo o reintenta con scope distinto." ) tool_exec.raw_output = json.dumps({ "error": err, "raw_text_preview": (result.raw_text or "")[:500], }) await self.sse.emit( EventType.TOOL_COMPLETED, {"tool": "acai_plan", "status": "failed", "error": err, "tool_call_id": tool_call_id}, session_id=session.session_id, ) return tool_exec # Plan valido: persistir en metadata. Si habia un plan activo previo, # moverlo a history como `superseded`. old_plan = session.metadata.get("current_plan") if old_plan and old_plan.get("status") == "active": old_plan["status"] = "superseded" session.metadata.setdefault("plan_history", []).append(old_plan) plan = dict(result.plan) plan["cursor"] = 0 plan["completed_step_ids"] = [] plan["status"] = "active" plan["created_at"] = int(time.time()) session.metadata["current_plan"] = plan steps = plan.get("steps") or [] next_desc = steps[0]["description"] if steps else "(plan vacio)" n_steps = len(steps) n_risks = len(plan.get("risks") or []) tool_exec.status = ToolExecutionStatus.COMPLETED tool_exec.result_summary = ( f"Plan generado: {n_steps} step(s), {n_risks} risk(s). " f"Proximo: step 1 — {next_desc[:200]}" ) logger.info( "[acai_plan] Plan persisted: %d steps, %d risks, objective=%r", n_steps, n_risks, objective[:120], ) # raw_output al modelo: el JSON completo del plan (truncado a 4000 chars). plan_json = json.dumps(plan, ensure_ascii=False) if len(plan_json) > 4000: tool_exec.raw_output = plan_json[:4000] + "\n[...truncated]" else: tool_exec.raw_output = plan_json await self.sse.emit( EventType.TOOL_COMPLETED, { "tool": "acai_plan", "status": "completed", "summary": tool_exec.result_summary[:200], "raw_output": tool_exec.raw_output[:4000], "tool_call_id": tool_call_id, }, session_id=session.session_id, ) # PlanStepper UI: notifica al frontend que hay un plan nuevo activo. await self.sse.emit( EventType.PLAN_CREATED, { "objective": plan.get("objective", ""), "steps": [ { "id": s.get("id"), "description": s.get("description", "")[:300], "agent_action": s.get("agent_action", "")[:200], "files_touched": s.get("files_touched", [])[:10], "tables_touched": s.get("tables_touched", [])[:10], } for s in plan.get("steps", []) ], "risks": plan.get("risks", [])[:10], "cursor": plan.get("cursor", 0), "completed_step_ids": plan.get("completed_step_ids", []), "status": plan.get("status", "active"), }, session_id=session.session_id, ) return tool_exec async def _execute_acai_plan_advance( self, session: SessionState, arguments: dict[str, Any], tool_call_id: str, tool_exec: ToolExecution, ) -> ToolExecution: """Avanza/abandona el plan activo.""" plan = session.metadata.get("current_plan") if not plan or plan.get("status") != "active": tool_exec.status = ToolExecutionStatus.COMPLETED tool_exec.result_summary = "No hay plan activo." tool_exec.raw_output = json.dumps({"status": "no_active_plan"}) await self.sse.emit( EventType.TOOL_COMPLETED, {"tool": "acai_plan_advance", "status": "completed", "summary": tool_exec.result_summary, "tool_call_id": tool_call_id}, session_id=session.session_id, ) return tool_exec if arguments.get("abandon"): plan["status"] = "abandoned" session.metadata.setdefault("plan_history", []).append(plan) session.metadata["current_plan"] = None tool_exec.status = ToolExecutionStatus.COMPLETED tool_exec.result_summary = "Plan abandonado." tool_exec.raw_output = json.dumps({"status": "abandoned"}) await self.sse.emit( EventType.TOOL_COMPLETED, {"tool": "acai_plan_advance", "status": "completed", "summary": tool_exec.result_summary, "tool_call_id": tool_call_id}, session_id=session.session_id, ) await self.sse.emit( EventType.PLAN_ENDED, {"status": "abandoned", "objective": plan.get("objective", "")}, session_id=session.session_id, ) return tool_exec # Aplicar completed_ids completed_in = arguments.get("completed_ids") or [] completed_set = set(plan.get("completed_step_ids", [])) for cid in completed_in: if isinstance(cid, int) and cid not in completed_set: plan.setdefault("completed_step_ids", []).append(cid) completed_set.add(cid) # Aplicar cursor steps = plan.get("steps") or [] if "next_cursor" in arguments: plan["cursor"] = max(0, min(int(arguments["next_cursor"]), len(steps))) else: # Auto-avanzar al primer step no completado. for i, st in enumerate(steps): if st.get("id") not in completed_set: plan["cursor"] = i break else: plan["status"] = "done" cursor = plan.get("cursor", 0) if plan.get("status") == "done" or cursor >= len(steps): tool_exec.result_summary = f"Plan completado ({len(completed_set)}/{len(steps)} steps)." else: next_desc = steps[cursor].get("description", "(?)") if cursor < len(steps) else "(?)" tool_exec.result_summary = ( f"Plan avanzado a step {cursor + 1}/{len(steps)}: {next_desc[:200]}" ) tool_exec.status = ToolExecutionStatus.COMPLETED tool_exec.raw_output = json.dumps({ "cursor": plan.get("cursor", 0), "completed_step_ids": plan.get("completed_step_ids", []), "status": plan.get("status", "active"), }) await self.sse.emit( EventType.TOOL_COMPLETED, {"tool": "acai_plan_advance", "status": "completed", "summary": tool_exec.result_summary, "tool_call_id": tool_call_id}, session_id=session.session_id, ) # Emitir PLAN_ADVANCED o PLAN_ENDED segun el resultado. if plan.get("status") == "done": await self.sse.emit( EventType.PLAN_ENDED, {"status": "done", "objective": plan.get("objective", "")}, session_id=session.session_id, ) else: await self.sse.emit( EventType.PLAN_ADVANCED, { "cursor": plan.get("cursor", 0), "completed_step_ids": plan.get("completed_step_ids", []), "status": plan.get("status", "active"), }, session_id=session.session_id, ) return tool_exec @staticmethod def _match_step_to_executions( step: dict[str, Any], tool_executions: list[ToolExecution], ) -> bool: """Heuristica: matchea step.agent_action con tool calls reales. Marca el step como completado si alguna de las tools ejecutadas coincide con el `agent_action` del step. Compara: 1) nombre de la tool (normalizando guion/underscore: `acai-write` matchea con `acai_write`). 2) si action menciona algun `files_touched` y la tool ejecutada tiene ese path en sus argumentos. 3) si action menciona algun `tables_touched` y la tool ejecutada tiene ese tableName en sus argumentos. """ action = (step.get("agent_action") or "").lower() files_touched = [str(f).lower() for f in (step.get("files_touched") or [])] tables_touched = [str(t).lower() for t in (step.get("tables_touched") or [])] if not action and not files_touched and not tables_touched: return False for te in tool_executions: if te.status != ToolExecutionStatus.COMPLETED: continue raw_name = strip_namespace(te.tool_name).lower() # Normaliza guiones/underscores para matching tool name <-> action. tool_variants = {raw_name, raw_name.replace("-", "_"), raw_name.replace("_", "-")} # Match 1: nombre de la tool aparece en action if any(v and v in action for v in tool_variants): return True # Match 2/3: path o tableName en los args de la tool try: args_str = json.dumps(te.arguments or {}, ensure_ascii=False).lower() except Exception: args_str = str(te.arguments or "").lower() for f in files_touched: if f and f in args_str: return True for t in tables_touched: if t and t in args_str: return True return False async def _auto_advance_plan_cursor( self, session: SessionState, tool_executions_this_step: list[ToolExecution], no_tool_calls_this_step: bool = False, ) -> None: """Avanza el cursor del plan tras un step interno del agente. Usa LLM-as-judge (`plan_judge.judge_plan_progress`) para decidir que steps del plan se acaban de completar con las tool_executions del step actual. Mas robusto que el matching string heuristico anterior. Si `no_tool_calls_this_step=True` y hay un plan active, marcamos el plan como `done` — el agente decidio terminar (end_turn) sin mas tools, asi que confiamos en su criterio. Esto cierra el plan visualmente cuando el agente acaba. """ plan = session.metadata.get("current_plan") if not plan or plan.get("status") != "active": return steps = plan.get("steps") or [] prev_cursor = int(plan.get("cursor", 0)) prev_completed = list(plan.get("completed_step_ids", [])) completed_set = set(prev_completed) rationale = "" # Si el agente termino el turn sin tools, NO marcamos los pendientes # como completados — seria un falso positivo (caso real: agente se # queda atascado y devuelve mensaje de chat sin haber hecho la tarea). # Solo si el `completed_set` previo ya cubre todos los steps cerramos # como done; si quedan pendientes, dejamos `active`. if no_tool_calls_this_step: if steps and len(completed_set) >= len(steps): rationale = "agente termino el turn; todos los steps ya completados" else: rationale = "agente termino el turn con steps pendientes (no cerrado)" # No tocar completed_set: respetamos lo que el judge dijo en steps previos elif tool_executions_this_step: # Pregunta al judge que steps acaba de completar. try: completed_ids, judge_rationale = await judge_plan_progress( plan=plan, tool_executions_this_step=tool_executions_this_step, model_adapter=self.model, model_id=self.profile.model_id, ) for cid in completed_ids: completed_set.add(cid) rationale = judge_rationale except Exception as e: logger.warning("[plan-judge] failed, no advance this step: %s", e) # Sin judge, no avanzamos el cursor — preferimos dejar el plan # como esta antes que falsos positivos heuristicos. return # Cursor: primer step NO completado. Si todos completados → done. cursor = len(steps) for i, step in enumerate(steps): if step.get("id") not in completed_set: cursor = i break plan["cursor"] = cursor plan["completed_step_ids"] = sorted(completed_set) ended = False if cursor >= len(steps) and steps: plan["status"] = "done" ended = True # Solo emitimos si hubo cambio real. changed = cursor != prev_cursor or set(plan["completed_step_ids"]) != set(prev_completed) logger.info( "[plan-advance] tools_in_step=%d prev_cursor=%d new_cursor=%d completed=%s changed=%s rationale=%r", len(tool_executions_this_step), prev_cursor, cursor, plan["completed_step_ids"], changed, rationale[:160], ) if not changed: return try: if ended: await self.sse.emit( EventType.PLAN_ENDED, {"status": "done", "objective": plan.get("objective", "")}, session_id=session.session_id, ) else: await self.sse.emit( EventType.PLAN_ADVANCED, { "cursor": plan["cursor"], "completed_step_ids": plan["completed_step_ids"], "status": plan.get("status", "active"), }, session_id=session.session_id, ) except Exception as e: logger.warning("PLAN_ADVANCED/ENDED emit failed: %s", e) # ---- Allowed tools -------------------------------------------------------- def _get_allowed_tools( self, followup_mode: str = "none", plan_mode: str = "force", ) -> list[dict[str, Any]]: """Return tool definitions filtered by this agent's allowed_tools. Si el agente tiene `has_planner_tool=True` Y `plan_mode == "force"`, anade definiciones sinteticas de `acai_plan` y `acai_plan_advance` (la tool interna no atraviesa MCP — se intercepta en `_execute_tool`). Cuando `plan_mode != "force"` (toggle del UI desactivado), las tools del planner NO se exponen y el agente ejecuta directo. """ if followup_mode == "transform": return [] if not self.mcp.is_running: return [] all_tools = self.mcp.get_tool_definitions() if self.profile.allowed_tools: tool_defs = [t for t in all_tools if t["name"] in self.profile.allowed_tools] else: tool_defs = list(all_tools) if self.profile.has_planner_tool and plan_mode == "force": tool_defs.append({ "name": "acai_plan", "description": ( "Genera un plan estructurado de ejecucion. Usa esta tool al recibir " "una peticion compuesta (landing entera, tienda, refactor amplio, modulo " "con tabla+hook+frontend). NO la uses para tareas triviales (cambiar un titulo, " "ajustar un color, leer datos). Devuelve JSON con steps, risks, files_touched, " "tables_touched." ), "input_schema": { "type": "object", "required": ["objective"], "properties": { "objective": { "type": "string", "description": "Descripcion en español de lo que hay que conseguir.", }, "scope": { "type": "string", "description": "Restricciones opcionales (ej. 'no toques el header').", }, }, }, }) tool_defs.append({ "name": "acai_plan_advance", "description": ( "Avanza/abandona el plan activo. Llama con `abandon: true` si el " "usuario corrige y el plan ya no es valido, o con `next_cursor` para " "saltar al siguiente step pendiente." ), "input_schema": { "type": "object", "properties": { "abandon": {"type": "boolean"}, "completed_ids": {"type": "array", "items": {"type": "integer"}}, "next_cursor": {"type": "integer"}, }, }, }) return tool_defs @staticmethod def _extract_mcp_output(result: dict[str, Any]) -> str: """Extract text content from MCP tool result. El modelo (MiniMax M2.7) es text-only — los blocks `type=image` no pueden reenviarse. En lugar de descartar silenciosamente (lo que dejaba al agente con un tool_result vacio y le hacia repetir la llamada), emitimos un placeholder explicito que le dice que use `browser_snapshot` si quiere inspeccionar la pagina. """ content = result.get("content", []) if isinstance(content, list): parts: list[str] = [] image_count = 0 for item in content: if not isinstance(item, dict): continue itype = item.get("type") if itype == "text": parts.append(item.get("text", "")) elif itype == "image": image_count += 1 if image_count and not parts: return ( f"[{image_count} imagen(es) no procesada(s) — el modelo es " f"text-only. Para inspeccionar la pagina usa " f"`browser_snapshot` (devuelve accessibility tree en texto). " f"`browser_take_screenshot` solo sirve para que el usuario " f"vea la captura, no para tu analisis.]" ) if image_count and parts: parts.append( f"\n[Adicionalmente {image_count} imagen(es) no incluida(s): " f"el modelo no las procesa.]" ) return "\n".join(parts) if parts else json.dumps(result) return str(content)