Rediseño tool results + compactación por step + integración Docker

- Tool results completos en conversación (como Claude Code/Cursor) en vez de resúmenes en system prompt - Parser multi-tool: trackea tool calls por tool_call_id para OpenAI streaming interleaved - Deduplicación por fingerprint + detección de loop cuando todos los calls de un step son duplicados - Compactación inteligente por step: el orquestador decide cuándo comprimir steps anteriores (cambio de agente o >3 steps) - stdio.js lee URLs del .acai como fallback (local_web_url, local_forge_host) - Buffer MCP aumentado a 1MB para respuestas grandes - Dockerfile adaptado para build context desde raíz del proyecto Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-03 12:09:08 +00:00
parent 0dd3adbebd
commit b88917c18d
7 changed files with 206 additions and 91 deletions
--- a/src/orchestrator/agents/base.py
+++ b/src/orchestrator/agents/base.py
@@ -2,6 +2,7 @@

 from __future__ import annotations

+import hashlib
 import json
 import logging
 import time
@@ -47,6 +48,10 @@ class BaseAgent:
    ) -> dict[str, Any]:
        """Run the agent's execution loop.

+        Uses real conversation messages with complete tool results,
+        like professional agentic tools (Claude Code, Cursor).
+        Compaction happens at the step level, not per tool result.
+
        Returns a result dict with keys: content, artifacts, tool_executions.
        """
        artifacts: list[ArtifactSummary] = await self.memory.list_artifacts(
@@ -54,15 +59,18 @@ class BaseAgent:
        )
        tool_executions: list[ToolExecution] = []
        accumulated_content = ""
-        working_items: list[dict[str, Any]] = []
+        # Real conversation history: assistant messages + tool results
+        conversation: list[dict[str, Any]] = []
+        tool_fingerprints: dict[str, ToolExecution] = {}
+        all_duplicates_streak = 0  # consecutive steps where ALL calls are duplicates

        for step in range(max_steps):
-            # Build context — NEVER includes raw tool output
+            # Build context with real conversation
            ctx = await self.context.build_context(
                session=session,
                agent=self.profile,
                artifacts=artifacts,
-                working_items=working_items,
+                conversation=conversation,
            )

            # Prepare tool definitions
@@ -77,7 +85,7 @@ class BaseAgent:

            full_text = ""
            tool_calls: list[dict[str, Any]] = []
-            current_tool: dict[str, Any] | None = None
+            active_tools: dict[str, dict[str, Any]] = {}

            async for chunk in self.model.stream(
                messages=ctx.to_messages(),
@@ -96,35 +104,40 @@ class BaseAgent:
                        session_id=session.session_id,
                    )

-                if chunk.tool_name and (current_tool is None or not current_tool.get("name")):
-                    current_tool = {
-                        "id": chunk.tool_call_id,
-                        "name": chunk.tool_name,
-                        "arguments": "",
-                    }
-                    await self.sse.emit(
-                        EventType.TOOL_STARTED,
-                        {"tool": chunk.tool_name, "step": step},
-                        session_id=session.session_id,
-                    )
+                if chunk.tool_name and chunk.tool_call_id:
+                    if chunk.tool_call_id not in active_tools:
+                        active_tools[chunk.tool_call_id] = {
+                            "id": chunk.tool_call_id,
+                            "name": chunk.tool_name,
+                            "arguments": "",
+                        }
+                        await self.sse.emit(
+                            EventType.TOOL_STARTED,
+                            {"tool": chunk.tool_name, "step": step},
+                            session_id=session.session_id,
+                        )

-                if chunk.tool_arguments and current_tool is not None and not chunk.finish_reason:
-                    # Accumulate partial argument chunks (NOT the final one)
-                    current_tool["arguments"] += chunk.tool_arguments
+                if chunk.tool_arguments and chunk.tool_call_id and not chunk.finish_reason:
+                    tool = active_tools.get(chunk.tool_call_id)
+                    if tool:
+                        tool["arguments"] += chunk.tool_arguments

-                if chunk.finish_reason == "tool_use" and current_tool is not None and current_tool.get("name"):
-                    # Final chunk carries complete arguments — use those if
-                    # partial accumulation is empty, otherwise use accumulated
-                    final_args = current_tool["arguments"] or chunk.tool_arguments or ""
+                if chunk.finish_reason == "tool_use" and chunk.tool_call_id:
+                    tool = active_tools.pop(chunk.tool_call_id, None)
+                    if not tool:
+                        tool = {
+                            "id": chunk.tool_call_id,
+                            "name": chunk.tool_name or "",
+                            "arguments": "",
+                        }
+                    final_args = tool["arguments"] or chunk.tool_arguments or ""
                    try:
                        args = json.loads(final_args) if final_args else {}
                    except json.JSONDecodeError:
                        logger.warning("Failed to parse tool args: %s", final_args[:200])
                        args = {}
-                    current_tool["parsed_arguments"] = args
-                    logger.debug("Tool call finalized: %s args=%s", current_tool["name"], json.dumps(args)[:200])
-                    tool_calls.append(current_tool)
-                    current_tool = None
+                    tool["parsed_arguments"] = args
+                    tool_calls.append(tool)

                if chunk.finish_reason == "end_turn":
                    break
@@ -133,24 +146,90 @@ class BaseAgent:

            # If no tool calls, we're done
            if not tool_calls:
+                # Add final assistant message to conversation
+                if full_text:
+                    conversation.append({"role": "assistant", "content": full_text})
                break

-            # Execute tool calls
+            # Add assistant message with tool calls to conversation
+            # (OpenAI format: assistant message carries tool_calls)
+            assistant_msg: dict[str, Any] = {"role": "assistant"}
+            if full_text:
+                assistant_msg["content"] = full_text
+            assistant_msg["tool_calls"] = [
+                {
+                    "id": tc["id"],
+                    "type": "function",
+                    "function": {
+                        "name": tc["name"],
+                        "arguments": json.dumps(tc.get("parsed_arguments", {})),
+                    },
+                }
+                for tc in tool_calls
+            ]
+            conversation.append(assistant_msg)
+
+            # Execute tool calls and add COMPLETE results to conversation
+            duplicates_this_step = 0
            for tc in tool_calls:
+                fp_raw = f"{tc['name']}:{json.dumps(tc.get('parsed_arguments', {}), sort_keys=True)}"
+                fp = hashlib.md5(fp_raw.encode()).hexdigest()
+
+                if fp in tool_fingerprints:
+                    prev_exec = tool_fingerprints[fp]
+                    tool_executions.append(prev_exec)
+                    duplicates_this_step += 1
+                    # Return cached result as tool message
+                    conversation.append({
+                        "role": "tool",
+                        "tool_call_id": tc["id"],
+                        "content": f"[DUPLICADO] Ya ejecutada con mismos argumentos. Resultado: {prev_exec.raw_output[:2000]}",
+                    })
+                    logger.warning("Duplicate tool call skipped: %s (fingerprint: %s)", tc["name"], fp[:8])
+                    continue
+
                tool_exec = await self._execute_tool(
                    session=session,
                    tool_name=tc["name"],
                    arguments=tc.get("parsed_arguments", {}),
                    artifacts=artifacts,
                )
+                tool_fingerprints[fp] = tool_exec
                tool_executions.append(tool_exec)

-                # Add summarised result to working context (NEVER raw)
-                working_items.append({
-                    "role": "tool_result",
-                    "content": f"[{tc['name']}] {tool_exec.result_summary}",
+                # COMPLETE result in conversation (truncated to safe limit)
+                conversation.append({
+                    "role": "tool",
+                    "tool_call_id": tc["id"],
+                    "content": tool_exec.raw_output[:8000] if tool_exec.raw_output else tool_exec.result_summary,
                })

+            # Loop detection: if ALL tool calls in this step were duplicates
+            if duplicates_this_step == len(tool_calls):
+                all_duplicates_streak += 1
+                if all_duplicates_streak >= 2:
+                    logger.warning("Loop detected: %d consecutive steps with all duplicate calls. Breaking.", all_duplicates_streak)
+                    conversation.append({
+                        "role": "user",
+                        "content": "[SISTEMA] Se detectaron llamadas repetidas. Ya tienes toda la información necesaria. Genera tu respuesta final ahora.",
+                    })
+                    # One more chance to generate a final response
+                    ctx = await self.context.build_context(
+                        session=session, agent=self.profile,
+                        artifacts=artifacts, conversation=conversation,
+                    )
+                    async for chunk in self.model.stream(
+                        messages=ctx.to_messages(),
+                        config=config,
+                    ):
+                        if chunk.delta:
+                            accumulated_content += chunk.delta
+                        if chunk.finish_reason:
+                            break
+                    break
+            else:
+                all_duplicates_streak = 0
+
        return {
            "content": accumulated_content,
            "artifacts": artifacts,
@@ -200,6 +279,7 @@ class BaseAgent:

            tool_exec.status = ToolExecutionStatus.COMPLETED
            tool_exec.result_summary = artifact.summary
+            tool_exec.raw_output = raw_output[:8000]
            tool_exec.duration_ms = duration

            await self.sse.emit(
--- a/src/orchestrator/engine.py
+++ b/src/orchestrator/engine.py
@@ -16,7 +16,7 @@ from ..context.engine import ContextEngine
 from ..mcp.manager import MCPManager
 from ..memory.store import MemoryStore
 from ..models.agent import AgentRole
-from ..models.session import SessionState, SessionStatus, TaskStatus
+from ..models.session import SessionState, SessionStatus, TaskState, TaskStatus
 from ..streaming.sse import SSEEmitter, EventType
 from .agents.coder import CoderAgent, create_coder_profile
 from .agents.collector import CollectorAgent, create_collector_profile
@@ -181,6 +181,10 @@ class OrchestratorEngine:
                for artifact in step_result.get("artifacts", []):
                    task.facts_extracted.extend(artifact.facts[:5])

+                # Decide if previous steps should be compacted
+                if i > 0:
+                    self._maybe_compact_previous_steps(task, current_index=i)
+
            except Exception as e:
                logger.error("Step %d failed: %s", i + 1, e)
                step.status = TaskStatus.FAILED
@@ -323,6 +327,35 @@ class OrchestratorEngine:
            task.task_id, len(task.facts_extracted), len(tools_used), len(task_artifacts),
        )

+    def _maybe_compact_previous_steps(
+        self, task: TaskState, current_index: int
+    ) -> None:
+        """Decide if previous steps should be compacted. Deterministic rules."""
+        current_step = task.plan[current_index]
+
+        for i in range(current_index):
+            prev = task.plan[i]
+            if prev.compacted or prev.status != TaskStatus.COMPLETED:
+                continue
+
+            # Rule 1: Change of agent role → previous steps are a different focus
+            if prev.agent_role != current_step.agent_role:
+                prev.compacted = True
+                logger.info(
+                    "Compacted step %d (%s) — agent changed to %s",
+                    i + 1, prev.agent_role, current_step.agent_role,
+                )
+                continue
+
+            # Rule 2: More than 3 completed non-compacted steps → compact oldest
+            non_compacted = [
+                s for s in task.plan[:current_index]
+                if s.status == TaskStatus.COMPLETED and not s.compacted
+            ]
+            if len(non_compacted) > 3:
+                non_compacted[0].compacted = True
+                logger.info("Compacted oldest step to stay within budget")
+
    def _create_agent(self, role: AgentRole) -> PlannerAgent | CoderAgent | CollectorAgent | ReviewerAgent:
        """Instantiate a subagent for the given role."""
        profile = self._profiles[role]