Rediseño tool results + compactación por step + integración Docker

- Tool results completos en conversación (como Claude Code/Cursor)
  en vez de resúmenes en system prompt
- Parser multi-tool: trackea tool calls por tool_call_id para
  OpenAI streaming interleaved
- Deduplicación por fingerprint + detección de loop cuando todos
  los calls de un step son duplicados
- Compactación inteligente por step: el orquestador decide cuándo
  comprimir steps anteriores (cambio de agente o >3 steps)
- stdio.js lee URLs del .acai como fallback (local_web_url, local_forge_host)
- Buffer MCP aumentado a 1MB para respuestas grandes
- Dockerfile adaptado para build context desde raíz del proyecto

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Jordan Diaz
2026-04-03 12:09:08 +00:00
parent 0dd3adbebd
commit b88917c18d
7 changed files with 206 additions and 91 deletions

View File

@@ -2,6 +2,7 @@
from __future__ import annotations
import hashlib
import json
import logging
import time
@@ -47,6 +48,10 @@ class BaseAgent:
) -> dict[str, Any]:
"""Run the agent's execution loop.
Uses real conversation messages with complete tool results,
like professional agentic tools (Claude Code, Cursor).
Compaction happens at the step level, not per tool result.
Returns a result dict with keys: content, artifacts, tool_executions.
"""
artifacts: list[ArtifactSummary] = await self.memory.list_artifacts(
@@ -54,15 +59,18 @@ class BaseAgent:
)
tool_executions: list[ToolExecution] = []
accumulated_content = ""
working_items: list[dict[str, Any]] = []
# Real conversation history: assistant messages + tool results
conversation: list[dict[str, Any]] = []
tool_fingerprints: dict[str, ToolExecution] = {}
all_duplicates_streak = 0 # consecutive steps where ALL calls are duplicates
for step in range(max_steps):
# Build context — NEVER includes raw tool output
# Build context with real conversation
ctx = await self.context.build_context(
session=session,
agent=self.profile,
artifacts=artifacts,
working_items=working_items,
conversation=conversation,
)
# Prepare tool definitions
@@ -77,7 +85,7 @@ class BaseAgent:
full_text = ""
tool_calls: list[dict[str, Any]] = []
current_tool: dict[str, Any] | None = None
active_tools: dict[str, dict[str, Any]] = {}
async for chunk in self.model.stream(
messages=ctx.to_messages(),
@@ -96,35 +104,40 @@ class BaseAgent:
session_id=session.session_id,
)
if chunk.tool_name and (current_tool is None or not current_tool.get("name")):
current_tool = {
"id": chunk.tool_call_id,
"name": chunk.tool_name,
"arguments": "",
}
await self.sse.emit(
EventType.TOOL_STARTED,
{"tool": chunk.tool_name, "step": step},
session_id=session.session_id,
)
if chunk.tool_name and chunk.tool_call_id:
if chunk.tool_call_id not in active_tools:
active_tools[chunk.tool_call_id] = {
"id": chunk.tool_call_id,
"name": chunk.tool_name,
"arguments": "",
}
await self.sse.emit(
EventType.TOOL_STARTED,
{"tool": chunk.tool_name, "step": step},
session_id=session.session_id,
)
if chunk.tool_arguments and current_tool is not None and not chunk.finish_reason:
# Accumulate partial argument chunks (NOT the final one)
current_tool["arguments"] += chunk.tool_arguments
if chunk.tool_arguments and chunk.tool_call_id and not chunk.finish_reason:
tool = active_tools.get(chunk.tool_call_id)
if tool:
tool["arguments"] += chunk.tool_arguments
if chunk.finish_reason == "tool_use" and current_tool is not None and current_tool.get("name"):
# Final chunk carries complete arguments — use those if
# partial accumulation is empty, otherwise use accumulated
final_args = current_tool["arguments"] or chunk.tool_arguments or ""
if chunk.finish_reason == "tool_use" and chunk.tool_call_id:
tool = active_tools.pop(chunk.tool_call_id, None)
if not tool:
tool = {
"id": chunk.tool_call_id,
"name": chunk.tool_name or "",
"arguments": "",
}
final_args = tool["arguments"] or chunk.tool_arguments or ""
try:
args = json.loads(final_args) if final_args else {}
except json.JSONDecodeError:
logger.warning("Failed to parse tool args: %s", final_args[:200])
args = {}
current_tool["parsed_arguments"] = args
logger.debug("Tool call finalized: %s args=%s", current_tool["name"], json.dumps(args)[:200])
tool_calls.append(current_tool)
current_tool = None
tool["parsed_arguments"] = args
tool_calls.append(tool)
if chunk.finish_reason == "end_turn":
break
@@ -133,24 +146,90 @@ class BaseAgent:
# If no tool calls, we're done
if not tool_calls:
# Add final assistant message to conversation
if full_text:
conversation.append({"role": "assistant", "content": full_text})
break
# Execute tool calls
# Add assistant message with tool calls to conversation
# (OpenAI format: assistant message carries tool_calls)
assistant_msg: dict[str, Any] = {"role": "assistant"}
if full_text:
assistant_msg["content"] = full_text
assistant_msg["tool_calls"] = [
{
"id": tc["id"],
"type": "function",
"function": {
"name": tc["name"],
"arguments": json.dumps(tc.get("parsed_arguments", {})),
},
}
for tc in tool_calls
]
conversation.append(assistant_msg)
# Execute tool calls and add COMPLETE results to conversation
duplicates_this_step = 0
for tc in tool_calls:
fp_raw = f"{tc['name']}:{json.dumps(tc.get('parsed_arguments', {}), sort_keys=True)}"
fp = hashlib.md5(fp_raw.encode()).hexdigest()
if fp in tool_fingerprints:
prev_exec = tool_fingerprints[fp]
tool_executions.append(prev_exec)
duplicates_this_step += 1
# Return cached result as tool message
conversation.append({
"role": "tool",
"tool_call_id": tc["id"],
"content": f"[DUPLICADO] Ya ejecutada con mismos argumentos. Resultado: {prev_exec.raw_output[:2000]}",
})
logger.warning("Duplicate tool call skipped: %s (fingerprint: %s)", tc["name"], fp[:8])
continue
tool_exec = await self._execute_tool(
session=session,
tool_name=tc["name"],
arguments=tc.get("parsed_arguments", {}),
artifacts=artifacts,
)
tool_fingerprints[fp] = tool_exec
tool_executions.append(tool_exec)
# Add summarised result to working context (NEVER raw)
working_items.append({
"role": "tool_result",
"content": f"[{tc['name']}] {tool_exec.result_summary}",
# COMPLETE result in conversation (truncated to safe limit)
conversation.append({
"role": "tool",
"tool_call_id": tc["id"],
"content": tool_exec.raw_output[:8000] if tool_exec.raw_output else tool_exec.result_summary,
})
# Loop detection: if ALL tool calls in this step were duplicates
if duplicates_this_step == len(tool_calls):
all_duplicates_streak += 1
if all_duplicates_streak >= 2:
logger.warning("Loop detected: %d consecutive steps with all duplicate calls. Breaking.", all_duplicates_streak)
conversation.append({
"role": "user",
"content": "[SISTEMA] Se detectaron llamadas repetidas. Ya tienes toda la información necesaria. Genera tu respuesta final ahora.",
})
# One more chance to generate a final response
ctx = await self.context.build_context(
session=session, agent=self.profile,
artifacts=artifacts, conversation=conversation,
)
async for chunk in self.model.stream(
messages=ctx.to_messages(),
config=config,
):
if chunk.delta:
accumulated_content += chunk.delta
if chunk.finish_reason:
break
break
else:
all_duplicates_streak = 0
return {
"content": accumulated_content,
"artifacts": artifacts,
@@ -200,6 +279,7 @@ class BaseAgent:
tool_exec.status = ToolExecutionStatus.COMPLETED
tool_exec.result_summary = artifact.summary
tool_exec.raw_output = raw_output[:8000]
tool_exec.duration_ms = duration
await self.sse.emit(

View File

@@ -16,7 +16,7 @@ from ..context.engine import ContextEngine
from ..mcp.manager import MCPManager
from ..memory.store import MemoryStore
from ..models.agent import AgentRole
from ..models.session import SessionState, SessionStatus, TaskStatus
from ..models.session import SessionState, SessionStatus, TaskState, TaskStatus
from ..streaming.sse import SSEEmitter, EventType
from .agents.coder import CoderAgent, create_coder_profile
from .agents.collector import CollectorAgent, create_collector_profile
@@ -181,6 +181,10 @@ class OrchestratorEngine:
for artifact in step_result.get("artifacts", []):
task.facts_extracted.extend(artifact.facts[:5])
# Decide if previous steps should be compacted
if i > 0:
self._maybe_compact_previous_steps(task, current_index=i)
except Exception as e:
logger.error("Step %d failed: %s", i + 1, e)
step.status = TaskStatus.FAILED
@@ -323,6 +327,35 @@ class OrchestratorEngine:
task.task_id, len(task.facts_extracted), len(tools_used), len(task_artifacts),
)
def _maybe_compact_previous_steps(
self, task: TaskState, current_index: int
) -> None:
"""Decide if previous steps should be compacted. Deterministic rules."""
current_step = task.plan[current_index]
for i in range(current_index):
prev = task.plan[i]
if prev.compacted or prev.status != TaskStatus.COMPLETED:
continue
# Rule 1: Change of agent role → previous steps are a different focus
if prev.agent_role != current_step.agent_role:
prev.compacted = True
logger.info(
"Compacted step %d (%s) — agent changed to %s",
i + 1, prev.agent_role, current_step.agent_role,
)
continue
# Rule 2: More than 3 completed non-compacted steps → compact oldest
non_compacted = [
s for s in task.plan[:current_index]
if s.status == TaskStatus.COMPLETED and not s.compacted
]
if len(non_compacted) > 3:
non_compacted[0].compacted = True
logger.info("Compacted oldest step to stay within budget")
def _create_agent(self, role: AgentRole) -> PlannerAgent | CoderAgent | CollectorAgent | ReviewerAgent:
"""Instantiate a subagent for the given role."""
profile = self._profiles[role]