Ajustes
This commit is contained in:
@@ -586,6 +586,15 @@ class ClaudeAdapter(ModelAdapter):
|
||||
if force_tool:
|
||||
kwargs["tool_choice"] = {"type": "tool", "name": force_tool}
|
||||
|
||||
# Permite desactivar thinking para llamadas que no lo necesitan (p.ej.
|
||||
# plan_judge: solo evalua, no razona). MiniMax M2.7 acepta el parametro
|
||||
# Anthropic-style `thinking`. Aunque la implementacion no respeta del
|
||||
# todo el "disabled" (a veces sigue emitiendo thinking blocks), reduce
|
||||
# el consumo de tokens y deja mas espacio para el JSON output.
|
||||
thinking_cfg = (config.extra or {}).get("thinking")
|
||||
if thinking_cfg:
|
||||
kwargs["thinking"] = thinking_cfg
|
||||
|
||||
# Retry con backoff sobre errores transitorios (429/503/529). El proxy
|
||||
# MiniMax devuelve 529 overloaded_error con cierta frecuencia bajo carga.
|
||||
last_exc: Exception | None = None
|
||||
|
||||
@@ -46,6 +46,9 @@ class SendMessageRequest(BaseModel):
|
||||
message: str
|
||||
stream: bool = False
|
||||
agent_id: str | None = None
|
||||
# 'auto' = el agente decide (heuristica trivial-vs-complex). 'force' = forzar
|
||||
# acai_plan antes de cualquier ejecucion. UI: toggle en ChatPanel.
|
||||
plan_mode: str = "auto"
|
||||
|
||||
|
||||
class CompletionRequest(BaseModel):
|
||||
@@ -79,6 +82,8 @@ class SessionResponse(BaseModel):
|
||||
created_at: str
|
||||
updated_at: str
|
||||
agent_id: str = "acai"
|
||||
# Plan activo (Fase 5.5: PlanStepper UI). None si no hay plan en curso.
|
||||
current_plan: dict[str, Any] | None = None
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
@@ -290,6 +295,14 @@ async def send_message(
|
||||
if not agent_profile:
|
||||
agent_profile = agent_reg.get(agent_reg.default_agent_id)
|
||||
|
||||
# Plan mode controlado por el usuario desde el toggle del ChatPanel.
|
||||
# 'auto' (default): heuristica del modelo trivial-vs-complex.
|
||||
# 'force': el agente DEBE llamar acai_plan como primera accion.
|
||||
plan_mode = (body.plan_mode or "auto").lower()
|
||||
if plan_mode not in ("auto", "force"):
|
||||
plan_mode = "auto"
|
||||
session.metadata["plan_mode"] = plan_mode
|
||||
|
||||
from ..mcp.manager import MCPManager
|
||||
orchestrator = _build_orchestrator(mcp_manager or MCPManager(), agent_profile)
|
||||
|
||||
@@ -379,6 +392,27 @@ async def get_session(session_id: str) -> SessionResponse:
|
||||
if not session:
|
||||
raise HTTPException(status_code=404, detail="Session not found")
|
||||
|
||||
plan = session.metadata.get("current_plan")
|
||||
plan_payload = None
|
||||
if isinstance(plan, dict) and plan.get("status") == "active":
|
||||
plan_payload = {
|
||||
"objective": plan.get("objective", ""),
|
||||
"steps": [
|
||||
{
|
||||
"id": s.get("id"),
|
||||
"description": s.get("description", "")[:300],
|
||||
"agent_action": s.get("agent_action", "")[:200],
|
||||
"files_touched": s.get("files_touched", [])[:10],
|
||||
"tables_touched": s.get("tables_touched", [])[:10],
|
||||
}
|
||||
for s in (plan.get("steps") or [])
|
||||
],
|
||||
"risks": (plan.get("risks") or [])[:10],
|
||||
"cursor": plan.get("cursor", 0),
|
||||
"completed_step_ids": plan.get("completed_step_ids", []),
|
||||
"status": plan.get("status", "active"),
|
||||
}
|
||||
|
||||
return SessionResponse(
|
||||
session_id=session.session_id,
|
||||
status=session.status.value,
|
||||
@@ -388,6 +422,7 @@ async def get_session(session_id: str) -> SessionResponse:
|
||||
created_at=session.created_at.isoformat(),
|
||||
updated_at=session.updated_at.isoformat(),
|
||||
agent_id=session.agent_id,
|
||||
current_plan=plan_payload,
|
||||
)
|
||||
|
||||
|
||||
@@ -412,6 +447,41 @@ async def delete_session(session_id: str) -> dict[str, str]:
|
||||
return {"status": "deleted", "session_id": session_id}
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# POST /sessions/{id}/plan/abandon — cancela el plan activo (Fase 5.5)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@router.post("/sessions/{session_id}/plan/abandon")
|
||||
async def abandon_plan(session_id: str) -> dict[str, Any]:
|
||||
storage = _get_storage()
|
||||
session = await storage.get_session(session_id)
|
||||
if not session:
|
||||
raise HTTPException(status_code=404, detail="Session not found")
|
||||
|
||||
plan = session.metadata.get("current_plan")
|
||||
if not isinstance(plan, dict) or plan.get("status") != "active":
|
||||
return {"status": "no_active_plan", "session_id": session_id}
|
||||
|
||||
plan["status"] = "abandoned"
|
||||
session.metadata.setdefault("plan_history", []).append(plan)
|
||||
session.metadata["current_plan"] = None
|
||||
await storage.update_session(session)
|
||||
|
||||
# Notificar al frontend via SSE.
|
||||
sse = _get_sse()
|
||||
try:
|
||||
from ..streaming.sse import EventType as _ET
|
||||
await sse.emit(
|
||||
_ET.PLAN_ENDED,
|
||||
{"status": "abandoned", "objective": plan.get("objective", "")},
|
||||
session_id=session_id,
|
||||
)
|
||||
except Exception:
|
||||
logger.warning("PLAN_ENDED emit failed on abandon", exc_info=True)
|
||||
|
||||
return {"status": "abandoned", "session_id": session_id}
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# GET /sessions/{id}/events
|
||||
# ------------------------------------------------------------------
|
||||
@@ -520,34 +590,85 @@ async def _load_knowledge_from_dir(docs_path: str = "docs") -> dict[str, Any]:
|
||||
if not docs_dir.is_dir():
|
||||
return {"status": "error", "message": f"Directory not found: {docs_dir}"}
|
||||
|
||||
# Read all docs
|
||||
docs_data: list[tuple[str, str, str, str, list[str]]] = [] # (id, title, content, summary, tags)
|
||||
# Read all docs. Cada doc puede tener frontmatter YAML al inicio:
|
||||
# ---
|
||||
# title: "..."
|
||||
# tags: [a, b]
|
||||
# load_priority: 80
|
||||
# load_when: [always]
|
||||
# summary: "..."
|
||||
# ---
|
||||
# Si no hay frontmatter, se cae al modo legacy (heuristica sobre headings).
|
||||
import re as _re
|
||||
import yaml as _yaml
|
||||
_FM_RE = _re.compile(r"^---\s*\n(.*?)\n---\s*\n", _re.DOTALL)
|
||||
|
||||
# (id, title, content, summary, tags, priority, load_when)
|
||||
docs_data: list[tuple[str, str, str, str, list[str], int, list[str]]] = []
|
||||
for md_file in sorted(docs_dir.glob("*.md")):
|
||||
content = md_file.read_text(encoding="utf-8")
|
||||
raw = md_file.read_text(encoding="utf-8")
|
||||
doc_id = md_file.stem
|
||||
|
||||
# Defaults
|
||||
title = doc_id
|
||||
summary = ""
|
||||
tags: list[str] = []
|
||||
priority = 50
|
||||
load_when: list[str] = []
|
||||
|
||||
# Intentar parsear frontmatter
|
||||
fm_match = _FM_RE.match(raw)
|
||||
if fm_match:
|
||||
try:
|
||||
fm = _yaml.safe_load(fm_match.group(1)) or {}
|
||||
if isinstance(fm, dict):
|
||||
title = str(fm.get("title", title))
|
||||
summary = str(fm.get("summary", ""))[:500]
|
||||
fm_tags = fm.get("tags") or []
|
||||
if isinstance(fm_tags, list):
|
||||
tags = [str(t).lower()[:30] for t in fm_tags][:10]
|
||||
priority = int(fm.get("load_priority", 50))
|
||||
fm_load_when = fm.get("load_when") or []
|
||||
if isinstance(fm_load_when, list):
|
||||
load_when = [str(x).lower()[:30] for x in fm_load_when][:10]
|
||||
# Body sin frontmatter — no contamina embeddings ni cuenta
|
||||
# como contenido en el system prompt.
|
||||
content = raw[fm_match.end():]
|
||||
except _yaml.YAMLError:
|
||||
logger.warning("Frontmatter invalido en %s — fallback legacy", md_file.name)
|
||||
content = raw
|
||||
else:
|
||||
content = raw
|
||||
|
||||
# Fallback legacy: si no hubo frontmatter o falto algun campo,
|
||||
# derivar title/summary/tags del contenido.
|
||||
lines = content.strip().splitlines()
|
||||
title = lines[0].lstrip("#").strip() if lines else doc_id
|
||||
if title == doc_id and lines:
|
||||
title = lines[0].lstrip("#").strip() or doc_id
|
||||
if not summary:
|
||||
summary_lines: list[str] = []
|
||||
for line in lines[:30]:
|
||||
stripped = line.strip()
|
||||
if stripped and not stripped.startswith("#"):
|
||||
summary_lines.append(stripped)
|
||||
if len(" ".join(summary_lines)) > 500:
|
||||
break
|
||||
summary = " ".join(summary_lines)[:500]
|
||||
if not tags:
|
||||
for line in lines:
|
||||
if line.startswith("## "):
|
||||
tags.append(line.lstrip("#").strip().lower()[:30])
|
||||
tags = tags[:10]
|
||||
|
||||
summary_lines = []
|
||||
for line in lines[:30]:
|
||||
line = line.strip()
|
||||
if line and not line.startswith("#"):
|
||||
summary_lines.append(line)
|
||||
if len(" ".join(summary_lines)) > 500:
|
||||
break
|
||||
summary = " ".join(summary_lines)[:500]
|
||||
|
||||
tags = []
|
||||
for line in lines:
|
||||
if line.startswith("## "):
|
||||
tags.append(line.lstrip("#").strip().lower()[:30])
|
||||
|
||||
docs_data.append((doc_id, title, content, summary, tags[:10]))
|
||||
docs_data.append((doc_id, title, content, summary, tags, priority, load_when))
|
||||
|
||||
# Generate embeddings in batch
|
||||
from ..memory.embeddings import EmbeddingService
|
||||
embed_service = EmbeddingService()
|
||||
embed_texts = [f"{title}\n{summary}\n{content[:2000]}" for _, title, content, summary, _ in docs_data]
|
||||
embed_texts = [
|
||||
f"{title}\n{summary}\n{content[:2000]}"
|
||||
for _, title, content, summary, _, _, _ in docs_data
|
||||
]
|
||||
|
||||
try:
|
||||
embeddings = await embed_service.embed_batch(embed_texts)
|
||||
@@ -576,7 +697,7 @@ async def _load_knowledge_from_dir(docs_path: str = "docs") -> dict[str, Any]:
|
||||
|
||||
# Store docs + embeddings
|
||||
loaded = []
|
||||
for i, (doc_id, title, content, summary, tags) in enumerate(docs_data):
|
||||
for i, (doc_id, title, content, summary, tags, priority, load_when) in enumerate(docs_data):
|
||||
doc = MemoryDocument(
|
||||
memory_id=doc_id,
|
||||
memory_type=MemoryType.DOCUMENT,
|
||||
@@ -585,6 +706,8 @@ async def _load_knowledge_from_dir(docs_path: str = "docs") -> dict[str, Any]:
|
||||
content=content,
|
||||
summary=summary,
|
||||
tags=tags,
|
||||
priority=priority,
|
||||
load_when=load_when,
|
||||
)
|
||||
await memory.store_document(doc)
|
||||
|
||||
@@ -596,6 +719,8 @@ async def _load_knowledge_from_dir(docs_path: str = "docs") -> dict[str, Any]:
|
||||
"title": title,
|
||||
"chars": len(content),
|
||||
"tags": tags[:5],
|
||||
"priority": priority,
|
||||
"load_when": load_when,
|
||||
"embedded": embeddings[i] is not None,
|
||||
})
|
||||
|
||||
|
||||
@@ -45,7 +45,19 @@ class Settings(BaseSettings):
|
||||
compaction_threshold_ratio: float = 0.80
|
||||
context_reserve_ratio: float = 0.10
|
||||
artifact_summary_max_chars: int = 2000
|
||||
knowledge_base_max_tokens: int = 30_000
|
||||
# KB inyectada como system prompt. Default 4k (antes 30k) — la doc
|
||||
# oficial de M2.7 advierte que system prompts grandes degradan rendimiento.
|
||||
# Top-2 docs medianos + cheat sheet ≈ 4k tokens caben con margen.
|
||||
# Se sobrescribe per-agent via `agent.yaml.kb_max_tokens`.
|
||||
knowledge_base_max_tokens: int = 4_000
|
||||
# Cap absoluto del numero de docs incluidos (filtro tras ranking).
|
||||
kb_top_n_docs: int = 2
|
||||
# Penalty al `load_priority` de docs `load_when: [ranked]` para que
|
||||
# no entren "por defecto" en el branch top_n, solo si rankean muy alto.
|
||||
kb_ranked_penalty: int = 10
|
||||
# Umbral de similitud por debajo del cual el ranking no es confiable
|
||||
# y se usa el `load_priority` del frontmatter como tie-break.
|
||||
kb_similarity_floor: float = 0.6
|
||||
working_context_max_items: int = 20
|
||||
tool_raw_output_max_chars: int = 2000
|
||||
conversation_recent_raw_limit: int = 2
|
||||
|
||||
@@ -90,11 +90,15 @@ class ContextEngine:
|
||||
and ("artifact_memory" in allowed or "task_state" in allowed)
|
||||
)
|
||||
|
||||
# 3. Knowledge base — loaded from memory store
|
||||
# 3. Knowledge base — loaded from memory store. Strategy y budget
|
||||
# vienen del agent profile (Fase 1 refactor): cada agente decide
|
||||
# cuanto KB inyecta y como filtra (top_n / tags / cheatsheet_only / ...).
|
||||
if "knowledge_base" in allowed and self.memory:
|
||||
kb_budget = agent.kb_max_tokens or settings.knowledge_base_max_tokens
|
||||
kb_section = await self._build_knowledge_base(
|
||||
session,
|
||||
max_tokens=settings.knowledge_base_max_tokens,
|
||||
agent=agent,
|
||||
max_tokens=kb_budget,
|
||||
)
|
||||
if kb_section:
|
||||
sections.append(kb_section)
|
||||
@@ -113,6 +117,7 @@ class ContextEngine:
|
||||
sections.append(
|
||||
self._build_task_state(
|
||||
session.current_task,
|
||||
session=session,
|
||||
objective_override=base_user_content,
|
||||
resolved_context=resolved_followup_context,
|
||||
followup_mode=followup_mode,
|
||||
@@ -340,29 +345,16 @@ class ContextEngine:
|
||||
def _build_immutable_rules(
|
||||
self, session: SessionState, agent: AgentProfile
|
||||
) -> ContextSection:
|
||||
parts = [
|
||||
"# System Rules (Immutable)",
|
||||
"",
|
||||
agent.system_prompt,
|
||||
"",
|
||||
]
|
||||
# `agent.system_prompt` ya incluye el contrato compartido (concatenado
|
||||
# por el registry al cargar). Aqui solo se añaden reglas de sesion
|
||||
# cuando existen — el bloque hardcoded de "Contrato de Contexto" que
|
||||
# vivia aqui se ha movido a `agents/_shared/contract.md` (Fase 3).
|
||||
parts = [agent.system_prompt or ""]
|
||||
if session.immutable_rules:
|
||||
parts.append("## Session Rules")
|
||||
parts.append("\n\n## Session Rules\n")
|
||||
for rule in session.immutable_rules:
|
||||
parts.append(f"- {rule}")
|
||||
parts.extend(
|
||||
[
|
||||
"",
|
||||
"## Contrato de Contexto",
|
||||
"- Los resultados de herramientas se incluyen completos en la conversación.",
|
||||
"- Los steps anteriores pueden estar compactados como resúmenes.",
|
||||
"- Mantén las respuestas enfocadas en el paso actual.",
|
||||
"- Si ya tienes la información necesaria, genera tu respuesta final.",
|
||||
"- NO repitas llamadas a herramientas con los mismos argumentos.",
|
||||
"- Responde SIEMPRE en español.",
|
||||
]
|
||||
)
|
||||
content = "\n".join(parts)
|
||||
content = "\n".join(p for p in parts if p)
|
||||
return ContextSection(
|
||||
section_type=ContextSectionType.IMMUTABLE_RULES,
|
||||
content=content,
|
||||
@@ -388,14 +380,30 @@ class ContextEngine:
|
||||
async def _build_knowledge_base(
|
||||
self,
|
||||
session: SessionState,
|
||||
agent: AgentProfile,
|
||||
max_tokens: int,
|
||||
) -> ContextSection | None:
|
||||
"""Load relevant knowledge documents via semantic search.
|
||||
"""Carga el subset relevante de la KB segun `agent.kb_load_strategy`.
|
||||
|
||||
Uses embeddings to find the most relevant docs for the current
|
||||
task. Always includes a title index of ALL docs so the agent
|
||||
knows what exists and can request more.
|
||||
Estrategias soportadas:
|
||||
- `none`: no inyecta KB (devuelve None).
|
||||
- `cheatsheet_only`: solo docs con `load_when` que contiene "cheatsheet".
|
||||
- `glossary_only`: solo docs con `load_when` que contiene "glossary".
|
||||
- `planner_only`: docs con `load_when` que contiene "planner_only" |
|
||||
"cheatsheet" | "glossary". Usado por el sub-loop de `acai_plan`.
|
||||
- `tags`: filtra por interseccion con `agent.kb_tags`, ranking dentro.
|
||||
- `top_n` (default): ranking semantico sobre docs `always`/`ranked`,
|
||||
con penalty para `ranked` y tie-break por `priority` cuando la
|
||||
similitud cae bajo `kb_similarity_floor`.
|
||||
- `all` (legacy): comportamiento previo, todos los que quepan.
|
||||
|
||||
Siempre incluye al final un listado "Other Available Docs" para que
|
||||
el agente pueda pedirlos via `read_doc`.
|
||||
"""
|
||||
strategy = (agent.kb_load_strategy or "top_n").lower()
|
||||
if strategy == "none":
|
||||
return None
|
||||
|
||||
if not self.memory:
|
||||
return None
|
||||
|
||||
@@ -412,36 +420,124 @@ class ContextEngine:
|
||||
if not all_docs:
|
||||
return None
|
||||
|
||||
doc_map = {d.memory_id: d for d in all_docs}
|
||||
|
||||
# Rank docs by semantic similarity
|
||||
query = self._build_search_query(session)
|
||||
ranked_ids: list[str] = []
|
||||
|
||||
if query:
|
||||
ranked_ids = await self._semantic_rank(query)
|
||||
|
||||
if not ranked_ids:
|
||||
# No embeddings or no task — sort by size (smallest first)
|
||||
ranked_ids = [
|
||||
d.memory_id
|
||||
for d in sorted(all_docs, key=lambda d: len(d.content))
|
||||
# 1) Pre-filtrado segun strategy.
|
||||
candidates: list[MemoryDocument]
|
||||
if strategy == "cheatsheet_only":
|
||||
candidates = [d for d in all_docs if "cheatsheet" in (d.load_when or [])]
|
||||
elif strategy == "glossary_only":
|
||||
candidates = [d for d in all_docs if "glossary" in (d.load_when or [])]
|
||||
elif strategy == "planner_only":
|
||||
candidates = [
|
||||
d for d in all_docs
|
||||
if any(t in (d.load_when or []) for t in ("planner_only", "cheatsheet", "glossary"))
|
||||
]
|
||||
elif strategy == "tags":
|
||||
agent_tags = {t.lower() for t in (agent.kb_tags or [])}
|
||||
if not agent_tags:
|
||||
candidates = []
|
||||
else:
|
||||
candidates = [
|
||||
d for d in all_docs
|
||||
if agent_tags.intersection({t.lower() for t in (d.tags or [])})
|
||||
]
|
||||
elif strategy == "all":
|
||||
# Legacy / debugging — todos los docs.
|
||||
candidates = list(all_docs)
|
||||
else:
|
||||
# `top_n` (default): considera docs `always` y `ranked`. Si el
|
||||
# frontmatter no esta presente, los tratamos como `always` para
|
||||
# no excluirlos por accidente (modo legacy).
|
||||
def _eligible_top_n(d: MemoryDocument) -> bool:
|
||||
lw = d.load_when or []
|
||||
if not lw:
|
||||
return True # legacy: sin frontmatter → considerado
|
||||
return "always" in lw or "ranked" in lw
|
||||
candidates = [d for d in all_docs if _eligible_top_n(d)]
|
||||
|
||||
# Include ALL docs — 42K tokens fits well within model context (128K)
|
||||
if not candidates:
|
||||
# No hay docs aplicables al strategy. Devolvemos solo el indice
|
||||
# de "Other Available Docs" para que el agente pueda pedir on-demand.
|
||||
return self._build_kb_section_only_index(all_docs, full_docs=[])
|
||||
|
||||
# 2) Ranking. Para strategies "estaticas" (cheatsheet_only, glossary_only,
|
||||
# planner_only) ordenamos por priority desc — son sets pequenos y el
|
||||
# ranking semantico no aporta. Para `tags` y `top_n` aplicamos ranking
|
||||
# semantico cuando hay query, sino priority desc.
|
||||
candidate_ids = {d.memory_id for d in candidates}
|
||||
ordered: list[MemoryDocument]
|
||||
|
||||
if strategy in ("cheatsheet_only", "glossary_only", "planner_only"):
|
||||
ordered = sorted(candidates, key=lambda d: d.priority, reverse=True)
|
||||
else:
|
||||
query = self._build_search_query(session)
|
||||
ranked: list[tuple[str, float]] = []
|
||||
if query:
|
||||
ranked = await self._semantic_rank(query)
|
||||
ranked = [(did, s) for did, s in ranked if did in candidate_ids]
|
||||
ranked_map = {did: s for did, s in ranked}
|
||||
|
||||
def _score(d: MemoryDocument) -> tuple[float, int]:
|
||||
# Score combinado: similitud + priority/100 (peso bajo).
|
||||
# Si la similitud es < floor, fallback a priority pura.
|
||||
sim = ranked_map.get(d.memory_id, 0.0)
|
||||
prio = d.priority
|
||||
# Penalty para `ranked` (no entra "por defecto")
|
||||
if "ranked" in (d.load_when or []):
|
||||
prio -= settings.kb_ranked_penalty
|
||||
if sim < settings.kb_similarity_floor:
|
||||
return (prio / 100.0, prio)
|
||||
return (sim + prio / 1000.0, prio)
|
||||
|
||||
ordered = sorted(candidates, key=_score, reverse=True)
|
||||
|
||||
# 3) Cap por kb_max_tokens y kb_top_n.
|
||||
token_budget = max_tokens
|
||||
top_n_cap = agent.kb_top_n or settings.kb_top_n_docs
|
||||
full_docs: list[MemoryDocument] = []
|
||||
|
||||
for doc_id in ranked_ids:
|
||||
doc = doc_map.get(doc_id)
|
||||
if not doc:
|
||||
continue
|
||||
for doc in ordered:
|
||||
if len(full_docs) >= top_n_cap and strategy not in ("cheatsheet_only", "glossary_only", "planner_only"):
|
||||
break
|
||||
doc_tokens = estimate_tokens(doc.content)
|
||||
if doc_tokens <= token_budget:
|
||||
full_docs.append(doc)
|
||||
token_budget -= doc_tokens
|
||||
elif not full_docs:
|
||||
# Si el primer doc ya no cabe, se incluye truncado para tener
|
||||
# algo. Mejor un doc parcial que ningun doc.
|
||||
truncated = self._truncate_to_tokens(doc.content, token_budget)
|
||||
if truncated:
|
||||
full_docs.append(MemoryDocument(
|
||||
memory_id=doc.memory_id,
|
||||
memory_type=doc.memory_type,
|
||||
namespace=doc.namespace,
|
||||
title=doc.title,
|
||||
content=truncated + "\n\n[...] (doc truncado)",
|
||||
summary=doc.summary,
|
||||
tags=doc.tags,
|
||||
priority=doc.priority,
|
||||
load_when=doc.load_when,
|
||||
))
|
||||
break
|
||||
|
||||
# Build section — ALWAYS include title index of ALL docs
|
||||
return self._build_kb_section_only_index(all_docs, full_docs)
|
||||
|
||||
@staticmethod
|
||||
def _truncate_to_tokens(text: str, max_tokens: int) -> str:
|
||||
# Heuristica: ~4 chars por token. Truncamos a 4*max_tokens caracteres.
|
||||
if max_tokens <= 0:
|
||||
return ""
|
||||
cap = max(0, max_tokens * 4)
|
||||
if len(text) <= cap:
|
||||
return text
|
||||
return text[:cap]
|
||||
|
||||
@staticmethod
|
||||
def _build_kb_section_only_index(
|
||||
all_docs: list[MemoryDocument],
|
||||
full_docs: list[MemoryDocument],
|
||||
) -> ContextSection:
|
||||
"""Construye la seccion KB final: docs cargados + indice del resto."""
|
||||
included_ids = {d.memory_id for d in full_docs}
|
||||
not_included = [d for d in all_docs if d.memory_id not in included_ids]
|
||||
|
||||
@@ -459,9 +555,9 @@ class ContextEngine:
|
||||
|
||||
if not_included:
|
||||
lines.append("## Other Available Docs")
|
||||
lines.append("_Ask for any of these if you need the full content:_")
|
||||
lines.append("_Pidelos con `read_doc({name: \"<id>\"})` cuando los necesites:_")
|
||||
for doc in not_included:
|
||||
lines.append(f"- **{doc.title}** ({doc.memory_id}): {doc.summary[:150]}")
|
||||
lines.append(f"- **{doc.title}** (`{doc.memory_id}`): {(doc.summary or '')[:150]}")
|
||||
lines.append("")
|
||||
|
||||
content = "\n".join(lines)
|
||||
@@ -472,8 +568,8 @@ class ContextEngine:
|
||||
token_estimate=estimate_tokens(content),
|
||||
)
|
||||
|
||||
async def _semantic_rank(self, query: str) -> list[str]:
|
||||
"""Rank knowledge docs by cosine similarity to the query."""
|
||||
async def _semantic_rank(self, query: str) -> list[tuple[str, float]]:
|
||||
"""Rank knowledge docs by cosine similarity. Returns (doc_id, score)."""
|
||||
try:
|
||||
if not self._embed_service:
|
||||
self._embed_service = EmbeddingService()
|
||||
@@ -484,7 +580,7 @@ class ContextEngine:
|
||||
namespace="knowledge",
|
||||
top_k=50,
|
||||
)
|
||||
return [doc_id for doc_id, _score in results]
|
||||
return list(results)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning("Semantic search failed: %s — loading all docs", e)
|
||||
@@ -572,6 +668,7 @@ class ContextEngine:
|
||||
def _build_task_state(
|
||||
self,
|
||||
task: TaskState,
|
||||
session: SessionState | None = None,
|
||||
objective_override: str | None = None,
|
||||
resolved_context: str = "",
|
||||
followup_mode: str = "none",
|
||||
@@ -659,6 +756,37 @@ class ContextEngine:
|
||||
f" {marker} Step {i + 1} [{status_label}{compacted_label}]: {step.description}"
|
||||
)
|
||||
|
||||
# Active Plan (Fase 5: tool acai_plan). Si hay un plan activo en
|
||||
# session.metadata, lo renderizamos con cursor + completed marks.
|
||||
if session is not None:
|
||||
current_plan = session.metadata.get("current_plan")
|
||||
if isinstance(current_plan, dict) and current_plan.get("status") == "active":
|
||||
steps = current_plan.get("steps") or []
|
||||
cursor = int(current_plan.get("cursor", 0))
|
||||
completed_set = set(current_plan.get("completed_step_ids", []))
|
||||
lines.append("")
|
||||
lines.append("## Active Plan (acai_plan)")
|
||||
lines.append(f"**Objetivo**: {current_plan.get('objective', '')}")
|
||||
if steps:
|
||||
lines.append(f"**Cursor**: → step {min(cursor + 1, len(steps))}/{len(steps)}")
|
||||
lines.append("")
|
||||
for i, st in enumerate(steps):
|
||||
sid = st.get("id", i + 1)
|
||||
desc = st.get("description", "")
|
||||
if sid in completed_set:
|
||||
marker, label = "✓", "done"
|
||||
elif i == cursor:
|
||||
marker, label = "→", "pending"
|
||||
else:
|
||||
marker, label = "·", "pending"
|
||||
lines.append(f" {marker} Step {i + 1} [{label}]: {desc}")
|
||||
risks = current_plan.get("risks") or []
|
||||
if risks:
|
||||
lines.append("")
|
||||
lines.append("**Risks**:")
|
||||
for r in risks[:5]:
|
||||
lines.append(f"- {r}")
|
||||
|
||||
content = "\n".join(lines)
|
||||
return ContextSection(
|
||||
section_type=ContextSectionType.TASK_STATE,
|
||||
|
||||
@@ -31,6 +31,23 @@ class AgentProfile(BaseModel):
|
||||
)
|
||||
stream_deltas: bool = True # Si emite deltas por SSE al usuario
|
||||
|
||||
# KB load strategy (Fase 1 refactor): controla CUANTO y QUE de la KB se
|
||||
# inyecta como system prompt. Ver `_build_knowledge_base` en context/engine.py.
|
||||
# - `top_n` (default): ranking semantico, top-N docs hasta agotar budget.
|
||||
# - `tags`: filtra por interseccion con `kb_tags`, ranking dentro.
|
||||
# - `cheatsheet_only`: solo docs con `load_when: [cheatsheet]`.
|
||||
# - `glossary_only`: solo docs con `load_when: [glossary]`.
|
||||
# - `planner_only`: solo docs con `load_when: [planner_only|cheatsheet|glossary]`
|
||||
# (usado por la sub-llamada interna de `acai_plan`).
|
||||
# - `none`: no carga KB.
|
||||
# - `all` (legacy): comportamiento previo, todos los docs que quepan.
|
||||
kb_load_strategy: str = "top_n"
|
||||
kb_tags: list[str] = Field(default_factory=list)
|
||||
kb_max_tokens: int | None = None # override per-agent del default global
|
||||
kb_top_n: int | None = None # override per-agent del default global
|
||||
has_planner_tool: bool = False # si expone la tool interna `acai_plan`
|
||||
system_prompt_planner: str = "" # cargado de `system.planner.md` si existe
|
||||
|
||||
|
||||
class SubAgentDefinition(BaseModel):
|
||||
"""A runnable subagent configuration within the orchestrator."""
|
||||
|
||||
@@ -62,6 +62,10 @@ class MemoryDocument(BaseModel):
|
||||
content: str
|
||||
summary: str = ""
|
||||
tags: list[str] = Field(default_factory=list)
|
||||
# Frontmatter YAML del doc (Fase 4 refactor). Si el doc no tiene frontmatter
|
||||
# se quedan en defaults: priority=50, load_when=[].
|
||||
priority: int = 50
|
||||
load_when: list[str] = Field(default_factory=list)
|
||||
embedding: list[float] | None = None
|
||||
created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
|
||||
updated_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
|
||||
|
||||
@@ -19,6 +19,9 @@ from ...models.artifacts import ArtifactSummary
|
||||
from ...models.session import SessionState
|
||||
from ...models.tools import ToolExecution, ToolExecutionStatus
|
||||
from ...streaming.sse import SSEEmitter, EventType
|
||||
from ..planner import run_planner_subloop
|
||||
from ..plan_judge import judge_plan_progress
|
||||
from ..tool_groups import is_plan_internal_tool, strip_namespace
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -64,6 +67,10 @@ class BaseAgent:
|
||||
total_output_tokens = 0
|
||||
# Real conversation history: assistant messages + tool results
|
||||
conversation: list[dict[str, Any]] = []
|
||||
# Expuesta para que las tools internas (acai_plan) puedan resumir
|
||||
# el thinking acumulado del agente principal sin que tengamos que
|
||||
# pasarlo explicitamente por cada llamada a `_execute_tool`.
|
||||
self._current_conversation = conversation
|
||||
|
||||
for step in range(max_steps):
|
||||
# Build context with real conversation
|
||||
@@ -86,6 +93,11 @@ class BaseAgent:
|
||||
temperature=self.profile.temperature or 0.3,
|
||||
)
|
||||
|
||||
# Snapshot del numero de tool_executions ya acumulados ANTES del
|
||||
# step. El judge solo necesita las del step actual; el slice
|
||||
# `tool_executions[exec_offset:]` da exactamente ese delta.
|
||||
exec_offset = len(tool_executions)
|
||||
|
||||
full_text = ""
|
||||
tool_calls: list[dict[str, Any]] = []
|
||||
active_tools: dict[str, dict[str, Any]] = {}
|
||||
@@ -269,6 +281,18 @@ class BaseAgent:
|
||||
elif full_text:
|
||||
# Fallback (no debiera ocurrir si el adapter emite block_index).
|
||||
conversation.append({"role": "assistant", "content": full_text})
|
||||
# El agente termino sin mas tool calls: cerramos el plan si
|
||||
# estaba activo. El judge no se llama (no hay tools que evaluar);
|
||||
# el flag `no_tool_calls_this_step=True` marca todos los pendientes
|
||||
# como completados.
|
||||
try:
|
||||
await self._auto_advance_plan_cursor(
|
||||
session,
|
||||
[],
|
||||
no_tool_calls_this_step=True,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("[plan-advance] failed at end_turn: %s", e)
|
||||
break
|
||||
|
||||
# Push del assistant turn con TODOS los blocks (thinking+text+tool_use).
|
||||
@@ -344,6 +368,17 @@ class BaseAgent:
|
||||
if tool_result_blocks:
|
||||
conversation.append({"role": "user", "content": tool_result_blocks})
|
||||
|
||||
# Auto-avance del cursor del plan TRAS CADA STEP INTERNO (no solo
|
||||
# al final del turno). Asi el frontend ve los `✓` aparecer en vivo
|
||||
# conforme el agente ejecuta tools, no de golpe al final.
|
||||
try:
|
||||
await self._auto_advance_plan_cursor(
|
||||
session,
|
||||
tool_executions[exec_offset:],
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("Auto-advance plan cursor failed: %s", e)
|
||||
|
||||
return {
|
||||
"content": accumulated_content,
|
||||
"artifacts": artifacts,
|
||||
@@ -374,6 +409,20 @@ class BaseAgent:
|
||||
|
||||
logger.info("Tool call: %s(%s)", tool_name, json.dumps(arguments)[:200])
|
||||
|
||||
# Intercepcion: tools internas del orquestador (Fase 5: acai_plan).
|
||||
# No atraviesan MCP — se ejecutan en Python directamente.
|
||||
if is_plan_internal_tool(tool_name):
|
||||
raw_name = strip_namespace(tool_name)
|
||||
await self.sse.emit(
|
||||
EventType.TOOL_STARTED,
|
||||
{"tool": raw_name, "tool_call_id": tool_call_id},
|
||||
session_id=session.session_id,
|
||||
)
|
||||
if raw_name == "acai_plan":
|
||||
return await self._execute_acai_plan(session, arguments, tool_call_id, tool_exec)
|
||||
if raw_name == "acai_plan_advance":
|
||||
return await self._execute_acai_plan_advance(session, arguments, tool_call_id, tool_exec)
|
||||
|
||||
start = time.monotonic()
|
||||
try:
|
||||
if self.mcp.is_running:
|
||||
@@ -439,25 +488,554 @@ class BaseAgent:
|
||||
|
||||
return tool_exec
|
||||
|
||||
# ---- Tools internas del orquestador (Fase 5) -----------------------------
|
||||
|
||||
@staticmethod
|
||||
def _summarize_parent_thinking(conversation: list[dict[str, Any]], max_chars: int = 1200) -> str:
|
||||
"""Resumen del thinking acumulado del agente principal hasta este turno.
|
||||
|
||||
Recorre los assistants Anthropic-style con content blocks `type=thinking`,
|
||||
junta los textos y trunca a `max_chars`. Se usa para pasar contexto
|
||||
comprimido al planner sub-loop sin contaminarlo con el thinking entero.
|
||||
"""
|
||||
chunks: list[str] = []
|
||||
total = 0
|
||||
for msg in reversed(conversation):
|
||||
if msg.get("role") != "assistant":
|
||||
continue
|
||||
content = msg.get("content")
|
||||
if not isinstance(content, list):
|
||||
continue
|
||||
for block in content:
|
||||
if isinstance(block, dict) and block.get("type") == "thinking":
|
||||
txt = block.get("thinking", "") or ""
|
||||
if not txt:
|
||||
continue
|
||||
chunks.append(txt)
|
||||
total += len(txt)
|
||||
if total >= max_chars:
|
||||
break
|
||||
if total >= max_chars:
|
||||
break
|
||||
# Concatenamos del mas viejo al mas reciente para mantener orden logico.
|
||||
joined = "\n---\n".join(reversed(chunks))
|
||||
if len(joined) > max_chars:
|
||||
joined = "[...] " + joined[-max_chars:]
|
||||
return joined
|
||||
|
||||
async def _execute_acai_plan(
|
||||
self,
|
||||
session: SessionState,
|
||||
arguments: dict[str, Any],
|
||||
tool_call_id: str,
|
||||
tool_exec: ToolExecution,
|
||||
) -> ToolExecution:
|
||||
"""Implementacion de la tool sintetica `acai_plan`.
|
||||
|
||||
Lanza un sub-loop con `system.planner.md` y solo tools de lectura.
|
||||
Persiste el plan resultante en `session.metadata["current_plan"]`.
|
||||
"""
|
||||
# Limite de invocaciones por turno: maximo 2. Tras eso, el modelo debe
|
||||
# ejecutar directo o abandonar.
|
||||
count = int(session.metadata.get("plan_call_count_in_turn", 0))
|
||||
if count >= 2:
|
||||
tool_exec.status = ToolExecutionStatus.COMPLETED
|
||||
tool_exec.result_summary = (
|
||||
"Ya invocaste acai_plan dos veces este turno. "
|
||||
"Ejecuta directo o usa acai_plan_advance({abandon:true}) para resetear."
|
||||
)
|
||||
tool_exec.raw_output = json.dumps({"error": "max_plan_calls_per_turn"})
|
||||
await self.sse.emit(
|
||||
EventType.TOOL_COMPLETED,
|
||||
{"tool": "acai_plan", "status": "completed", "summary": tool_exec.result_summary, "tool_call_id": tool_call_id},
|
||||
session_id=session.session_id,
|
||||
)
|
||||
return tool_exec
|
||||
|
||||
session.metadata["plan_call_count_in_turn"] = count + 1
|
||||
|
||||
objective = str(arguments.get("objective") or "").strip()
|
||||
scope = str(arguments.get("scope") or "").strip()
|
||||
if not objective:
|
||||
tool_exec.status = ToolExecutionStatus.FAILED
|
||||
tool_exec.error = "Falta el campo 'objective'"
|
||||
tool_exec.result_summary = "acai_plan FALLO: falta objective."
|
||||
tool_exec.raw_output = json.dumps({"error": "missing_objective"})
|
||||
await self.sse.emit(
|
||||
EventType.TOOL_COMPLETED,
|
||||
{"tool": "acai_plan", "status": "failed", "error": tool_exec.error, "tool_call_id": tool_call_id},
|
||||
session_id=session.session_id,
|
||||
)
|
||||
return tool_exec
|
||||
|
||||
# Resumen del thinking acumulado en el turno actual (si lo hay).
|
||||
# `self._current_conversation` se setea al inicio de execute() — ver mas abajo.
|
||||
parent_summary = self._summarize_parent_thinking(
|
||||
getattr(self, "_current_conversation", []) or [],
|
||||
)
|
||||
|
||||
start = time.monotonic()
|
||||
try:
|
||||
result = await run_planner_subloop(
|
||||
objective=objective,
|
||||
scope=scope,
|
||||
agent_profile=self.profile,
|
||||
model_adapter=self.model,
|
||||
mcp=self.mcp,
|
||||
parent_thinking_summary=parent_summary,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error("Planner sub-loop crashed: %s", e)
|
||||
tool_exec.status = ToolExecutionStatus.FAILED
|
||||
tool_exec.error = str(e)
|
||||
tool_exec.duration_ms = (time.monotonic() - start) * 1000
|
||||
tool_exec.result_summary = f"acai_plan FALLO: {str(e)[:200]}"
|
||||
tool_exec.raw_output = json.dumps({"error": str(e)[:500]})
|
||||
await self.sse.emit(
|
||||
EventType.TOOL_COMPLETED,
|
||||
{"tool": "acai_plan", "status": "failed", "error": str(e), "tool_call_id": tool_call_id},
|
||||
session_id=session.session_id,
|
||||
)
|
||||
return tool_exec
|
||||
|
||||
tool_exec.duration_ms = (time.monotonic() - start) * 1000
|
||||
|
||||
if not result.plan:
|
||||
err = result.error or "Plan vacio"
|
||||
logger.warning(
|
||||
"[acai_plan] Plan FAILED: %s (raw_preview=%r)",
|
||||
err, (result.raw_text or "")[:200],
|
||||
)
|
||||
tool_exec.status = ToolExecutionStatus.FAILED
|
||||
tool_exec.error = err
|
||||
tool_exec.result_summary = (
|
||||
f"acai_plan FALLO: {err}. Procede en modo directo o reintenta con scope distinto."
|
||||
)
|
||||
tool_exec.raw_output = json.dumps({
|
||||
"error": err,
|
||||
"raw_text_preview": (result.raw_text or "")[:500],
|
||||
})
|
||||
await self.sse.emit(
|
||||
EventType.TOOL_COMPLETED,
|
||||
{"tool": "acai_plan", "status": "failed", "error": err, "tool_call_id": tool_call_id},
|
||||
session_id=session.session_id,
|
||||
)
|
||||
return tool_exec
|
||||
|
||||
# Plan valido: persistir en metadata. Si habia un plan activo previo,
|
||||
# moverlo a history como `superseded`.
|
||||
old_plan = session.metadata.get("current_plan")
|
||||
if old_plan and old_plan.get("status") == "active":
|
||||
old_plan["status"] = "superseded"
|
||||
session.metadata.setdefault("plan_history", []).append(old_plan)
|
||||
|
||||
plan = dict(result.plan)
|
||||
plan["cursor"] = 0
|
||||
plan["completed_step_ids"] = []
|
||||
plan["status"] = "active"
|
||||
plan["created_at"] = int(time.time())
|
||||
session.metadata["current_plan"] = plan
|
||||
|
||||
steps = plan.get("steps") or []
|
||||
next_desc = steps[0]["description"] if steps else "(plan vacio)"
|
||||
n_steps = len(steps)
|
||||
n_risks = len(plan.get("risks") or [])
|
||||
|
||||
tool_exec.status = ToolExecutionStatus.COMPLETED
|
||||
tool_exec.result_summary = (
|
||||
f"Plan generado: {n_steps} step(s), {n_risks} risk(s). "
|
||||
f"Proximo: step 1 — {next_desc[:200]}"
|
||||
)
|
||||
logger.info(
|
||||
"[acai_plan] Plan persisted: %d steps, %d risks, objective=%r",
|
||||
n_steps, n_risks, objective[:120],
|
||||
)
|
||||
# raw_output al modelo: el JSON completo del plan (truncado a 4000 chars).
|
||||
plan_json = json.dumps(plan, ensure_ascii=False)
|
||||
if len(plan_json) > 4000:
|
||||
tool_exec.raw_output = plan_json[:4000] + "\n[...truncated]"
|
||||
else:
|
||||
tool_exec.raw_output = plan_json
|
||||
|
||||
await self.sse.emit(
|
||||
EventType.TOOL_COMPLETED,
|
||||
{
|
||||
"tool": "acai_plan",
|
||||
"status": "completed",
|
||||
"summary": tool_exec.result_summary[:200],
|
||||
"raw_output": tool_exec.raw_output[:4000],
|
||||
"tool_call_id": tool_call_id,
|
||||
},
|
||||
session_id=session.session_id,
|
||||
)
|
||||
# PlanStepper UI: notifica al frontend que hay un plan nuevo activo.
|
||||
await self.sse.emit(
|
||||
EventType.PLAN_CREATED,
|
||||
{
|
||||
"objective": plan.get("objective", ""),
|
||||
"steps": [
|
||||
{
|
||||
"id": s.get("id"),
|
||||
"description": s.get("description", "")[:300],
|
||||
"agent_action": s.get("agent_action", "")[:200],
|
||||
"files_touched": s.get("files_touched", [])[:10],
|
||||
"tables_touched": s.get("tables_touched", [])[:10],
|
||||
}
|
||||
for s in plan.get("steps", [])
|
||||
],
|
||||
"risks": plan.get("risks", [])[:10],
|
||||
"cursor": plan.get("cursor", 0),
|
||||
"completed_step_ids": plan.get("completed_step_ids", []),
|
||||
"status": plan.get("status", "active"),
|
||||
},
|
||||
session_id=session.session_id,
|
||||
)
|
||||
return tool_exec
|
||||
|
||||
async def _execute_acai_plan_advance(
|
||||
self,
|
||||
session: SessionState,
|
||||
arguments: dict[str, Any],
|
||||
tool_call_id: str,
|
||||
tool_exec: ToolExecution,
|
||||
) -> ToolExecution:
|
||||
"""Avanza/abandona el plan activo."""
|
||||
plan = session.metadata.get("current_plan")
|
||||
if not plan or plan.get("status") != "active":
|
||||
tool_exec.status = ToolExecutionStatus.COMPLETED
|
||||
tool_exec.result_summary = "No hay plan activo."
|
||||
tool_exec.raw_output = json.dumps({"status": "no_active_plan"})
|
||||
await self.sse.emit(
|
||||
EventType.TOOL_COMPLETED,
|
||||
{"tool": "acai_plan_advance", "status": "completed", "summary": tool_exec.result_summary, "tool_call_id": tool_call_id},
|
||||
session_id=session.session_id,
|
||||
)
|
||||
return tool_exec
|
||||
|
||||
if arguments.get("abandon"):
|
||||
plan["status"] = "abandoned"
|
||||
session.metadata.setdefault("plan_history", []).append(plan)
|
||||
session.metadata["current_plan"] = None
|
||||
tool_exec.status = ToolExecutionStatus.COMPLETED
|
||||
tool_exec.result_summary = "Plan abandonado."
|
||||
tool_exec.raw_output = json.dumps({"status": "abandoned"})
|
||||
await self.sse.emit(
|
||||
EventType.TOOL_COMPLETED,
|
||||
{"tool": "acai_plan_advance", "status": "completed", "summary": tool_exec.result_summary, "tool_call_id": tool_call_id},
|
||||
session_id=session.session_id,
|
||||
)
|
||||
await self.sse.emit(
|
||||
EventType.PLAN_ENDED,
|
||||
{"status": "abandoned", "objective": plan.get("objective", "")},
|
||||
session_id=session.session_id,
|
||||
)
|
||||
return tool_exec
|
||||
|
||||
# Aplicar completed_ids
|
||||
completed_in = arguments.get("completed_ids") or []
|
||||
completed_set = set(plan.get("completed_step_ids", []))
|
||||
for cid in completed_in:
|
||||
if isinstance(cid, int) and cid not in completed_set:
|
||||
plan.setdefault("completed_step_ids", []).append(cid)
|
||||
completed_set.add(cid)
|
||||
|
||||
# Aplicar cursor
|
||||
steps = plan.get("steps") or []
|
||||
if "next_cursor" in arguments:
|
||||
plan["cursor"] = max(0, min(int(arguments["next_cursor"]), len(steps)))
|
||||
else:
|
||||
# Auto-avanzar al primer step no completado.
|
||||
for i, st in enumerate(steps):
|
||||
if st.get("id") not in completed_set:
|
||||
plan["cursor"] = i
|
||||
break
|
||||
else:
|
||||
plan["status"] = "done"
|
||||
|
||||
cursor = plan.get("cursor", 0)
|
||||
if plan.get("status") == "done" or cursor >= len(steps):
|
||||
tool_exec.result_summary = f"Plan completado ({len(completed_set)}/{len(steps)} steps)."
|
||||
else:
|
||||
next_desc = steps[cursor].get("description", "(?)") if cursor < len(steps) else "(?)"
|
||||
tool_exec.result_summary = (
|
||||
f"Plan avanzado a step {cursor + 1}/{len(steps)}: {next_desc[:200]}"
|
||||
)
|
||||
tool_exec.status = ToolExecutionStatus.COMPLETED
|
||||
tool_exec.raw_output = json.dumps({
|
||||
"cursor": plan.get("cursor", 0),
|
||||
"completed_step_ids": plan.get("completed_step_ids", []),
|
||||
"status": plan.get("status", "active"),
|
||||
})
|
||||
await self.sse.emit(
|
||||
EventType.TOOL_COMPLETED,
|
||||
{"tool": "acai_plan_advance", "status": "completed", "summary": tool_exec.result_summary, "tool_call_id": tool_call_id},
|
||||
session_id=session.session_id,
|
||||
)
|
||||
# Emitir PLAN_ADVANCED o PLAN_ENDED segun el resultado.
|
||||
if plan.get("status") == "done":
|
||||
await self.sse.emit(
|
||||
EventType.PLAN_ENDED,
|
||||
{"status": "done", "objective": plan.get("objective", "")},
|
||||
session_id=session.session_id,
|
||||
)
|
||||
else:
|
||||
await self.sse.emit(
|
||||
EventType.PLAN_ADVANCED,
|
||||
{
|
||||
"cursor": plan.get("cursor", 0),
|
||||
"completed_step_ids": plan.get("completed_step_ids", []),
|
||||
"status": plan.get("status", "active"),
|
||||
},
|
||||
session_id=session.session_id,
|
||||
)
|
||||
return tool_exec
|
||||
|
||||
@staticmethod
|
||||
def _match_step_to_executions(
|
||||
step: dict[str, Any],
|
||||
tool_executions: list[ToolExecution],
|
||||
) -> bool:
|
||||
"""Heuristica: matchea step.agent_action con tool calls reales.
|
||||
|
||||
Marca el step como completado si alguna de las tools ejecutadas
|
||||
coincide con el `agent_action` del step. Compara:
|
||||
1) nombre de la tool (normalizando guion/underscore: `acai-write`
|
||||
matchea con `acai_write`).
|
||||
2) si action menciona algun `files_touched` y la tool ejecutada
|
||||
tiene ese path en sus argumentos.
|
||||
3) si action menciona algun `tables_touched` y la tool ejecutada
|
||||
tiene ese tableName en sus argumentos.
|
||||
"""
|
||||
action = (step.get("agent_action") or "").lower()
|
||||
files_touched = [str(f).lower() for f in (step.get("files_touched") or [])]
|
||||
tables_touched = [str(t).lower() for t in (step.get("tables_touched") or [])]
|
||||
if not action and not files_touched and not tables_touched:
|
||||
return False
|
||||
|
||||
for te in tool_executions:
|
||||
if te.status != ToolExecutionStatus.COMPLETED:
|
||||
continue
|
||||
raw_name = strip_namespace(te.tool_name).lower()
|
||||
# Normaliza guiones/underscores para matching tool name <-> action.
|
||||
tool_variants = {raw_name, raw_name.replace("-", "_"), raw_name.replace("_", "-")}
|
||||
|
||||
# Match 1: nombre de la tool aparece en action
|
||||
if any(v and v in action for v in tool_variants):
|
||||
return True
|
||||
|
||||
# Match 2/3: path o tableName en los args de la tool
|
||||
try:
|
||||
args_str = json.dumps(te.arguments or {}, ensure_ascii=False).lower()
|
||||
except Exception:
|
||||
args_str = str(te.arguments or "").lower()
|
||||
|
||||
for f in files_touched:
|
||||
if f and f in args_str:
|
||||
return True
|
||||
for t in tables_touched:
|
||||
if t and t in args_str:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
async def _auto_advance_plan_cursor(
|
||||
self,
|
||||
session: SessionState,
|
||||
tool_executions_this_step: list[ToolExecution],
|
||||
no_tool_calls_this_step: bool = False,
|
||||
) -> None:
|
||||
"""Avanza el cursor del plan tras un step interno del agente.
|
||||
|
||||
Usa LLM-as-judge (`plan_judge.judge_plan_progress`) para decidir que
|
||||
steps del plan se acaban de completar con las tool_executions del step
|
||||
actual. Mas robusto que el matching string heuristico anterior.
|
||||
|
||||
Si `no_tool_calls_this_step=True` y hay un plan active, marcamos el plan
|
||||
como `done` — el agente decidio terminar (end_turn) sin mas tools, asi
|
||||
que confiamos en su criterio. Esto cierra el plan visualmente cuando el
|
||||
agente acaba.
|
||||
"""
|
||||
plan = session.metadata.get("current_plan")
|
||||
if not plan or plan.get("status") != "active":
|
||||
return
|
||||
|
||||
steps = plan.get("steps") or []
|
||||
prev_cursor = int(plan.get("cursor", 0))
|
||||
prev_completed = list(plan.get("completed_step_ids", []))
|
||||
completed_set = set(prev_completed)
|
||||
|
||||
rationale = ""
|
||||
|
||||
# Si el agente termino el turn sin tools, NO marcamos los pendientes
|
||||
# como completados — seria un falso positivo (caso real: agente se
|
||||
# queda atascado y devuelve mensaje de chat sin haber hecho la tarea).
|
||||
# Solo si el `completed_set` previo ya cubre todos los steps cerramos
|
||||
# como done; si quedan pendientes, dejamos `active`.
|
||||
if no_tool_calls_this_step:
|
||||
if steps and len(completed_set) >= len(steps):
|
||||
rationale = "agente termino el turn; todos los steps ya completados"
|
||||
else:
|
||||
rationale = "agente termino el turn con steps pendientes (no cerrado)"
|
||||
# No tocar completed_set: respetamos lo que el judge dijo en steps previos
|
||||
elif tool_executions_this_step:
|
||||
# Pregunta al judge que steps acaba de completar.
|
||||
try:
|
||||
completed_ids, judge_rationale = await judge_plan_progress(
|
||||
plan=plan,
|
||||
tool_executions_this_step=tool_executions_this_step,
|
||||
model_adapter=self.model,
|
||||
model_id=self.profile.model_id,
|
||||
)
|
||||
for cid in completed_ids:
|
||||
completed_set.add(cid)
|
||||
rationale = judge_rationale
|
||||
except Exception as e:
|
||||
logger.warning("[plan-judge] failed, no advance this step: %s", e)
|
||||
# Sin judge, no avanzamos el cursor — preferimos dejar el plan
|
||||
# como esta antes que falsos positivos heuristicos.
|
||||
return
|
||||
|
||||
# Cursor: primer step NO completado. Si todos completados → done.
|
||||
cursor = len(steps)
|
||||
for i, step in enumerate(steps):
|
||||
if step.get("id") not in completed_set:
|
||||
cursor = i
|
||||
break
|
||||
|
||||
plan["cursor"] = cursor
|
||||
plan["completed_step_ids"] = sorted(completed_set)
|
||||
ended = False
|
||||
if cursor >= len(steps) and steps:
|
||||
plan["status"] = "done"
|
||||
ended = True
|
||||
|
||||
# Solo emitimos si hubo cambio real.
|
||||
changed = cursor != prev_cursor or set(plan["completed_step_ids"]) != set(prev_completed)
|
||||
logger.info(
|
||||
"[plan-advance] tools_in_step=%d prev_cursor=%d new_cursor=%d completed=%s changed=%s rationale=%r",
|
||||
len(tool_executions_this_step), prev_cursor, cursor,
|
||||
plan["completed_step_ids"], changed, rationale[:160],
|
||||
)
|
||||
if not changed:
|
||||
return
|
||||
|
||||
try:
|
||||
if ended:
|
||||
await self.sse.emit(
|
||||
EventType.PLAN_ENDED,
|
||||
{"status": "done", "objective": plan.get("objective", "")},
|
||||
session_id=session.session_id,
|
||||
)
|
||||
else:
|
||||
await self.sse.emit(
|
||||
EventType.PLAN_ADVANCED,
|
||||
{
|
||||
"cursor": plan["cursor"],
|
||||
"completed_step_ids": plan["completed_step_ids"],
|
||||
"status": plan.get("status", "active"),
|
||||
},
|
||||
session_id=session.session_id,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("PLAN_ADVANCED/ENDED emit failed: %s", e)
|
||||
|
||||
# ---- Allowed tools --------------------------------------------------------
|
||||
|
||||
def _get_allowed_tools(self, followup_mode: str = "none") -> list[dict[str, Any]]:
|
||||
"""Return tool definitions filtered by this agent's allowed_tools."""
|
||||
"""Return tool definitions filtered by this agent's allowed_tools.
|
||||
|
||||
Si el agente tiene `has_planner_tool=True`, anade definiciones sinteticas
|
||||
de `acai_plan` y `acai_plan_advance` (Fase 5: la tool interna no
|
||||
atraviesa MCP — se intercepta en `_execute_tool`).
|
||||
"""
|
||||
if followup_mode == "transform":
|
||||
return []
|
||||
if not self.mcp.is_running:
|
||||
return []
|
||||
all_tools = self.mcp.get_tool_definitions()
|
||||
if not self.profile.allowed_tools:
|
||||
return all_tools # No filter → all tools
|
||||
return [t for t in all_tools if t["name"] in self.profile.allowed_tools]
|
||||
if self.profile.allowed_tools:
|
||||
tool_defs = [t for t in all_tools if t["name"] in self.profile.allowed_tools]
|
||||
else:
|
||||
tool_defs = list(all_tools)
|
||||
|
||||
if self.profile.has_planner_tool:
|
||||
tool_defs.append({
|
||||
"name": "acai_plan",
|
||||
"description": (
|
||||
"Genera un plan estructurado de ejecucion. Usa esta tool al recibir "
|
||||
"una peticion compuesta (landing entera, tienda, refactor amplio, modulo "
|
||||
"con tabla+hook+frontend). NO la uses para tareas triviales (cambiar un titulo, "
|
||||
"ajustar un color, leer datos). Devuelve JSON con steps, risks, files_touched, "
|
||||
"tables_touched."
|
||||
),
|
||||
"input_schema": {
|
||||
"type": "object",
|
||||
"required": ["objective"],
|
||||
"properties": {
|
||||
"objective": {
|
||||
"type": "string",
|
||||
"description": "Descripcion en español de lo que hay que conseguir.",
|
||||
},
|
||||
"scope": {
|
||||
"type": "string",
|
||||
"description": "Restricciones opcionales (ej. 'no toques el header').",
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
tool_defs.append({
|
||||
"name": "acai_plan_advance",
|
||||
"description": (
|
||||
"Avanza/abandona el plan activo. Llama con `abandon: true` si el "
|
||||
"usuario corrige y el plan ya no es valido, o con `next_cursor` para "
|
||||
"saltar al siguiente step pendiente."
|
||||
),
|
||||
"input_schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"abandon": {"type": "boolean"},
|
||||
"completed_ids": {"type": "array", "items": {"type": "integer"}},
|
||||
"next_cursor": {"type": "integer"},
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
return tool_defs
|
||||
|
||||
@staticmethod
|
||||
def _extract_mcp_output(result: dict[str, Any]) -> str:
|
||||
"""Extract text content from MCP tool result."""
|
||||
"""Extract text content from MCP tool result.
|
||||
|
||||
El modelo (MiniMax M2.7) es text-only — los blocks `type=image` no
|
||||
pueden reenviarse. En lugar de descartar silenciosamente (lo que dejaba
|
||||
al agente con un tool_result vacio y le hacia repetir la llamada),
|
||||
emitimos un placeholder explicito que le dice que use `browser_snapshot`
|
||||
si quiere inspeccionar la pagina.
|
||||
"""
|
||||
content = result.get("content", [])
|
||||
if isinstance(content, list):
|
||||
parts: list[str] = []
|
||||
image_count = 0
|
||||
for item in content:
|
||||
if isinstance(item, dict) and item.get("type") == "text":
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
itype = item.get("type")
|
||||
if itype == "text":
|
||||
parts.append(item.get("text", ""))
|
||||
elif itype == "image":
|
||||
image_count += 1
|
||||
if image_count and not parts:
|
||||
return (
|
||||
f"[{image_count} imagen(es) no procesada(s) — el modelo es "
|
||||
f"text-only. Para inspeccionar la pagina usa "
|
||||
f"`browser_snapshot` (devuelve accessibility tree en texto). "
|
||||
f"`browser_take_screenshot` solo sirve para que el usuario "
|
||||
f"vea la captura, no para tu analisis.]"
|
||||
)
|
||||
if image_count and parts:
|
||||
parts.append(
|
||||
f"\n[Adicionalmente {image_count} imagen(es) no incluida(s): "
|
||||
f"el modelo no las procesa.]"
|
||||
)
|
||||
return "\n".join(parts) if parts else json.dumps(result)
|
||||
return str(content)
|
||||
|
||||
@@ -99,10 +99,27 @@ class OrchestratorEngine:
|
||||
session_id=session.session_id,
|
||||
)
|
||||
|
||||
# Plan mode 'force': el usuario ha pulsado el toggle Plan en el chat.
|
||||
# Prependeamos una directiva al mensaje para que el agente llame
|
||||
# acai_plan ANTES de ejecutar nada. El system prompt ya conoce la tool;
|
||||
# esto solo bypassa la heuristica trivial-vs-complex.
|
||||
plan_mode = (session.metadata.get("plan_mode") or "auto").lower()
|
||||
if plan_mode == "force":
|
||||
message = (
|
||||
"[modo Plan activo por el usuario] Tu PRIMERA accion debe ser "
|
||||
"llamar a la tool `acai_plan` con un plan detallado del trabajo "
|
||||
"que vas a hacer. No ejecutes ninguna otra tool antes. Despues "
|
||||
"del plan, procede con la ejecucion normal.\n\n"
|
||||
f"Peticion del usuario:\n{message}"
|
||||
)
|
||||
|
||||
# Create task
|
||||
task = session.begin_task(objective=message)
|
||||
task.status = TaskStatus.EXECUTING
|
||||
|
||||
# Reset del contador de invocaciones de `acai_plan` por turno (Fase 5).
|
||||
session.metadata["plan_call_count_in_turn"] = 0
|
||||
|
||||
# Execute with the selected agent
|
||||
agent = BaseAgent(
|
||||
profile=self.agent_profile,
|
||||
|
||||
206
src/orchestrator/plan_judge.py
Normal file
206
src/orchestrator/plan_judge.py
Normal file
@@ -0,0 +1,206 @@
|
||||
"""LLM-as-judge para tracking del progreso del plan.
|
||||
|
||||
Sustituye la heuristica string-matching de `_match_step_to_executions` por
|
||||
una llamada al modelo que entiende semantica. Tras cada batch de tool calls
|
||||
del agente principal, le preguntamos al judge "que steps acaba de completar"
|
||||
con el plan + las tools como input. Devuelve JSON con `completed_ids`.
|
||||
|
||||
Diseno:
|
||||
- Una sola llamada non-streaming, ~300 tokens output max.
|
||||
- Solo evalua steps PENDIENTES (los ya completados no se envian — ahorra tokens).
|
||||
- Falla en silencio si el modelo no devuelve JSON parseable. El caller decide
|
||||
si caer al matcher heuristico o no avanzar el cursor.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
from typing import Any
|
||||
|
||||
from ..adapters.base import ModelAdapter, ModelConfig
|
||||
from ..models.tools import ToolExecution, ToolExecutionStatus
|
||||
from .tool_groups import strip_namespace
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
_SYSTEM_PROMPT = """\
|
||||
Eres un revisor de progreso de un plan de ejecucion. Recibes:
|
||||
1. El plan con sus steps PENDIENTES (id, description, agent_action, tables_touched, files_touched).
|
||||
2. Las herramientas que el agente principal acaba de ejecutar en este step (nombre, args, success).
|
||||
|
||||
Tu unica salida es un objeto JSON con esta forma exacta:
|
||||
|
||||
{
|
||||
"completed_ids": [1, 4],
|
||||
"rationale": "una frase corta explicando por que"
|
||||
}
|
||||
|
||||
Reglas:
|
||||
- `completed_ids` contiene los IDs de los steps que han sido COMPLETAMENTE realizados por las tools ejecutadas en este step.
|
||||
- Sé estricto: si un step requiere `create_or_update_record en builder_custom` y la tool ejecutada fue `create_or_update_record en apartados`, NO esta hecho.
|
||||
- Si un step requiere `acai-write template/estandar/modulos/X/index-base.tpl` y la tool fue `acai-write` con un path distinto, NO esta hecho.
|
||||
- Si un step menciona varias tools (ej. "create_or_update_record + add_module_to_record") solo lo marcas como done si TODAS las tools necesarias se ejecutaron.
|
||||
- Si un step usa `ask_user` como agent_action, NUNCA lo marques como done — el agente debe preguntarle al usuario manualmente.
|
||||
- Si dudas, NO incluyas el id. Mejor un falso negativo (que pase a otro step) que un falso positivo (que marque algo no hecho).
|
||||
- Si ninguna tool corresponde a ningun step pendiente, devuelve `"completed_ids": []`.
|
||||
- `rationale`: una frase concisa en español, max 200 chars.
|
||||
|
||||
Devuelve SOLO el JSON, sin texto alrededor."""
|
||||
|
||||
|
||||
_FENCE_RE = re.compile(r"```(?:json)?\s*(\{.*?\})\s*```", re.DOTALL | re.IGNORECASE)
|
||||
|
||||
|
||||
def _parse_judge_output(raw: str) -> dict[str, Any] | None:
|
||||
"""Extrae el JSON del output del judge. Tolerante a fences y texto extra."""
|
||||
if not raw:
|
||||
return None
|
||||
|
||||
# Path 1: fence
|
||||
m = _FENCE_RE.search(raw)
|
||||
if m:
|
||||
try:
|
||||
return json.loads(m.group(1))
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
# Path 2: balanced braces
|
||||
start = raw.find("{")
|
||||
if start < 0:
|
||||
return None
|
||||
depth = 0
|
||||
in_str = False
|
||||
escape = False
|
||||
for i in range(start, len(raw)):
|
||||
c = raw[i]
|
||||
if escape:
|
||||
escape = False
|
||||
continue
|
||||
if c == "\\":
|
||||
escape = True
|
||||
continue
|
||||
if c == '"' and not escape:
|
||||
in_str = not in_str
|
||||
continue
|
||||
if in_str:
|
||||
continue
|
||||
if c == "{":
|
||||
depth += 1
|
||||
elif c == "}":
|
||||
depth -= 1
|
||||
if depth == 0:
|
||||
candidate = raw[start:i + 1]
|
||||
try:
|
||||
return json.loads(candidate)
|
||||
except json.JSONDecodeError:
|
||||
return None
|
||||
return None
|
||||
|
||||
|
||||
def _serialize_tool_execs(tool_executions: list[ToolExecution]) -> list[dict[str, Any]]:
|
||||
"""Compacta tool_executions a lo minimo necesario para el judge."""
|
||||
out: list[dict[str, Any]] = []
|
||||
for te in tool_executions:
|
||||
if te.status not in (ToolExecutionStatus.COMPLETED, ToolExecutionStatus.FAILED):
|
||||
continue
|
||||
out.append({
|
||||
"tool": strip_namespace(te.tool_name),
|
||||
"args": te.arguments or {},
|
||||
"success": te.status == ToolExecutionStatus.COMPLETED,
|
||||
})
|
||||
return out
|
||||
|
||||
|
||||
def _serialize_pending_steps(plan: dict[str, Any]) -> list[dict[str, Any]]:
|
||||
"""Solo los steps que aun no estan completados."""
|
||||
completed = set(plan.get("completed_step_ids") or [])
|
||||
out: list[dict[str, Any]] = []
|
||||
for s in plan.get("steps") or []:
|
||||
sid = s.get("id")
|
||||
if sid in completed:
|
||||
continue
|
||||
out.append({
|
||||
"id": sid,
|
||||
"description": (s.get("description") or "")[:300],
|
||||
"agent_action": (s.get("agent_action") or "")[:300],
|
||||
"files_touched": s.get("files_touched") or [],
|
||||
"tables_touched": s.get("tables_touched") or [],
|
||||
})
|
||||
return out
|
||||
|
||||
|
||||
async def judge_plan_progress(
|
||||
plan: dict[str, Any],
|
||||
tool_executions_this_step: list[ToolExecution],
|
||||
model_adapter: ModelAdapter,
|
||||
model_id: str | None = None,
|
||||
) -> tuple[list[int], str]:
|
||||
"""Pregunta al modelo qué steps del plan están completados tras este batch.
|
||||
|
||||
Devuelve `(completed_ids, rationale)`. En caso de error o JSON no parseable
|
||||
devuelve `([], "judge_error: <mensaje>")` — el caller decide si aplica
|
||||
fallback heuristico o ignora.
|
||||
"""
|
||||
pending = _serialize_pending_steps(plan)
|
||||
if not pending:
|
||||
return [], "no pending steps"
|
||||
|
||||
tools_payload = _serialize_tool_execs(tool_executions_this_step)
|
||||
if not tools_payload:
|
||||
return [], "no tools executed"
|
||||
|
||||
user_msg = json.dumps({
|
||||
"plan_pending_steps": pending,
|
||||
"tools_executed_this_step": tools_payload,
|
||||
}, ensure_ascii=False)
|
||||
|
||||
# max_tokens generoso: MiniMax M2.7 puede emitir thinking blocks aunque
|
||||
# pidamos `disabled`, y necesitamos espacio para el JSON output sin que
|
||||
# se trunque (causa principal de `parse_failed` en sesiones reales).
|
||||
config = ModelConfig(
|
||||
model_id=model_id or "",
|
||||
max_tokens=1500,
|
||||
temperature=0.0,
|
||||
extra={"thinking": {"type": "disabled"}},
|
||||
)
|
||||
|
||||
# Llamada NO streaming — usamos `complete()` que devuelve directamente texto.
|
||||
try:
|
||||
response = await model_adapter.complete(
|
||||
messages=[
|
||||
{"role": "system", "content": _SYSTEM_PROMPT},
|
||||
{"role": "user", "content": user_msg},
|
||||
],
|
||||
tools=None,
|
||||
config=config,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("[plan_judge] model call failed: %s", e)
|
||||
return [], f"judge_error: {str(e)[:120]}"
|
||||
|
||||
raw_text = (response.content or "").strip()
|
||||
parsed = _parse_judge_output(raw_text)
|
||||
if not parsed or not isinstance(parsed, dict):
|
||||
logger.warning("[plan_judge] could not parse JSON: %r", raw_text[:200])
|
||||
return [], "judge_error: parse_failed"
|
||||
|
||||
raw_ids = parsed.get("completed_ids") or []
|
||||
if not isinstance(raw_ids, list):
|
||||
return [], "judge_error: completed_ids not a list"
|
||||
|
||||
pending_ids = {s["id"] for s in pending}
|
||||
completed_ids = []
|
||||
for cid in raw_ids:
|
||||
try:
|
||||
cid_int = int(cid)
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
# Solo acepta IDs que estaban pendientes (defensa contra alucinacion)
|
||||
if cid_int in pending_ids:
|
||||
completed_ids.append(cid_int)
|
||||
|
||||
rationale = str(parsed.get("rationale") or "")[:300]
|
||||
return completed_ids, rationale
|
||||
355
src/orchestrator/planner.py
Normal file
355
src/orchestrator/planner.py
Normal file
@@ -0,0 +1,355 @@
|
||||
"""Sub-loop del planner — implementacion de la tool interna `acai_plan`.
|
||||
|
||||
La tool `acai_plan` se intercepta en `BaseAgent._execute_tool`. Cuando el
|
||||
agente principal la llama, lanzamos `run_planner_subloop` que abre una
|
||||
mini-conversacion con el modelo usando `system.planner.md` y solo tools de
|
||||
lectura. Devuelve un plan JSON estructurado.
|
||||
|
||||
Diseno:
|
||||
- El planner NO ve el thinking del agente principal directamente — recibe
|
||||
un `parent_thinking_summary` reducido (~300 tokens) para no contaminar.
|
||||
- max_steps=3 turnos del modelo. Suficiente para 1-2 lookups + emitir JSON.
|
||||
- La salida es texto que se parsea a JSON. Si falla, retornamos error y
|
||||
el agente principal decide si reintenta o pasa a modo directo.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from typing import Any
|
||||
|
||||
from ..adapters.base import ModelAdapter, ModelConfig
|
||||
from ..mcp.manager import MCPManager
|
||||
from ..models.agent import AgentProfile
|
||||
from .tool_groups import PLANNER_TOOLS, strip_namespace
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class PlannerResult:
|
||||
"""Resultado del sub-loop del planner."""
|
||||
|
||||
plan: dict[str, Any] | None
|
||||
error: str = ""
|
||||
raw_text: str = ""
|
||||
tool_executions: list[dict[str, Any]] = None # type: ignore
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
if self.tool_executions is None:
|
||||
self.tool_executions = []
|
||||
|
||||
|
||||
# Regex para extraer el primer bloque JSON del texto del modelo.
|
||||
# Soporta tanto JSON puro como dentro de fences ```json ... ```.
|
||||
_FENCE_RE = re.compile(r"```(?:json)?\s*(\{.*?\})\s*```", re.DOTALL | re.IGNORECASE)
|
||||
|
||||
|
||||
def parse_plan(raw_text: str) -> dict[str, Any] | None:
|
||||
"""Extrae JSON robustamente del output del planner.
|
||||
|
||||
Estrategia:
|
||||
1) Intenta encontrar un fence ```json ... ```.
|
||||
2) Si no, busca el primer `{` con su matching `}` balanceado.
|
||||
3) Parsea con json.loads; si falla, retorna None.
|
||||
"""
|
||||
if not raw_text:
|
||||
return None
|
||||
|
||||
# Path 1: fence
|
||||
m = _FENCE_RE.search(raw_text)
|
||||
if m:
|
||||
try:
|
||||
return json.loads(m.group(1))
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
# Path 2: balanced braces — encuentra el primer `{` y avanza contando.
|
||||
start = raw_text.find("{")
|
||||
if start < 0:
|
||||
return None
|
||||
depth = 0
|
||||
in_str = False
|
||||
escape = False
|
||||
for i in range(start, len(raw_text)):
|
||||
c = raw_text[i]
|
||||
if escape:
|
||||
escape = False
|
||||
continue
|
||||
if c == "\\":
|
||||
escape = True
|
||||
continue
|
||||
if c == '"' and not escape:
|
||||
in_str = not in_str
|
||||
continue
|
||||
if in_str:
|
||||
continue
|
||||
if c == "{":
|
||||
depth += 1
|
||||
elif c == "}":
|
||||
depth -= 1
|
||||
if depth == 0:
|
||||
candidate = raw_text[start:i + 1]
|
||||
try:
|
||||
return json.loads(candidate)
|
||||
except json.JSONDecodeError:
|
||||
return None
|
||||
return None
|
||||
|
||||
|
||||
def _normalize_plan(plan: dict[str, Any], objective: str) -> dict[str, Any]:
|
||||
"""Asegura los campos esperados con defaults razonables."""
|
||||
out: dict[str, Any] = {
|
||||
"objective": str(plan.get("objective") or objective)[:500],
|
||||
"steps": [],
|
||||
"risks": [],
|
||||
"files_touched": [],
|
||||
"tables_touched": [],
|
||||
"estimated_steps": 0,
|
||||
"notes": "",
|
||||
}
|
||||
raw_steps = plan.get("steps") or []
|
||||
if isinstance(raw_steps, list):
|
||||
for i, s in enumerate(raw_steps):
|
||||
if not isinstance(s, dict):
|
||||
continue
|
||||
step = {
|
||||
"id": int(s.get("id") or i + 1),
|
||||
"description": str(s.get("description") or "")[:500],
|
||||
"agent_action": str(s.get("agent_action") or "")[:500],
|
||||
"files_touched": [str(x) for x in (s.get("files_touched") or []) if x][:20],
|
||||
"tables_touched": [str(x) for x in (s.get("tables_touched") or []) if x][:20],
|
||||
"depends_on": [int(x) for x in (s.get("depends_on") or []) if isinstance(x, (int, str)) and str(x).isdigit()][:10],
|
||||
}
|
||||
out["steps"].append(step)
|
||||
out["risks"] = [str(r)[:300] for r in (plan.get("risks") or []) if r][:10]
|
||||
out["files_touched"] = list({f for s in out["steps"] for f in s["files_touched"]})[:30]
|
||||
out["tables_touched"] = list({t for s in out["steps"] for t in s["tables_touched"]})[:30]
|
||||
out["estimated_steps"] = int(plan.get("estimated_steps") or len(out["steps"]))
|
||||
out["notes"] = str(plan.get("notes") or "")[:500]
|
||||
return out
|
||||
|
||||
|
||||
def _build_planner_tools(mcp: MCPManager | None) -> list[dict[str, Any]]:
|
||||
"""Devuelve solo las definiciones de tools de lectura."""
|
||||
if not mcp or not mcp.is_running:
|
||||
return []
|
||||
out: list[dict[str, Any]] = []
|
||||
for tool in mcp.get_tool_definitions():
|
||||
if strip_namespace(tool["name"]) in PLANNER_TOOLS:
|
||||
out.append(tool)
|
||||
return out
|
||||
|
||||
|
||||
async def run_planner_subloop(
|
||||
*,
|
||||
objective: str,
|
||||
scope: str,
|
||||
agent_profile: AgentProfile,
|
||||
model_adapter: ModelAdapter,
|
||||
mcp: MCPManager | None,
|
||||
parent_thinking_summary: str = "",
|
||||
max_subloop_steps: int = 6,
|
||||
) -> PlannerResult:
|
||||
"""Ejecuta una mini-conversacion con el modelo para producir el plan.
|
||||
|
||||
NO emite SSE de cara al usuario. NO persiste artifacts. NO escribe nada.
|
||||
El agente principal (su caller) integra el resultado como tool_result.
|
||||
"""
|
||||
system_prompt = agent_profile.system_prompt_planner or ""
|
||||
if not system_prompt.strip():
|
||||
return PlannerResult(plan=None, error="planner system prompt vacio")
|
||||
|
||||
user_msg_parts = [
|
||||
f"Objetivo: {objective}",
|
||||
]
|
||||
if scope.strip():
|
||||
user_msg_parts.append(f"Scope: {scope}")
|
||||
if parent_thinking_summary.strip():
|
||||
user_msg_parts.append(f"Contexto previo (resumen del thinking del agente principal):\n{parent_thinking_summary}")
|
||||
user_msg_parts.append("Produce el plan JSON segun la especificacion.")
|
||||
user_message = "\n\n".join(user_msg_parts)
|
||||
|
||||
messages: list[dict[str, Any]] = [
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": user_message},
|
||||
]
|
||||
|
||||
config = ModelConfig(
|
||||
model_id=agent_profile.model_id or "",
|
||||
max_tokens=agent_profile.max_tokens or 4096,
|
||||
# Temperatura mas baja que el agente principal — queremos JSON limpio.
|
||||
temperature=0.1,
|
||||
)
|
||||
|
||||
tool_defs = _build_planner_tools(mcp)
|
||||
tool_executions_log: list[dict[str, Any]] = []
|
||||
|
||||
accumulated_text = ""
|
||||
accumulated_thinking = ""
|
||||
|
||||
for sub_step in range(max_subloop_steps):
|
||||
full_text = ""
|
||||
active_tools: dict[str, dict[str, Any]] = {}
|
||||
tool_calls_this_step: list[dict[str, Any]] = []
|
||||
finish_reason = ""
|
||||
|
||||
async for chunk in model_adapter.stream(
|
||||
messages=messages,
|
||||
tools=tool_defs if tool_defs else None,
|
||||
config=config,
|
||||
):
|
||||
if chunk.delta:
|
||||
full_text += chunk.delta
|
||||
|
||||
if chunk.thinking_delta:
|
||||
accumulated_thinking += chunk.thinking_delta
|
||||
|
||||
if chunk.tool_name and chunk.tool_call_id:
|
||||
if chunk.tool_call_id not in active_tools:
|
||||
active_tools[chunk.tool_call_id] = {
|
||||
"id": chunk.tool_call_id,
|
||||
"name": chunk.tool_name,
|
||||
"arguments": "",
|
||||
}
|
||||
|
||||
if chunk.tool_arguments and chunk.tool_call_id and not chunk.finish_reason:
|
||||
tool = active_tools.get(chunk.tool_call_id)
|
||||
if tool:
|
||||
tool["arguments"] += chunk.tool_arguments
|
||||
|
||||
if chunk.finish_reason == "tool_use" and chunk.tool_call_id:
|
||||
tool = active_tools.pop(chunk.tool_call_id, None)
|
||||
if tool:
|
||||
final_args = tool["arguments"] or chunk.tool_arguments or ""
|
||||
try:
|
||||
tool["parsed_arguments"] = json.loads(final_args) if final_args else {}
|
||||
except json.JSONDecodeError:
|
||||
tool["parsed_arguments"] = {}
|
||||
tool_calls_this_step.append(tool)
|
||||
|
||||
if chunk.finish_reason in ("end_turn", "stop_sequence"):
|
||||
finish_reason = chunk.finish_reason
|
||||
break
|
||||
|
||||
accumulated_text += full_text
|
||||
|
||||
# Si el modelo no llamo tools y emitio texto -> intenta parsear plan.
|
||||
if not tool_calls_this_step:
|
||||
plan_raw = parse_plan(full_text or accumulated_text)
|
||||
if plan_raw is not None:
|
||||
normalized = _normalize_plan(plan_raw, objective)
|
||||
# Adjuntar resumen del thinking interno como `notes` si no lo dio.
|
||||
if not normalized.get("notes") and accumulated_thinking:
|
||||
normalized["notes"] = accumulated_thinking[:300]
|
||||
return PlannerResult(
|
||||
plan=normalized,
|
||||
raw_text=full_text,
|
||||
tool_executions=tool_executions_log,
|
||||
)
|
||||
# Si llegamos aqui sin tools y sin plan parseable, fallamos.
|
||||
if sub_step >= max_subloop_steps - 1:
|
||||
return PlannerResult(
|
||||
plan=None,
|
||||
error="No se pudo parsear el JSON del plan",
|
||||
raw_text=full_text or accumulated_text,
|
||||
tool_executions=tool_executions_log,
|
||||
)
|
||||
# Reintenta con un mensaje de correccion explicito.
|
||||
messages.append({"role": "assistant", "content": full_text or accumulated_text})
|
||||
messages.append({
|
||||
"role": "user",
|
||||
"content": (
|
||||
"Tu output anterior no contenia un JSON parseable. "
|
||||
"Emite UNICAMENTE el plan JSON segun la especificacion, "
|
||||
"sin texto adicional alrededor."
|
||||
),
|
||||
})
|
||||
continue
|
||||
|
||||
# Si llamo tools, ejecutamos las tools y seguimos el sub-loop.
|
||||
# Adjuntamos el assistant message con tool_use blocks y los tool_results.
|
||||
assistant_blocks: list[dict[str, Any]] = []
|
||||
if full_text:
|
||||
assistant_blocks.append({"type": "text", "text": full_text})
|
||||
for tc in tool_calls_this_step:
|
||||
assistant_blocks.append({
|
||||
"type": "tool_use",
|
||||
"id": tc["id"],
|
||||
"name": tc["name"],
|
||||
"input": tc.get("parsed_arguments", {}),
|
||||
})
|
||||
messages.append({"role": "assistant", "content": assistant_blocks})
|
||||
|
||||
tool_result_blocks: list[dict[str, Any]] = []
|
||||
for tc in tool_calls_this_step:
|
||||
# Solo ejecutamos tools de lectura. Si por algun bug llega una
|
||||
# tool de escritura, devolvemos error en lugar de ejecutarla.
|
||||
tool_name_raw = tc["name"]
|
||||
if not strip_namespace(tool_name_raw) in PLANNER_TOOLS:
|
||||
tool_result_blocks.append({
|
||||
"type": "tool_result",
|
||||
"tool_use_id": tc["id"],
|
||||
"content": f"[ERROR planner] tool '{tool_name_raw}' no permitida en planner sub-loop (solo lectura).",
|
||||
"is_error": True,
|
||||
})
|
||||
continue
|
||||
try:
|
||||
if not mcp or not mcp.is_running:
|
||||
raise RuntimeError("MCP no disponible")
|
||||
result = await mcp.call_tool(tool_name_raw, tc.get("parsed_arguments", {}))
|
||||
# Extraer texto del resultado MCP
|
||||
content_parts: list[str] = []
|
||||
for c in (result.get("content") or []):
|
||||
if isinstance(c, dict) and c.get("type") == "text":
|
||||
content_parts.append(c.get("text", ""))
|
||||
raw_output = "\n".join(content_parts) if content_parts else json.dumps(result)
|
||||
tool_result_blocks.append({
|
||||
"type": "tool_result",
|
||||
"tool_use_id": tc["id"],
|
||||
"content": raw_output[:4000],
|
||||
})
|
||||
tool_executions_log.append({
|
||||
"name": tool_name_raw,
|
||||
"arguments": tc.get("parsed_arguments", {}),
|
||||
"raw_output_preview": raw_output[:300],
|
||||
})
|
||||
except Exception as e:
|
||||
logger.warning("Planner tool %s failed: %s", tool_name_raw, e)
|
||||
tool_result_blocks.append({
|
||||
"type": "tool_result",
|
||||
"tool_use_id": tc["id"],
|
||||
"content": f"[ERROR] {e}",
|
||||
"is_error": True,
|
||||
})
|
||||
messages.append({"role": "user", "content": tool_result_blocks})
|
||||
|
||||
# En el penultimo y ultimo turno, forzamos al modelo a parar de
|
||||
# investigar y emitir el JSON. M2.7 a veces sigue pidiendo tools
|
||||
# indefinidamente — hay que cortar.
|
||||
if sub_step >= max_subloop_steps - 2:
|
||||
messages.append({
|
||||
"role": "user",
|
||||
"content": (
|
||||
"PARA. No llames mas tools. Ya tienes lo necesario. "
|
||||
"Emite AHORA el plan JSON segun la especificacion del system prompt. "
|
||||
"Solo el JSON, sin texto alrededor."
|
||||
),
|
||||
})
|
||||
|
||||
# Si salimos del loop sin plan, fallamos.
|
||||
logger.warning(
|
||||
"Planner agotado: %d steps, %d tool calls totales, accumulated_text=%r",
|
||||
max_subloop_steps,
|
||||
len(tool_executions_log),
|
||||
accumulated_text[:300],
|
||||
)
|
||||
return PlannerResult(
|
||||
plan=None,
|
||||
error=f"Planner agotado tras {max_subloop_steps} steps sin emitir JSON",
|
||||
raw_text=accumulated_text,
|
||||
tool_executions=tool_executions_log,
|
||||
)
|
||||
@@ -25,6 +25,15 @@ class AgentRegistry:
|
||||
self._agents: dict[str, AgentProfile] = {}
|
||||
self._metadata: dict[str, dict[str, Any]] = {}
|
||||
self._agents_dir = agents_dir
|
||||
self._contract: str = ""
|
||||
|
||||
def _load_contract(self) -> str:
|
||||
"""Lee el contrato compartido (`_shared/contract.md`) que se concatena
|
||||
al system prompt de cada agente. Si no existe, devuelve string vacio."""
|
||||
contract_path = self._agents_dir / "_shared" / "contract.md"
|
||||
if contract_path.is_file():
|
||||
return contract_path.read_text(encoding="utf-8")
|
||||
return ""
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Carga
|
||||
@@ -34,6 +43,7 @@ class AgentRegistry:
|
||||
"""Escanea agents_dir y carga todos los agentes encontrados."""
|
||||
self._agents.clear()
|
||||
self._metadata.clear()
|
||||
self._contract = self._load_contract()
|
||||
|
||||
if not self._agents_dir.is_dir():
|
||||
logger.warning("Agents directory not found: %s", self._agents_dir)
|
||||
@@ -42,6 +52,9 @@ class AgentRegistry:
|
||||
for agent_dir in sorted(self._agents_dir.iterdir()):
|
||||
if not agent_dir.is_dir():
|
||||
continue
|
||||
# Skip directorios especiales (`_shared`, etc).
|
||||
if agent_dir.name.startswith("_"):
|
||||
continue
|
||||
|
||||
yaml_path = agent_dir / "agent.yaml"
|
||||
prompt_path = agent_dir / "system.md"
|
||||
@@ -60,6 +73,26 @@ class AgentRegistry:
|
||||
|
||||
agent_id = meta.get("name", agent_dir.name)
|
||||
|
||||
# Concatena contract.md al system prompt del agente
|
||||
# (Fase 3: las reglas comunes viven en _shared/contract.md).
|
||||
# La identidad del agente va PRIMERO, las reglas de ambiente
|
||||
# despues — separadas por linea horizontal.
|
||||
if self._contract:
|
||||
if system_prompt:
|
||||
system_prompt = system_prompt.rstrip() + "\n\n---\n\n" + self._contract
|
||||
else:
|
||||
system_prompt = self._contract
|
||||
|
||||
# Planner system prompt (opcional, usado por la tool
|
||||
# interna `acai_plan` cuando el agente lo expone).
|
||||
# El planner tambien recibe el contract.
|
||||
planner_path = agent_dir / "system.planner.md"
|
||||
planner_prompt = ""
|
||||
if planner_path.exists():
|
||||
planner_prompt = planner_path.read_text(encoding="utf-8")
|
||||
if self._contract:
|
||||
planner_prompt = planner_prompt.rstrip() + "\n\n---\n\n" + self._contract
|
||||
|
||||
profile = AgentProfile(
|
||||
role=agent_id,
|
||||
name=agent_id,
|
||||
@@ -79,6 +112,12 @@ class AgentRegistry:
|
||||
"task_state",
|
||||
]),
|
||||
stream_deltas=meta.get("stream_deltas", True),
|
||||
kb_load_strategy=meta.get("kb_load_strategy", "top_n"),
|
||||
kb_tags=meta.get("kb_tags", []),
|
||||
kb_max_tokens=meta.get("kb_max_tokens"),
|
||||
kb_top_n=meta.get("kb_top_n"),
|
||||
has_planner_tool=meta.get("has_planner_tool", False),
|
||||
system_prompt_planner=planner_prompt,
|
||||
)
|
||||
|
||||
self._agents[agent_id] = profile
|
||||
|
||||
63
src/orchestrator/tool_groups.py
Normal file
63
src/orchestrator/tool_groups.py
Normal file
@@ -0,0 +1,63 @@
|
||||
"""Grupos de tools utilizados por el orquestador.
|
||||
|
||||
`READ_TOOLS`: tools de solo lectura. Son seguras de exponer en sub-loops
|
||||
(p.ej. el planner) porque NO modifican estado del proyecto.
|
||||
|
||||
`PLANNER_TOOLS`: alias de READ_TOOLS — el planner SOLO investiga.
|
||||
|
||||
`PLAN_INTERNAL_TOOLS`: tools sinteticas implementadas por el orquestador
|
||||
Python (no atraviesan MCP). Se interceptan en `BaseAgent._execute_tool`.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
# Whitelist de tools de lectura. Cualquier tool MCP cuyo nombre `endswith`
|
||||
# uno de estos sufijos o coincide exactamente entra en el set tras
|
||||
# normalizar el namespace (p.ej. `acai_code__list_tables` se compara
|
||||
# contra el sufijo `list_tables`).
|
||||
READ_TOOL_NAMES: frozenset[str] = frozenset({
|
||||
# Files (lectura/busqueda)
|
||||
"acai-glob", "acai-grep", "acai-view",
|
||||
# Records (lectura)
|
||||
"list_table_records", "get_record",
|
||||
"list_page_modules", "get_module_config_vars",
|
||||
"list_record_uploads",
|
||||
# Schema / tables (lectura)
|
||||
"list_tables", "get_table_schema",
|
||||
# Layout / libraries (lectura)
|
||||
"get_layout_field", "list_global_libraries",
|
||||
# Hooks (lectura)
|
||||
"get_hook_middleware",
|
||||
# Project / web (lectura)
|
||||
"get_web_url",
|
||||
# Git (lectura)
|
||||
"list_git_log",
|
||||
# Docs (lectura)
|
||||
"list_docs", "read_doc",
|
||||
})
|
||||
|
||||
PLANNER_TOOLS: frozenset[str] = READ_TOOL_NAMES
|
||||
|
||||
PLAN_INTERNAL_TOOL_NAMES: frozenset[str] = frozenset({
|
||||
"acai_plan",
|
||||
"acai_plan_advance",
|
||||
})
|
||||
|
||||
|
||||
def strip_namespace(tool_name: str) -> str:
|
||||
"""Extrae el nombre raw de una tool con namespace.
|
||||
|
||||
El MCPManager prefija con `<server>__` cuando hay multiples servers.
|
||||
Para comparar contra READ_TOOL_NAMES quitamos ese prefijo.
|
||||
"""
|
||||
if "__" in tool_name:
|
||||
return tool_name.split("__", 1)[1]
|
||||
return tool_name
|
||||
|
||||
|
||||
def is_read_tool(tool_name: str) -> bool:
|
||||
return strip_namespace(tool_name) in READ_TOOL_NAMES
|
||||
|
||||
|
||||
def is_plan_internal_tool(tool_name: str) -> bool:
|
||||
return strip_namespace(tool_name) in PLAN_INTERNAL_TOOL_NAMES
|
||||
@@ -207,6 +207,29 @@ class ClaudeFormatEmitter:
|
||||
# Emit assistant snapshot for reconciliation
|
||||
self._push(session_id, self._build_assistant_snapshot(session_id))
|
||||
|
||||
elif event_type == EventType.PLAN_CREATED:
|
||||
# Fase 5.5: PlanStepper UI. Reenviamos los datos del plan al
|
||||
# frontend como evento custom "plan.created".
|
||||
self._push(session_id, {
|
||||
"type": "plan.created",
|
||||
"plan": data,
|
||||
})
|
||||
|
||||
elif event_type == EventType.PLAN_ADVANCED:
|
||||
self._push(session_id, {
|
||||
"type": "plan.advanced",
|
||||
"cursor": data.get("cursor", 0),
|
||||
"completed_step_ids": data.get("completed_step_ids", []),
|
||||
"status": data.get("status", "active"),
|
||||
})
|
||||
|
||||
elif event_type == EventType.PLAN_ENDED:
|
||||
self._push(session_id, {
|
||||
"type": "plan.ended",
|
||||
"status": data.get("status", "done"),
|
||||
"objective": data.get("objective", ""),
|
||||
})
|
||||
|
||||
elif event_type == EventType.EXECUTION_COMPLETED:
|
||||
# Close any open text block
|
||||
self._close_text_block(session_id)
|
||||
|
||||
@@ -27,6 +27,11 @@ class EventType(StrEnum):
|
||||
TOOL_COMPLETED = "tool.completed"
|
||||
SUBAGENT_ASSIGNED = "subagent.assigned"
|
||||
EXECUTION_COMPLETED = "execution.completed"
|
||||
# Plan lifecycle (Fase 5.5: PlanStepper UI). Emitidos por BaseAgent
|
||||
# cuando la tool interna `acai_plan` produce/avanza/cierra un plan.
|
||||
PLAN_CREATED = "plan.created"
|
||||
PLAN_ADVANCED = "plan.advanced"
|
||||
PLAN_ENDED = "plan.ended"
|
||||
ERROR = "error"
|
||||
KEEPALIVE = "keepalive"
|
||||
|
||||
|
||||
Reference in New Issue
Block a user