This commit is contained in:
Jordan Diaz
2026-04-09 20:46:03 +00:00
parent 4c73d848bb
commit 237dc00379
10 changed files with 1049 additions and 1216 deletions

View File

@@ -7,6 +7,7 @@ while preserving the most important information.
from __future__ import annotations
import hashlib
import json
import logging
import re
from typing import Any
@@ -157,6 +158,140 @@ class ContextCompactor:
break
return "\n".join(lines)
def compact_conversation(
self,
messages: list[dict[str, Any]],
max_tokens: int,
recent_raw_limit: int = 2,
raw_char_limit: int = 2000,
) -> tuple[list[dict[str, Any]], dict[str, Any]]:
"""Compact conversation history while preserving the latest user turn."""
total = sum(self._estimate_message_tokens(m) for m in messages)
meta = {
"budget_tokens": max_tokens,
"input_tokens": total,
"output_tokens": total,
"messages_input": len(messages),
"messages_output": len(messages),
"messages_compacted": 0,
"tool_messages_compacted": 0,
"assistant_messages_compacted": 0,
"user_messages_compacted": 0,
"raw_tool_results_kept": 0,
}
if total <= max_tokens:
return messages, meta
compacted = [dict(m) for m in messages]
last_user_idx = max(
(i for i, m in enumerate(compacted) if m.get("role") == "user"),
default=-1,
)
tool_indexes = [i for i, m in enumerate(compacted) if m.get("role") == "tool"]
keep_raw_tool_indexes = (
set(tool_indexes[-recent_raw_limit:])
if recent_raw_limit > 0
else set()
)
for idx in keep_raw_tool_indexes:
content = compacted[idx].get("content", "")
if isinstance(content, str) and content:
truncated = content[:raw_char_limit]
if truncated != content:
compacted[idx]["content"] = truncated
meta["messages_compacted"] += 1
meta["tool_messages_compacted"] += 1
meta["raw_tool_results_kept"] += 1
total = sum(self._estimate_message_tokens(m) for m in compacted)
if total > max_tokens:
for idx in tool_indexes:
if idx in keep_raw_tool_indexes:
continue
content = compacted[idx].get("content", "")
if not isinstance(content, str) or not content:
continue
compacted[idx]["content"] = self._summarize_message_content(
content,
prefix="[TOOL RESULT COMPACTADO]",
max_chars=max(180, raw_char_limit // 4),
)
meta["messages_compacted"] += 1
meta["tool_messages_compacted"] += 1
total = sum(self._estimate_message_tokens(m) for m in compacted)
if total <= max_tokens:
break
if total > max_tokens:
for idx, message in enumerate(compacted):
if idx == last_user_idx or message.get("role") != "assistant":
continue
content = message.get("content", "")
if not isinstance(content, str) or not content:
continue
message["content"] = self._summarize_message_content(
content,
prefix="[ASSISTANT COMPACTADO]",
max_chars=max(240, raw_char_limit // 3),
)
meta["messages_compacted"] += 1
meta["assistant_messages_compacted"] += 1
total = sum(self._estimate_message_tokens(m) for m in compacted)
if total <= max_tokens:
break
if total > max_tokens:
for idx, message in enumerate(compacted):
if idx == last_user_idx or message.get("role") != "user":
continue
content = message.get("content", "")
if not isinstance(content, str) or not content:
continue
message["content"] = self._summarize_message_content(
content,
prefix="[USER CONTEXT COMPACTADO]",
max_chars=max(220, raw_char_limit // 3),
)
meta["messages_compacted"] += 1
meta["user_messages_compacted"] += 1
total = sum(self._estimate_message_tokens(m) for m in compacted)
if total <= max_tokens:
break
if total > max_tokens:
for idx in tool_indexes:
if idx in keep_raw_tool_indexes:
compacted[idx]["content"] = self._summarize_message_content(
compacted[idx].get("content", ""),
prefix="[TOOL RESULT COMPACTADO]",
max_chars=max(180, raw_char_limit // 5),
)
total = sum(self._estimate_message_tokens(m) for m in compacted)
if total <= max_tokens:
break
if total > max_tokens:
for idx, message in enumerate(compacted):
if idx == last_user_idx:
continue
role = message.get("role", "")
content = message.get("content", "")
if not isinstance(content, str) or not content:
continue
if role == "tool":
message["content"] = "[TOOL RESULT COMPACTADO]"
elif role == "assistant":
message["content"] = "[ASSISTANT COMPACTADO]"
elif role == "user":
message["content"] = "[USER CONTEXT COMPACTADO]"
total = sum(self._estimate_message_tokens(m) for m in compacted)
if total <= max_tokens:
break
meta["output_tokens"] = total
return compacted, meta
# ------------------------------------------------------------------
# Internals
# ------------------------------------------------------------------
@@ -186,6 +321,45 @@ class ContextCompactor:
compacted.append(line)
return "\n".join(compacted)
def _summarize_message_content(
self,
content: str,
prefix: str,
max_chars: int,
) -> str:
stripped = content.strip()
compacted = self._compact_text(content)
if len(compacted) <= max_chars:
if compacted != stripped:
summary = f"{prefix} {compacted}".strip()
if len(summary) > max_chars:
summary = summary[:max_chars].rstrip() + ""
return summary
return compacted
lines = [l.strip() for l in compacted.splitlines() if l.strip()]
if not lines:
return prefix
if len(lines) == 1:
return f"{prefix} {lines[0][:max_chars]}".strip()
first = lines[0][: max_chars // 2]
last = lines[-1][: max_chars // 3]
summary = f"{prefix} First: {first}"
if last and last != first:
summary += f" | Last: {last}"
if len(summary) > max_chars:
summary = summary[:max_chars].rstrip() + ""
return summary
@staticmethod
def _estimate_message_tokens(message: dict[str, Any]) -> int:
content = message.get("content", "")
tokens = estimate_tokens(content if isinstance(content, str) else str(content))
if message.get("tool_calls"):
tokens += estimate_tokens(json.dumps(message.get("tool_calls", []), ensure_ascii=False))
return tokens
def _extract_facts(self, raw_output: str) -> list[str]:
"""Extract short factual claims from tool output."""
facts: list[str] = []

View File

@@ -99,13 +99,25 @@ class ContextEngine:
if kb_section:
sections.append(kb_section)
base_user_content, resolved_followup_context, user_content, followup_mode = (
self._resolve_current_request(session)
)
session.metadata["followup_mode"] = followup_mode
# 4. Task history — compact summaries of past tasks in this session
if "task_state" in allowed and session.task_history:
sections.append(self._build_task_history(session))
# 5. Task state — current task (includes compacted previous steps)
if "task_state" in allowed and session.current_task:
sections.append(self._build_task_state(session.current_task))
sections.append(
self._build_task_state(
session.current_task,
objective_override=base_user_content,
resolved_context=resolved_followup_context,
followup_mode=followup_mode,
)
)
# 6. Artifact memory — summaries for recent/current artifacts
if include_artifact_memory:
@@ -115,14 +127,15 @@ class ContextEngine:
# Build messages with real conversation history first so sections can
# compact against the remaining budget.
messages = self._build_messages(session, conversation)
message_tokens = sum(self._estimate_message_tokens(m) for m in messages)
pre_compaction_section_tokens = sum(estimate_tokens(s.content) for s in sections)
pre_compaction_total = pre_compaction_section_tokens + message_tokens
section_budget = max(
1,
settings.effective_context_budget - message_tokens,
messages = self._build_messages(
session,
conversation,
user_content=user_content,
)
raw_message_tokens = sum(self._estimate_message_tokens(m) for m in messages)
pre_compaction_section_tokens = sum(estimate_tokens(s.content) for s in sections)
pre_compaction_total = pre_compaction_section_tokens + raw_message_tokens
section_budget = max(1, settings.effective_context_budget - raw_message_tokens)
# Compact sections only when the full prompt is approaching the target.
section_compaction = {
@@ -135,18 +148,64 @@ class ContextEngine:
"sections_compacted": 0,
"sections_removed": 0,
}
if pre_compaction_total > settings.effective_compaction_threshold:
system_prompt = self._assemble_system_prompt(sections)
system_prompt_tokens = estimate_tokens(system_prompt)
hard_message_budget = max(1, settings.effective_context_budget - system_prompt_tokens)
target_message_budget = max(1, settings.effective_compaction_threshold - system_prompt_tokens)
message_budget = min(hard_message_budget, target_message_budget)
conversation_compaction = {
"budget_tokens": message_budget,
"hard_budget_tokens": hard_message_budget,
"input_tokens": raw_message_tokens,
"output_tokens": raw_message_tokens,
"messages_input": len(messages),
"messages_output": len(messages),
"messages_compacted": 0,
"raw_tool_results_kept": 0,
}
total_tokens = system_prompt_tokens + raw_message_tokens
if total_tokens > settings.effective_compaction_threshold:
messages, conversation_compaction = self.compactor.compact_conversation(
messages,
max_tokens=message_budget,
recent_raw_limit=settings.conversation_recent_raw_limit,
raw_char_limit=settings.tool_raw_output_max_chars,
)
total_tokens = system_prompt_tokens + sum(
self._estimate_message_tokens(m) for m in messages
)
if total_tokens > settings.effective_context_budget:
section_budget = max(
1,
settings.effective_context_budget
- sum(self._estimate_message_tokens(m) for m in messages),
)
sections, section_compaction = self.compactor.compact_sections(
sections,
max_tokens=section_budget,
)
system_prompt = self._assemble_system_prompt(sections)
system_prompt_tokens = estimate_tokens(system_prompt)
total_tokens = system_prompt_tokens + sum(
self._estimate_message_tokens(m) for m in messages
)
# Assemble system prompt from sections
system_prompt = self._assemble_system_prompt(sections)
total_tokens = estimate_tokens(system_prompt) + sum(
self._estimate_message_tokens(m) for m in messages
)
if total_tokens > settings.effective_context_budget:
hard_message_budget = max(
1,
settings.effective_context_budget - system_prompt_tokens,
)
messages, conversation_compaction = self.compactor.compact_conversation(
messages,
max_tokens=hard_message_budget,
recent_raw_limit=settings.conversation_recent_raw_limit,
raw_char_limit=settings.tool_raw_output_max_chars,
)
total_tokens = system_prompt_tokens + sum(
self._estimate_message_tokens(m) for m in messages
)
package = ContextPackage(
sections=sections,
@@ -188,18 +247,22 @@ class ContextEngine:
section_compaction.get("sections_compacted")
or section_compaction.get("sections_removed")
or section_compaction.get("duplicates_removed")
or conversation_compaction.get("messages_compacted")
),
"system_prompt_tokens": estimate_tokens(system_prompt),
"user_message_preview": messages[0]["content"][:200] if messages else "",
"system_prompt_tokens": system_prompt_tokens,
"user_message_preview": user_content[:200],
"artifacts_count": len(artifacts) if artifacts else 0,
"conversation_messages": conv_len,
"budget_tokens": settings.effective_context_budget,
"threshold_tokens": settings.effective_compaction_threshold,
"message_tokens": message_tokens,
"message_tokens": conversation_compaction.get("output_tokens", raw_message_tokens),
"message_tokens_before_compaction": raw_message_tokens,
"pre_compaction_tokens": pre_compaction_total,
"post_compaction_tokens": total_tokens,
"section_budget_tokens": section_budget,
"message_budget_tokens": message_budget,
"section_compaction": section_compaction,
"conversation_compaction": conversation_compaction,
"over_budget": total_tokens > settings.effective_context_budget,
}
@@ -480,6 +543,22 @@ class ContextEngine:
review = entry.get("review", "")
if review:
lines.append(f" Review: {review[:100]}")
outcomes = entry.get("outcomes", [])
if outcomes:
lines.append(f" Outcomes: {'; '.join(outcomes[:2])}")
focus_refs = entry.get("focus_refs", [])
if focus_refs:
ref_parts = []
for ref in focus_refs[:3]:
label = ref.get("label", "")
ref_type = ref.get("type", "entity")
ref_id = ref.get("id", "")
if ref_id:
ref_parts.append(f"{ref_type} '{label}' ({ref_id})")
else:
ref_parts.append(f"{ref_type} '{label}'")
if ref_parts:
lines.append(f" Focus refs: {'; '.join(ref_parts)}")
lines.append("")
content = "\n".join(lines)
@@ -490,14 +569,52 @@ class ContextEngine:
token_estimate=estimate_tokens(content),
)
def _build_task_state(self, task: TaskState) -> ContextSection:
def _build_task_state(
self,
task: TaskState,
objective_override: str | None = None,
resolved_context: str = "",
followup_mode: str = "none",
) -> ContextSection:
lines = [
"# Current Task",
f"**Objective**: {task.objective}",
f"**Objective**: {objective_override or task.objective}",
f"**Status**: {task.status}",
f"**Step**: {task.current_step_index + 1}/{len(task.plan)}",
]
if followup_mode != "none":
lines.append(f"**Follow-up Mode**: {followup_mode}")
if resolved_context:
lines.extend(
[
"",
"## Resolved Follow-up Context",
resolved_context,
]
)
if followup_mode == "transform":
lines.extend(
[
"",
"## Follow-up Policy",
"- Reutiliza primero el trabajo y contexto ya reunidos.",
"- No llames herramientas salvo que falte un dato factual critico para responder.",
"- Prioriza transformar, refinar o reescribir lo ya analizado.",
]
)
elif followup_mode == "fetch_more":
lines.extend(
[
"",
"## Follow-up Policy",
"- El usuario esta pidiendo datos o verificacion adicional.",
"- Puedes usar herramientas si aportan informacion nueva y necesaria.",
]
)
current = task.current_step()
if current:
lines.extend(
@@ -590,28 +707,27 @@ class ContextEngine:
self,
session: SessionState,
conversation: list[dict[str, Any]] | None = None,
user_content: str | None = None,
) -> list[dict[str, Any]]:
"""Build the messages array with real conversation history.
Includes the user objective message followed by the full
assistant/tool conversation — like professional agentic tools.
"""
if session.current_task:
step = session.current_task.current_step()
if step:
user_content = (
f"Execute this step: {step.description}\n"
f"Overall objective: {session.current_task.objective}"
)
else:
user_content = session.current_task.objective
else:
user_content = "Awaiting task assignment."
if user_content is None:
_, _, user_content, _ = self._resolve_current_request(session)
messages: list[dict[str, Any]] = []
# Include previous task exchanges as compact conversation history
if session.task_history:
recent_messages = self._sanitize_recent_messages(
getattr(session, "recent_messages", []),
)
if recent_messages:
messages.extend(recent_messages)
# Include previous task exchanges as compact conversation history only
# when there is no raw recent conversation window available.
if session.task_history and not recent_messages:
history_lines = ["[HISTORIAL DE CONVERSACIÓN ANTERIOR — NO ejecutar de nuevo, solo contexto]"]
for entry in session.task_history[-10:]:
objective = entry.get("objective", "")[:200]
@@ -632,6 +748,22 @@ class ContextEngine:
kd_parts.append(f"modules: {key_data['modules'][:5]}")
if kd_parts:
history_lines.append(f" Datos clave: {'; '.join(kd_parts)}")
outcomes = entry.get("outcomes", [])
if outcomes:
history_lines.append(f" Conclusiones: {'; '.join(outcomes[:2])}")
focus_refs = entry.get("focus_refs", [])
if focus_refs:
focus_parts = []
for ref in focus_refs[:3]:
label = ref.get("label", "")
ref_type = ref.get("type", "entity")
ref_id = ref.get("id", "")
if ref_id:
focus_parts.append(f"{ref_type}:{label} ({ref_id})")
else:
focus_parts.append(f"{ref_type}:{label}")
if focus_parts:
history_lines.append(f" Referencias activas: {'; '.join(focus_parts)}")
# Extract agent response from summary
if " → Agent: " in summary:
agent_part = summary.split(" → Agent: ", 1)[1][:200]
@@ -650,6 +782,67 @@ class ContextEngine:
return messages
def _resolve_current_request(self, session: SessionState) -> tuple[str, str, str, str]:
if session.current_task:
step = session.current_task.current_step()
if step:
base_user_content = (
f"Execute this step: {step.description}\n"
f"Overall objective: {session.current_task.objective}"
)
else:
base_user_content = session.current_task.objective
else:
base_user_content = "Awaiting task assignment."
followup_mode = self._classify_followup_mode(base_user_content)
resolved_context = ""
if session.task_history and followup_mode != "none":
resolved_context = self._build_followup_resolution(session.task_history[-1])
if not resolved_context and followup_mode != "none":
resolved_context = self._build_recent_message_resolution(
getattr(session, "recent_messages", []),
)
if resolved_context:
user_content = (
"[CONTEXTO RESUELTO DEL TURNO ANTERIOR]\n"
f"{resolved_context}\n\n"
f"{base_user_content}"
)
else:
user_content = base_user_content
return base_user_content, resolved_context, user_content, followup_mode
@staticmethod
def _sanitize_recent_messages(messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
sanitized_messages: list[dict[str, Any]] = []
for message in messages:
role = str(message.get("role", "")).strip()
if role not in {"user", "assistant", "tool"}:
continue
sanitized: dict[str, Any] = {"role": role}
content = message.get("content", "")
if isinstance(content, str) and content:
sanitized["content"] = content
if role == "assistant":
tool_calls = message.get("tool_calls")
if isinstance(tool_calls, list) and tool_calls:
sanitized["tool_calls"] = tool_calls
if role == "tool":
tool_call_id = str(message.get("tool_call_id", "")).strip()
if tool_call_id:
sanitized["tool_call_id"] = tool_call_id
if "content" not in sanitized and "tool_calls" not in sanitized:
continue
sanitized_messages.append(sanitized)
return sanitized_messages
@staticmethod
def _estimate_message_tokens(message: dict[str, Any]) -> int:
content = message.get("content", "")
@@ -657,6 +850,125 @@ class ContextEngine:
return estimate_tokens(content)
return estimate_tokens(str(content))
@staticmethod
def _looks_like_followup(text: str) -> bool:
lower = text.lower()
followup_markers = (
"ese ",
"esa ",
"eso",
"este ",
"esta ",
"anterior",
"anteriormente",
"mismo",
"hazlo",
"rehaz",
"reescribe",
"céntrate",
"centrate",
"solo en",
)
return len(lower) <= 300 and any(marker in lower for marker in followup_markers)
@classmethod
def _classify_followup_mode(cls, text: str) -> str:
lower = text.lower().strip()
transform_markers = (
"más comercial",
"mas comercial",
"segunda versión",
"segunda version",
"otra versión",
"otra version",
"versión final",
"version final",
"copy",
"estructura",
"lista para aplicar",
"resúm",
"resum",
"rehaz",
"reescribe",
"adapta",
"cámbialo",
"cambialo",
"sin cambiar el foco",
"más técnico",
"mas tecnico",
"más corto",
"mas corto",
"más directo",
"mas directo",
)
fetch_markers = (
"revisa",
"revisa la configuración",
"revisa la configuracion",
"comprueba",
"mira si",
"abre",
"busca",
"localiza",
"consulta",
"verifica",
"comprueba cómo",
"comprueba como",
"cómo está",
"como está",
"como esta",
"qué ves",
"que ves",
)
if any(marker in lower for marker in transform_markers):
return "transform"
if any(marker in lower for marker in fetch_markers):
return "fetch_more"
if not cls._looks_like_followup(text):
return "none"
return "ambiguous"
@staticmethod
def _build_followup_resolution(entry: dict[str, Any]) -> str:
lines: list[str] = []
focus_refs = entry.get("focus_refs", [])
outcomes = entry.get("outcomes", [])
primary = [ref for ref in focus_refs if ref.get("role") == "primary_focus"]
refs_to_render = primary or focus_refs[:3]
if refs_to_render:
rendered = []
for ref in refs_to_render[:3]:
label = ref.get("label", "")
ref_type = ref.get("type", "entity")
ref_id = ref.get("id", "")
if ref_id:
rendered.append(f"- Active ref: {ref_type} '{label}' ({ref_id})")
else:
rendered.append(f"- Active ref: {ref_type} '{label}'")
lines.extend(rendered)
if outcomes:
for outcome in outcomes[:2]:
lines.append(f"- Prior conclusion: {outcome}")
return "\n".join(lines).strip()
@staticmethod
def _build_recent_message_resolution(messages: list[dict[str, Any]]) -> str:
for message in reversed(messages):
if message.get("role") != "assistant":
continue
content = message.get("content", "")
if not isinstance(content, str):
continue
content = " ".join(content.split()).strip()
if not content:
continue
return f"- Recent assistant conclusion: {content[:280]}"
return ""
@staticmethod
def _select_context_artifacts(
session: SessionState,

View File

@@ -88,6 +88,7 @@ class SessionState(BaseModel):
current_task: TaskState | None = None
completed_tasks: list[str] = Field(default_factory=list)
task_history: list[dict[str, Any]] = Field(default_factory=list) # Compact summaries of past tasks
recent_messages: list[dict[str, Any]] = Field(default_factory=list) # Rolling raw conversation window across tasks
turn_count: int = 0
created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
updated_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))

View File

@@ -77,7 +77,9 @@ class BaseAgent:
)
# Prepare tool definitions
tool_defs = self._get_allowed_tools()
tool_defs = self._get_allowed_tools(
followup_mode=str(session.metadata.get("followup_mode", "none")),
)
# Stream model response
config = ModelConfig(
@@ -262,6 +264,7 @@ class BaseAgent:
"content": accumulated_content,
"artifacts": artifacts,
"tool_executions": tool_executions,
"conversation": conversation,
"usage": {
"input_tokens": total_input_tokens,
"output_tokens": total_output_tokens,
@@ -341,8 +344,10 @@ class BaseAgent:
return tool_exec
def _get_allowed_tools(self) -> list[dict[str, Any]]:
def _get_allowed_tools(self, followup_mode: str = "none") -> list[dict[str, Any]]:
"""Return tool definitions filtered by this agent's allowed_tools."""
if followup_mode == "transform":
return []
if not self.mcp.is_running:
return []
all_tools = self.mcp.get_tool_definitions()

View File

@@ -8,6 +8,7 @@ from __future__ import annotations
import asyncio
import logging
import re
from typing import Any
from ..adapters.base import ModelAdapter
@@ -132,6 +133,11 @@ class OrchestratorEngine:
content = result.get("content", "")
usage = result.get("usage", {"input_tokens": 0, "output_tokens": 0})
key_data = self._extract_key_data_from_results([result])
session.recent_messages = self._append_recent_messages(
session.recent_messages,
message=message,
conversation=result.get("conversation", []),
)
session.task_history.append(
self._build_task_history_entry(
@@ -218,6 +224,52 @@ class OrchestratorEngine:
"status": "error",
}
@staticmethod
def _append_recent_messages(
existing: list[dict[str, Any]],
message: str,
conversation: list[dict[str, Any]],
) -> list[dict[str, Any]]:
merged = [OrchestratorEngine._sanitize_recent_message(m) for m in existing]
merged = [m for m in merged if m]
current_turn: list[dict[str, Any]] = []
if message.strip():
current_turn.append({"role": "user", "content": message})
for message_obj in conversation:
sanitized = OrchestratorEngine._sanitize_recent_message(message_obj)
if sanitized:
current_turn.append(sanitized)
merged.extend(current_turn)
return merged
@staticmethod
def _sanitize_recent_message(message: dict[str, Any]) -> dict[str, Any]:
role = str(message.get("role", "")).strip()
if role not in {"user", "assistant", "tool"}:
return {}
sanitized: dict[str, Any] = {"role": role}
content = message.get("content", "")
if isinstance(content, str) and content:
sanitized["content"] = content
if role == "assistant":
tool_calls = message.get("tool_calls")
if isinstance(tool_calls, list) and tool_calls:
sanitized["tool_calls"] = tool_calls
if role == "tool":
tool_call_id = str(message.get("tool_call_id", "")).strip()
if tool_call_id:
sanitized["tool_call_id"] = tool_call_id
if "content" not in sanitized and "tool_calls" not in sanitized:
return {}
return sanitized
@staticmethod
def _extract_key_data_from_results(results: list[dict[str, Any]]) -> dict[str, Any]:
"""Extract structured data from tool executions for task history."""
@@ -270,6 +322,13 @@ class OrchestratorEngine:
else:
summary = f"User: {message_summary}"
outcomes = OrchestratorEngine._extract_outcomes(content)
focus_refs = OrchestratorEngine._extract_focus_refs(
message=message,
content=content,
key_data=key_data,
outcomes=outcomes,
)
tools_used: list[str] = []
for tool_exec in tool_executions:
tool_name = getattr(tool_exec, "tool_name", "")
@@ -287,6 +346,8 @@ class OrchestratorEngine:
"tools_used": tools_used[:8],
"artifacts_count": artifacts_count,
"summary": summary,
"outcomes": outcomes,
"focus_refs": focus_refs,
"review": "",
}
@@ -316,5 +377,143 @@ class OrchestratorEngine:
" ".join(entry.get("facts", [])[:5]),
" ".join(entry.get("tools_used", [])[:5]),
str(entry.get("key_data", {})),
" ".join(entry.get("outcomes", [])[:3]),
str(entry.get("focus_refs", [])[:3]),
]
return estimate_tokens("\n".join(p for p in parts if p))
@staticmethod
def _extract_outcomes(content: str) -> list[str]:
if not content:
return []
normalized_lines = []
for raw_line in content.splitlines():
line = raw_line.strip()
if not line:
continue
line = re.sub(r"^[#>\-\*\d\.\)\s]+", "", line).strip()
if not line:
continue
normalized_lines.append(line)
keywords = (
"si tuviera que elegir",
"más flojo",
"mas flojo",
"más problem",
"mas problem",
"recomiendo",
"recomendación",
"recomendacion",
"prioridad",
"conclus",
"debería",
"deberia",
"peor",
"más débil",
"mas debil",
)
outcomes: list[str] = []
seen: set[str] = set()
for line in normalized_lines:
lower = line.lower()
if any(k in lower for k in keywords):
trimmed = line[:220]
if trimmed not in seen:
seen.add(trimmed)
outcomes.append(trimmed)
if len(outcomes) >= 3:
return outcomes
for line in normalized_lines:
if len(line) < 20:
continue
trimmed = line[:180]
if trimmed not in seen:
seen.add(trimmed)
outcomes.append(trimmed)
if len(outcomes) >= 2:
break
return outcomes[:3]
@staticmethod
def _extract_focus_refs(
message: str,
content: str,
key_data: dict[str, Any],
outcomes: list[str],
) -> list[dict[str, str]]:
refs: list[dict[str, str]] = []
seen: set[tuple[str, str, str]] = set()
def add_ref(ref_type: str, label: str, ref_id: str = "", role: str = "related") -> None:
label = label.strip()
ref_id = ref_id.strip()
if not label and not ref_id:
return
key = (ref_type, label, ref_id)
if key in seen:
return
seen.add(key)
refs.append({
"type": ref_type,
"label": label or ref_id,
"id": ref_id,
"role": role,
})
for table, nums in key_data.get("tables", {}).items():
add_ref("table", table, table, "related")
for num in nums[:3]:
add_ref("record", f"{table} record {num}", f"{table}:{num}", "related")
for section in key_data.get("sections", [])[:5]:
add_ref("section", section, section, "related")
for module in key_data.get("modules", [])[:5]:
add_ref("module", module, module, "related")
source_text = "\n".join(outcomes + [content[:1200]])
for line in outcomes:
for match in re.findall(r"\*\*([^*]{2,80})\*\*", line):
add_ref(
OrchestratorEngine._infer_ref_type(match, line, message),
match,
"",
"primary_focus",
)
if not any(ref["role"] == "primary_focus" for ref in refs):
for pattern in (
r"(?:elegir(?:\s+\*\*uno\*\*)?,?\s+dir[ií]a que\s+\*\*([^*]{2,80})\*\*)",
r"(?:el [^.\n]{0,40}m[aá]s flojo(?:[^.\n]{0,40})es\s+\*\*([^*]{2,80})\*\*)",
):
match = re.search(pattern, source_text, flags=re.IGNORECASE)
if match:
label = match.group(1).strip()
add_ref(
OrchestratorEngine._infer_ref_type(label, source_text, message),
label,
"",
"primary_focus",
)
break
return refs[:8]
@staticmethod
def _infer_ref_type(label: str, context: str, message: str) -> str:
text = f"{label} {context} {message}".lower()
if any(k in text for k in ("módulo", "modulo")):
return "module"
if any(k in text for k in ("página", "pagina", "apartado")):
return "page"
if "tabla" in text:
return "table"
if any(k in text for k in ("archivo", "file", ".tpl", ".php", ".js", ".css")):
return "file"
if any(k in text for k in ("sección", "seccion", "section")):
return "section"
return "entity"