nah

2026-04-09 20:46:03 +00:00
parent 4c73d848bb
commit 237dc00379
10 changed files with 1049 additions and 1216 deletions
--- a/src/orchestrator/engine.py
+++ b/src/orchestrator/engine.py
@@ -8,6 +8,7 @@ from __future__ import annotations

 import asyncio
 import logging
+import re
 from typing import Any

 from ..adapters.base import ModelAdapter
@@ -132,6 +133,11 @@ class OrchestratorEngine:
        content = result.get("content", "")
        usage = result.get("usage", {"input_tokens": 0, "output_tokens": 0})
        key_data = self._extract_key_data_from_results([result])
+        session.recent_messages = self._append_recent_messages(
+            session.recent_messages,
+            message=message,
+            conversation=result.get("conversation", []),
+        )

        session.task_history.append(
            self._build_task_history_entry(
@@ -218,6 +224,52 @@ class OrchestratorEngine:
            "status": "error",
        }

+    @staticmethod
+    def _append_recent_messages(
+        existing: list[dict[str, Any]],
+        message: str,
+        conversation: list[dict[str, Any]],
+    ) -> list[dict[str, Any]]:
+        merged = [OrchestratorEngine._sanitize_recent_message(m) for m in existing]
+        merged = [m for m in merged if m]
+
+        current_turn: list[dict[str, Any]] = []
+        if message.strip():
+            current_turn.append({"role": "user", "content": message})
+
+        for message_obj in conversation:
+            sanitized = OrchestratorEngine._sanitize_recent_message(message_obj)
+            if sanitized:
+                current_turn.append(sanitized)
+
+        merged.extend(current_turn)
+        return merged
+
+    @staticmethod
+    def _sanitize_recent_message(message: dict[str, Any]) -> dict[str, Any]:
+        role = str(message.get("role", "")).strip()
+        if role not in {"user", "assistant", "tool"}:
+            return {}
+
+        sanitized: dict[str, Any] = {"role": role}
+        content = message.get("content", "")
+        if isinstance(content, str) and content:
+            sanitized["content"] = content
+
+        if role == "assistant":
+            tool_calls = message.get("tool_calls")
+            if isinstance(tool_calls, list) and tool_calls:
+                sanitized["tool_calls"] = tool_calls
+
+        if role == "tool":
+            tool_call_id = str(message.get("tool_call_id", "")).strip()
+            if tool_call_id:
+                sanitized["tool_call_id"] = tool_call_id
+
+        if "content" not in sanitized and "tool_calls" not in sanitized:
+            return {}
+        return sanitized
+
    @staticmethod
    def _extract_key_data_from_results(results: list[dict[str, Any]]) -> dict[str, Any]:
        """Extract structured data from tool executions for task history."""
@@ -270,6 +322,13 @@ class OrchestratorEngine:
        else:
            summary = f"User: {message_summary}"

+        outcomes = OrchestratorEngine._extract_outcomes(content)
+        focus_refs = OrchestratorEngine._extract_focus_refs(
+            message=message,
+            content=content,
+            key_data=key_data,
+            outcomes=outcomes,
+        )
        tools_used: list[str] = []
        for tool_exec in tool_executions:
            tool_name = getattr(tool_exec, "tool_name", "")
@@ -287,6 +346,8 @@ class OrchestratorEngine:
            "tools_used": tools_used[:8],
            "artifacts_count": artifacts_count,
            "summary": summary,
+            "outcomes": outcomes,
+            "focus_refs": focus_refs,
            "review": "",
        }

@@ -316,5 +377,143 @@ class OrchestratorEngine:
            " ".join(entry.get("facts", [])[:5]),
            " ".join(entry.get("tools_used", [])[:5]),
            str(entry.get("key_data", {})),
+            " ".join(entry.get("outcomes", [])[:3]),
+            str(entry.get("focus_refs", [])[:3]),
        ]
        return estimate_tokens("\n".join(p for p in parts if p))
+
+    @staticmethod
+    def _extract_outcomes(content: str) -> list[str]:
+        if not content:
+            return []
+
+        normalized_lines = []
+        for raw_line in content.splitlines():
+            line = raw_line.strip()
+            if not line:
+                continue
+            line = re.sub(r"^[#>\-\*\d\.\)\s]+", "", line).strip()
+            if not line:
+                continue
+            normalized_lines.append(line)
+
+        keywords = (
+            "si tuviera que elegir",
+            "más flojo",
+            "mas flojo",
+            "más problem",
+            "mas problem",
+            "recomiendo",
+            "recomendación",
+            "recomendacion",
+            "prioridad",
+            "conclus",
+            "debería",
+            "deberia",
+            "peor",
+            "más débil",
+            "mas debil",
+        )
+
+        outcomes: list[str] = []
+        seen: set[str] = set()
+        for line in normalized_lines:
+            lower = line.lower()
+            if any(k in lower for k in keywords):
+                trimmed = line[:220]
+                if trimmed not in seen:
+                    seen.add(trimmed)
+                    outcomes.append(trimmed)
+            if len(outcomes) >= 3:
+                return outcomes
+
+        for line in normalized_lines:
+            if len(line) < 20:
+                continue
+            trimmed = line[:180]
+            if trimmed not in seen:
+                seen.add(trimmed)
+                outcomes.append(trimmed)
+            if len(outcomes) >= 2:
+                break
+        return outcomes[:3]
+
+    @staticmethod
+    def _extract_focus_refs(
+        message: str,
+        content: str,
+        key_data: dict[str, Any],
+        outcomes: list[str],
+    ) -> list[dict[str, str]]:
+        refs: list[dict[str, str]] = []
+        seen: set[tuple[str, str, str]] = set()
+
+        def add_ref(ref_type: str, label: str, ref_id: str = "", role: str = "related") -> None:
+            label = label.strip()
+            ref_id = ref_id.strip()
+            if not label and not ref_id:
+                return
+            key = (ref_type, label, ref_id)
+            if key in seen:
+                return
+            seen.add(key)
+            refs.append({
+                "type": ref_type,
+                "label": label or ref_id,
+                "id": ref_id,
+                "role": role,
+            })
+
+        for table, nums in key_data.get("tables", {}).items():
+            add_ref("table", table, table, "related")
+            for num in nums[:3]:
+                add_ref("record", f"{table} record {num}", f"{table}:{num}", "related")
+
+        for section in key_data.get("sections", [])[:5]:
+            add_ref("section", section, section, "related")
+
+        for module in key_data.get("modules", [])[:5]:
+            add_ref("module", module, module, "related")
+
+        source_text = "\n".join(outcomes + [content[:1200]])
+        for line in outcomes:
+            for match in re.findall(r"\*\*([^*]{2,80})\*\*", line):
+                add_ref(
+                    OrchestratorEngine._infer_ref_type(match, line, message),
+                    match,
+                    "",
+                    "primary_focus",
+                )
+
+        if not any(ref["role"] == "primary_focus" for ref in refs):
+            for pattern in (
+                r"(?:elegir(?:\s+\*\*uno\*\*)?,?\s+dir[ií]a que\s+\*\*([^*]{2,80})\*\*)",
+                r"(?:el [^.\n]{0,40}m[aá]s flojo(?:[^.\n]{0,40})es\s+\*\*([^*]{2,80})\*\*)",
+            ):
+                match = re.search(pattern, source_text, flags=re.IGNORECASE)
+                if match:
+                    label = match.group(1).strip()
+                    add_ref(
+                        OrchestratorEngine._infer_ref_type(label, source_text, message),
+                        label,
+                        "",
+                        "primary_focus",
+                    )
+                    break
+
+        return refs[:8]
+
+    @staticmethod
+    def _infer_ref_type(label: str, context: str, message: str) -> str:
+        text = f"{label} {context} {message}".lower()
+        if any(k in text for k in ("módulo", "modulo")):
+            return "module"
+        if any(k in text for k in ("página", "pagina", "apartado")):
+            return "page"
+        if "tabla" in text:
+            return "table"
+        if any(k in text for k in ("archivo", "file", ".tpl", ".php", ".js", ".css")):
+            return "file"
+        if any(k in text for k in ("sección", "seccion", "section")):
+            return "section"
+        return "entity"