compactor final

2026-04-09 21:41:11 +00:00
parent 237dc00379
commit 19efed84b7
1 changed files with 221 additions and 1 deletions
--- a/src/context/compactor.py
+++ b/src/context/compactor.py
@@ -327,8 +327,17 @@ class ContextCompactor:
        prefix: str,
        max_chars: int,
    ) -> str:
+        """Resume el contenido de un mensaje segun su tipo detectado.
+
+        Dispatcher que detecta JSON / tabla / stack-trace / texto plano y
+        aplica la estrategia de sumario mas apropiada. Si un handler
+        especializado no puede procesar el contenido (devuelve None o lanza),
+        cae al handler de texto plano (first + last).
+        """
        stripped = content.strip()
        compacted = self._compact_text(content)
+
+        # Caso trivial: el contenido ya cabe, solo normalizamos whitespace
        if len(compacted) <= max_chars:
            if compacted != stripped:
                summary = f"{prefix} {compacted}".strip()
@@ -337,7 +346,218 @@ class ContextCompactor:
                return summary
            return compacted

-        lines = [l.strip() for l in compacted.splitlines() if l.strip()]
+        # Detectar tipo de contenido y despachar al handler apropiado
+        ctype = self._detect_content_type(compacted)
+        try:
+            if ctype == "json":
+                result = self._summarize_json(compacted, prefix, max_chars)
+                if result is not None:
+                    return result
+            elif ctype == "table":
+                result = self._summarize_table(compacted, prefix, max_chars)
+                if result is not None:
+                    return result
+            elif ctype == "trace":
+                result = self._summarize_trace(compacted, prefix, max_chars)
+                if result is not None:
+                    return result
+        except Exception as e:
+            logger.debug("typed summarizer failed (%s): %s", ctype, e)
+
+        # Fallback: texto plano (first + last)
+        return self._summarize_plain(compacted, prefix, max_chars)
+
+    # ------------------------------------------------------------------
+    # Handlers especializados por tipo de contenido
+    # ------------------------------------------------------------------
+
+    def _detect_content_type(self, text: str) -> str:
+        """Heuristica para detectar el tipo de contenido del mensaje.
+
+        Devuelve 'json' | 'table' | 'trace' | 'plain'.
+        Solo devuelve 'json' si el parse funciona realmente.
+        """
+        stripped = text.strip()
+        if not stripped:
+            return "plain"
+
+        # JSON: empieza con { o [ y parsea correctamente
+        first_char = stripped[0]
+        if first_char in ("{", "["):
+            try:
+                json.loads(stripped)
+                return "json"
+            except (json.JSONDecodeError, ValueError):
+                pass
+
+        # Stack trace / error: contiene marcadores tipicos
+        lower = stripped.lower()
+        trace_markers = ("traceback", "error:", "exception", "\n    at ")
+        if any(m in lower for m in trace_markers):
+            return "trace"
+
+        # Tabla markdown: al menos una linea con pipes y un separador ---
+        has_pipe_line = False
+        has_separator = False
+        for line in stripped.splitlines()[:20]:
+            l = line.strip()
+            if l.startswith("|") and l.endswith("|") and l.count("|") >= 3:
+                has_pipe_line = True
+                if re.match(r"^\|[\s\|:\-]+\|$", l) and "---" in l:
+                    has_separator = True
+                    break
+        if has_pipe_line and has_separator:
+            return "table"
+
+        return "plain"
+
+    def _summarize_json(self, raw: str, prefix: str, max_chars: int) -> str | None:
+        """Resume JSON truncando listas largas y preservando shape.
+
+        Devuelve None si el parse falla (no deberia si _detect_content_type lo
+        identifico correctamente, pero por seguridad).
+        """
+        try:
+            data = json.loads(raw)
+        except (json.JSONDecodeError, ValueError):
+            return None
+
+        truncated, stats = self._truncate_json_value(data, list_limit=5, depth_limit=4)
+        try:
+            body = json.dumps(truncated, ensure_ascii=False, separators=(",", ":"))
+        except (TypeError, ValueError):
+            return None
+
+        stats_parts = []
+        if stats.get("lists_truncated"):
+            stats_parts.append(f"{stats['lists_truncated']} listas truncadas")
+        if stats.get("items_dropped"):
+            stats_parts.append(f"{stats['items_dropped']} items omitidos")
+        stats_text = ", ".join(stats_parts) or "truncado"
+
+        summary = f"{prefix} JSON ({stats_text}): {body}"
+        if len(summary) > max_chars:
+            summary = summary[: max_chars - 1].rstrip() + "…"
+        return summary
+
+    def _truncate_json_value(
+        self,
+        value: Any,
+        list_limit: int,
+        depth_limit: int,
+        _depth: int = 0,
+    ) -> tuple[Any, dict[str, int]]:
+        """Trunca recursivamente listas y limita profundidad en un JSON."""
+        stats = {"lists_truncated": 0, "items_dropped": 0}
+
+        if _depth >= depth_limit:
+            if isinstance(value, (dict, list)):
+                return ("<…>", stats)
+            return (value, stats)
+
+        if isinstance(value, list):
+            original_len = len(value)
+            if original_len > list_limit:
+                stats["lists_truncated"] += 1
+                stats["items_dropped"] += original_len - list_limit
+                value = value[:list_limit] + [f"<…+{original_len - list_limit} más>"]
+            truncated_list = []
+            for item in value:
+                sub, sub_stats = self._truncate_json_value(
+                    item, list_limit, depth_limit, _depth + 1
+                )
+                truncated_list.append(sub)
+                for k in stats:
+                    stats[k] += sub_stats.get(k, 0)
+            return (truncated_list, stats)
+
+        if isinstance(value, dict):
+            truncated_dict = {}
+            for k, v in value.items():
+                sub, sub_stats = self._truncate_json_value(
+                    v, list_limit, depth_limit, _depth + 1
+                )
+                truncated_dict[k] = sub
+                for key in stats:
+                    stats[key] += sub_stats.get(key, 0)
+            return (truncated_dict, stats)
+
+        return (value, stats)
+
+    def _summarize_table(self, raw: str, prefix: str, max_chars: int) -> str | None:
+        """Resume una tabla markdown preservando header + primeras N filas."""
+        lines = [l for l in raw.splitlines() if l.strip()]
+        if len(lines) < 3:
+            return None  # Muy pocas lineas para ser una tabla
+
+        # Localizar header (primera linea con pipes) y separador
+        header_idx = -1
+        separator_idx = -1
+        for i, line in enumerate(lines):
+            stripped = line.strip()
+            if stripped.startswith("|") and "|" in stripped[1:]:
+                if header_idx < 0:
+                    header_idx = i
+                    continue
+                if re.match(r"^\|[\s\|:\-]+\|$", stripped) and "---" in stripped:
+                    separator_idx = i
+                    break
+        if header_idx < 0 or separator_idx < 0:
+            return None
+
+        data_rows = lines[separator_idx + 1 :]
+        data_rows = [r for r in data_rows if r.strip().startswith("|")]
+        keep_rows = 5
+        total_rows = len(data_rows)
+
+        parts = [
+            f"{prefix} Tabla ({total_rows} filas, mostrando {min(keep_rows, total_rows)}):",
+            lines[header_idx],
+            lines[separator_idx],
+        ]
+        parts.extend(data_rows[:keep_rows])
+        if total_rows > keep_rows:
+            parts.append(f"| … {total_rows - keep_rows} filas más … |")
+
+        summary = "\n".join(parts)
+        if len(summary) > max_chars:
+            summary = summary[: max_chars - 1].rstrip() + "…"
+        return summary
+
+    def _summarize_trace(self, raw: str, prefix: str, max_chars: int) -> str | None:
+        """Resume un stack trace: mensaje de error + ultimas N frames."""
+        lines = [l for l in raw.splitlines() if l.strip()]
+        if not lines:
+            return None
+
+        # Localizar la linea del mensaje de error (la mas informativa)
+        error_line = None
+        for line in lines:
+            low = line.lower()
+            if any(m in low for m in ("error:", "exception:", "traceback")):
+                error_line = line.strip()
+                break
+        if error_line is None:
+            error_line = lines[0].strip()
+
+        # Ultimas 5 lineas del stack (suelen ser las mas relevantes)
+        tail_count = 5
+        tail_lines = [l.strip() for l in lines[-tail_count:]]
+        hidden = max(0, len(lines) - tail_count - 1)
+
+        parts = [f"{prefix} Error: {error_line[:200]}"]
+        if hidden > 0:
+            parts.append(f"… {hidden} frames ocultos …")
+        parts.extend(tail_lines)
+
+        summary = "\n".join(parts)
+        if len(summary) > max_chars:
+            summary = summary[: max_chars - 1].rstrip() + "…"
+        return summary
+
+    def _summarize_plain(self, raw: str, prefix: str, max_chars: int) -> str:
+        """Fallback para texto plano: primera linea + ultima linea."""
+        lines = [l.strip() for l in raw.splitlines() if l.strip()]
        if not lines:
            return prefix
        if len(lines) == 1: