P0 contexto: ventana por modelo + recuperación ante overflow + self-heal del catálogo

Que las conversaciones largas no se rompan ni gasten de más: Ventana de contexto por modelo (antes: budget estático 120k/200k para todos): - cost.resolve_context_window: lee context_length del catálogo OpenRouter/DeepSeek en Redis, con fallback a litellm. config.budget_for_window deriva el budget de la ventana real (window - max_output - reserve). build_context lo aplica por turno (param model_id) en vez del fijo de settings. - Self-heal del catálogo OpenRouter: el admin panel lo cachea con TTL 1h y solo lo repuebla al abrir su ventana de IA → en runtime caducaba y se perdían ventana y precio. Ahora cost._get_catalog lo refresca solo (fetch público, mismo shape, cooldown 5min, TTL 24h). Arregla también el coste (caía al fijo). Recuperación ante overflow: - adapters.base.ContextOverflowError; openai_adapter traduce el error de context-length del proveedor (init e iteración del stream). - base.py: retry proactivo que recompacta hasta caber en la ventana ANTES de llamar al LLM; si ni así cabe → error accionable (no rompe la sesión). - engine.py: mensaje user-facing claro (modelo + ventana). Tests: ventana/budget, self-heal (mockeado), overflow, y sesión REAL de Redis. 106 verdes. evals/: harness para evaluar al agente acai-code (driver + README + resultados). Comparativa kimi vs deepseek vs glm (deepseek-v4-pro high = mejor calidad/precio). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-20 13:48:19 +01:00
parent 9d11a59fb8
commit 651d61b096
15 changed files with 997 additions and 36 deletions
--- a/tests/test_context_budget.py
+++ b/tests/test_context_budget.py
@@ -65,6 +65,128 @@ class TestSettingsBudget:
        assert cfg.effective_context_budget == 172_000
        assert cfg.effective_compaction_threshold == 137_600

+    def test_budget_for_window_small_and_large(self):
+        cfg = Settings(
+            context_max_tokens=0,
+            model_max_output_tokens=4_096,
+            context_reserve_ratio=0.10,
+            _env_file=None,
+        )
+        # 32k: window - max_output - 10% reserve
+        assert cfg.budget_for_window(32_000) == 32_000 - 4_096 - 3_200
+        # 1M: budget mucho mayor (no compacta innecesariamente)
+        assert cfg.budget_for_window(1_000_000) == 1_000_000 - 4_096 - 100_000
+        # ventana inválida → fallback al budget estático
+        assert cfg.budget_for_window(0) == cfg.effective_context_budget
+
+    def test_compaction_threshold_for_uses_ratio(self):
+        cfg = Settings(
+            compaction_threshold_tokens=0,
+            compaction_threshold_ratio=0.80,
+            _env_file=None,
+        )
+        assert cfg.compaction_threshold_for(100_000) == 80_000
+
+
+class TestContextWindowResolution:
+    def test_resolve_window_from_catalog(self, monkeypatch):
+        import json
+        from src.orchestrator import cost
+
+        cost._window_cache.clear()
+
+        class _FakeRedis:
+            async def get(self, key):
+                return json.dumps([
+                    {"id": "kimi-k2.7-code", "context_length": 256_000},
+                    {"id": "otro", "context_length": 32_000},
+                ])
+
+        monkeypatch.setattr(cost, "_get_cfg_redis", lambda: _FakeRedis())
+        w = asyncio.run(cost.resolve_context_window("openrouter/kimi-k2.7-code"))
+        assert w == 256_000
+        # segunda llamada usa cache (no peta aunque cambie el fake)
+        assert asyncio.run(cost.resolve_context_window("openrouter/kimi-k2.7-code")) == 256_000
+
+    def test_resolve_window_miss_is_none_or_int(self, monkeypatch):
+        from src.orchestrator import cost
+
+        cost._window_cache.clear()
+
+        class _FakeRedis:
+            async def get(self, key):
+                return None
+
+        monkeypatch.setattr(cost, "_get_cfg_redis", lambda: _FakeRedis())
+        w = asyncio.run(cost.resolve_context_window("openrouter/modelo-inexistente-xyz"))
+        assert w is None or isinstance(w, int)
+
+    def test_resolve_window_ignores_non_litellm_ids(self):
+        from src.orchestrator import cost
+
+        cost._window_cache.clear()
+        assert asyncio.run(cost.resolve_context_window("sin-prefijo")) is None
+        assert asyncio.run(cost.resolve_context_window(None)) is None
+
+    def test_resolve_window_self_heals_when_catalog_missing(self, monkeypatch):
+        """Si el catálogo OpenRouter caducó, se repuebla en runtime (self-heal)."""
+        from src.orchestrator import cost
+
+        cost._window_cache.clear()
+        cost._or_last_refresh[0] = 0.0  # desactivar cooldown para el test
+        store = {}
+
+        class _FakeRedis:
+            async def get(self, key):
+                return store.get(key)
+
+            async def set(self, key, val, ex=None):
+                store[key] = val
+
+        monkeypatch.setattr(cost, "_get_cfg_redis", lambda: _FakeRedis())
+        monkeypatch.setattr(
+            cost, "_fetch_openrouter_catalog_sync",
+            lambda: [{"id": "moonshotai/kimi-x", "context_length": 262_144,
+                      "price_in_1m": 0.6, "price_out_1m": 3.0}],
+        )
+
+        w = asyncio.run(cost.resolve_context_window("openrouter/moonshotai/kimi-x"))
+        assert w == 262_144
+        # quedó repoblado en el cache para futuras lecturas
+        assert "acai:config:ai:models_cache:openrouter" in store
+
+
+class TestModelAwareBudget:
+    def test_build_context_uses_model_window_budget(self, monkeypatch):
+        from src.orchestrator import cost
+
+        async def _fake_window(model_id):
+            return 40_000
+
+        monkeypatch.setattr(cost, "resolve_context_window", _fake_window)
+        session = SessionState(immutable_rules=["No romper"])
+        session.begin_task("hola")
+        agent = AgentProfile(role="acai", name="Acai", system_prompt="Haz el trabajo.")
+
+        pkg = asyncio.run(
+            ContextEngine().build_context(
+                session=session, agent=agent, model_id="openrouter/m"
+            )
+        )
+        assert pkg.budget_tokens == settings.budget_for_window(40_000)
+
+    def test_budget_override_wins(self):
+        session = SessionState(immutable_rules=["No romper"])
+        session.begin_task("hola")
+        agent = AgentProfile(role="acai", name="Acai", system_prompt="Haz el trabajo.")
+
+        pkg = asyncio.run(
+            ContextEngine().build_context(
+                session=session, agent=agent, budget_override=12_345
+            )
+        )
+        assert pkg.budget_tokens == 12_345
+

 class TestContextEngine:
    def test_build_context_keeps_task_history_and_current_task(self):