diff --git a/mcp-server/tools/media/analyze_image.js b/mcp-server/tools/media/analyze_image.js index 9982e06..9386dcd 100644 --- a/mcp-server/tools/media/analyze_image.js +++ b/mcp-server/tools/media/analyze_image.js @@ -57,11 +57,16 @@ function resolveChatPreviewPath(imageUrl) { const fileParam = qs.get("file"); if (!fileParam) return null; - // Sanitizar: evitar traversal — solo nombre base permitido - const safeName = path.basename(fileParam); - if (!safeName || safeName === "." || safeName === "..") return null; + // Sanitizar contra traversal PRESERVANDO el subdirectorio de usuario + // (el file= es "/"; basename lo perdía → not found). + if (fileParam.includes("..") || fileParam.startsWith("/") || fileParam.includes("\\")) return null; - return path.join(CHAT_UPLOADS_DIR, safeName); + const fullPath = path.join(CHAT_UPLOADS_DIR, fileParam); + // Asegurar que queda dentro de CHAT_UPLOADS_DIR. + const base = path.resolve(CHAT_UPLOADS_DIR); + if (!path.resolve(fullPath).startsWith(base + path.sep)) return null; + + return fullPath; } /** diff --git a/src/adapters/base.py b/src/adapters/base.py index ac62f50..7062444 100644 --- a/src/adapters/base.py +++ b/src/adapters/base.py @@ -57,6 +57,10 @@ class ModelConfig: max_tokens: int = 4096 temperature: float = 0.3 stop_sequences: list[str] = field(default_factory=list) + # Nivel de razonamiento (minimal|low|medium|high). Vacío = sin razonamiento + # explícito. LiteLLM lo traduce por proveedor; modelos que no lo soportan lo + # ignoran (litellm.drop_params=True). + reasoning_effort: str = "" extra: dict[str, Any] = field(default_factory=dict) diff --git a/src/adapters/litellm_adapter.py b/src/adapters/litellm_adapter.py index 51eaa67..a985986 100644 --- a/src/adapters/litellm_adapter.py +++ b/src/adapters/litellm_adapter.py @@ -59,9 +59,25 @@ class LiteLLMAdapter(OpenAIAdapter): async def _acreate(self, kwargs: dict[str, Any]): kwargs = dict(kwargs) - kwargs["model"] = self._litellm_model - if self._api_key: - kwargs["api_key"] = self._api_key - if self._api_base: - kwargs["api_base"] = self._api_base + # Respetar el model_id por request (resuelto dinámicamente en + # send_message). Solo se honra si trae prefijo de proveedor + # ("deepseek/...", "openrouter/..."); cualquier otro valor (default + # no-litellm, vacío) cae al modelo por defecto del adapter — preserva el + # comportamiento previo para llamadas internas (planner, completions). + model = kwargs.get("model") or "" + if "/" not in model: + model = self._litellm_model + kwargs["model"] = model + + if model.startswith("openrouter/"): + # OpenRouter: LiteLLM enruta con OPENROUTER_API_KEY del entorno y su + # base propia. NO forzar api_key/api_base del proxy DeepSeek — lo + # sobreescribirían y romperían el routing. + kwargs.pop("api_key", None) + kwargs.pop("api_base", None) + else: + if self._api_key: + kwargs["api_key"] = self._api_key + if self._api_base: + kwargs["api_base"] = self._api_base return await litellm.acompletion(**kwargs) diff --git a/src/adapters/openai_adapter.py b/src/adapters/openai_adapter.py index bac7896..24fd7cf 100644 --- a/src/adapters/openai_adapter.py +++ b/src/adapters/openai_adapter.py @@ -77,6 +77,8 @@ class OpenAIAdapter(ModelAdapter): "stream": True, "stream_options": {"include_usage": True}, } + if getattr(config, "reasoning_effort", ""): + kwargs["reasoning_effort"] = config.reasoning_effort if tools: kwargs["tools"] = self._format_tools(tools) @@ -266,6 +268,8 @@ class OpenAIAdapter(ModelAdapter): "temperature": config.temperature, "messages": self._to_openai_messages(messages), } + if getattr(config, "reasoning_effort", ""): + kwargs["reasoning_effort"] = config.reasoning_effort if tools: kwargs["tools"] = self._format_tools(tools) # Fuerza al modelo a usar un tool concreto para garantizar JSON por schema @@ -428,8 +432,9 @@ class OpenAIAdapter(ModelAdapter): if tool_calls: m["tool_calls"] = tool_calls out.append(m) - else: # user (puede traer tool_result blocks) + else: # user (puede traer tool_result blocks, texto e imágenes) text_parts = [] + image_blocks: list[dict[str, Any]] = [] for b in content: if not isinstance(b, dict): continue @@ -442,7 +447,18 @@ class OpenAIAdapter(ModelAdapter): }) elif t == "text": text_parts.append(b.get("text", "")) - if text_parts: + elif t == "image_url": + # Visión nativa: preservar el bloque en formato multimodal OpenAI. + image_blocks.append({"type": "image_url", "image_url": b.get("image_url") or {}}) + if image_blocks: + # Content como lista de bloques (texto + imágenes). + parts: list[dict[str, Any]] = [] + joined = "\n".join(p for p in text_parts if p) + if joined: + parts.append({"type": "text", "text": joined}) + parts.extend(image_blocks) + out.append({"role": "user", "content": parts}) + elif text_parts: out.append({"role": "user", "content": "\n".join(text_parts)}) # Guard defensivo: el compactor ya garantiza el invariante tool_use ↔ # tool_result (`_enforce_tool_pairing`), pero si algo se escapa el diff --git a/src/api/routes.py b/src/api/routes.py index 6e759b0..064777e 100644 --- a/src/api/routes.py +++ b/src/api/routes.py @@ -46,6 +46,10 @@ class SendMessageRequest(BaseModel): message: str stream: bool = False agent_id: str | None = None + # Imágenes para visión nativa: bloques listos para el modelo + # {"type":"image_url","image_url":{"url":"data:;base64,..."}}. Solo se + # envían cuando el modelo activo es multimodal (lo decide acai-app). + attachments: list[dict[str, Any]] | None = None # 'off' (default): la tool acai_plan no se expone al modelo, ejecuta directo. # 'force': system prompt obliga a llamar acai_plan antes de ejecutar. # 'auto' (legacy): se trata como 'off'. UI: toggle en ChatPanel. @@ -335,6 +339,25 @@ async def send_message( if not agent_profile: agent_profile = agent_reg.get(agent_reg.default_agent_id) + # Resolución dinámica del modelo (Fase 2): override por-usuario (metadata de + # la sesión) → default global (Redis acai:config:ai:*). Si resuelve, se + # inyecta en una COPIA del profile para no mutar el del registry (singleton). + if agent_profile is not None: + from ..orchestrator.model_resolver import resolve_session_model + resolved = await resolve_session_model(session) + update = {} + if resolved.get("model_id"): + update["model_id"] = resolved["model_id"] + if resolved.get("reasoning_effort"): + update["reasoning_effort"] = resolved["reasoning_effort"] + if update: + agent_profile = agent_profile.model_copy(update=update) + logger.info( + "Session %s: modelo resuelto -> %s (reasoning=%s)", + session_id, update.get("model_id", "(default)"), + update.get("reasoning_effort", "off"), + ) + # Plan mode controlado por el usuario desde el toggle del ChatPanel. # 'auto' (default): heuristica del modelo trivial-vs-complex. # 'force': el agente DEBE llamar acai_plan como primera accion. @@ -359,7 +382,7 @@ async def send_message( if body.stream: task = asyncio.create_task( - _execute_and_persist(orchestrator, storage, session, body.message) + _execute_and_persist(orchestrator, storage, session, body.message, body.attachments) ) _running_executions[session_id] = task # Auto-limpieza del registro al terminar (solo si seguimos siendo la @@ -377,11 +400,11 @@ async def send_message( "stream_url": f"/sessions/{session_id}/stream", } - result = await _execute_and_persist(orchestrator, storage, session, body.message) + result = await _execute_and_persist(orchestrator, storage, session, body.message, body.attachments) return result -async def _execute_and_persist(orchestrator, storage, session, message) -> dict[str, Any]: +async def _execute_and_persist(orchestrator, storage, session, message, attachments=None) -> dict[str, Any]: # Acquire exclusive lock — prevents concurrent execution on same session async with storage.session_lock(session.session_id) as acquired: if not acquired: @@ -392,7 +415,7 @@ async def _execute_and_persist(orchestrator, storage, session, message) -> dict[ } try: - result = await orchestrator.process_message(session, message) + result = await orchestrator.process_message(session, message, attachments) return result except asyncio.CancelledError: # Ejecución abortada por el usuario (stop) o preemptada por un @@ -401,6 +424,24 @@ async def _execute_and_persist(orchestrator, storage, session, message) -> dict[ # que el `await task` de la cancelación complete. El `finally` # persiste el estado y el `session_lock` se libera al salir. logger.info("Execution cancelled for session %s", session.session_id) + # Persistir el turno del usuario aunque se cancele: si no, un + # "vuelve a intentarlo" posterior se queda sin contexto de lo pedido. + # Guardamos su mensaje (+ imagen) y un marcador de interrupción para + # mantener la alternancia user/assistant. + try: + task = session.current_task + if task and (task.objective or "").strip(): + session.recent_messages = orchestrator._append_recent_messages( + session.recent_messages, + message=task.objective, + conversation=[{ + "role": "assistant", + "content": "[Respuesta interrumpida por el usuario antes de completarse]", + }], + image_attachments=task.image_attachments, + ) + except Exception: + logger.exception("No se pudo persistir el turno cancelado") session.status = SessionStatus.ACTIVE session.current_task = None raise diff --git a/src/context/compactor.py b/src/context/compactor.py index fc5e0ee..406ea76 100644 --- a/src/context/compactor.py +++ b/src/context/compactor.py @@ -890,6 +890,10 @@ class ContextCompactor: elif btype == "tool_result": tc = block.get("content", "") tokens += estimate_tokens(tc if isinstance(tc, str) else str(tc)) + elif btype == "image_url": + # Una imagen ~1500 tokens. NO medir el base64 como texto, que + # lo contaría como ~30k y reventaría presupuestos/trim. + tokens += 1500 else: tokens += estimate_tokens(str(block)) else: diff --git a/src/context/engine.py b/src/context/engine.py index f3b9001..f8d23ca 100644 --- a/src/context/engine.py +++ b/src/context/engine.py @@ -924,8 +924,19 @@ class ContextEngine: messages.append({"role": "user", "content": "\n".join(history_lines)}) messages.append({"role": "assistant", "content": "Entendido, tengo el contexto del historial. ¿En qué puedo ayudarte ahora?"}) - # Current user message - messages.append({"role": "user", "content": user_content}) + # Current user message — con imágenes adjuntas (visión nativa) si las hay. + # En ese caso el content pasa a ser lista de bloques [texto, image_url...]. + image_attachments = [] + if session.current_task and getattr(session.current_task, "image_attachments", None): + image_attachments = [ + b for b in session.current_task.image_attachments if isinstance(b, dict) + ] + if image_attachments: + content_blocks = [{"type": "text", "text": user_content}] + content_blocks.extend(image_attachments) + messages.append({"role": "user", "content": content_blocks}) + else: + messages.append({"role": "user", "content": user_content}) # Append real conversation (assistant messages + tool results from current step) if conversation: @@ -1037,6 +1048,10 @@ class ContextEngine: elif btype == "tool_result": tc = block.get("content", "") total += estimate_tokens(tc if isinstance(tc, str) else str(tc)) + elif btype == "image_url": + # Heurística conservadora: una imagen ~1500 tokens (no se + # cuenta el base64 como texto, que infla muchísimo). + total += 1500 else: total += estimate_tokens(str(block)) return total diff --git a/src/models/agent.py b/src/models/agent.py index 1d14468..810e34b 100644 --- a/src/models/agent.py +++ b/src/models/agent.py @@ -20,6 +20,7 @@ class AgentProfile(BaseModel): allowed_tools: list[str] = Field(default_factory=list) model_id: str | None = None planner_model_id: str | None = None # override del modelo solo para el sub-loop del planner + reasoning_effort: str | None = None # nivel de razonamiento (minimal|low|medium|high) resuelto por sesión temperature: float | None = None max_tokens: int | None = None context_sections: list[str] = Field( diff --git a/src/models/session.py b/src/models/session.py index e91fc9c..61bf858 100644 --- a/src/models/session.py +++ b/src/models/session.py @@ -46,6 +46,9 @@ class TaskState(BaseModel): task_id: str = Field(default_factory=lambda: uuid.uuid4().hex[:12]) objective: str + # Imágenes adjuntas a la petición actual (visión nativa). Cada item es un + # bloque listo para el modelo: {"type":"image_url","image_url":{"url":"data:..."}}. + image_attachments: list[dict[str, Any]] = Field(default_factory=list) status: TaskStatus = TaskStatus.PENDING plan: list[TaskStep] = Field(default_factory=list) current_step_index: int = 0 @@ -94,8 +97,8 @@ class SessionState(BaseModel): updated_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc)) metadata: dict[str, Any] = Field(default_factory=dict) - def begin_task(self, objective: str) -> TaskState: - task = TaskState(objective=objective) + def begin_task(self, objective: str, image_attachments: list[dict[str, Any]] | None = None) -> TaskState: + task = TaskState(objective=objective, image_attachments=image_attachments or []) self.current_task = task self.status = SessionStatus.EXECUTING self.turn_count += 1 diff --git a/src/orchestrator/agents/base.py b/src/orchestrator/agents/base.py index 533dd7e..2e557b2 100644 --- a/src/orchestrator/agents/base.py +++ b/src/orchestrator/agents/base.py @@ -93,6 +93,7 @@ class BaseAgent: model_id=self.profile.model_id or "", max_tokens=self.profile.max_tokens or 4096, temperature=self.profile.temperature or 0.3, + reasoning_effort=self.profile.reasoning_effort or "", ) # Snapshot del numero de tool_executions ya acumulados ANTES del diff --git a/src/orchestrator/cost.py b/src/orchestrator/cost.py new file mode 100644 index 0000000..196ca23 --- /dev/null +++ b/src/orchestrator/cost.py @@ -0,0 +1,121 @@ +"""Cálculo de coste por modelo (Fase 2). + +Prioridad de fuentes de precio (para que el coste registrado en +`consumo_acaicode` coincida con lo que muestra el Forge Admin Panel): + 1. Catálogo OpenRouter cacheado por el panel en Redis db 0 + (`acai:config:ai:models_cache:openrouter` → price_in_1m / price_out_1m). + 2. Price map de LiteLLM (conoce muchos modelos deepseek/, anthropic/, etc.). + 3. Coste fijo de `settings` (comportamiento previo). +""" + +from __future__ import annotations + +import json +import logging + +import redis.asyncio as redis + +from ..config import settings + +logger = logging.getLogger(__name__) + +# Caches de catálogo que publica el Forge Admin Panel en Redis db 0, por proveedor. +# El id se guarda SIN el prefijo de proveedor de litellm (p.ej. +# "moonshotai/kimi-k2.7-code", "deepseek-v4-pro"). +_CACHE_KEYS = { + "openrouter": "acai:config:ai:models_cache:openrouter", + "deepseek": "acai:config:ai:models_cache:deepseek", +} +_CONFIG_DB = 0 +_cfg_redis: "redis.Redis | None" = None + + +def _get_cfg_redis() -> "redis.Redis": + global _cfg_redis + if _cfg_redis is None: + _cfg_redis = redis.Redis( + host=settings.redis_host, + port=settings.redis_port, + db=_CONFIG_DB, + password=settings.redis_password or None, + decode_responses=True, + ) + return _cfg_redis + + +async def _catalog_price_per_1m(model_id: str | None): + """(price_in_1m, price_out_1m) del catálogo del panel, o None. + + model_id viene en formato litellm ("/"). Separamos el prefijo + de proveedor para elegir el cache y buscar por el id catalogado. + """ + if not model_id or "/" not in model_id: + return None + provider, _, raw_id = model_id.partition("/") + cache_key = _CACHE_KEYS.get(provider) + if not cache_key: + return None + try: + cached = await _get_cfg_redis().get(cache_key) + if not cached: + return None + models = json.loads(cached) + except Exception as e: # pragma: no cover - defensivo + logger.warning("catálogo %s no disponible para coste: %s", provider, e) + return None + for m in models: + if m.get("id") == raw_id: + pin = m.get("price_in_1m") + pout = m.get("price_out_1m") + if pin is not None and pout is not None: + return (float(pin), float(pout)) + return None + + +async def compute_cost(model_id: str | None, input_tokens: int, output_tokens: int) -> dict: + """Coste de una ejecución para `model_id` y los tokens dados. + + Devuelve {"cost_usd", "input_cost_1m", "output_cost_1m"} — el coste total y + las tarifas por 1M tokens REALMENTE aplicadas (se almacenan en + `consumo_acaicode.input_cost_1M` / `output_cost_1M`). + """ + input_tokens = int(input_tokens or 0) + output_tokens = int(output_tokens or 0) + + def _result(in_1m: float, out_1m: float) -> dict: + return { + "cost_usd": (input_tokens / 1_000_000) * in_1m + (output_tokens / 1_000_000) * out_1m, + "input_cost_1m": round(in_1m, 6), + "output_cost_1m": round(out_1m, 6), + } + + # 1. Precio del catálogo OpenRouter (fuente que muestra el admin). + prices = await _catalog_price_per_1m(model_id) + if prices: + return _result(prices[0], prices[1]) + + # 2. Price map de LiteLLM (deepseek/, anthropic/, etc.). + if model_id and "/" in model_id: + try: + import litellm + + prompt_cost, completion_cost = litellm.cost_per_token( + model=model_id, + prompt_tokens=input_tokens, + completion_tokens=output_tokens, + ) + total = (prompt_cost or 0.0) + (completion_cost or 0.0) + if total > 0: + # Derivar tarifa por 1M a partir del coste por-token de litellm. + in_1m = (prompt_cost / input_tokens) * 1_000_000 if input_tokens else 0.0 + out_1m = (completion_cost / output_tokens) * 1_000_000 if output_tokens else 0.0 + return { + "cost_usd": total, + "input_cost_1m": round(in_1m, 6), + "output_cost_1m": round(out_1m, 6), + } + except Exception as e: + logger.warning("cost_per_token(%s) falló, uso coste fijo: %s", model_id, e) + + # 3. Coste fijo configurado. + return _result(settings.cost_per_1m_input, settings.cost_per_1m_output) diff --git a/src/orchestrator/engine.py b/src/orchestrator/engine.py index e073f25..0a52ff3 100644 --- a/src/orchestrator/engine.py +++ b/src/orchestrator/engine.py @@ -52,11 +52,16 @@ class OrchestratorEngine: self, session: SessionState, message: str, + image_attachments: list[dict[str, Any]] | None = None, ) -> dict[str, Any]: - """Process a user message. Single agent execution with timeout.""" + """Process a user message. Single agent execution with timeout. + + `image_attachments`: bloques image_url (visión nativa) para el turno del + usuario, cuando el modelo activo es multimodal. + """ try: return await asyncio.wait_for( - self._run(session, message), + self._run(session, message, image_attachments), timeout=settings.max_execution_timeout_seconds, ) except asyncio.TimeoutError: @@ -86,6 +91,7 @@ class OrchestratorEngine: self, session: SessionState, message: str, + image_attachments: list[dict[str, Any]] | None = None, ) -> dict[str, Any]: """Execute: message → agent → response.""" @@ -113,8 +119,8 @@ class OrchestratorEngine: f"Peticion del usuario:\n{message}" ) - # Create task - task = session.begin_task(objective=message) + # Create task (con imágenes adjuntas si las hay — visión nativa) + task = session.begin_task(objective=message, image_attachments=image_attachments) task.status = TaskStatus.EXECUTING # Reset del contador de invocaciones de `acai_plan` por turno (Fase 5). @@ -154,6 +160,9 @@ class OrchestratorEngine: session.recent_messages, message=message, conversation=result.get("conversation", []), + image_attachments=( + session.current_task.image_attachments if session.current_task else None + ), ) session.task_history.append( @@ -182,13 +191,18 @@ class OrchestratorEngine: task.status = TaskStatus.COMPLETED session.complete_task() - # Calculate cost + # Calculate cost — por modelo realmente usado (Fase 2). El model_id + # efectivo vive en el agent_profile (resuelto en send_message). total_input = usage.get("input_tokens", 0) total_output = usage.get("output_tokens", 0) - cost_usd = ( - (total_input / 1_000_000) * settings.cost_per_1m_input - + (total_output / 1_000_000) * settings.cost_per_1m_output + model_used = ( + self.agent_profile.model_id + or settings.litellm_model + or settings.default_model_id ) + from .cost import compute_cost + cost_info = await compute_cost(model_used, total_input, total_output) + cost_usd = cost_info["cost_usd"] await self.sse.emit( EventType.EXECUTION_COMPLETED, @@ -201,6 +215,19 @@ class OrchestratorEngine: "status": "completed", "usage": usage, "total_cost_usd": round(cost_usd, 6), + # Modelo + tarifas usadas → se propagan a consumo_acaicode via + # _report_usage (columnas input_cost_1M / output_cost_1M). + "model": model_used, + "modelUsage": { + model_used: { + "inputTokens": total_input, + "outputTokens": total_output, + "costUSD": round(cost_usd, 6), + "inputCost1M": cost_info["input_cost_1m"], + "outputCost1M": cost_info["output_cost_1m"], + "reasoningEffort": self.agent_profile.reasoning_effort or "", + } + }, }, session_id=session.session_id, ) @@ -246,13 +273,21 @@ class OrchestratorEngine: existing: list[dict[str, Any]], message: str, conversation: list[dict[str, Any]], + image_attachments: list[dict[str, Any]] | None = None, ) -> list[dict[str, Any]]: merged = [OrchestratorEngine._sanitize_recent_message(m) for m in existing] merged = [m for m in merged if m] current_turn: list[dict[str, Any]] = [] - if message.strip(): - current_turn.append({"role": "user", "content": message}) + if message.strip() or image_attachments: + if image_attachments: + # Guardar el turno con la imagen como bloques para que PERSISTA + # en el contexto de turnos siguientes (visión nativa multimodal). + content_blocks = [{"type": "text", "text": message}] + content_blocks.extend(image_attachments) + current_turn.append({"role": "user", "content": content_blocks}) + else: + current_turn.append({"role": "user", "content": message}) for message_obj in conversation: sanitized = OrchestratorEngine._sanitize_recent_message(message_obj) diff --git a/src/orchestrator/model_resolver.py b/src/orchestrator/model_resolver.py new file mode 100644 index 0000000..bc37fd7 --- /dev/null +++ b/src/orchestrator/model_resolver.py @@ -0,0 +1,113 @@ +"""Resolución dinámica del modelo IA por sesión (Fase 2). + +Prioridad: + 1. Override por-usuario: `session.metadata["ai_provider"|"ai_model"]`. Lo + inyecta acai-app via self-read del WS (`getAcaiCodeUserAiModel`) al crear + la sesión. + 2. Default global: Redis `acai:config:ai:provider` / `acai:config:ai:model`, + que escribe el Forge Admin Panel. OJO: esas keys NO llevan el prefijo + `agentic` (son globales del stack Acai). + 3. Sin configuración → None: no se toca el modelo y el adapter usa su default + (comportamiento previo). + +Solo aplica cuando el provider activo es `litellm` — los providers del catálogo +(openrouter, deepseek) se enrutan por LiteLLM. Para claude/openai no se toca. +""" + +from __future__ import annotations + +import logging + +import redis.asyncio as redis + +from ..config import settings + +logger = logging.getLogger(__name__) + +# Keys del Forge Admin Panel (globales, SIN prefijo agentic). +_GLOBAL_PROVIDER_KEY = "acai:config:ai:provider" +_GLOBAL_MODEL_KEY = "acai:config:ai:model" +_GLOBAL_REASONING_KEY = "acai:config:ai:reasoning_effort" + +# Niveles de razonamiento válidos (lo demás se ignora → sin razonamiento). +_VALID_EFFORTS = {"minimal", "low", "medium", "high"} + +# El Forge Admin Panel escribe la config global en Redis db 0 (REDIS_DB=0 del +# admin). El agentic usa db 1 para sus sesiones, así que para leer la config +# global necesitamos una conexión dedicada a db 0 (misma instancia Redis). +_GLOBAL_CONFIG_DB = 0 +_global_redis: "redis.Redis | None" = None + + +def _get_global_redis() -> "redis.Redis": + global _global_redis + if _global_redis is None: + _global_redis = redis.Redis( + host=settings.redis_host, + port=settings.redis_port, + db=_GLOBAL_CONFIG_DB, + password=settings.redis_password or None, + decode_responses=True, + ) + return _global_redis + + +def to_litellm_model(provider: str | None, model: str | None) -> str: + """Mapea {provider, model} del catálogo a un model string de LiteLLM.""" + provider = (provider or "").strip().lower() + model = (model or "").strip() + if not model: + return "" + if provider == "openrouter": + # Los ids de OpenRouter ya vienen como "vendor/name" → prefijo openrouter/. + return model if model.startswith("openrouter/") else f"openrouter/{model}" + if provider == "deepseek": + return model if model.startswith("deepseek/") else f"deepseek/{model}" + # Provider desconocido: respetar el id tal cual (puede traer ya su prefijo). + return model + + +def _norm_effort(value) -> str | None: + v = (value or "").strip().lower() + return v if v in _VALID_EFFORTS else None + + +async def resolve_session_model(session) -> dict: + """Resuelve modelo + razonamiento efectivos para la sesión. + + Devuelve {"model_id": str|None, "reasoning_effort": str|None}. El effort se + toma de la MISMA fuente que el modelo (override de usuario o default global), + para que sean coherentes. model_id None = sin override (adapter usa default). + """ + none = {"model_id": None, "reasoning_effort": None} + if settings.default_model_provider != "litellm": + return none + + # 1. Override por-usuario (metadata de la sesión). + meta = getattr(session, "metadata", None) or {} + provider = meta.get("ai_provider") + model = meta.get("ai_model") + if provider and model: + return { + "model_id": to_litellm_model(provider, model) or None, + "reasoning_effort": _norm_effort(meta.get("ai_reasoning_effort")), + } + + # 2. Default global (Redis db 0, keys sin prefijo agentic). + try: + gr = _get_global_redis() + provider = await gr.get(_GLOBAL_PROVIDER_KEY) + model = await gr.get(_GLOBAL_MODEL_KEY) + effort = await gr.get(_GLOBAL_REASONING_KEY) + except Exception as e: # pragma: no cover - defensivo + logger.warning("resolve_session_model: lectura Redis falló: %s", e) + return none + + if provider and model: + return { + "model_id": to_litellm_model(provider, model) or None, + "reasoning_effort": _norm_effort(effort), + } + + # 3. Sin configuración → sin override. + return none diff --git a/src/orchestrator/planner.py b/src/orchestrator/planner.py index c85b5aa..de49bf3 100644 --- a/src/orchestrator/planner.py +++ b/src/orchestrator/planner.py @@ -217,6 +217,8 @@ async def run_planner_subloop( max_tokens=settings.planner_max_tokens or 16000, # Temperatura mas baja que el agente principal — queremos JSON limpio. temperature=0.1, + # Mismo nivel de razonamiento resuelto por sesión que el agente principal. + reasoning_effort=agent_profile.reasoning_effort or "", ) tool_defs = _build_planner_tools(mcp) diff --git a/src/streaming/claude_format.py b/src/streaming/claude_format.py index 467dc41..e9314e0 100644 --- a/src/streaming/claude_format.py +++ b/src/streaming/claude_format.py @@ -363,6 +363,8 @@ class ClaudeFormatEmitter: "cache_creation_input_tokens": 0, }, "total_cost_usd": data.get("total_cost_usd", 0), + # Modelo usado → acai-app lo registra en consumo_acaicode. + "modelUsage": data.get("modelUsage", {}), }) # Done