- LiteLLMAdapter (subclasses OpenAIAdapter via _acreate hook): routes DeepSeek through LiteLLM. Opt-in AGENTIC_DEFAULT_MODEL_PROVIDER=litellm. A/B beat the hand-rolled adapter (0 DSML, 0 parse-fails). Defensive chunk.usage getattr, token-estimate usage fallback for billing, quiet litellm logs. - DSML parser: tolerate single/multi fullwidth pipes, honor string="true/false" typed args (openai_adapter fallback when DeepSeek leaks tool calls as text). - Thinking mode: capture and round-trip reasoning_content across turns. - Embeddings: dedicated AGENTIC_EMBEDDINGS_API_KEY (DeepSeek has no embeddings); disable cleanly when unset to avoid per-turn 401. - claude_format: friendly generic error messages to the chat, raw only in logs. - acai agent max_tokens 4096->16384 (whole-file writes no longer truncate); system.md size-based edit policy; strict tools opt-in (off). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
68 lines
2.5 KiB
Python
68 lines
2.5 KiB
Python
"""LiteLLM model adapter — spike para A/B contra el adapter OpenAI/DeepSeek nativo.
|
|
|
|
Reutiliza TODO el flujo de OpenAIAdapter (procesado de chunks, conversión de
|
|
mensajes, tools, fallback DSML) y solo cambia la llamada al modelo: en vez del
|
|
SDK de OpenAI, enruta por LiteLLM, que trae handling específico por proveedor
|
|
(DeepSeek incluido) y podría resolver de fábrica el DSML / reasoning_content que
|
|
hoy parcheamos a mano.
|
|
|
|
Activar con `AGENTIC_DEFAULT_MODEL_PROVIDER=litellm`. Modelo via
|
|
`AGENTIC_LITELLM_MODEL` (p.ej. "deepseek/deepseek-v4-pro"); si vacío, deriva de
|
|
`AGENTIC_DEFAULT_MODEL_ID`. Reusa `openai_api_key` / `openai_base_url` como
|
|
credenciales.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
from typing import Any
|
|
|
|
import litellm
|
|
|
|
from ..config import settings
|
|
from .openai_adapter import OpenAIAdapter
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Que LiteLLM descarte params no soportados por el proveedor en vez de petar.
|
|
litellm.drop_params = True
|
|
# Silenciar el spam INFO de litellm ("LiteLLM completion() model=...").
|
|
litellm.suppress_debug_info = True
|
|
logging.getLogger("LiteLLM").setLevel(logging.WARNING)
|
|
|
|
|
|
class LiteLLMAdapter(OpenAIAdapter):
|
|
"""Enruta las llamadas por LiteLLM, reutilizando el pipeline de OpenAIAdapter."""
|
|
|
|
def __init__(
|
|
self,
|
|
model: str | None = None,
|
|
api_key: str | None = None,
|
|
base_url: str | None = None,
|
|
) -> None:
|
|
# NO llamamos a super().__init__: no necesitamos el cliente AsyncOpenAI.
|
|
self._litellm_model = model or settings.litellm_model or self._derive_model()
|
|
self._api_key = api_key or settings.openai_api_key or None
|
|
self._api_base = base_url or settings.openai_base_url or None
|
|
# LiteLLM no entrega usage fiable en streaming → estimar para billing.
|
|
self._estimate_usage_fallback = True
|
|
logger.info(
|
|
"LiteLLMAdapter: model=%s api_base=%s",
|
|
self._litellm_model, self._api_base or "(default)",
|
|
)
|
|
|
|
@staticmethod
|
|
def _derive_model() -> str:
|
|
mid = settings.default_model_id or "deepseek-chat"
|
|
# Si ya trae prefijo de proveedor ("deepseek/...", "openai/..."), respetar.
|
|
return mid if "/" in mid else f"deepseek/{mid}"
|
|
|
|
async def _acreate(self, kwargs: dict[str, Any]):
|
|
kwargs = dict(kwargs)
|
|
kwargs["model"] = self._litellm_model
|
|
if self._api_key:
|
|
kwargs["api_key"] = self._api_key
|
|
if self._api_base:
|
|
kwargs["api_base"] = self._api_base
|
|
return await litellm.acompletion(**kwargs)
|