Añadir completion + ajustes del chat

This commit is contained in:
Jordan Diaz
2026-04-14 07:12:50 +00:00
parent 15abc1eb4f
commit 469ff65052
3 changed files with 229 additions and 56 deletions

View File

@@ -2,6 +2,7 @@
from __future__ import annotations from __future__ import annotations
import asyncio
import json import json
import logging import logging
from typing import Any, AsyncIterator from typing import Any, AsyncIterator
@@ -14,6 +15,27 @@ from .base import ModelAdapter, ModelConfig, ModelResponse, StreamChunk
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# Errores transitorios del proxy del modelo (MiniMax/Anthropic). Reintentamos
# con backoff exponencial: 1s, 3s, 9s. 529 es overloaded_error de Anthropic;
# 429 rate-limit; 503 service unavailable.
_TRANSIENT_STATUSES = {429, 503, 529}
_RETRY_DELAYS = (1.0, 3.0, 9.0)
def _is_transient(exc: Exception) -> bool:
"""True si el error es seguro de reintentar (sobrecarga / red transitoria)."""
if isinstance(exc, (anthropic.APIConnectionError, anthropic.APITimeoutError)):
return True
if isinstance(exc, anthropic.APIStatusError):
status = getattr(exc, "status_code", None)
if status in _TRANSIENT_STATUSES:
return True
msg = str(exc).lower()
if "overloaded" in msg or "high load" in msg:
return True
return False
class ClaudeAdapter(ModelAdapter): class ClaudeAdapter(ModelAdapter):
"""Adapter for the Anthropic Claude API.""" """Adapter for the Anthropic Claude API."""
@@ -63,6 +85,14 @@ class ClaudeAdapter(ModelAdapter):
if tools: if tools:
kwargs["tools"] = self._format_tools(tools) kwargs["tools"] = self._format_tools(tools)
# Retry con backoff sobre errores transitorios al ABRIR el stream.
# Si ya hemos empezado a emitir chunks al consumidor, NO podemos
# reintentar (el orquestador ya recibió contenido parcial).
attempt = 0
max_attempts = len(_RETRY_DELAYS) + 1
while True:
yielded_any = False
try:
async with self._client.messages.stream(**kwargs) as stream: async with self._client.messages.stream(**kwargs) as stream:
current_tool_id = "" current_tool_id = ""
current_tool_name = "" current_tool_name = ""
@@ -70,6 +100,7 @@ class ClaudeAdapter(ModelAdapter):
input_tokens = 0 input_tokens = 0
async for event in stream: async for event in stream:
yielded_any = True
if event.type == "message_start" and hasattr(event, "message"): if event.type == "message_start" and hasattr(event, "message"):
usage = getattr(event.message, "usage", None) usage = getattr(event.message, "usage", None)
if usage: if usage:
@@ -122,6 +153,19 @@ class ClaudeAdapter(ModelAdapter):
"output_tokens": output_tokens, "output_tokens": output_tokens,
}, },
) )
return # consumo OK, salimos del retry loop
except Exception as e:
# Si ya emitimos algo al consumidor, no podemos reintentar
# de forma segura: el contenido parcial ya viajó.
if yielded_any or not _is_transient(e) or attempt >= max_attempts - 1:
raise
wait = _RETRY_DELAYS[attempt]
logger.warning(
"Claude stream() transient error (attempt %d/%d), retrying in %.1fs: %s",
attempt + 1, max_attempts, wait, str(e)[:200],
)
await asyncio.sleep(wait)
attempt += 1
# ------------------------------------------------------------------ # ------------------------------------------------------------------
# Non-streaming # Non-streaming
@@ -158,8 +202,31 @@ class ClaudeAdapter(ModelAdapter):
kwargs["system"] = system_content kwargs["system"] = system_content
if tools: if tools:
kwargs["tools"] = self._format_tools(tools) kwargs["tools"] = self._format_tools(tools)
# Fuerza al modelo a usar un tool concreto para garantizar JSON por schema
# (usado por /completions con json_schema). Ver OpenAIAdapter para la variante.
force_tool = (config.extra or {}).get("force_tool")
if force_tool:
kwargs["tool_choice"] = {"type": "tool", "name": force_tool}
# Retry con backoff sobre errores transitorios (429/503/529). El proxy
# MiniMax devuelve 529 overloaded_error con cierta frecuencia bajo carga.
last_exc: Exception | None = None
for attempt in range(len(_RETRY_DELAYS) + 1):
try:
response = await self._client.messages.create(**kwargs) response = await self._client.messages.create(**kwargs)
break
except Exception as e:
if not _is_transient(e) or attempt == len(_RETRY_DELAYS):
raise
wait = _RETRY_DELAYS[attempt]
logger.warning(
"Claude complete() transient error (attempt %d/%d), retrying in %.1fs: %s",
attempt + 1, len(_RETRY_DELAYS) + 1, wait, str(e)[:200],
)
last_exc = e
await asyncio.sleep(wait)
else:
raise last_exc or RuntimeError("Claude complete() retry exhausted")
content = "" content = ""
tool_calls: list[dict[str, Any]] = [] tool_calls: list[dict[str, Any]] = []

View File

@@ -152,6 +152,14 @@ class OpenAIAdapter(ModelAdapter):
} }
if tools: if tools:
kwargs["tools"] = self._format_tools(tools) kwargs["tools"] = self._format_tools(tools)
# Fuerza al modelo a usar un tool concreto para garantizar JSON por schema
# (usado por /completions con json_schema). Ver ClaudeAdapter para la variante.
force_tool = (config.extra or {}).get("force_tool")
if force_tool:
kwargs["tool_choice"] = {
"type": "function",
"function": {"name": force_tool},
}
response = await self._client.chat.completions.create(**kwargs) response = await self._client.chat.completions.create(**kwargs)
choice = response.choices[0] choice = response.choices[0]

View File

@@ -48,6 +48,28 @@ class SendMessageRequest(BaseModel):
agent_id: str | None = None agent_id: str | None = None
class CompletionRequest(BaseModel):
"""One-shot structured completion sin sesión, sin MCP, sin agente.
Para callers que necesitan una respuesta directa del LLM (opcionalmente
conforme a un JSON schema). Ejemplo: `content_export.py` cuando extrae
brand info o genera chunks de texto para webs locales.
"""
message: str
system: str | None = None
model_id: str | None = None
max_tokens: int = 4096
temperature: float = 0.3
json_schema: dict[str, Any] | None = None
class CompletionResponse(BaseModel):
content: str = ""
parsed: dict[str, Any] | None = None
usage: dict[str, int] = Field(default_factory=dict)
model_id: str = ""
class SessionResponse(BaseModel): class SessionResponse(BaseModel):
session_id: str session_id: str
status: str status: str
@@ -157,6 +179,70 @@ async def create_session(body: CreateSessionRequest) -> CreateSessionResponse:
) )
# ------------------------------------------------------------------
# POST /completions — one-shot structured completion
# ------------------------------------------------------------------
@router.post("/completions", response_model=CompletionResponse)
async def completion(body: CompletionRequest) -> CompletionResponse:
"""Llamada directa al LLM sin sesión, sin MCP, sin agente.
Si se proporciona `json_schema`, el modelo es forzado a rellenar un tool
con ese schema (tool_use forzado para Claude/Anthropic-compatible, function
calling forzado para OpenAI). El resultado se devuelve parseado en `parsed`
además del JSON stringified en `content`.
"""
from ..adapters.base import ModelConfig
adapter = _deps.get("model_adapter")
if adapter is None:
raise HTTPException(status_code=503, detail="Model adapter not initialized")
messages: list[dict[str, Any]] = []
if body.system:
messages.append({"role": "system", "content": body.system})
messages.append({"role": "user", "content": body.message})
config = ModelConfig(
model_id=body.model_id or settings.default_model_id,
max_tokens=body.max_tokens,
temperature=body.temperature,
)
tools_param: list[dict[str, Any]] | None = None
if body.json_schema:
tools_param = [
{
"name": "emit_response",
"description": "Emite la respuesta estructurada conforme al schema.",
"input_schema": body.json_schema,
}
]
config.extra = {"force_tool": "emit_response"}
try:
response = await adapter.complete(messages, tools=tools_param, config=config)
except Exception as e:
logger.exception("completion failed: %s", e)
raise HTTPException(status_code=502, detail="Model call failed: {}".format(e))
parsed: dict[str, Any] | None = None
content_out = response.content or ""
if body.json_schema and response.tool_calls:
tool_args = response.tool_calls[0].get("arguments")
if isinstance(tool_args, dict):
parsed = tool_args
import json as _json
content_out = _json.dumps(parsed, ensure_ascii=False)
return CompletionResponse(
content=content_out,
parsed=parsed,
usage=response.usage or {},
model_id=config.model_id,
)
# ------------------------------------------------------------------ # ------------------------------------------------------------------
# POST /sessions/{id}/messages # POST /sessions/{id}/messages
# ------------------------------------------------------------------ # ------------------------------------------------------------------
@@ -170,6 +256,18 @@ async def send_message(
if not session: if not session:
raise HTTPException(status_code=404, detail="Session not found") raise HTTPException(status_code=404, detail="Session not found")
# Resetear sesión en estado ERROR al recibir un mensaje nuevo. Sin esto,
# un fallo transitorio (p.ej. 529 overloaded del proxy del modelo) deja
# la sesión bloqueada para siempre y los siguientes mensajes del usuario
# no se procesan. Limpiamos el current_task fallido también.
if session.status == SessionStatus.ERROR:
logger.info(
"Session %s was in ERROR state, resetting to ACTIVE for new message",
session_id,
)
session.status = SessionStatus.ACTIVE
session.current_task = None
# Get or create session's MCP manager # Get or create session's MCP manager
registry = _get_mcp_registry() registry = _get_mcp_registry()
mcp_manager = registry.get_for_session(session_id) mcp_manager = registry.get_for_session(session_id)