Añadir completion + ajustes del chat
This commit is contained in:
@@ -2,6 +2,7 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
from typing import Any, AsyncIterator
|
||||
@@ -14,6 +15,27 @@ from .base import ModelAdapter, ModelConfig, ModelResponse, StreamChunk
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Errores transitorios del proxy del modelo (MiniMax/Anthropic). Reintentamos
|
||||
# con backoff exponencial: 1s, 3s, 9s. 529 es overloaded_error de Anthropic;
|
||||
# 429 rate-limit; 503 service unavailable.
|
||||
_TRANSIENT_STATUSES = {429, 503, 529}
|
||||
_RETRY_DELAYS = (1.0, 3.0, 9.0)
|
||||
|
||||
|
||||
def _is_transient(exc: Exception) -> bool:
|
||||
"""True si el error es seguro de reintentar (sobrecarga / red transitoria)."""
|
||||
if isinstance(exc, (anthropic.APIConnectionError, anthropic.APITimeoutError)):
|
||||
return True
|
||||
if isinstance(exc, anthropic.APIStatusError):
|
||||
status = getattr(exc, "status_code", None)
|
||||
if status in _TRANSIENT_STATUSES:
|
||||
return True
|
||||
msg = str(exc).lower()
|
||||
if "overloaded" in msg or "high load" in msg:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
class ClaudeAdapter(ModelAdapter):
|
||||
"""Adapter for the Anthropic Claude API."""
|
||||
|
||||
@@ -63,65 +85,87 @@ class ClaudeAdapter(ModelAdapter):
|
||||
if tools:
|
||||
kwargs["tools"] = self._format_tools(tools)
|
||||
|
||||
async with self._client.messages.stream(**kwargs) as stream:
|
||||
current_tool_id = ""
|
||||
current_tool_name = ""
|
||||
accumulated_args = ""
|
||||
input_tokens = 0
|
||||
|
||||
async for event in stream:
|
||||
if event.type == "message_start" and hasattr(event, "message"):
|
||||
usage = getattr(event.message, "usage", None)
|
||||
if usage:
|
||||
input_tokens = getattr(usage, "input_tokens", 0)
|
||||
|
||||
if event.type == "content_block_start":
|
||||
block = event.content_block
|
||||
if block.type == "tool_use":
|
||||
current_tool_id = block.id
|
||||
current_tool_name = block.name
|
||||
accumulated_args = ""
|
||||
yield StreamChunk(
|
||||
tool_call_id=current_tool_id,
|
||||
tool_name=current_tool_name,
|
||||
)
|
||||
continue
|
||||
|
||||
if event.type == "content_block_delta":
|
||||
delta = event.delta
|
||||
if delta.type == "text_delta":
|
||||
yield StreamChunk(delta=delta.text)
|
||||
elif delta.type == "input_json_delta":
|
||||
accumulated_args += delta.partial_json
|
||||
yield StreamChunk(
|
||||
tool_call_id=current_tool_id,
|
||||
tool_name=current_tool_name,
|
||||
tool_arguments=delta.partial_json,
|
||||
)
|
||||
continue
|
||||
|
||||
if event.type == "content_block_stop":
|
||||
if current_tool_id and accumulated_args:
|
||||
yield StreamChunk(
|
||||
tool_call_id=current_tool_id,
|
||||
tool_name=current_tool_name,
|
||||
tool_arguments=accumulated_args,
|
||||
finish_reason="tool_use",
|
||||
)
|
||||
# Retry con backoff sobre errores transitorios al ABRIR el stream.
|
||||
# Si ya hemos empezado a emitir chunks al consumidor, NO podemos
|
||||
# reintentar (el orquestador ya recibió contenido parcial).
|
||||
attempt = 0
|
||||
max_attempts = len(_RETRY_DELAYS) + 1
|
||||
while True:
|
||||
yielded_any = False
|
||||
try:
|
||||
async with self._client.messages.stream(**kwargs) as stream:
|
||||
current_tool_id = ""
|
||||
current_tool_name = ""
|
||||
accumulated_args = ""
|
||||
continue
|
||||
input_tokens = 0
|
||||
|
||||
if event.type == "message_delta":
|
||||
output_tokens = getattr(event.usage, "output_tokens", 0) if event.usage else 0
|
||||
yield StreamChunk(
|
||||
finish_reason=event.delta.stop_reason or "",
|
||||
usage={
|
||||
"input_tokens": input_tokens,
|
||||
"output_tokens": output_tokens,
|
||||
},
|
||||
)
|
||||
async for event in stream:
|
||||
yielded_any = True
|
||||
if event.type == "message_start" and hasattr(event, "message"):
|
||||
usage = getattr(event.message, "usage", None)
|
||||
if usage:
|
||||
input_tokens = getattr(usage, "input_tokens", 0)
|
||||
|
||||
if event.type == "content_block_start":
|
||||
block = event.content_block
|
||||
if block.type == "tool_use":
|
||||
current_tool_id = block.id
|
||||
current_tool_name = block.name
|
||||
accumulated_args = ""
|
||||
yield StreamChunk(
|
||||
tool_call_id=current_tool_id,
|
||||
tool_name=current_tool_name,
|
||||
)
|
||||
continue
|
||||
|
||||
if event.type == "content_block_delta":
|
||||
delta = event.delta
|
||||
if delta.type == "text_delta":
|
||||
yield StreamChunk(delta=delta.text)
|
||||
elif delta.type == "input_json_delta":
|
||||
accumulated_args += delta.partial_json
|
||||
yield StreamChunk(
|
||||
tool_call_id=current_tool_id,
|
||||
tool_name=current_tool_name,
|
||||
tool_arguments=delta.partial_json,
|
||||
)
|
||||
continue
|
||||
|
||||
if event.type == "content_block_stop":
|
||||
if current_tool_id and accumulated_args:
|
||||
yield StreamChunk(
|
||||
tool_call_id=current_tool_id,
|
||||
tool_name=current_tool_name,
|
||||
tool_arguments=accumulated_args,
|
||||
finish_reason="tool_use",
|
||||
)
|
||||
current_tool_id = ""
|
||||
current_tool_name = ""
|
||||
accumulated_args = ""
|
||||
continue
|
||||
|
||||
if event.type == "message_delta":
|
||||
output_tokens = getattr(event.usage, "output_tokens", 0) if event.usage else 0
|
||||
yield StreamChunk(
|
||||
finish_reason=event.delta.stop_reason or "",
|
||||
usage={
|
||||
"input_tokens": input_tokens,
|
||||
"output_tokens": output_tokens,
|
||||
},
|
||||
)
|
||||
return # consumo OK, salimos del retry loop
|
||||
except Exception as e:
|
||||
# Si ya emitimos algo al consumidor, no podemos reintentar
|
||||
# de forma segura: el contenido parcial ya viajó.
|
||||
if yielded_any or not _is_transient(e) or attempt >= max_attempts - 1:
|
||||
raise
|
||||
wait = _RETRY_DELAYS[attempt]
|
||||
logger.warning(
|
||||
"Claude stream() transient error (attempt %d/%d), retrying in %.1fs: %s",
|
||||
attempt + 1, max_attempts, wait, str(e)[:200],
|
||||
)
|
||||
await asyncio.sleep(wait)
|
||||
attempt += 1
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Non-streaming
|
||||
@@ -158,8 +202,31 @@ class ClaudeAdapter(ModelAdapter):
|
||||
kwargs["system"] = system_content
|
||||
if tools:
|
||||
kwargs["tools"] = self._format_tools(tools)
|
||||
# Fuerza al modelo a usar un tool concreto para garantizar JSON por schema
|
||||
# (usado por /completions con json_schema). Ver OpenAIAdapter para la variante.
|
||||
force_tool = (config.extra or {}).get("force_tool")
|
||||
if force_tool:
|
||||
kwargs["tool_choice"] = {"type": "tool", "name": force_tool}
|
||||
|
||||
response = await self._client.messages.create(**kwargs)
|
||||
# Retry con backoff sobre errores transitorios (429/503/529). El proxy
|
||||
# MiniMax devuelve 529 overloaded_error con cierta frecuencia bajo carga.
|
||||
last_exc: Exception | None = None
|
||||
for attempt in range(len(_RETRY_DELAYS) + 1):
|
||||
try:
|
||||
response = await self._client.messages.create(**kwargs)
|
||||
break
|
||||
except Exception as e:
|
||||
if not _is_transient(e) or attempt == len(_RETRY_DELAYS):
|
||||
raise
|
||||
wait = _RETRY_DELAYS[attempt]
|
||||
logger.warning(
|
||||
"Claude complete() transient error (attempt %d/%d), retrying in %.1fs: %s",
|
||||
attempt + 1, len(_RETRY_DELAYS) + 1, wait, str(e)[:200],
|
||||
)
|
||||
last_exc = e
|
||||
await asyncio.sleep(wait)
|
||||
else:
|
||||
raise last_exc or RuntimeError("Claude complete() retry exhausted")
|
||||
|
||||
content = ""
|
||||
tool_calls: list[dict[str, Any]] = []
|
||||
|
||||
Reference in New Issue
Block a user