"""Application configuration via environment variables.""" from __future__ import annotations from pydantic_settings import BaseSettings from pydantic import Field class Settings(BaseSettings): # --- Service --- service_name: str = "agentic-microservice" service_version: str = "1.0.0" host: str = "0.0.0.0" port: int = 8000 debug: bool = False # --- Redis --- redis_host: str = "localhost" redis_port: int = 6379 redis_db: int = 0 redis_password: str = "" redis_key_prefix: str = "agentic" session_ttl_seconds: int = 86400 # 24h @property def redis_url(self) -> str: auth = f":{self.redis_password}@" if self.redis_password else "" return f"redis://{auth}{self.redis_host}:{self.redis_port}/{self.redis_db}" # --- Model providers --- anthropic_api_key: str = "" anthropic_base_url: str = "" # Custom base URL (for MiniMax Anthropic-compatible, etc.) openai_api_key: str = "" openai_base_url: str = "" # Custom base URL (for MiniMax, DeepInfra, etc.) # --- Embeddings (semantic search) --- # Credenciales DEDICADAS para embeddings. Necesarias porque el chat usa # `openai_api_key` apuntando a un endpoint compatible (p.ej. DeepSeek, que NO # tiene API de embeddings). Si vacio, cae a `openai_api_key` por compat. El # base_url vacio => OpenAI real (api.openai.com); NO hereda `openai_base_url`. embeddings_api_key: str = "" embeddings_base_url: str = "" embeddings_model: str = "text-embedding-3-small" # Spike LiteLLM: si default_model_provider=litellm, modelo a usar (formato # litellm, p.ej. "deepseek/deepseek-v4-pro"). Vacío → deriva de default_model_id. litellm_model: str = "" @property def effective_embeddings_key(self) -> str: """Key a usar para embeddings. Prioriza la dedicada; reutiliza la del chat SOLO si el chat es OpenAI real (sin `openai_base_url` custom) — si apunta a DeepSeek u otro proveedor, esa key no sirve para embeddings.""" if self.embeddings_api_key: return self.embeddings_api_key if not self.openai_base_url: return self.openai_api_key return "" @property def embeddings_enabled(self) -> bool: return bool(self.effective_embeddings_key or self.embeddings_base_url) default_model_provider: str = "claude" default_model_id: str = "claude-sonnet-4-20250514" # Modelo override SOLO para el sub-loop del planner (acai_plan). Si vacio, # usa default_model_id. Pensado para usar un modelo mas potente al planificar # (p.ej. deepseek-v4-pro) y otro mas rapido al ejecutar (p.ej. deepseek-v4-flash). planner_model_id: str = "" # Max tokens del planner. Mas alto que el agente principal porque Pro con # thinking puede gastar 2-4k tokens razonando antes de emitir el JSON del plan. planner_max_tokens: int = 16000 max_tokens: int = 4096 temperature: float = 0.3 # DeepSeek strict function calling (beta). OPT-IN (default False): exige schemas # tipo OpenAI (additionalProperties:false, todos required, etc.) que los tools MCP # actuales NO cumplen → da 400. Para activarlo: schemas compatibles + base_url # https://api.deepseek.com/beta + AGENTIC_DEEPSEEK_STRICT_TOOLS=true. deepseek_strict_tools: bool = False # --- Context engine --- model_context_window: int = 0 # 0 = use legacy fixed budget / explicit override model_max_output_tokens: int = 4096 context_max_tokens: int = 0 # 0 = auto-budget from model window, fallback legacy 120k compaction_threshold_tokens: int = 0 # 0 = derive from ratio compaction_threshold_ratio: float = 0.80 context_reserve_ratio: float = 0.10 artifact_summary_max_chars: int = 2000 # KB inyectada como system prompt. Default 4k (antes 30k) — la doc # oficial de M2.7 advierte que system prompts grandes degradan rendimiento. # Top-2 docs medianos + cheat sheet ≈ 4k tokens caben con margen. # Se sobrescribe per-agent via `agent.yaml.kb_max_tokens`. knowledge_base_max_tokens: int = 4_000 # Cap absoluto del numero de docs incluidos (filtro tras ranking). kb_top_n_docs: int = 2 # Penalty al `load_priority` de docs `load_when: [ranked]` para que # no entren "por defecto" en el branch top_n, solo si rankean muy alto. kb_ranked_penalty: int = 10 # Umbral de similitud por debajo del cual el ranking no es confiable # y se usa el `load_priority` del frontmatter como tie-break. kb_similarity_floor: float = 0.6 working_context_max_items: int = 20 tool_raw_output_max_chars: int = 16000 # Antes 2000 (calibrado MiniMax 200k). Subido para DeepSeek 1M context. conversation_recent_raw_limit: int = 2 task_history_max_entries: int = 20 task_history_max_tokens: int = 1500 # Presupuesto de tokens para la ventana de recent_messages persistida en # sesion. Sin esto crece sin limite y empuja al compactor a su paso # destructivo (colapsar bloques perdiendo tool_use ids). 0 = sin limite. recent_messages_max_tokens: int = 60_000 # --- MCP --- mcp_config_path: str = "" # Path to mcp.json; empty = legacy single-server mode mcp_server_command: str = "" # Legacy: single server command mcp_server_args: list[str] = Field(default_factory=list) mcp_timeout_seconds: float = 30.0 mcp_startup_timeout_seconds: float = 10.0 # --- Pricing (per 1M tokens) --- cost_per_1m_input: float = 2.50 cost_per_1m_output: float = 15.00 # --- Orchestrator --- max_execution_steps: int = 25 subagent_max_steps: int = 30 max_execution_timeout_seconds: float = 300.0 # 5 min global timeout # --- SSE --- sse_keepalive_seconds: float = 15.0 model_config = {"env_prefix": "AGENTIC_", "env_file": ".env", "extra": "ignore"} @property def reserve_tokens(self) -> int: if self.model_context_window <= 0: return 0 return max(0, int(self.model_context_window * self.context_reserve_ratio)) @property def effective_context_budget(self) -> int: if self.context_max_tokens > 0: return self.context_max_tokens if self.model_context_window > 0: budget = ( self.model_context_window - max(0, self.model_max_output_tokens) - self.reserve_tokens ) return max(1, budget) return 120_000 @property def effective_compaction_threshold(self) -> int: if self.compaction_threshold_tokens > 0: return min(self.compaction_threshold_tokens, self.effective_context_budget) return max(1, int(self.effective_context_budget * self.compaction_threshold_ratio)) settings = Settings()