Token tracking y cálculo de costes por mensaje
- Config: COST_PER_1M_INPUT y COST_PER_1M_OUTPUT configurables via .env
- OpenAI adapter: stream_options include_usage para capturar tokens reales
- base.py: acumula input/output tokens de cada iteración del agente
- planner.py: devuelve usage junto con el plan
- engine.py: suma tokens de planner + steps + review, calcula coste USD
- Response incluye usage{input_tokens, output_tokens} y total_cost_usd
Formato compatible con el bridge de Claude Code CLI para integración
con el frontend y reporting a Acai webservice.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -44,6 +44,7 @@ class OpenAIAdapter(ModelAdapter):
|
|||||||
"temperature": config.temperature,
|
"temperature": config.temperature,
|
||||||
"messages": messages,
|
"messages": messages,
|
||||||
"stream": True,
|
"stream": True,
|
||||||
|
"stream_options": {"include_usage": True},
|
||||||
}
|
}
|
||||||
if tools:
|
if tools:
|
||||||
kwargs["tools"] = self._format_tools(tools)
|
kwargs["tools"] = self._format_tools(tools)
|
||||||
@@ -52,9 +53,22 @@ class OpenAIAdapter(ModelAdapter):
|
|||||||
|
|
||||||
tool_calls_acc: dict[int, dict[str, str]] = {}
|
tool_calls_acc: dict[int, dict[str, str]] = {}
|
||||||
|
|
||||||
|
final_usage: dict[str, int] = {}
|
||||||
|
|
||||||
async for chunk in stream:
|
async for chunk in stream:
|
||||||
|
# With include_usage, the last chunk has usage but no choices
|
||||||
|
if chunk.usage:
|
||||||
|
final_usage = {
|
||||||
|
"input_tokens": chunk.usage.prompt_tokens or 0,
|
||||||
|
"output_tokens": chunk.usage.completion_tokens or 0,
|
||||||
|
}
|
||||||
|
|
||||||
choice = chunk.choices[0] if chunk.choices else None
|
choice = chunk.choices[0] if chunk.choices else None
|
||||||
if not choice:
|
if not choice:
|
||||||
|
# Usage-only chunk (last one with include_usage) — emit it
|
||||||
|
if final_usage:
|
||||||
|
yield StreamChunk(usage=final_usage)
|
||||||
|
final_usage = {} # Only emit once
|
||||||
continue
|
continue
|
||||||
|
|
||||||
delta = choice.delta
|
delta = choice.delta
|
||||||
@@ -99,16 +113,15 @@ class OpenAIAdapter(ModelAdapter):
|
|||||||
tool_arguments=acc["arguments"],
|
tool_arguments=acc["arguments"],
|
||||||
finish_reason="tool_use",
|
finish_reason="tool_use",
|
||||||
)
|
)
|
||||||
|
# Emit usage after tool_use chunks
|
||||||
|
if final_usage:
|
||||||
|
yield StreamChunk(usage=final_usage)
|
||||||
else:
|
else:
|
||||||
yield StreamChunk(
|
yield StreamChunk(
|
||||||
finish_reason="end_turn"
|
finish_reason="end_turn"
|
||||||
if choice.finish_reason == "stop"
|
if choice.finish_reason == "stop"
|
||||||
else choice.finish_reason,
|
else choice.finish_reason,
|
||||||
usage={
|
usage=final_usage,
|
||||||
"output_tokens": chunk.usage.completion_tokens
|
|
||||||
if chunk.usage
|
|
||||||
else 0
|
|
||||||
},
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
|
|||||||
@@ -48,6 +48,10 @@ class Settings(BaseSettings):
|
|||||||
mcp_timeout_seconds: float = 30.0
|
mcp_timeout_seconds: float = 30.0
|
||||||
mcp_startup_timeout_seconds: float = 10.0
|
mcp_startup_timeout_seconds: float = 10.0
|
||||||
|
|
||||||
|
# --- Pricing (per 1M tokens) ---
|
||||||
|
cost_per_1m_input: float = 2.50
|
||||||
|
cost_per_1m_output: float = 15.00
|
||||||
|
|
||||||
# --- Orchestrator ---
|
# --- Orchestrator ---
|
||||||
max_execution_steps: int = 25
|
max_execution_steps: int = 25
|
||||||
subagent_max_steps: int = 10
|
subagent_max_steps: int = 10
|
||||||
|
|||||||
@@ -59,6 +59,8 @@ class BaseAgent:
|
|||||||
)
|
)
|
||||||
tool_executions: list[ToolExecution] = []
|
tool_executions: list[ToolExecution] = []
|
||||||
accumulated_content = ""
|
accumulated_content = ""
|
||||||
|
total_input_tokens = 0
|
||||||
|
total_output_tokens = 0
|
||||||
# Real conversation history: assistant messages + tool results
|
# Real conversation history: assistant messages + tool results
|
||||||
conversation: list[dict[str, Any]] = []
|
conversation: list[dict[str, Any]] = []
|
||||||
tool_fingerprints: dict[str, ToolExecution] = {}
|
tool_fingerprints: dict[str, ToolExecution] = {}
|
||||||
@@ -139,6 +141,11 @@ class BaseAgent:
|
|||||||
tool["parsed_arguments"] = args
|
tool["parsed_arguments"] = args
|
||||||
tool_calls.append(tool)
|
tool_calls.append(tool)
|
||||||
|
|
||||||
|
# Accumulate token usage from any chunk that has it
|
||||||
|
if chunk.usage:
|
||||||
|
total_input_tokens += chunk.usage.get("input_tokens", 0)
|
||||||
|
total_output_tokens += chunk.usage.get("output_tokens", 0)
|
||||||
|
|
||||||
if chunk.finish_reason == "end_turn":
|
if chunk.finish_reason == "end_turn":
|
||||||
break
|
break
|
||||||
|
|
||||||
@@ -234,6 +241,10 @@ class BaseAgent:
|
|||||||
"content": accumulated_content,
|
"content": accumulated_content,
|
||||||
"artifacts": artifacts,
|
"artifacts": artifacts,
|
||||||
"tool_executions": tool_executions,
|
"tool_executions": tool_executions,
|
||||||
|
"usage": {
|
||||||
|
"input_tokens": total_input_tokens,
|
||||||
|
"output_tokens": total_output_tokens,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
async def _execute_tool(
|
async def _execute_tool(
|
||||||
|
|||||||
@@ -55,9 +55,10 @@ def create_planner_profile() -> AgentProfile:
|
|||||||
class PlannerAgent(BaseAgent):
|
class PlannerAgent(BaseAgent):
|
||||||
"""Generates execution plans from objectives."""
|
"""Generates execution plans from objectives."""
|
||||||
|
|
||||||
async def plan(self, session: SessionState) -> list[TaskStep]:
|
async def plan(self, session: SessionState) -> tuple[list[TaskStep], dict[str, int]]:
|
||||||
"""Generate a plan and return TaskSteps."""
|
"""Generate a plan and return (TaskSteps, usage)."""
|
||||||
result = await self.execute(session, max_steps=1)
|
result = await self.execute(session, max_steps=1)
|
||||||
|
usage = result.get("usage", {"input_tokens": 0, "output_tokens": 0})
|
||||||
content = result["content"].strip()
|
content = result["content"].strip()
|
||||||
|
|
||||||
# Parse the JSON plan from the model output
|
# Parse the JSON plan from the model output
|
||||||
@@ -92,7 +93,7 @@ class PlannerAgent(BaseAgent):
|
|||||||
parsed.get("facts", [])
|
parsed.get("facts", [])
|
||||||
)
|
)
|
||||||
|
|
||||||
return steps
|
return steps, usage
|
||||||
|
|
||||||
except (json.JSONDecodeError, KeyError) as e:
|
except (json.JSONDecodeError, KeyError) as e:
|
||||||
logger.warning("Failed to parse planner output: %s", e)
|
logger.warning("Failed to parse planner output: %s", e)
|
||||||
@@ -104,4 +105,4 @@ class PlannerAgent(BaseAgent):
|
|||||||
else "Execute task",
|
else "Execute task",
|
||||||
agent_role="coder",
|
agent_role="coder",
|
||||||
)
|
)
|
||||||
]
|
], usage
|
||||||
|
|||||||
@@ -115,9 +115,10 @@ class OrchestratorEngine:
|
|||||||
|
|
||||||
# 2. Plan
|
# 2. Plan
|
||||||
task.status = TaskStatus.PLANNING
|
task.status = TaskStatus.PLANNING
|
||||||
|
planner_usage: dict[str, int] = {"input_tokens": 0, "output_tokens": 0}
|
||||||
try:
|
try:
|
||||||
planner = self._create_agent(AgentRole.PLANNER)
|
planner = self._create_agent(AgentRole.PLANNER)
|
||||||
plan_steps = await planner.plan(session)
|
plan_steps, planner_usage = await planner.plan(session)
|
||||||
task.plan = plan_steps
|
task.plan = plan_steps
|
||||||
task.status = TaskStatus.EXECUTING
|
task.status = TaskStatus.EXECUTING
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -234,6 +235,21 @@ class OrchestratorEngine:
|
|||||||
session_id=session.session_id,
|
session_id=session.session_id,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Accumulate token usage: planner + all steps + review
|
||||||
|
total_input = planner_usage.get("input_tokens", 0)
|
||||||
|
total_output = planner_usage.get("output_tokens", 0)
|
||||||
|
for r in results:
|
||||||
|
total_input += r.get("usage", {}).get("input_tokens", 0)
|
||||||
|
total_output += r.get("usage", {}).get("output_tokens", 0)
|
||||||
|
# Add review usage if any
|
||||||
|
total_input += review_result.get("usage", {}).get("input_tokens", 0)
|
||||||
|
total_output += review_result.get("usage", {}).get("output_tokens", 0)
|
||||||
|
# Calculate cost
|
||||||
|
cost_usd = (
|
||||||
|
(total_input / 1_000_000) * settings.cost_per_1m_input
|
||||||
|
+ (total_output / 1_000_000) * settings.cost_per_1m_output
|
||||||
|
)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"session_id": session.session_id,
|
"session_id": session.session_id,
|
||||||
"task_id": task.task_id,
|
"task_id": task.task_id,
|
||||||
@@ -245,6 +261,11 @@ class OrchestratorEngine:
|
|||||||
),
|
),
|
||||||
"review": review_result.get("content", ""),
|
"review": review_result.get("content", ""),
|
||||||
"status": status,
|
"status": status,
|
||||||
|
"usage": {
|
||||||
|
"input_tokens": total_input,
|
||||||
|
"output_tokens": total_output,
|
||||||
|
},
|
||||||
|
"total_cost_usd": round(cost_usd, 6),
|
||||||
}
|
}
|
||||||
|
|
||||||
def _error_result(self, session: SessionState, error: str) -> dict[str, Any]:
|
def _error_result(self, session: SessionState, error: str) -> dict[str, Any]:
|
||||||
|
|||||||
Reference in New Issue
Block a user