Fix problemas detectados en evaluación: historial, prompting, artifacts

1. Task history preserva key_data estructurado (recordNums, sectionIds,
   moduleIds, pages) extraído de las tool executions reales — el modelo
   retiene contexto entre tasks sin re-consultar.

2. Coder system prompt mejorado: instrucciones explícitas sobre qué tool
   usar para cada operación (create_module vs create_or_update_record),
   consultar knowledge base antes de actuar, y reutilizar key_data del
   historial.

3. Eliminado artifact_memory y working_context del coder context_sections
   — ya no son necesarios con conversación real. Reduce acumulación de
   artifacts en el context.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Jordan Diaz
2026-04-03 13:29:09 +00:00
parent 3aa7a463d0
commit 7bdb943e7f
3 changed files with 97 additions and 6 deletions

View File

@@ -296,12 +296,16 @@ class OrchestratorEngine:
for step in task.plan:
tools_used.update(step.tools_used)
# Extract key structured data from tool executions
key_data = self._extract_key_data_from_results(results)
history_entry = {
"task_id": task.task_id,
"objective": task.objective,
"status": task.status.value,
"steps": len(task.plan),
"facts": task.facts_extracted[-10:],
"key_data": key_data,
"tools_used": list(tools_used)[:10],
"artifacts_count": len(task_artifacts),
"summary": "; ".join(step_summaries)[:300],
@@ -327,6 +331,73 @@ class OrchestratorEngine:
task.task_id, len(task.facts_extracted), len(tools_used), len(task_artifacts),
)
@staticmethod
def _extract_key_data_from_results(results: list[dict[str, Any]]) -> dict[str, Any]:
"""Extract structured data from tool executions for task history.
Preserves key identifiers (recordNum, sectionId, tableName, moduleId)
so the model retains context across tasks without re-querying.
"""
key_data: dict[str, Any] = {}
seen_tables: dict[str, list[int]] = {} # tableName -> recordNums
seen_sections: list[str] = []
seen_modules: list[str] = []
seen_pages: dict[str, int] = {} # page name/url -> recordNum
for result in results:
for te in result.get("tool_executions", []):
args = te.arguments
name = te.tool_name
# Track table + record relationships
table = args.get("tableName", "")
record = args.get("recordNum")
if table and record:
record_int = int(record) if str(record).isdigit() else None
if record_int and table not in seen_tables:
seen_tables[table] = []
if record_int and record_int not in seen_tables.get(table, []):
seen_tables[table].append(record_int)
# Track section IDs
section = args.get("sectionId", "")
if section and section not in seen_sections:
seen_sections.append(section)
# Track modules
module = args.get("moduleId", "") or args.get("moduleName", "")
if module and module not in seen_modules:
seen_modules.append(module)
# Extract page info from raw output (enlace, name)
if te.raw_output and "enlace" in te.raw_output:
try:
import json as _json
# Try to parse structured data from output
for line in te.raw_output.splitlines():
line = line.strip()
if line.startswith("{"):
try:
data = _json.loads(line)
if "enlace" in data and "num" in data:
page_key = data.get("name", data["enlace"])
seen_pages[page_key] = int(data["num"])
except _json.JSONDecodeError:
pass
except Exception:
pass
if seen_tables:
key_data["tables"] = {t: nums[:10] for t, nums in seen_tables.items()}
if seen_sections:
key_data["sections"] = seen_sections[:20]
if seen_modules:
key_data["modules"] = seen_modules[:20]
if seen_pages:
key_data["pages"] = dict(list(seen_pages.items())[:20])
return key_data
def _maybe_compact_previous_steps(
self, task: TaskState, current_index: int
) -> None: