This commit is contained in:
Jordan Diaz
2026-04-25 10:27:51 +00:00
parent e84a36c83d
commit 6881d64a08
42 changed files with 3207 additions and 3413 deletions

View File

@@ -558,6 +558,22 @@ async def _load_knowledge_from_dir(docs_path: str = "docs") -> dict[str, Any]:
embeddings = [None] * len(docs_data)
has_embeddings = False
# Limpia entradas huérfanas: docs que ya no existen en el filesystem.
# Sin esto, los IDs antiguos (e.g. tras renombrar 'builder-fields' →
# '01-builder-fields') quedarían en Redis y aparecerían en el ranking.
current_ids = {d[0] for d in docs_data}
existing_docs = await memory.list_documents(namespace="knowledge")
removed = []
for existing in existing_docs:
if existing.memory_id not in current_ids:
await memory.delete_document(existing.memory_id, namespace="knowledge")
# Borra también el embedding asociado
embed_key = memory._key("embeddings", "knowledge", existing.memory_id)
await memory._r.delete(embed_key)
removed.append(existing.memory_id)
if removed:
logger.info("Removed %d stale knowledge docs: %s", len(removed), removed)
# Store docs + embeddings
loaded = []
for i, (doc_id, title, content, summary, tags) in enumerate(docs_data):
@@ -587,6 +603,7 @@ async def _load_knowledge_from_dir(docs_path: str = "docs") -> dict[str, Any]:
return {
"status": "loaded",
"count": len(loaded),
"removed": removed,
"embeddings": has_embeddings,
"documents": loaded,
}
@@ -641,6 +658,109 @@ async def delete_knowledge(doc_id: str) -> dict[str, str]:
return {"status": "deleted", "id": doc_id}
def _list_doc_sections(content: str) -> list[str]:
"""Lista los headings H2 (## ...) de un doc markdown."""
sections = []
for line in content.splitlines():
stripped = line.lstrip()
# Solo H2 — exactamente "## " y no "### "
if stripped.startswith("## ") and not stripped.startswith("### "):
sections.append(stripped[3:].strip())
return sections
def _extract_doc_section(content: str, section_query: str) -> str | None:
"""Extrae una sección por heading H2. Match case-insensitive, parcial.
Devuelve el bloque desde el `## heading` hasta el siguiente `## ` (o EOF).
"""
if not section_query:
return None
section_lower = section_query.lower().strip()
captured: list[str] = []
capture = False
for line in content.splitlines():
stripped = line.lstrip()
is_h2 = stripped.startswith("## ") and not stripped.startswith("### ")
if is_h2:
heading = stripped[3:].strip()
if capture:
# Llegamos al siguiente H2 — paramos
break
if section_lower in heading.lower():
capture = True
captured.append(line)
continue
if capture:
captured.append(line)
if captured:
return "\n".join(captured).rstrip()
return None
@router.get("/knowledge/{doc_id}")
async def read_knowledge(
doc_id: str,
section: str | None = None,
) -> dict[str, Any]:
"""Lee un doc del knowledge base. Opcionalmente, una sola sección por heading H2.
- Sin `section`: devuelve el contenido completo.
- Con `section`: busca el primer H2 cuyo título contenga `section`
(case-insensitive, parcial) y devuelve hasta el siguiente H2.
- Si la sección no existe, devuelve `available_sections` para que el
cliente reintente con un nombre válido.
"""
memory = _deps.get("memory_store")
if not memory:
raise HTTPException(status_code=501, detail="Memory store not available")
doc = await memory.get_document(doc_id, namespace="knowledge")
if not doc:
raise HTTPException(
status_code=404,
detail=f"Document '{doc_id}' not found in knowledge base",
)
available_sections = _list_doc_sections(doc.content)
if section:
section_content = _extract_doc_section(doc.content, section)
if section_content is None:
return {
"id": doc.memory_id,
"title": doc.title,
"section_requested": section,
"section_found": False,
"available_sections": available_sections,
"content": "",
"chars": 0,
}
return {
"id": doc.memory_id,
"title": doc.title,
"section": section,
"section_found": True,
"chars": len(section_content),
"content": section_content,
}
return {
"id": doc.memory_id,
"title": doc.title,
"section": None,
"section_found": True,
"chars": len(doc.content),
"available_sections": available_sections,
"content": doc.content,
}
# ------------------------------------------------------------------
# MCP Management
# ------------------------------------------------------------------