ajustes
This commit is contained in:
@@ -558,6 +558,22 @@ async def _load_knowledge_from_dir(docs_path: str = "docs") -> dict[str, Any]:
|
||||
embeddings = [None] * len(docs_data)
|
||||
has_embeddings = False
|
||||
|
||||
# Limpia entradas huérfanas: docs que ya no existen en el filesystem.
|
||||
# Sin esto, los IDs antiguos (e.g. tras renombrar 'builder-fields' →
|
||||
# '01-builder-fields') quedarían en Redis y aparecerían en el ranking.
|
||||
current_ids = {d[0] for d in docs_data}
|
||||
existing_docs = await memory.list_documents(namespace="knowledge")
|
||||
removed = []
|
||||
for existing in existing_docs:
|
||||
if existing.memory_id not in current_ids:
|
||||
await memory.delete_document(existing.memory_id, namespace="knowledge")
|
||||
# Borra también el embedding asociado
|
||||
embed_key = memory._key("embeddings", "knowledge", existing.memory_id)
|
||||
await memory._r.delete(embed_key)
|
||||
removed.append(existing.memory_id)
|
||||
if removed:
|
||||
logger.info("Removed %d stale knowledge docs: %s", len(removed), removed)
|
||||
|
||||
# Store docs + embeddings
|
||||
loaded = []
|
||||
for i, (doc_id, title, content, summary, tags) in enumerate(docs_data):
|
||||
@@ -587,6 +603,7 @@ async def _load_knowledge_from_dir(docs_path: str = "docs") -> dict[str, Any]:
|
||||
return {
|
||||
"status": "loaded",
|
||||
"count": len(loaded),
|
||||
"removed": removed,
|
||||
"embeddings": has_embeddings,
|
||||
"documents": loaded,
|
||||
}
|
||||
@@ -641,6 +658,109 @@ async def delete_knowledge(doc_id: str) -> dict[str, str]:
|
||||
return {"status": "deleted", "id": doc_id}
|
||||
|
||||
|
||||
def _list_doc_sections(content: str) -> list[str]:
|
||||
"""Lista los headings H2 (## ...) de un doc markdown."""
|
||||
sections = []
|
||||
for line in content.splitlines():
|
||||
stripped = line.lstrip()
|
||||
# Solo H2 — exactamente "## " y no "### "
|
||||
if stripped.startswith("## ") and not stripped.startswith("### "):
|
||||
sections.append(stripped[3:].strip())
|
||||
return sections
|
||||
|
||||
|
||||
def _extract_doc_section(content: str, section_query: str) -> str | None:
|
||||
"""Extrae una sección por heading H2. Match case-insensitive, parcial.
|
||||
|
||||
Devuelve el bloque desde el `## heading` hasta el siguiente `## ` (o EOF).
|
||||
"""
|
||||
if not section_query:
|
||||
return None
|
||||
|
||||
section_lower = section_query.lower().strip()
|
||||
captured: list[str] = []
|
||||
capture = False
|
||||
|
||||
for line in content.splitlines():
|
||||
stripped = line.lstrip()
|
||||
is_h2 = stripped.startswith("## ") and not stripped.startswith("### ")
|
||||
|
||||
if is_h2:
|
||||
heading = stripped[3:].strip()
|
||||
if capture:
|
||||
# Llegamos al siguiente H2 — paramos
|
||||
break
|
||||
if section_lower in heading.lower():
|
||||
capture = True
|
||||
captured.append(line)
|
||||
continue
|
||||
|
||||
if capture:
|
||||
captured.append(line)
|
||||
|
||||
if captured:
|
||||
return "\n".join(captured).rstrip()
|
||||
return None
|
||||
|
||||
|
||||
@router.get("/knowledge/{doc_id}")
|
||||
async def read_knowledge(
|
||||
doc_id: str,
|
||||
section: str | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Lee un doc del knowledge base. Opcionalmente, una sola sección por heading H2.
|
||||
|
||||
- Sin `section`: devuelve el contenido completo.
|
||||
- Con `section`: busca el primer H2 cuyo título contenga `section`
|
||||
(case-insensitive, parcial) y devuelve hasta el siguiente H2.
|
||||
- Si la sección no existe, devuelve `available_sections` para que el
|
||||
cliente reintente con un nombre válido.
|
||||
"""
|
||||
memory = _deps.get("memory_store")
|
||||
if not memory:
|
||||
raise HTTPException(status_code=501, detail="Memory store not available")
|
||||
|
||||
doc = await memory.get_document(doc_id, namespace="knowledge")
|
||||
if not doc:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail=f"Document '{doc_id}' not found in knowledge base",
|
||||
)
|
||||
|
||||
available_sections = _list_doc_sections(doc.content)
|
||||
|
||||
if section:
|
||||
section_content = _extract_doc_section(doc.content, section)
|
||||
if section_content is None:
|
||||
return {
|
||||
"id": doc.memory_id,
|
||||
"title": doc.title,
|
||||
"section_requested": section,
|
||||
"section_found": False,
|
||||
"available_sections": available_sections,
|
||||
"content": "",
|
||||
"chars": 0,
|
||||
}
|
||||
return {
|
||||
"id": doc.memory_id,
|
||||
"title": doc.title,
|
||||
"section": section,
|
||||
"section_found": True,
|
||||
"chars": len(section_content),
|
||||
"content": section_content,
|
||||
}
|
||||
|
||||
return {
|
||||
"id": doc.memory_id,
|
||||
"title": doc.title,
|
||||
"section": None,
|
||||
"section_found": True,
|
||||
"chars": len(doc.content),
|
||||
"available_sections": available_sections,
|
||||
"content": doc.content,
|
||||
}
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# MCP Management
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
Reference in New Issue
Block a user