ajustes

2026-04-25 10:27:51 +00:00
parent e84a36c83d
commit 6881d64a08
42 changed files with 3207 additions and 3413 deletions
--- a/src/api/routes.py
+++ b/src/api/routes.py
@@ -558,6 +558,22 @@ async def _load_knowledge_from_dir(docs_path: str = "docs") -> dict[str, Any]:
        embeddings = [None] * len(docs_data)
        has_embeddings = False

+    # Limpia entradas huérfanas: docs que ya no existen en el filesystem.
+    # Sin esto, los IDs antiguos (e.g. tras renombrar 'builder-fields' →
+    # '01-builder-fields') quedarían en Redis y aparecerían en el ranking.
+    current_ids = {d[0] for d in docs_data}
+    existing_docs = await memory.list_documents(namespace="knowledge")
+    removed = []
+    for existing in existing_docs:
+        if existing.memory_id not in current_ids:
+            await memory.delete_document(existing.memory_id, namespace="knowledge")
+            # Borra también el embedding asociado
+            embed_key = memory._key("embeddings", "knowledge", existing.memory_id)
+            await memory._r.delete(embed_key)
+            removed.append(existing.memory_id)
+    if removed:
+        logger.info("Removed %d stale knowledge docs: %s", len(removed), removed)
+
    # Store docs + embeddings
    loaded = []
    for i, (doc_id, title, content, summary, tags) in enumerate(docs_data):
@@ -587,6 +603,7 @@ async def _load_knowledge_from_dir(docs_path: str = "docs") -> dict[str, Any]:
    return {
        "status": "loaded",
        "count": len(loaded),
+        "removed": removed,
        "embeddings": has_embeddings,
        "documents": loaded,
    }
@@ -641,6 +658,109 @@ async def delete_knowledge(doc_id: str) -> dict[str, str]:
    return {"status": "deleted", "id": doc_id}


+def _list_doc_sections(content: str) -> list[str]:
+    """Lista los headings H2 (## ...) de un doc markdown."""
+    sections = []
+    for line in content.splitlines():
+        stripped = line.lstrip()
+        # Solo H2 — exactamente "## " y no "### "
+        if stripped.startswith("## ") and not stripped.startswith("### "):
+            sections.append(stripped[3:].strip())
+    return sections
+
+
+def _extract_doc_section(content: str, section_query: str) -> str | None:
+    """Extrae una sección por heading H2. Match case-insensitive, parcial.
+
+    Devuelve el bloque desde el `## heading` hasta el siguiente `## ` (o EOF).
+    """
+    if not section_query:
+        return None
+
+    section_lower = section_query.lower().strip()
+    captured: list[str] = []
+    capture = False
+
+    for line in content.splitlines():
+        stripped = line.lstrip()
+        is_h2 = stripped.startswith("## ") and not stripped.startswith("### ")
+
+        if is_h2:
+            heading = stripped[3:].strip()
+            if capture:
+                # Llegamos al siguiente H2 — paramos
+                break
+            if section_lower in heading.lower():
+                capture = True
+                captured.append(line)
+                continue
+
+        if capture:
+            captured.append(line)
+
+    if captured:
+        return "\n".join(captured).rstrip()
+    return None
+
+
+@router.get("/knowledge/{doc_id}")
+async def read_knowledge(
+    doc_id: str,
+    section: str | None = None,
+) -> dict[str, Any]:
+    """Lee un doc del knowledge base. Opcionalmente, una sola sección por heading H2.
+
+    - Sin `section`: devuelve el contenido completo.
+    - Con `section`: busca el primer H2 cuyo título contenga `section`
+      (case-insensitive, parcial) y devuelve hasta el siguiente H2.
+    - Si la sección no existe, devuelve `available_sections` para que el
+      cliente reintente con un nombre válido.
+    """
+    memory = _deps.get("memory_store")
+    if not memory:
+        raise HTTPException(status_code=501, detail="Memory store not available")
+
+    doc = await memory.get_document(doc_id, namespace="knowledge")
+    if not doc:
+        raise HTTPException(
+            status_code=404,
+            detail=f"Document '{doc_id}' not found in knowledge base",
+        )
+
+    available_sections = _list_doc_sections(doc.content)
+
+    if section:
+        section_content = _extract_doc_section(doc.content, section)
+        if section_content is None:
+            return {
+                "id": doc.memory_id,
+                "title": doc.title,
+                "section_requested": section,
+                "section_found": False,
+                "available_sections": available_sections,
+                "content": "",
+                "chars": 0,
+            }
+        return {
+            "id": doc.memory_id,
+            "title": doc.title,
+            "section": section,
+            "section_found": True,
+            "chars": len(section_content),
+            "content": section_content,
+        }
+
+    return {
+        "id": doc.memory_id,
+        "title": doc.title,
+        "section": None,
+        "section_found": True,
+        "chars": len(doc.content),
+        "available_sections": available_sections,
+        "content": doc.content,
+    }
+
+
 # ------------------------------------------------------------------
 # MCP Management
 # ------------------------------------------------------------------