Auto-load knowledge base al arrancar el servicio

Extraída lógica de carga a _load_knowledge_from_dir() reutilizable.
Se llama automáticamente en el lifespan después de set_dependencies().
Si falla, solo loguea warning — no bloquea el arranque.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Jordan Diaz
2026-04-04 09:10:49 +00:00
parent f17be543ee
commit bc6ad3bcec
2 changed files with 26 additions and 10 deletions

View File

@@ -326,22 +326,18 @@ class LoadKnowledgeRequest(BaseModel):
docs_path: str = "docs" docs_path: str = "docs"
@router.post("/knowledge/load") async def _load_knowledge_from_dir(docs_path: str = "docs") -> dict[str, Any]:
async def load_knowledge(body: LoadKnowledgeRequest) -> dict[str, Any]: """Load knowledge docs from directory. Used by endpoint and startup."""
"""Load markdown docs from a directory into the knowledge base.
Generates embeddings for semantic search via OpenAI text-embedding-3-small.
"""
memory = _deps.get("memory_store") memory = _deps.get("memory_store")
if not memory: if not memory:
raise HTTPException(status_code=501, detail="Memory store not available") return {"status": "error", "message": "Memory store not available"}
docs_dir = pathlib.Path(body.docs_path) docs_dir = pathlib.Path(docs_path)
if not docs_dir.is_absolute(): if not docs_dir.is_absolute():
docs_dir = pathlib.Path(__file__).resolve().parent.parent.parent / body.docs_path docs_dir = pathlib.Path(__file__).resolve().parent.parent.parent / docs_path
if not docs_dir.is_dir(): if not docs_dir.is_dir():
raise HTTPException(status_code=400, detail=f"Directory not found: {docs_dir}") return {"status": "error", "message": f"Directory not found: {docs_dir}"}
# Read all docs # Read all docs
docs_data: list[tuple[str, str, str, str, list[str]]] = [] # (id, title, content, summary, tags) docs_data: list[tuple[str, str, str, str, list[str]]] = [] # (id, title, content, summary, tags)
@@ -415,6 +411,18 @@ async def load_knowledge(body: LoadKnowledgeRequest) -> dict[str, Any]:
} }
@router.post("/knowledge/load")
async def load_knowledge(body: LoadKnowledgeRequest) -> dict[str, Any]:
"""Load markdown docs from a directory into the knowledge base.
Generates embeddings for semantic search via OpenAI text-embedding-3-small.
"""
result = await _load_knowledge_from_dir(body.docs_path)
if result.get("status") == "error":
raise HTTPException(status_code=501, detail=result["message"])
return result
@router.get("/knowledge") @router.get("/knowledge")
async def list_knowledge() -> dict[str, Any]: async def list_knowledge() -> dict[str, Any]:
"""List all documents in the knowledge base.""" """List all documents in the knowledge base."""

View File

@@ -95,6 +95,14 @@ async def lifespan(app: FastAPI):
mcp_registry=mcp_registry, mcp_registry=mcp_registry,
) )
# 7. Auto-load knowledge base
from .api.routes import _load_knowledge_from_dir
try:
kb_result = await _load_knowledge_from_dir("docs")
logger.info("Knowledge auto-loaded: %d docs, embeddings=%s", kb_result.get("count", 0), kb_result.get("embeddings", False))
except Exception as e:
logger.warning("Failed to auto-load knowledge: %s", e)
logger.info("All systems initialized. Serving on %s:%d", settings.host, settings.port) logger.info("All systems initialized. Serving on %s:%d", settings.host, settings.port)
yield yield