Initial commit

2026-04-01 23:16:45 +01:00
commit 91cfdaee72
200 changed files with 25589 additions and 0 deletions
--- a/mcp-server/tools/orchestrator/detector.js
+++ b/mcp-server/tools/orchestrator/detector.js
@@ -0,0 +1,345 @@
+/**
+ * Workflow auto-detection engine.
+ * Keyword-based pattern matching with weighted scoring + contextual adjustments.
+ * No LLM call needed — fast and deterministic.
+ */
+
+const WORKFLOW_PATTERNS = {
+    create_section: {
+        keywords: [
+            "crear seccion", "create section", "nueva seccion", "new section",
+            "anadir seccion", "add section", "crear tabla", "create table",
+            "nueva pagina", "new page", "nueva seccion web", "new web section",
+            "montar seccion", "set up section", "configurar seccion",
+            // Additional English patterns
+            "build section", "build page", "make section", "make page",
+            "set up page", "create page", "new table",
+            "section for", "seccion de", "seccion para",
+            // Natural phrasing
+            "want section", "need section", "quiero seccion",
+            "necesito seccion", "hacer seccion", "hacer pagina"
+        ],
+        boost: [
+            "categoria", "category", "productos", "products", "blog", "noticias",
+            "news", "equipo", "team", "servicios", "services", "galeria", "gallery",
+            "portfolio", "testimonios", "testimonials", "faq", "preguntas",
+            "clientes", "clients", "proyectos", "projects",
+            "restaurante", "restaurant", "tienda", "store", "shop",
+            "eventos", "events", "cursos", "courses"
+        ],
+        weight: 10
+    },
+    populate_content: {
+        keywords: [
+            "anadir contenido", "add content", "crear registros", "create records",
+            "poblar", "populate", "rellenar", "fill", "bulk", "masivo",
+            "insertar datos", "insert data", "meter datos", "cargar contenido",
+            "load content", "contenido de ejemplo", "sample content",
+            "crear entradas", "create entries", "anadir registros", "add records",
+            "registros de ejemplo", "sample records", "meter registros",
+            "fill with data", "fill with content", "add sample", "add examples",
+            "anadir ejemplos", "contenido de prueba", "test content"
+        ],
+        boost: [
+            "imagenes", "images", "fotos", "photos", "stock", "ejemplo", "sample",
+            "demo", "placeholder", "varios", "multiple", "lote", "batch"
+        ],
+        weight: 10
+    },
+    create_module: {
+        keywords: [
+            "crear modulo", "create module", "nuevo modulo", "new module",
+            "disenar modulo", "design module", "hacer modulo", "make module",
+            "componente", "component", "crear componente", "create component",
+            "nuevo componente", "new component", "montar modulo",
+            "build module", "build component", "make component"
+        ],
+        boost: [
+            "hero", "slider", "card", "grid", "lista", "list", "banner",
+            "footer", "header", "navbar", "cta", "call to action",
+            "carousel", "accordion", "tabs", "pricing", "features"
+        ],
+        weight: 10
+    },
+    edit_module: {
+        keywords: [
+            "editar modulo", "edit module", "modificar modulo", "modify module",
+            "cambiar modulo", "change module", "actualizar modulo", "update module",
+            "arreglar modulo", "fix module", "mejorar modulo", "improve module",
+            "corregir modulo", "ajustar modulo", "adjust module"
+        ],
+        boost: [
+            "css", "html", "javascript", "js", "estilo", "style", "variable",
+            "campo", "field", "diseno", "design", "responsive", "movil", "mobile",
+            "color", "fuente", "font", "espaciado", "spacing",
+            "hero", "slider", "card", "grid", "banner", "footer", "header",
+            "navbar", "cta", "carousel", "accordion", "tabs", "pricing"
+        ],
+        weight: 10
+    },
+    manage_records: {
+        keywords: [
+            "editar registro", "edit record", "actualizar registro", "update record",
+            "borrar registro", "delete record", "buscar registro", "search record",
+            "listar registros", "list records", "modificar registro", "modify record",
+            "ver registros", "view records", "consultar registros", "query records",
+            "cambiar datos", "change data", "eliminar registro", "remove record",
+            // CRUD-oriented English patterns
+            "update data", "delete data", "edit data", "modify data",
+            "update field", "change field", "edit entry", "delete entry",
+            "update price", "change price", "update name", "change name",
+            "remove records", "remove entries", "crud",
+            "insert record", "insert entry", "create record", "add entry",
+            "find record", "find records", "search records", "search data"
+        ],
+        boost: [
+            "filtrar", "filter", "where", "campo", "field", "valor", "value",
+            "pagina", "page", "ordenar", "sort", "buscar", "search",
+            "precio", "price", "nombre", "name", "fecha", "date",
+            "estado", "status", "activo", "active"
+        ],
+        weight: 8
+    },
+    manage_media: {
+        // Only specific action phrases — generic words like "image/foto" are in boost, not keywords
+        keywords: [
+            "subir imagen", "upload image", "subir foto", "upload photo",
+            "buscar imagen stock", "search stock image", "buscar fotos stock",
+            "generar imagen", "generate image", "generar foto",
+            "reemplazar imagen", "replace image", "cambiar imagen", "change image",
+            "borrar imagen", "delete image", "eliminar imagen", "remove image",
+            "gestionar media", "manage media", "gestionar imagenes", "manage images",
+            "buscar stock", "search stock", "stock photos", "fotos stock",
+            "subir archivo", "upload file"
+        ],
+        boost: [
+            "stock", "pixabay", "pexels", "ai", "inteligencia artificial",
+            "resize", "thumbnail", "miniatura", "s3", "assets",
+            "comprimir", "compress", "optimizar", "optimize",
+            // Generic image words are boosts, NOT keywords
+            "imagen", "image", "foto", "photo", "galeria", "gallery", "media"
+        ],
+        weight: 5  // Reduced from 8 — media is usually a step, not a workflow
+    },
+    seo_setup: {
+        keywords: [
+            "seo", "meta tags", "meta descripcion", "meta description",
+            "enlace", "slug", "url amigable", "friendly url", "sitemap",
+            "schema markup", "posicionamiento", "ranking",
+            "meta titulo", "meta title", "configurar seo", "setup seo",
+            "set up seo", "configure seo"
+        ],
+        boost: [
+            "google", "keywords", "palabras clave", "busqueda", "search",
+            "indexar", "index", "robots", "canonical", "og:image"
+        ],
+        weight: 6
+    },
+    explore_site: {
+        keywords: [
+            "explorar", "explore", "que tiene", "what's in", "listar todo",
+            "list all", "mostrar", "show me", "overview", "resumen",
+            "que hay", "que secciones", "what sections", "ver todo",
+            "show everything", "estructura", "structure", "inventario",
+            "mapa del sitio", "site map", "what modules", "que modulos"
+        ],
+        boost: [
+            "estructura", "structure", "mapa", "map", "resumen", "summary",
+            "completo", "complete", "todas", "all"
+        ],
+        weight: 5
+    }
+};
+
+/**
+ * Normalize text for matching: lowercase, remove accents, strip common articles, trim.
+ */
+function normalizeText(text) {
+    return text
+        .toLowerCase()
+        .normalize("NFD")
+        .replace(/[\u0300-\u036f]/g, "")
+        .trim();
+}
+
+/**
+ * Prepare task text for matching: normalize + strip common filler words (articles, prepositions)
+ * that break keyword matching (e.g., "editar el módulo" should match "editar módulo").
+ */
+function prepareTaskForMatching(text) {
+    const normalized = normalizeText(text);
+    // Strip common Spanish/English articles and short prepositions that break adjacent keyword matching
+    return normalized.replace(/\b(el|la|los|las|un|una|unos|unas|del|al|the|a|an)\b/g, " ").replace(/\s+/g, " ").trim();
+}
+
+// ── Contextual adjustment patterns ──────────────────────────────────────────
+// These use regex word matching to detect intent combinations that substring
+// matching misses (e.g., "create a new products section" has words separated).
+
+const CREATION_VERBS = /\b(crear|create|nueva?o?|new|build|make|set up|montar|anadir|add|disenar|design|hacer)\b/;
+const EDIT_VERBS = /\b(editar|edit|modificar|modify|cambiar|change|actualizar|update|arreglar|fix|mejorar|improve|ajustar|adjust|corregir)\b/;
+const CRUD_VERBS = /\b(editar|edit|borrar|delete|eliminar|remove|actualizar|update|crear|create|insertar|insert|modificar|modify|buscar|search|listar|list|consultar|query|cambiar|change|find|get|ver|view)\b/;
+const SECTION_WORDS = /\b(seccion|section|pagina|page|tabla|table|web|sitio|site)\b/;
+const MODULE_WORDS = /\b(modulo|module|componente|component)\b/;
+const RECORD_WORDS = /\b(registro|registros|record|records|datos|data|entrada|entradas|entry|entries|contenido|content|precio|price|campo|field)\b/;
+const MEDIA_ONLY_WORDS = /\b(subir|upload|reemplazar|replace|descargar|download)\b/;
+const IMAGE_WORDS = /\b(imagen|imagenes|image|images|foto|fotos|photo|photos|galeria|gallery)\b/;
+// Words that indicate the task is about content/records, not creating a new section
+const CONTENT_INTENT_WORDS = /\b(contenido|content|rellenar|fill|poblar|populate|registros|records|sample|ejemplo|articulos|articles|entradas|entries|anadir contenido|add content)\b/;
+// Words that indicate the task is about SEO, not creating a new section
+const SEO_INTENT_WORDS = /\b(seo|meta tags?|meta descripcion|meta description|meta titulo|meta title|sitemap|slug|posicionamiento|ranking|canonical)\b/;
+
+/**
+ * Post-scoring contextual adjustments.
+ * Uses regex word matching (not substring) to detect intent patterns the keyword
+ * phase may miss due to non-adjacent words.
+ */
+function applyContextAdjustments(scores, normalizedTask) {
+    const hasCreationVerb = CREATION_VERBS.test(normalizedTask);
+    const hasEditVerb = EDIT_VERBS.test(normalizedTask);
+    const hasCrudVerb = CRUD_VERBS.test(normalizedTask);
+    const hasSection = SECTION_WORDS.test(normalizedTask);
+    const hasModule = MODULE_WORDS.test(normalizedTask);
+    const hasRecord = RECORD_WORDS.test(normalizedTask);
+    const hasMediaAction = MEDIA_ONLY_WORDS.test(normalizedTask);
+    const hasImageWord = IMAGE_WORDS.test(normalizedTask);
+    const hasContentIntent = CONTENT_INTENT_WORDS.test(normalizedTask);
+    const hasSeoIntent = SEO_INTENT_WORDS.test(normalizedTask);
+
+    // ── Section creation intent ──
+    // "create" + "section/page/table" = strong signal for create_section
+    // BUT NOT when the real intent is populating content or configuring SEO
+    if (hasCreationVerb && hasSection && !hasContentIntent && !hasSeoIntent) {
+        scores.create_section = scores.create_section || { score: 0, keywordHits: 0, boostHits: 0 };
+        scores.create_section.score += 20;
+    }
+
+    // ── Module creation intent ──
+    // "create/new" + "module/component" = strong signal for create_module
+    if (hasCreationVerb && hasModule) {
+        scores.create_module = scores.create_module || { score: 0, keywordHits: 0, boostHits: 0 };
+        scores.create_module.score += 20;
+    }
+
+    // ── Module edit intent ──
+    // "edit/modify/change" + "module/component" = strong signal for edit_module
+    if (hasEditVerb && hasModule) {
+        scores.edit_module = scores.edit_module || { score: 0, keywordHits: 0, boostHits: 0 };
+        scores.edit_module.score += 20;
+    }
+
+    // ── Decisive create vs edit for modules ──
+    // When both create_module and edit_module have scores, apply decisive differentiation
+    if (hasModule && scores.create_module && scores.edit_module) {
+        if (hasCreationVerb && !hasEditVerb) {
+            // Clearly creation intent → penalize edit
+            scores.edit_module.score = Math.max(0, scores.edit_module.score - 15);
+        } else if (hasEditVerb && !hasCreationVerb) {
+            // Clearly edit intent → penalize create
+            scores.create_module.score = Math.max(0, scores.create_module.score - 15);
+        }
+    }
+
+    // ── Record CRUD intent ──
+    // Any CRUD verb + "record/data/entry" = signal for manage_records
+    if (hasCrudVerb && hasRecord) {
+        scores.manage_records = scores.manage_records || { score: 0, keywordHits: 0, boostHits: 0 };
+        scores.manage_records.score += 15;
+    }
+
+    // ── Penalize manage_media when context is clearly about something else ──
+    // If the task mentions section/module/record context, media is a step not the workflow
+    if (scores.manage_media && (hasSection || hasModule || hasRecord)) {
+        // Only keep media score if there's an explicit media action verb ("upload", "replace")
+        if (!hasMediaAction) {
+            scores.manage_media.score = Math.max(0, Math.floor(scores.manage_media.score * 0.3));
+        }
+    }
+
+    // ── Boost manage_media only when it's the clear primary intent ──
+    // "upload/replace" + "image/photo" WITHOUT section/module/record context
+    if (hasMediaAction && hasImageWord && !hasSection && !hasModule && !hasRecord) {
+        scores.manage_media = scores.manage_media || { score: 0, keywordHits: 0, boostHits: 0 };
+        scores.manage_media.score += 10;
+    }
+}
+
+/**
+ * Detect the best workflow for a given task description.
+ * Returns the top match with confidence, or suggestions if ambiguous.
+ *
+ * @param {string} task - The user's task description
+ * @returns {{ workflow: string, confidence: number, alternatives: Array }}
+ */
+export function detectWorkflow(task) {
+    const normalizedTask = prepareTaskForMatching(task);
+    const scores = {};
+
+    // ── Phase 1: Keyword + boost scoring ──
+    for (const [workflowId, pattern] of Object.entries(WORKFLOW_PATTERNS)) {
+        let score = 0;
+        let keywordHits = 0;
+        let boostHits = 0;
+
+        // Check keyword matches
+        for (const keyword of pattern.keywords) {
+            if (normalizedTask.includes(normalizeText(keyword))) {
+                keywordHits++;
+            }
+        }
+
+        // Check boost matches
+        for (const boost of pattern.boost) {
+            if (normalizedTask.includes(normalizeText(boost))) {
+                boostHits++;
+            }
+        }
+
+        score = (keywordHits * pattern.weight) + (boostHits * 3);
+        scores[workflowId] = { score, keywordHits, boostHits };
+    }
+
+    // ── Phase 2: Contextual adjustments ──
+    // Uses regex word matching to catch intent patterns that substring matching misses
+    applyContextAdjustments(scores, normalizedTask);
+
+    // Sort by score descending
+    const ranked = Object.entries(scores)
+        .filter(([, data]) => data.score > 0)
+        .sort(([, a], [, b]) => b.score - a.score);
+
+    if (ranked.length === 0) {
+        return {
+            workflow: null,
+            confidence: 0,
+            alternatives: []
+        };
+    }
+
+    const [topId, topData] = ranked[0];
+    const maxPossibleScore = WORKFLOW_PATTERNS[topId].keywords.length * WORKFLOW_PATTERNS[topId].weight
+        + WORKFLOW_PATTERNS[topId].boost.length * 3;
+    const confidence = Math.min(topData.score / Math.max(maxPossibleScore * 0.15, 1), 1);
+
+    // Check if top 2 are close (ambiguous)
+    const alternatives = ranked.slice(1, 3).map(([id, data]) => ({
+        workflow: id,
+        score: data.score,
+        confidence: Math.min(data.score / Math.max(
+            WORKFLOW_PATTERNS[id].keywords.length * WORKFLOW_PATTERNS[id].weight * 0.15, 1
+        ), 1)
+    }));
+
+    const isAmbiguous = alternatives.length > 0
+        && alternatives[0].score > 0
+        && (topData.score - alternatives[0].score) < (topData.score * 0.2);
+
+    return {
+        workflow: topId,
+        confidence: Math.round(confidence * 100) / 100,
+        ambiguous: isAmbiguous,
+        alternatives
+    };
+}
+
+export { WORKFLOW_PATTERNS };