agenticSystem/mcp-server/tools/orchestrator/detector.js

/**
 * Workflow auto-detection engine.
 * Keyword-based pattern matching with weighted scoring + contextual adjustments.
 * No LLM call needed — fast and deterministic.
 */

const WORKFLOW_PATTERNS = {
    create_section: {
        keywords: [
            "crear seccion", "create section", "nueva seccion", "new section",
            "anadir seccion", "add section", "crear tabla", "create table",
            "nueva pagina", "new page", "nueva seccion web", "new web section",
            "montar seccion", "set up section", "configurar seccion",
            // Additional English patterns
            "build section", "build page", "make section", "make page",
            "set up page", "create page", "new table",
            "section for", "seccion de", "seccion para",
            // Natural phrasing
            "want section", "need section", "quiero seccion",
            "necesito seccion", "hacer seccion", "hacer pagina"
        ],
        boost: [
            "categoria", "category", "productos", "products", "blog", "noticias",
            "news", "equipo", "team", "servicios", "services", "galeria", "gallery",
            "portfolio", "testimonios", "testimonials", "faq", "preguntas",
            "clientes", "clients", "proyectos", "projects",
            "restaurante", "restaurant", "tienda", "store", "shop",
            "eventos", "events", "cursos", "courses"
        ],
        weight: 10
    },
    populate_content: {
        keywords: [
            "anadir contenido", "add content", "crear registros", "create records",
            "poblar", "populate", "rellenar", "fill", "bulk", "masivo",
            "insertar datos", "insert data", "meter datos", "cargar contenido",
            "load content", "contenido de ejemplo", "sample content",
            "crear entradas", "create entries", "anadir registros", "add records",
            "registros de ejemplo", "sample records", "meter registros",
            "fill with data", "fill with content", "add sample", "add examples",
            "anadir ejemplos", "contenido de prueba", "test content"
        ],
        boost: [
            "imagenes", "images", "fotos", "photos", "stock", "ejemplo", "sample",
            "demo", "placeholder", "varios", "multiple", "lote", "batch"
        ],
        weight: 10
    },
    create_module: {
        keywords: [
            "crear modulo", "create module", "nuevo modulo", "new module",
            "disenar modulo", "design module", "hacer modulo", "make module",
            "componente", "component", "crear componente", "create component",
            "nuevo componente", "new component", "montar modulo",
            "build module", "build component", "make component"
        ],
        boost: [
            "hero", "slider", "card", "grid", "lista", "list", "banner",
            "footer", "header", "navbar", "cta", "call to action",
            "carousel", "accordion", "tabs", "pricing", "features"
        ],
        weight: 10
    },
    edit_module: {
        keywords: [
            "editar modulo", "edit module", "modificar modulo", "modify module",
            "cambiar modulo", "change module", "actualizar modulo", "update module",
            "arreglar modulo", "fix module", "mejorar modulo", "improve module",
            "corregir modulo", "ajustar modulo", "adjust module"
        ],
        boost: [
            "css", "html", "javascript", "js", "estilo", "style", "variable",
            "campo", "field", "diseno", "design", "responsive", "movil", "mobile",
            "color", "fuente", "font", "espaciado", "spacing",
            "hero", "slider", "card", "grid", "banner", "footer", "header",
            "navbar", "cta", "carousel", "accordion", "tabs", "pricing"
        ],
        weight: 10
    },
    manage_records: {
        keywords: [
            "editar registro", "edit record", "actualizar registro", "update record",
            "borrar registro", "delete record", "buscar registro", "search record",
            "listar registros", "list records", "modificar registro", "modify record",
            "ver registros", "view records", "consultar registros", "query records",
            "cambiar datos", "change data", "eliminar registro", "remove record",
            // CRUD-oriented English patterns
            "update data", "delete data", "edit data", "modify data",
            "update field", "change field", "edit entry", "delete entry",
            "update price", "change price", "update name", "change name",
            "remove records", "remove entries", "crud",
            "insert record", "insert entry", "create record", "add entry",
            "find record", "find records", "search records", "search data"
        ],
        boost: [
            "filtrar", "filter", "where", "campo", "field", "valor", "value",
            "pagina", "page", "ordenar", "sort", "buscar", "search",
            "precio", "price", "nombre", "name", "fecha", "date",
            "estado", "status", "activo", "active"
        ],
        weight: 8
    },
    manage_media: {
        // Only specific action phrases — generic words like "image/foto" are in boost, not keywords
        keywords: [
            "subir imagen", "upload image", "subir foto", "upload photo",
            "buscar imagen stock", "search stock image", "buscar fotos stock",
            "generar imagen", "generate image", "generar foto",
            "reemplazar imagen", "replace image", "cambiar imagen", "change image",
            "borrar imagen", "delete image", "eliminar imagen", "remove image",
            "gestionar media", "manage media", "gestionar imagenes", "manage images",
            "buscar stock", "search stock", "stock photos", "fotos stock",
            "subir archivo", "upload file"
        ],
        boost: [
            "stock", "pixabay", "pexels", "ai", "inteligencia artificial",
            "resize", "thumbnail", "miniatura", "s3", "assets",
            "comprimir", "compress", "optimizar", "optimize",
            // Generic image words are boosts, NOT keywords
            "imagen", "image", "foto", "photo", "galeria", "gallery", "media"
        ],
        weight: 5  // Reduced from 8 — media is usually a step, not a workflow
    },
    seo_setup: {
        keywords: [
            "seo", "meta tags", "meta descripcion", "meta description",
            "enlace", "slug", "url amigable", "friendly url", "sitemap",
            "schema markup", "posicionamiento", "ranking",
            "meta titulo", "meta title", "configurar seo", "setup seo",
            "set up seo", "configure seo"
        ],
        boost: [
            "google", "keywords", "palabras clave", "busqueda", "search",
            "indexar", "index", "robots", "canonical", "og:image"
        ],
        weight: 6
    },
    explore_site: {
        keywords: [
            "explorar", "explore", "que tiene", "what's in", "listar todo",
            "list all", "mostrar", "show me", "overview", "resumen",
            "que hay", "que secciones", "what sections", "ver todo",
            "show everything", "estructura", "structure", "inventario",
            "mapa del sitio", "site map", "what modules", "que modulos"
        ],
        boost: [
            "estructura", "structure", "mapa", "map", "resumen", "summary",
            "completo", "complete", "todas", "all"
        ],
        weight: 5
    }
};

/**
 * Normalize text for matching: lowercase, remove accents, strip common articles, trim.
 */
function normalizeText(text) {
    return text
        .toLowerCase()
        .normalize("NFD")
        .replace(/[\u0300-\u036f]/g, "")
        .trim();
}

/**
 * Prepare task text for matching: normalize + strip common filler words (articles, prepositions)
 * that break keyword matching (e.g., "editar el módulo" should match "editar módulo").
 */
function prepareTaskForMatching(text) {
    const normalized = normalizeText(text);
    // Strip common Spanish/English articles and short prepositions that break adjacent keyword matching
    return normalized.replace(/\b(el|la|los|las|un|una|unos|unas|del|al|the|a|an)\b/g, " ").replace(/\s+/g, " ").trim();
}

// ── Contextual adjustment patterns ──────────────────────────────────────────
// These use regex word matching to detect intent combinations that substring
// matching misses (e.g., "create a new products section" has words separated).

const CREATION_VERBS = /\b(crear|create|nueva?o?|new|build|make|set up|montar|anadir|add|disenar|design|hacer)\b/;
const EDIT_VERBS = /\b(editar|edit|modificar|modify|cambiar|change|actualizar|update|arreglar|fix|mejorar|improve|ajustar|adjust|corregir)\b/;
const CRUD_VERBS = /\b(editar|edit|borrar|delete|eliminar|remove|actualizar|update|crear|create|insertar|insert|modificar|modify|buscar|search|listar|list|consultar|query|cambiar|change|find|get|ver|view)\b/;
const SECTION_WORDS = /\b(seccion|section|pagina|page|tabla|table|web|sitio|site)\b/;
const MODULE_WORDS = /\b(modulo|module|componente|component)\b/;
const RECORD_WORDS = /\b(registro|registros|record|records|datos|data|entrada|entradas|entry|entries|contenido|content|precio|price|campo|field)\b/;
const MEDIA_ONLY_WORDS = /\b(subir|upload|reemplazar|replace|descargar|download)\b/;
const IMAGE_WORDS = /\b(imagen|imagenes|image|images|foto|fotos|photo|photos|galeria|gallery)\b/;
// Words that indicate the task is about content/records, not creating a new section
const CONTENT_INTENT_WORDS = /\b(contenido|content|rellenar|fill|poblar|populate|registros|records|sample|ejemplo|articulos|articles|entradas|entries|anadir contenido|add content)\b/;
// Words that indicate the task is about SEO, not creating a new section
const SEO_INTENT_WORDS = /\b(seo|meta tags?|meta descripcion|meta description|meta titulo|meta title|sitemap|slug|posicionamiento|ranking|canonical)\b/;

/**
 * Post-scoring contextual adjustments.
 * Uses regex word matching (not substring) to detect intent patterns the keyword
 * phase may miss due to non-adjacent words.
 */
function applyContextAdjustments(scores, normalizedTask) {
    const hasCreationVerb = CREATION_VERBS.test(normalizedTask);
    const hasEditVerb = EDIT_VERBS.test(normalizedTask);
    const hasCrudVerb = CRUD_VERBS.test(normalizedTask);
    const hasSection = SECTION_WORDS.test(normalizedTask);
    const hasModule = MODULE_WORDS.test(normalizedTask);
    const hasRecord = RECORD_WORDS.test(normalizedTask);
    const hasMediaAction = MEDIA_ONLY_WORDS.test(normalizedTask);
    const hasImageWord = IMAGE_WORDS.test(normalizedTask);
    const hasContentIntent = CONTENT_INTENT_WORDS.test(normalizedTask);
    const hasSeoIntent = SEO_INTENT_WORDS.test(normalizedTask);

    // ── Section creation intent ──
    // "create" + "section/page/table" = strong signal for create_section
    // BUT NOT when the real intent is populating content or configuring SEO
    if (hasCreationVerb && hasSection && !hasContentIntent && !hasSeoIntent) {
        scores.create_section = scores.create_section || { score: 0, keywordHits: 0, boostHits: 0 };
        scores.create_section.score += 20;
    }

    // ── Module creation intent ──
    // "create/new" + "module/component" = strong signal for create_module
    if (hasCreationVerb && hasModule) {
        scores.create_module = scores.create_module || { score: 0, keywordHits: 0, boostHits: 0 };
        scores.create_module.score += 20;
    }

    // ── Module edit intent ──
    // "edit/modify/change" + "module/component" = strong signal for edit_module
    if (hasEditVerb && hasModule) {
        scores.edit_module = scores.edit_module || { score: 0, keywordHits: 0, boostHits: 0 };
        scores.edit_module.score += 20;
    }

    // ── Decisive create vs edit for modules ──
    // When both create_module and edit_module have scores, apply decisive differentiation
    if (hasModule && scores.create_module && scores.edit_module) {
        if (hasCreationVerb && !hasEditVerb) {
            // Clearly creation intent → penalize edit
            scores.edit_module.score = Math.max(0, scores.edit_module.score - 15);
        } else if (hasEditVerb && !hasCreationVerb) {
            // Clearly edit intent → penalize create
            scores.create_module.score = Math.max(0, scores.create_module.score - 15);
        }
    }

    // ── Record CRUD intent ──
    // Any CRUD verb + "record/data/entry" = signal for manage_records
    if (hasCrudVerb && hasRecord) {
        scores.manage_records = scores.manage_records || { score: 0, keywordHits: 0, boostHits: 0 };
        scores.manage_records.score += 15;
    }

    // ── Penalize manage_media when context is clearly about something else ──
    // If the task mentions section/module/record context, media is a step not the workflow
    if (scores.manage_media && (hasSection || hasModule || hasRecord)) {
        // Only keep media score if there's an explicit media action verb ("upload", "replace")
        if (!hasMediaAction) {
            scores.manage_media.score = Math.max(0, Math.floor(scores.manage_media.score * 0.3));
        }
    }

    // ── Boost manage_media only when it's the clear primary intent ──
    // "upload/replace" + "image/photo" WITHOUT section/module/record context
    if (hasMediaAction && hasImageWord && !hasSection && !hasModule && !hasRecord) {
        scores.manage_media = scores.manage_media || { score: 0, keywordHits: 0, boostHits: 0 };
        scores.manage_media.score += 10;
    }
}

/**
 * Detect the best workflow for a given task description.
 * Returns the top match with confidence, or suggestions if ambiguous.
 *
 * @param {string} task - The user's task description
 * @returns {{ workflow: string, confidence: number, alternatives: Array }}
 */
export function detectWorkflow(task) {
    const normalizedTask = prepareTaskForMatching(task);
    const scores = {};

    // ── Phase 1: Keyword + boost scoring ──
    for (const [workflowId, pattern] of Object.entries(WORKFLOW_PATTERNS)) {
        let score = 0;
        let keywordHits = 0;
        let boostHits = 0;

        // Check keyword matches
        for (const keyword of pattern.keywords) {
            if (normalizedTask.includes(normalizeText(keyword))) {
                keywordHits++;
            }
        }

        // Check boost matches
        for (const boost of pattern.boost) {
            if (normalizedTask.includes(normalizeText(boost))) {
                boostHits++;
            }
        }

        score = (keywordHits * pattern.weight) + (boostHits * 3);
        scores[workflowId] = { score, keywordHits, boostHits };
    }

    // ── Phase 2: Contextual adjustments ──
    // Uses regex word matching to catch intent patterns that substring matching misses
    applyContextAdjustments(scores, normalizedTask);

    // Sort by score descending
    const ranked = Object.entries(scores)
        .filter(([, data]) => data.score > 0)
        .sort(([, a], [, b]) => b.score - a.score);

    if (ranked.length === 0) {
        return {
            workflow: null,
            confidence: 0,
            alternatives: []
        };
    }

    const [topId, topData] = ranked[0];
    const maxPossibleScore = WORKFLOW_PATTERNS[topId].keywords.length * WORKFLOW_PATTERNS[topId].weight
        + WORKFLOW_PATTERNS[topId].boost.length * 3;
    const confidence = Math.min(topData.score / Math.max(maxPossibleScore * 0.15, 1), 1);

    // Check if top 2 are close (ambiguous)
    const alternatives = ranked.slice(1, 3).map(([id, data]) => ({
        workflow: id,
        score: data.score,
        confidence: Math.min(data.score / Math.max(
            WORKFLOW_PATTERNS[id].keywords.length * WORKFLOW_PATTERNS[id].weight * 0.15, 1
        ), 1)
    }));

    const isAmbiguous = alternatives.length > 0
        && alternatives[0].score > 0
        && (topData.score - alternatives[0].score) < (topData.score * 0.2);

    return {
        workflow: topId,
        confidence: Math.round(confidence * 100) / 100,
        ambiguous: isAmbiguous,
        alternatives
    };
}

export { WORKFLOW_PATTERNS };