346 lines
16 KiB
JavaScript
346 lines
16 KiB
JavaScript
/**
|
|
* Workflow auto-detection engine.
|
|
* Keyword-based pattern matching with weighted scoring + contextual adjustments.
|
|
* No LLM call needed — fast and deterministic.
|
|
*/
|
|
|
|
const WORKFLOW_PATTERNS = {
|
|
create_section: {
|
|
keywords: [
|
|
"crear seccion", "create section", "nueva seccion", "new section",
|
|
"anadir seccion", "add section", "crear tabla", "create table",
|
|
"nueva pagina", "new page", "nueva seccion web", "new web section",
|
|
"montar seccion", "set up section", "configurar seccion",
|
|
// Additional English patterns
|
|
"build section", "build page", "make section", "make page",
|
|
"set up page", "create page", "new table",
|
|
"section for", "seccion de", "seccion para",
|
|
// Natural phrasing
|
|
"want section", "need section", "quiero seccion",
|
|
"necesito seccion", "hacer seccion", "hacer pagina"
|
|
],
|
|
boost: [
|
|
"categoria", "category", "productos", "products", "blog", "noticias",
|
|
"news", "equipo", "team", "servicios", "services", "galeria", "gallery",
|
|
"portfolio", "testimonios", "testimonials", "faq", "preguntas",
|
|
"clientes", "clients", "proyectos", "projects",
|
|
"restaurante", "restaurant", "tienda", "store", "shop",
|
|
"eventos", "events", "cursos", "courses"
|
|
],
|
|
weight: 10
|
|
},
|
|
populate_content: {
|
|
keywords: [
|
|
"anadir contenido", "add content", "crear registros", "create records",
|
|
"poblar", "populate", "rellenar", "fill", "bulk", "masivo",
|
|
"insertar datos", "insert data", "meter datos", "cargar contenido",
|
|
"load content", "contenido de ejemplo", "sample content",
|
|
"crear entradas", "create entries", "anadir registros", "add records",
|
|
"registros de ejemplo", "sample records", "meter registros",
|
|
"fill with data", "fill with content", "add sample", "add examples",
|
|
"anadir ejemplos", "contenido de prueba", "test content"
|
|
],
|
|
boost: [
|
|
"imagenes", "images", "fotos", "photos", "stock", "ejemplo", "sample",
|
|
"demo", "placeholder", "varios", "multiple", "lote", "batch"
|
|
],
|
|
weight: 10
|
|
},
|
|
create_module: {
|
|
keywords: [
|
|
"crear modulo", "create module", "nuevo modulo", "new module",
|
|
"disenar modulo", "design module", "hacer modulo", "make module",
|
|
"componente", "component", "crear componente", "create component",
|
|
"nuevo componente", "new component", "montar modulo",
|
|
"build module", "build component", "make component"
|
|
],
|
|
boost: [
|
|
"hero", "slider", "card", "grid", "lista", "list", "banner",
|
|
"footer", "header", "navbar", "cta", "call to action",
|
|
"carousel", "accordion", "tabs", "pricing", "features"
|
|
],
|
|
weight: 10
|
|
},
|
|
edit_module: {
|
|
keywords: [
|
|
"editar modulo", "edit module", "modificar modulo", "modify module",
|
|
"cambiar modulo", "change module", "actualizar modulo", "update module",
|
|
"arreglar modulo", "fix module", "mejorar modulo", "improve module",
|
|
"corregir modulo", "ajustar modulo", "adjust module"
|
|
],
|
|
boost: [
|
|
"css", "html", "javascript", "js", "estilo", "style", "variable",
|
|
"campo", "field", "diseno", "design", "responsive", "movil", "mobile",
|
|
"color", "fuente", "font", "espaciado", "spacing",
|
|
"hero", "slider", "card", "grid", "banner", "footer", "header",
|
|
"navbar", "cta", "carousel", "accordion", "tabs", "pricing"
|
|
],
|
|
weight: 10
|
|
},
|
|
manage_records: {
|
|
keywords: [
|
|
"editar registro", "edit record", "actualizar registro", "update record",
|
|
"borrar registro", "delete record", "buscar registro", "search record",
|
|
"listar registros", "list records", "modificar registro", "modify record",
|
|
"ver registros", "view records", "consultar registros", "query records",
|
|
"cambiar datos", "change data", "eliminar registro", "remove record",
|
|
// CRUD-oriented English patterns
|
|
"update data", "delete data", "edit data", "modify data",
|
|
"update field", "change field", "edit entry", "delete entry",
|
|
"update price", "change price", "update name", "change name",
|
|
"remove records", "remove entries", "crud",
|
|
"insert record", "insert entry", "create record", "add entry",
|
|
"find record", "find records", "search records", "search data"
|
|
],
|
|
boost: [
|
|
"filtrar", "filter", "where", "campo", "field", "valor", "value",
|
|
"pagina", "page", "ordenar", "sort", "buscar", "search",
|
|
"precio", "price", "nombre", "name", "fecha", "date",
|
|
"estado", "status", "activo", "active"
|
|
],
|
|
weight: 8
|
|
},
|
|
manage_media: {
|
|
// Only specific action phrases — generic words like "image/foto" are in boost, not keywords
|
|
keywords: [
|
|
"subir imagen", "upload image", "subir foto", "upload photo",
|
|
"buscar imagen stock", "search stock image", "buscar fotos stock",
|
|
"generar imagen", "generate image", "generar foto",
|
|
"reemplazar imagen", "replace image", "cambiar imagen", "change image",
|
|
"borrar imagen", "delete image", "eliminar imagen", "remove image",
|
|
"gestionar media", "manage media", "gestionar imagenes", "manage images",
|
|
"buscar stock", "search stock", "stock photos", "fotos stock",
|
|
"subir archivo", "upload file"
|
|
],
|
|
boost: [
|
|
"stock", "pixabay", "pexels", "ai", "inteligencia artificial",
|
|
"resize", "thumbnail", "miniatura", "s3", "assets",
|
|
"comprimir", "compress", "optimizar", "optimize",
|
|
// Generic image words are boosts, NOT keywords
|
|
"imagen", "image", "foto", "photo", "galeria", "gallery", "media"
|
|
],
|
|
weight: 5 // Reduced from 8 — media is usually a step, not a workflow
|
|
},
|
|
seo_setup: {
|
|
keywords: [
|
|
"seo", "meta tags", "meta descripcion", "meta description",
|
|
"enlace", "slug", "url amigable", "friendly url", "sitemap",
|
|
"schema markup", "posicionamiento", "ranking",
|
|
"meta titulo", "meta title", "configurar seo", "setup seo",
|
|
"set up seo", "configure seo"
|
|
],
|
|
boost: [
|
|
"google", "keywords", "palabras clave", "busqueda", "search",
|
|
"indexar", "index", "robots", "canonical", "og:image"
|
|
],
|
|
weight: 6
|
|
},
|
|
explore_site: {
|
|
keywords: [
|
|
"explorar", "explore", "que tiene", "what's in", "listar todo",
|
|
"list all", "mostrar", "show me", "overview", "resumen",
|
|
"que hay", "que secciones", "what sections", "ver todo",
|
|
"show everything", "estructura", "structure", "inventario",
|
|
"mapa del sitio", "site map", "what modules", "que modulos"
|
|
],
|
|
boost: [
|
|
"estructura", "structure", "mapa", "map", "resumen", "summary",
|
|
"completo", "complete", "todas", "all"
|
|
],
|
|
weight: 5
|
|
}
|
|
};
|
|
|
|
/**
|
|
* Normalize text for matching: lowercase, remove accents, strip common articles, trim.
|
|
*/
|
|
function normalizeText(text) {
|
|
return text
|
|
.toLowerCase()
|
|
.normalize("NFD")
|
|
.replace(/[\u0300-\u036f]/g, "")
|
|
.trim();
|
|
}
|
|
|
|
/**
|
|
* Prepare task text for matching: normalize + strip common filler words (articles, prepositions)
|
|
* that break keyword matching (e.g., "editar el módulo" should match "editar módulo").
|
|
*/
|
|
function prepareTaskForMatching(text) {
|
|
const normalized = normalizeText(text);
|
|
// Strip common Spanish/English articles and short prepositions that break adjacent keyword matching
|
|
return normalized.replace(/\b(el|la|los|las|un|una|unos|unas|del|al|the|a|an)\b/g, " ").replace(/\s+/g, " ").trim();
|
|
}
|
|
|
|
// ── Contextual adjustment patterns ──────────────────────────────────────────
|
|
// These use regex word matching to detect intent combinations that substring
|
|
// matching misses (e.g., "create a new products section" has words separated).
|
|
|
|
const CREATION_VERBS = /\b(crear|create|nueva?o?|new|build|make|set up|montar|anadir|add|disenar|design|hacer)\b/;
|
|
const EDIT_VERBS = /\b(editar|edit|modificar|modify|cambiar|change|actualizar|update|arreglar|fix|mejorar|improve|ajustar|adjust|corregir)\b/;
|
|
const CRUD_VERBS = /\b(editar|edit|borrar|delete|eliminar|remove|actualizar|update|crear|create|insertar|insert|modificar|modify|buscar|search|listar|list|consultar|query|cambiar|change|find|get|ver|view)\b/;
|
|
const SECTION_WORDS = /\b(seccion|section|pagina|page|tabla|table|web|sitio|site)\b/;
|
|
const MODULE_WORDS = /\b(modulo|module|componente|component)\b/;
|
|
const RECORD_WORDS = /\b(registro|registros|record|records|datos|data|entrada|entradas|entry|entries|contenido|content|precio|price|campo|field)\b/;
|
|
const MEDIA_ONLY_WORDS = /\b(subir|upload|reemplazar|replace|descargar|download)\b/;
|
|
const IMAGE_WORDS = /\b(imagen|imagenes|image|images|foto|fotos|photo|photos|galeria|gallery)\b/;
|
|
// Words that indicate the task is about content/records, not creating a new section
|
|
const CONTENT_INTENT_WORDS = /\b(contenido|content|rellenar|fill|poblar|populate|registros|records|sample|ejemplo|articulos|articles|entradas|entries|anadir contenido|add content)\b/;
|
|
// Words that indicate the task is about SEO, not creating a new section
|
|
const SEO_INTENT_WORDS = /\b(seo|meta tags?|meta descripcion|meta description|meta titulo|meta title|sitemap|slug|posicionamiento|ranking|canonical)\b/;
|
|
|
|
/**
|
|
* Post-scoring contextual adjustments.
|
|
* Uses regex word matching (not substring) to detect intent patterns the keyword
|
|
* phase may miss due to non-adjacent words.
|
|
*/
|
|
function applyContextAdjustments(scores, normalizedTask) {
|
|
const hasCreationVerb = CREATION_VERBS.test(normalizedTask);
|
|
const hasEditVerb = EDIT_VERBS.test(normalizedTask);
|
|
const hasCrudVerb = CRUD_VERBS.test(normalizedTask);
|
|
const hasSection = SECTION_WORDS.test(normalizedTask);
|
|
const hasModule = MODULE_WORDS.test(normalizedTask);
|
|
const hasRecord = RECORD_WORDS.test(normalizedTask);
|
|
const hasMediaAction = MEDIA_ONLY_WORDS.test(normalizedTask);
|
|
const hasImageWord = IMAGE_WORDS.test(normalizedTask);
|
|
const hasContentIntent = CONTENT_INTENT_WORDS.test(normalizedTask);
|
|
const hasSeoIntent = SEO_INTENT_WORDS.test(normalizedTask);
|
|
|
|
// ── Section creation intent ──
|
|
// "create" + "section/page/table" = strong signal for create_section
|
|
// BUT NOT when the real intent is populating content or configuring SEO
|
|
if (hasCreationVerb && hasSection && !hasContentIntent && !hasSeoIntent) {
|
|
scores.create_section = scores.create_section || { score: 0, keywordHits: 0, boostHits: 0 };
|
|
scores.create_section.score += 20;
|
|
}
|
|
|
|
// ── Module creation intent ──
|
|
// "create/new" + "module/component" = strong signal for create_module
|
|
if (hasCreationVerb && hasModule) {
|
|
scores.create_module = scores.create_module || { score: 0, keywordHits: 0, boostHits: 0 };
|
|
scores.create_module.score += 20;
|
|
}
|
|
|
|
// ── Module edit intent ──
|
|
// "edit/modify/change" + "module/component" = strong signal for edit_module
|
|
if (hasEditVerb && hasModule) {
|
|
scores.edit_module = scores.edit_module || { score: 0, keywordHits: 0, boostHits: 0 };
|
|
scores.edit_module.score += 20;
|
|
}
|
|
|
|
// ── Decisive create vs edit for modules ──
|
|
// When both create_module and edit_module have scores, apply decisive differentiation
|
|
if (hasModule && scores.create_module && scores.edit_module) {
|
|
if (hasCreationVerb && !hasEditVerb) {
|
|
// Clearly creation intent → penalize edit
|
|
scores.edit_module.score = Math.max(0, scores.edit_module.score - 15);
|
|
} else if (hasEditVerb && !hasCreationVerb) {
|
|
// Clearly edit intent → penalize create
|
|
scores.create_module.score = Math.max(0, scores.create_module.score - 15);
|
|
}
|
|
}
|
|
|
|
// ── Record CRUD intent ──
|
|
// Any CRUD verb + "record/data/entry" = signal for manage_records
|
|
if (hasCrudVerb && hasRecord) {
|
|
scores.manage_records = scores.manage_records || { score: 0, keywordHits: 0, boostHits: 0 };
|
|
scores.manage_records.score += 15;
|
|
}
|
|
|
|
// ── Penalize manage_media when context is clearly about something else ──
|
|
// If the task mentions section/module/record context, media is a step not the workflow
|
|
if (scores.manage_media && (hasSection || hasModule || hasRecord)) {
|
|
// Only keep media score if there's an explicit media action verb ("upload", "replace")
|
|
if (!hasMediaAction) {
|
|
scores.manage_media.score = Math.max(0, Math.floor(scores.manage_media.score * 0.3));
|
|
}
|
|
}
|
|
|
|
// ── Boost manage_media only when it's the clear primary intent ──
|
|
// "upload/replace" + "image/photo" WITHOUT section/module/record context
|
|
if (hasMediaAction && hasImageWord && !hasSection && !hasModule && !hasRecord) {
|
|
scores.manage_media = scores.manage_media || { score: 0, keywordHits: 0, boostHits: 0 };
|
|
scores.manage_media.score += 10;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Detect the best workflow for a given task description.
|
|
* Returns the top match with confidence, or suggestions if ambiguous.
|
|
*
|
|
* @param {string} task - The user's task description
|
|
* @returns {{ workflow: string, confidence: number, alternatives: Array }}
|
|
*/
|
|
export function detectWorkflow(task) {
|
|
const normalizedTask = prepareTaskForMatching(task);
|
|
const scores = {};
|
|
|
|
// ── Phase 1: Keyword + boost scoring ──
|
|
for (const [workflowId, pattern] of Object.entries(WORKFLOW_PATTERNS)) {
|
|
let score = 0;
|
|
let keywordHits = 0;
|
|
let boostHits = 0;
|
|
|
|
// Check keyword matches
|
|
for (const keyword of pattern.keywords) {
|
|
if (normalizedTask.includes(normalizeText(keyword))) {
|
|
keywordHits++;
|
|
}
|
|
}
|
|
|
|
// Check boost matches
|
|
for (const boost of pattern.boost) {
|
|
if (normalizedTask.includes(normalizeText(boost))) {
|
|
boostHits++;
|
|
}
|
|
}
|
|
|
|
score = (keywordHits * pattern.weight) + (boostHits * 3);
|
|
scores[workflowId] = { score, keywordHits, boostHits };
|
|
}
|
|
|
|
// ── Phase 2: Contextual adjustments ──
|
|
// Uses regex word matching to catch intent patterns that substring matching misses
|
|
applyContextAdjustments(scores, normalizedTask);
|
|
|
|
// Sort by score descending
|
|
const ranked = Object.entries(scores)
|
|
.filter(([, data]) => data.score > 0)
|
|
.sort(([, a], [, b]) => b.score - a.score);
|
|
|
|
if (ranked.length === 0) {
|
|
return {
|
|
workflow: null,
|
|
confidence: 0,
|
|
alternatives: []
|
|
};
|
|
}
|
|
|
|
const [topId, topData] = ranked[0];
|
|
const maxPossibleScore = WORKFLOW_PATTERNS[topId].keywords.length * WORKFLOW_PATTERNS[topId].weight
|
|
+ WORKFLOW_PATTERNS[topId].boost.length * 3;
|
|
const confidence = Math.min(topData.score / Math.max(maxPossibleScore * 0.15, 1), 1);
|
|
|
|
// Check if top 2 are close (ambiguous)
|
|
const alternatives = ranked.slice(1, 3).map(([id, data]) => ({
|
|
workflow: id,
|
|
score: data.score,
|
|
confidence: Math.min(data.score / Math.max(
|
|
WORKFLOW_PATTERNS[id].keywords.length * WORKFLOW_PATTERNS[id].weight * 0.15, 1
|
|
), 1)
|
|
}));
|
|
|
|
const isAmbiguous = alternatives.length > 0
|
|
&& alternatives[0].score > 0
|
|
&& (topData.score - alternatives[0].score) < (topData.score * 0.2);
|
|
|
|
return {
|
|
workflow: topId,
|
|
confidence: Math.round(confidence * 100) / 100,
|
|
ambiguous: isAmbiguous,
|
|
alternatives
|
|
};
|
|
}
|
|
|
|
export { WORKFLOW_PATTERNS };
|