From b3ca3437984a1ae519d517ecb90bcec56bf1df73 Mon Sep 17 00:00:00 2001 From: Jordan Diaz Date: Sun, 10 May 2026 21:27:47 +0000 Subject: [PATCH] Ajustes de estructura --- mcp-server/tools/media/analyze_image.js | 201 ++++++++++++++++++++++++ mcp-server/tools/media/index.js | 2 + 2 files changed, 203 insertions(+) create mode 100644 mcp-server/tools/media/analyze_image.js diff --git a/mcp-server/tools/media/analyze_image.js b/mcp-server/tools/media/analyze_image.js new file mode 100644 index 0000000..9982e06 --- /dev/null +++ b/mcp-server/tools/media/analyze_image.js @@ -0,0 +1,201 @@ +import { z } from "zod"; +import axios from "axios"; +import fs from "fs"; +import path from "path"; +import { withAuth } from "../../auth/index.js"; +import { handleToolError } from "../helpers/errorHandler.js"; +import { withAuthParams } from "../helpers/authSchema.js"; + +const GEMINI_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent"; +const CHAT_UPLOADS_DIR = "/opt/acai/chat-uploads"; +const DEFAULT_PROMPT = "Describe esta imagen detalladamente, mencionando elementos visuales, texto, layout y proposito aparente."; + +/** + * Detecta el mime type a partir de la extension del fichero o del primer byte (magic number). + */ +function detectMimeType(filename, buffer) { + const ext = (filename || "").toLowerCase().split('.').pop(); + const byExt = { + jpg: "image/jpeg", + jpeg: "image/jpeg", + png: "image/png", + webp: "image/webp", + gif: "image/gif", + bmp: "image/bmp", + heic: "image/heic", + heif: "image/heif", + }; + if (byExt[ext]) return byExt[ext]; + + // Magic numbers fallback + if (buffer && buffer.length >= 4) { + if (buffer[0] === 0xff && buffer[1] === 0xd8 && buffer[2] === 0xff) return "image/jpeg"; + if (buffer[0] === 0x89 && buffer[1] === 0x50 && buffer[2] === 0x4e && buffer[3] === 0x47) return "image/png"; + if (buffer[0] === 0x47 && buffer[1] === 0x49 && buffer[2] === 0x46) return "image/gif"; + if (buffer.length >= 12 && buffer.slice(8, 12).toString() === "WEBP") return "image/webp"; + } + return "image/jpeg"; +} + +/** + * Resuelve una URL de chat-preview a una ruta local segura dentro de CHAT_UPLOADS_DIR. + * Acepta `/api/chat-preview?file=xxx` o variantes con host. + */ +function resolveChatPreviewPath(imageUrl) { + let qs; + try { + // Permite tanto absolutas como relativas + const u = imageUrl.startsWith("http") + ? new URL(imageUrl) + : new URL(imageUrl, "http://placeholder.local"); + if (!u.pathname.startsWith("/api/chat-preview")) return null; + qs = u.searchParams; + } catch { + return null; + } + + const fileParam = qs.get("file"); + if (!fileParam) return null; + + // Sanitizar: evitar traversal — solo nombre base permitido + const safeName = path.basename(fileParam); + if (!safeName || safeName === "." || safeName === "..") return null; + + return path.join(CHAT_UPLOADS_DIR, safeName); +} + +/** + * Carga la imagen como { mimeType, base64 } desde URL publica o chat-preview local. + */ +async function loadImage(imageUrl) { + // Caso 1: chat-preview local + const localPath = resolveChatPreviewPath(imageUrl); + if (localPath) { + if (!fs.existsSync(localPath)) { + throw new Error(`Local chat upload not found: ${path.basename(localPath)}`); + } + const buffer = fs.readFileSync(localPath); + return { + mimeType: detectMimeType(localPath, buffer), + base64: buffer.toString("base64"), + }; + } + + // Caso 2: URL publica http(s) + if (imageUrl.startsWith("http://") || imageUrl.startsWith("https://")) { + const response = await axios.get(imageUrl, { + responseType: "arraybuffer", + timeout: 30000, + maxContentLength: 20 * 1024 * 1024, // 20MB max + }); + const buffer = Buffer.from(response.data, "binary"); + const headerMime = response.headers?.["content-type"]?.split(";")[0]?.trim(); + const mimeType = headerMime && headerMime.startsWith("image/") + ? headerMime + : detectMimeType(imageUrl.split("?")[0], buffer); + return { + mimeType, + base64: buffer.toString("base64"), + }; + } + + throw new Error("Unsupported image_url. Use http(s):// or /api/chat-preview?file=..."); +} + +export function registerAnalyzeImageTool(server) { + server.tool( + "analyze_image", + "Analiza una imagen usando Gemini Vision. Util cuando el usuario adjunta una imagen, despues de un screenshot de Playwright, o para describir cualquier imagen accesible via URL. Devuelve descripcion text del contenido visual.", + withAuthParams({ + image_url: z.string().describe("URL de la imagen. Acepta URL publica http(s):// o ruta relativa /api/chat-preview?file=..."), + prompt: z.string().optional().describe("Que quieres saber de la imagen. Default: descripcion detallada."), + }), + { readOnlyHint: true, destructiveHint: false }, + withAuth(async ({ image_url, prompt }) => { + try { + const apiKey = process.env.NANO_BANANA_API_KEY; + if (!apiKey) { + return { + content: [{ + type: "text", + text: JSON.stringify({ + success: false, + error: "NANO_BANANA_API_KEY no esta configurada en el entorno del MCP server.", + }, null, 2), + }], + isError: true, + }; + } + + // 1) Cargar imagen (local o remota) -> base64 + mime + let image; + try { + image = await loadImage(image_url); + } catch (loadErr) { + return { + content: [{ + type: "text", + text: JSON.stringify({ + success: false, + error: `No se pudo cargar la imagen: ${loadErr.message}`, + }, null, 2), + }], + isError: true, + }; + } + + // 2) Llamar a Gemini Vision + const finalPrompt = (prompt && prompt.trim()) || DEFAULT_PROMPT; + const payload = { + contents: [{ + parts: [ + { inline_data: { mime_type: image.mimeType, data: image.base64 } }, + { text: finalPrompt }, + ], + }], + }; + + const geminiResp = await axios.post(GEMINI_ENDPOINT, payload, { + headers: { + "x-goog-api-key": apiKey, + "Content-Type": "application/json", + }, + timeout: 60000, + maxBodyLength: 30 * 1024 * 1024, + }); + + const description = geminiResp.data?.candidates?.[0]?.content?.parts?.[0]?.text; + if (!description) { + return { + content: [{ + type: "text", + text: JSON.stringify({ + success: false, + error: "Gemini no devolvio descripcion.", + raw: geminiResp.data, + }, null, 2), + }], + isError: true, + }; + } + + return { + content: [{ + type: "text", + text: description, + }], + }; + } catch (error) { + // Mejorar error si es respuesta de Gemini + if (error.response?.data) { + return handleToolError( + new Error(`Gemini API error: ${JSON.stringify(error.response.data).slice(0, 500)}`), + "analyze_image", + { image_url, status: error.response.status } + ); + } + return handleToolError(error, "analyze_image", { image_url }); + } + }) + ); +} diff --git a/mcp-server/tools/media/index.js b/mcp-server/tools/media/index.js index 536989d..67d7d08 100644 --- a/mcp-server/tools/media/index.js +++ b/mcp-server/tools/media/index.js @@ -1,9 +1,11 @@ import { registerUploadRecordImageTool } from './upload.js'; import { registerUploadImageToAssetsTool } from './uploadImageToAssets.js'; import { registerGenerateImageTool } from './generateImage.js'; +import { registerAnalyzeImageTool } from './analyze_image.js'; export function registerMediaTools(server) { registerUploadRecordImageTool(server); registerUploadImageToAssetsTool(server); registerGenerateImageTool(server); + registerAnalyzeImageTool(server); }