Compare commits

...

9 Commits

31 changed files with 1370 additions and 514 deletions

View File

@ -17,4 +17,7 @@ RESULTS_DIR=./results
UPLOADS_DIR=./uploads/temp
# Logging Configuration
LOG_LEVEL=info
LOG_LEVEL=info
# Text-to-Image Logging (optional)
# TTI_LOG=./apps/api-service/logs/tti.log

View File

@ -38,7 +38,7 @@
},
"dependencies": {
"@banatie/database": "workspace:*",
"@google/genai": "^1.17.0",
"@google/genai": "^1.22.0",
"cors": "^2.8.5",
"dotenv": "^17.2.2",
"express": "^5.1.0",

View File

@ -1,26 +1,26 @@
import express, { Application } from 'express';
import cors from 'cors';
import { config } from 'dotenv';
import { Config } from './types/api';
import { generateRouter } from './routes/generate';
import { enhanceRouter } from './routes/enhance';
import { textToImageRouter } from './routes/textToImage';
import { imagesRouter } from './routes/images';
import bootstrapRoutes from './routes/bootstrap';
import adminKeysRoutes from './routes/admin/keys';
import { errorHandler, notFoundHandler } from './middleware/errorHandler';
import express, { Application } from "express";
import cors from "cors";
import { config } from "dotenv";
import { Config } from "./types/api";
import { generateRouter } from "./routes/generate";
import { enhanceRouter } from "./routes/enhance";
import { textToImageRouter } from "./routes/textToImage";
import { imagesRouter } from "./routes/images";
import bootstrapRoutes from "./routes/bootstrap";
import adminKeysRoutes from "./routes/admin/keys";
import { errorHandler, notFoundHandler } from "./middleware/errorHandler";
// Load environment variables
config();
// Application configuration
export const appConfig: Config = {
port: parseInt(process.env['PORT'] || '3000'),
geminiApiKey: process.env['GEMINI_API_KEY'] || '',
resultsDir: './results',
uploadsDir: './uploads/temp',
port: parseInt(process.env["PORT"] || "3000"),
geminiApiKey: process.env["GEMINI_API_KEY"] || "",
resultsDir: "./results",
uploadsDir: "./uploads/temp",
maxFileSize: 5 * 1024 * 1024, // 5MB
maxFiles: 3
maxFiles: 3,
};
// Create Express application
@ -28,32 +28,34 @@ export const createApp = (): Application => {
const app = express();
// Middleware - CORS configuration (allow all origins)
app.use(cors({
origin: true, // Allow all origins
credentials: true,
methods: ['GET', 'POST', 'PUT', 'DELETE', 'OPTIONS'],
allowedHeaders: ['Content-Type', 'Authorization', 'X-API-Key'],
exposedHeaders: ['X-Request-ID']
}));
app.use(
cors({
origin: true, // Allow all origins
credentials: true,
methods: ["GET", "POST", "PUT", "DELETE", "OPTIONS"],
allowedHeaders: ["Content-Type", "Authorization", "X-API-Key"],
exposedHeaders: ["X-Request-ID"],
}),
);
app.use(express.json({ limit: '10mb' }));
app.use(express.urlencoded({ extended: true, limit: '10mb' }));
app.use(express.json({ limit: "10mb" }));
app.use(express.urlencoded({ extended: true, limit: "10mb" }));
// Request ID middleware for logging
app.use((req, res, next) => {
req.requestId = Math.random().toString(36).substr(2, 9);
res.setHeader('X-Request-ID', req.requestId);
res.setHeader("X-Request-ID", req.requestId);
next();
});
// Health check endpoint
app.get('/health', (_req, res) => {
app.get("/health", (_req, res) => {
const health = {
status: 'healthy',
status: "healthy",
timestamp: new Date().toISOString(),
uptime: process.uptime(),
environment: process.env['NODE_ENV'] || 'development',
version: process.env['npm_package_version'] || '1.0.0'
environment: process.env["NODE_ENV"] || "development",
version: process.env["npm_package_version"] || "1.0.0",
};
console.log(`[${health.timestamp}] Health check - ${health.status}`);
@ -61,30 +63,34 @@ export const createApp = (): Application => {
});
// API info endpoint
app.get('/api/info', async (req: any, res) => {
app.get("/api/info", async (req: any, res) => {
const info: any = {
name: 'Banatie - Nano Banana Image Generation API',
version: '1.0.0',
description: 'REST API service for AI-powered image generation using Gemini Flash Image model',
name: "Banatie - Nano Banana Image Generation API",
version: "1.0.0",
description:
"REST API service for AI-powered image generation using Gemini Flash Image model",
endpoints: {
'GET /health': 'Health check',
'GET /api/info': 'API information',
'POST /api/generate': 'Generate images from text prompt with optional reference images',
'POST /api/text-to-image': 'Generate images from text prompt only (JSON)',
'POST /api/enhance': 'Enhance and optimize prompts for better image generation'
"GET /health": "Health check",
"GET /api/info": "API information",
"POST /api/generate":
"Generate images from text prompt with optional reference images",
"POST /api/text-to-image":
"Generate images from text prompt only (JSON)",
"POST /api/enhance":
"Enhance and optimize prompts for better image generation",
},
limits: {
maxFileSize: `${appConfig.maxFileSize / (1024 * 1024)}MB`,
maxFiles: appConfig.maxFiles,
supportedFormats: ['PNG', 'JPEG', 'JPG', 'WebP']
}
supportedFormats: ["PNG", "JPEG", "JPG", "WebP"],
},
};
// If API key is provided, validate and return key info
const providedKey = req.headers['x-api-key'] as string;
const providedKey = req.headers["x-api-key"] as string;
if (providedKey) {
try {
const { ApiKeyService } = await import('./services/ApiKeyService');
const { ApiKeyService } = await import("./services/ApiKeyService");
const apiKeyService = new ApiKeyService();
const apiKey = await apiKeyService.validateKey(providedKey);
@ -97,7 +103,7 @@ export const createApp = (): Application => {
organizationSlug: apiKey.organizationSlug,
projectId: apiKey.projectId,
projectSlug: apiKey.projectSlug,
expiresAt: apiKey.expiresAt
expiresAt: apiKey.expiresAt,
};
}
} catch (error) {
@ -111,16 +117,16 @@ export const createApp = (): Application => {
// Public routes (no authentication)
// Bootstrap route (no auth, but works only once)
app.use('/api/bootstrap', bootstrapRoutes);
app.use("/api/bootstrap", bootstrapRoutes);
// Admin routes (require master key)
app.use('/api/admin/keys', adminKeysRoutes);
app.use("/api/admin/keys", adminKeysRoutes);
// Protected API routes (require valid API key)
app.use('/api', generateRouter);
app.use('/api', enhanceRouter);
app.use('/api', textToImageRouter);
app.use('/api', imagesRouter);
app.use("/api", generateRouter);
app.use("/api", enhanceRouter);
app.use("/api", textToImageRouter);
app.use("/api", imagesRouter);
// Error handling middleware (must be last)
app.use(notFoundHandler);
@ -136,4 +142,4 @@ declare global {
requestId: string;
}
}
}
}

View File

@ -16,6 +16,20 @@ const VALIDATION_RULES = {
},
};
// Valid aspect ratios supported by Gemini SDK
const VALID_ASPECT_RATIOS = [
"1:1", // Square (1024x1024)
"2:3", // Portrait (832x1248)
"3:2", // Landscape (1248x832)
"3:4", // Portrait (864x1184)
"4:3", // Landscape (1184x864)
"4:5", // Portrait (896x1152)
"5:4", // Landscape (1152x896)
"9:16", // Vertical (768x1344)
"16:9", // Widescreen (1344x768)
"21:9", // Ultrawide (1536x672)
] as const;
/**
* Validate the text-to-image JSON request
*/
@ -25,7 +39,7 @@ export const validateTextToImageRequest = (
next: NextFunction,
): void | Response => {
const timestamp = new Date().toISOString();
const { prompt, filename, autoEnhance, enhancementOptions } = req.body;
const { prompt, filename, aspectRatio, autoEnhance, enhancementOptions } = req.body;
const errors: string[] = [];
console.log(
@ -41,6 +55,20 @@ export const validateTextToImageRequest = (
});
}
// Set defaults before validation
// Default autoEnhance to true if not explicitly set
if (req.body.autoEnhance === undefined) {
req.body.autoEnhance = true;
}
// Default template to "photorealistic" in enhancementOptions
if (req.body.enhancementOptions && !req.body.enhancementOptions.template) {
req.body.enhancementOptions.template = "photorealistic";
} else if (!req.body.enhancementOptions && req.body.autoEnhance !== false) {
// If autoEnhance is true (default) and no enhancementOptions, create it with default template
req.body.enhancementOptions = { template: "photorealistic" };
}
// Validate prompt
if (!prompt) {
errors.push("Prompt is required");
@ -73,6 +101,17 @@ export const validateTextToImageRequest = (
);
}
// Validate aspectRatio (optional, defaults to "1:1")
if (aspectRatio !== undefined) {
if (typeof aspectRatio !== "string") {
errors.push("aspectRatio must be a string");
} else if (!VALID_ASPECT_RATIOS.includes(aspectRatio as any)) {
errors.push(
`Invalid aspectRatio. Must be one of: ${VALID_ASPECT_RATIOS.join(", ")}`
);
}
}
// Validate autoEnhance (optional boolean)
if (autoEnhance !== undefined && typeof autoEnhance !== "boolean") {
errors.push("autoEnhance must be a boolean");
@ -86,17 +125,11 @@ export const validateTextToImageRequest = (
) {
errors.push("enhancementOptions must be an object");
} else {
const {
imageStyle,
aspectRatio,
mood,
lighting,
cameraAngle,
negativePrompts,
} = enhancementOptions;
const { template } = enhancementOptions;
// Validate template parameter
if (
imageStyle !== undefined &&
template !== undefined &&
![
"photorealistic",
"illustration",
@ -104,52 +137,32 @@ export const validateTextToImageRequest = (
"sticker",
"product",
"comic",
].includes(imageStyle)
"general",
].includes(template)
) {
errors.push("Invalid imageStyle in enhancementOptions");
errors.push(
"Invalid template in enhancementOptions. Must be one of: photorealistic, illustration, minimalist, sticker, product, comic, general",
);
}
}
}
if (
aspectRatio !== undefined &&
!["square", "portrait", "landscape", "wide", "ultrawide"].includes(
aspectRatio,
)
) {
errors.push("Invalid aspectRatio in enhancementOptions");
}
if (
mood !== undefined &&
(typeof mood !== "string" || mood.length > 100)
) {
errors.push("mood must be a string with max 100 characters");
}
if (
lighting !== undefined &&
(typeof lighting !== "string" || lighting.length > 100)
) {
errors.push("lighting must be a string with max 100 characters");
}
if (
cameraAngle !== undefined &&
(typeof cameraAngle !== "string" || cameraAngle.length > 100)
) {
errors.push("cameraAngle must be a string with max 100 characters");
}
if (negativePrompts !== undefined) {
if (!Array.isArray(negativePrompts) || negativePrompts.length > 10) {
errors.push("negativePrompts must be an array with max 10 items");
} else {
for (const item of negativePrompts) {
if (typeof item !== "string" || item.length > 100) {
errors.push(
"Each negative prompt must be a string with max 100 characters",
);
break;
}
// Validate meta (optional object)
if (req.body.meta !== undefined) {
if (
typeof req.body.meta !== "object" ||
Array.isArray(req.body.meta)
) {
errors.push("meta must be an object");
} else if (req.body.meta.tags !== undefined) {
if (!Array.isArray(req.body.meta.tags)) {
errors.push("meta.tags must be an array");
} else {
// Validate each tag is a string
for (const tag of req.body.meta.tags) {
if (typeof tag !== "string") {
errors.push("Each tag in meta.tags must be a string");
break;
}
}
}

View File

@ -1,5 +1,5 @@
import { Request, Response } from "express";
import { PromptEnhancementService } from "../services/PromptEnhancementService";
import { PromptEnhancementService } from "../services/promptEnhancement";
import { EnhancedGenerateImageRequest } from "../types/api";
let promptEnhancementService: PromptEnhancementService | null = null;
@ -24,9 +24,12 @@ export const autoEnhancePrompt = async (
const requestId = req.requestId;
const { prompt, autoEnhance, enhancementOptions } = req.body;
if (!autoEnhance) {
// Default autoEnhance to true if not explicitly set to false
const shouldEnhance = autoEnhance !== false;
if (!shouldEnhance) {
console.log(
`[${timestamp}] [${requestId}] Auto-enhancement disabled, skipping`,
`[${timestamp}] [${requestId}] Auto-enhancement explicitly disabled, skipping`,
);
return next();
}
@ -47,9 +50,21 @@ export const autoEnhancePrompt = async (
promptEnhancementService = new PromptEnhancementService(apiKey);
}
// Extract orgId and projectId from validated API key
const orgId = req.apiKey?.organizationSlug || "unknown";
const projectId = req.apiKey?.projectSlug || "unknown";
const result = await promptEnhancementService.enhancePrompt(
prompt,
enhancementOptions || {},
{
...enhancementOptions,
...(req.body.meta?.tags && { tags: req.body.meta.tags }),
},
{
orgId,
projectId,
...(req.body.meta && { meta: req.body.meta }),
},
);
if (result.success && result.enhancedPrompt) {

View File

@ -1,6 +1,6 @@
import { Request, Response, Router } from "express";
import type { Router as RouterType } from "express";
import { PromptEnhancementService } from "../services/PromptEnhancementService";
import { PromptEnhancementService } from "../services/promptEnhancement";
import { asyncHandler } from "../middleware/errorHandler";
import {
PromptEnhancementRequest,
@ -133,9 +133,17 @@ enhanceRouter.post(
console.log(`[${timestamp}] [${requestId}] Starting prompt enhancement`);
try {
// Extract orgId and projectId from validated API key
const orgId = req.apiKey?.organizationSlug || "unknown";
const projectId = req.apiKey?.projectSlug || "unknown";
const result = await promptEnhancementService.enhancePrompt(
prompt,
options || {},
{
orgId,
projectId,
},
);
console.log(`[${timestamp}] [${requestId}] Enhancement completed:`, {

View File

@ -63,7 +63,7 @@ generateRouter.post(
const timestamp = new Date().toISOString();
const requestId = req.requestId;
const { prompt, filename } = req.body;
const { prompt, filename, aspectRatio, meta } = req.body;
const files = (req.files as Express.Multer.File[]) || [];
// Extract org/project slugs from validated API key
@ -108,9 +108,11 @@ generateRouter.post(
const result = await imageGenService.generateImage({
prompt,
filename,
...(aspectRatio && { aspectRatio }),
orgId,
projectId,
...(referenceImages && { referenceImages }),
...(meta && { meta }),
});
// Log the result

View File

@ -54,7 +54,7 @@ textToImageRouter.post(
const timestamp = new Date().toISOString();
const requestId = req.requestId;
const { prompt, filename } = req.body;
const { prompt, filename, aspectRatio, meta } = req.body;
// Extract org/project slugs from validated API key
const orgId = req.apiKey?.organizationSlug || undefined;
@ -73,8 +73,10 @@ textToImageRouter.post(
const result = await imageGenService.generateImage({
prompt,
filename,
...(aspectRatio && { aspectRatio }),
orgId,
projectId,
...(meta && { meta }),
});
// Log the result

View File

@ -9,10 +9,11 @@ import {
GeminiParams,
} from "../types/api";
import { StorageFactory } from "./StorageFactory";
import { TTILogger, TTILogEntry } from "./TTILogger";
export class ImageGenService {
private ai: GoogleGenAI;
private primaryModel = "gemini-2.5-flash-image-preview";
private primaryModel = "gemini-2.5-flash-image";
constructor(apiKey: string) {
if (!apiKey) {
@ -28,18 +29,26 @@ export class ImageGenService {
async generateImage(
options: ImageGenerationOptions,
): Promise<ImageGenerationResult> {
const { prompt, filename, referenceImages, orgId, projectId } = options;
const { prompt, filename, referenceImages, aspectRatio, orgId, projectId, meta } = options;
// Use default values if not provided
const finalOrgId = orgId || process.env["DEFAULT_ORG_ID"] || "default";
const finalProjectId =
projectId || process.env["DEFAULT_PROJECT_ID"] || "main";
const finalAspectRatio = aspectRatio || "1:1"; // Default to square
// Step 1: Generate image from Gemini AI
let generatedData: GeneratedImageData;
let geminiParams: GeminiParams;
try {
const aiResult = await this.generateImageWithAI(prompt, referenceImages);
const aiResult = await this.generateImageWithAI(
prompt,
referenceImages,
finalAspectRatio,
finalOrgId,
finalProjectId,
meta,
);
generatedData = aiResult.generatedData;
geminiParams = aiResult.geminiParams;
} catch (error) {
@ -114,8 +123,15 @@ export class ImageGenService {
*/
private async generateImageWithAI(
prompt: string,
referenceImages?: ReferenceImage[],
): Promise<{ generatedData: GeneratedImageData; geminiParams: GeminiParams }> {
referenceImages: ReferenceImage[] | undefined,
aspectRatio: string,
orgId: string,
projectId: string,
meta?: { tags?: string[] },
): Promise<{
generatedData: GeneratedImageData;
geminiParams: GeminiParams;
}> {
const contentParts: any[] = [];
// Add reference images if provided
@ -135,6 +151,8 @@ export class ImageGenService {
text: prompt,
});
// CRITICAL: Calculate exact values before SDK call
// These exact objects will be passed to both SDK and logger
const contents = [
{
role: "user" as const,
@ -142,7 +160,12 @@ export class ImageGenService {
},
];
const config = { responseModalities: ["IMAGE", "TEXT"] };
const config = {
responseModalities: ["IMAGE", "TEXT"],
imageConfig: {
aspectRatio,
},
};
// Capture Gemini SDK parameters for debugging
const geminiParams: GeminiParams = {
@ -155,7 +178,30 @@ export class ImageGenService {
},
};
// Log TTI request BEFORE SDK call - using exact same values
const ttiLogger = TTILogger.getInstance();
const logEntry: TTILogEntry = {
timestamp: new Date().toISOString(),
orgId,
projectId,
prompt,
model: this.primaryModel,
config,
...(meta && { meta }),
...(referenceImages &&
referenceImages.length > 0 && {
referenceImages: referenceImages.map((img) => ({
mimetype: img.mimetype,
size: img.buffer.length,
originalname: img.originalname,
})),
}),
};
ttiLogger.log(logEntry);
try {
// Use the EXACT same config and contents objects calculated above
const response = await this.ai.models.generateContent({
model: this.primaryModel,
config,

View File

@ -1,274 +0,0 @@
import { GoogleGenAI } from "@google/genai";
export interface PromptEnhancementOptions {
imageStyle?:
| "photorealistic"
| "illustration"
| "minimalist"
| "sticker"
| "product"
| "comic";
aspectRatio?: "square" | "portrait" | "landscape" | "wide" | "ultrawide";
mood?: string;
lighting?: string;
cameraAngle?: string;
outputFormat?: "text" | "markdown" | "detailed";
negativePrompts?: string[];
}
export interface PromptEnhancementResult {
success: boolean;
originalPrompt: string;
enhancedPrompt?: string;
detectedLanguage?: string;
appliedTemplate?: string;
metadata?: {
style?: string;
aspectRatio?: string;
enhancements: string[];
};
error?: string;
}
export class PromptEnhancementService {
private ai: GoogleGenAI;
private model = "gemini-2.5-flash";
constructor(apiKey: string) {
if (!apiKey) {
throw new Error("Gemini API key is required");
}
this.ai = new GoogleGenAI({ apiKey });
}
async enhancePrompt(
rawPrompt: string,
options: PromptEnhancementOptions = {},
): Promise<PromptEnhancementResult> {
const timestamp = new Date().toISOString();
console.log(
`[${timestamp}] Starting prompt enhancement for: "${rawPrompt.substring(0, 50)}..."`,
);
try {
const systemPrompt = this.buildSystemPrompt(options);
const userPrompt = this.buildUserPrompt(rawPrompt, options);
console.log(
`[${timestamp}] Making API request to Gemini 2.5 Flash for prompt enhancement...`,
);
const response = await this.ai.models.generateContent({
model: this.model,
config: { responseModalities: ["TEXT"] },
contents: [
{
role: "user" as const,
parts: [{ text: `${systemPrompt}\n\n${userPrompt}` }],
},
],
});
if (
response.candidates &&
response.candidates[0] &&
response.candidates[0].content
) {
const content = response.candidates[0].content;
const enhancedText = content.parts?.[0]?.text || "";
console.log(`[${timestamp}] Enhanced prompt generated successfully`);
const result = this.parseEnhancedResponse(
enhancedText,
rawPrompt,
options,
);
const enhancementResult: PromptEnhancementResult = {
success: true,
originalPrompt: rawPrompt,
enhancedPrompt: result.enhancedPrompt,
...(result.detectedLanguage && {
detectedLanguage: result.detectedLanguage,
}),
...(result.appliedTemplate && {
appliedTemplate: result.appliedTemplate,
}),
metadata: {
...(options.imageStyle && { style: options.imageStyle }),
...(!options.imageStyle &&
result.detectedStyle && { style: result.detectedStyle }),
...(options.aspectRatio && { aspectRatio: options.aspectRatio }),
enhancements: result.enhancements,
},
};
return enhancementResult;
}
return {
success: false,
originalPrompt: rawPrompt,
error: "No enhanced prompt received from API",
};
} catch (error) {
console.error(`[${timestamp}] Prompt enhancement failed:`, error);
return {
success: false,
originalPrompt: rawPrompt,
error: error instanceof Error ? error.message : "Enhancement failed",
};
}
}
private buildSystemPrompt(options: PromptEnhancementOptions): string {
const {
imageStyle,
aspectRatio,
mood,
lighting,
cameraAngle,
negativePrompts,
} = options;
return `You are an expert AI prompt engineer specializing in transforming rough, unstructured prompts into professional, detailed prompts for the Gemini Flash Image Generation model. Your goal is to follow these principles:
CORE PRINCIPLE: Describe the scene, don't just list keywords. Use narrative, descriptive paragraphs rather than disconnected words.
ENHANCEMENT GUIDELINES:
1. Transform any language into professional English
2. Be hyper-specific with details instead of vague descriptions
3. Use photography and cinematic language for composition control
4. Provide context and intent for better understanding
5. Apply the appropriate template based on the desired style
STYLE TEMPLATES:
- Photorealistic: Use photography terms (camera angles, lens types, lighting, fine details)
- Illustration: Specify art style, line work, color palette, shading technique
- Minimalist: Focus on negative space, simple composition, subtle elements
- Sticker: Emphasize style (kawaii, bold outlines, clean design), transparent background
- Product: Studio lighting setup, commercial photography terms, surfaces, angles
- Comic: Panel style, art technique, mood, dialogue/caption integration
TECHNICAL REQUIREMENTS:
${imageStyle ? `- Target Style: ${imageStyle}` : "- Auto-detect and apply appropriate style"}
${aspectRatio ? `- Aspect Ratio: ${aspectRatio}` : "- Use square format unless context suggests otherwise"}
${mood ? `- Mood: ${mood}` : ""}
${lighting ? `- Lighting: ${lighting}` : ""}
${cameraAngle ? `- Camera Angle: ${cameraAngle}` : ""}
${negativePrompts && negativePrompts.length > 0 ? `- Avoid: ${negativePrompts.join(", ")}` : ""}
RESPONSE FORMAT:
Provide only the enhanced prompt as a single, cohesive paragraph. Do not include explanations, metadata, or multiple options. The response should be ready to use directly for image generation.
Remember: More detail equals more control. Transform vague concepts into vivid, specific descriptions that guide the model toward the exact image envisioned.`;
}
private buildUserPrompt(
rawPrompt: string,
options: PromptEnhancementOptions,
): string {
let prompt = `Transform this prompt into a professional image generation prompt: "${rawPrompt}"`;
if (options.imageStyle) {
prompt += `\n\nTarget style: ${options.imageStyle}`;
}
if (options.aspectRatio) {
prompt += `\n\nAspect ratio: ${options.aspectRatio}`;
}
return prompt;
}
private parseEnhancedResponse(
enhancedText: string,
originalPrompt: string,
options: PromptEnhancementOptions,
): {
enhancedPrompt: string;
detectedLanguage?: string;
appliedTemplate?: string;
detectedStyle?: string;
enhancements: string[];
} {
const enhancements: string[] = [];
// Clean up the enhanced text
const enhancedPrompt = enhancedText.trim();
// Detect applied enhancements
if (enhancedPrompt.length > originalPrompt.length * 1.5) {
enhancements.push("Added detailed descriptions");
}
if (
enhancedPrompt.includes("photorealistic") ||
enhancedPrompt.includes("shot") ||
enhancedPrompt.includes("lens")
) {
enhancements.push("Applied photography terminology");
}
if (
enhancedPrompt.includes("lighting") ||
enhancedPrompt.includes("illuminated")
) {
enhancements.push("Enhanced lighting description");
}
if (
enhancedPrompt.includes("texture") ||
enhancedPrompt.includes("surface")
) {
enhancements.push("Added texture details");
}
// Try to detect the language of original prompt (simple heuristic)
const detectedLanguage = this.detectLanguage(originalPrompt);
// Detect applied template based on content
let appliedTemplate = "general";
if (options.imageStyle) {
appliedTemplate = options.imageStyle;
} else if (
enhancedPrompt.includes("photorealistic") ||
enhancedPrompt.includes("camera")
) {
appliedTemplate = "photorealistic";
} else if (
enhancedPrompt.includes("sticker") ||
enhancedPrompt.includes("kawaii")
) {
appliedTemplate = "sticker";
} else if (
enhancedPrompt.includes("minimalist") ||
enhancedPrompt.includes("negative space")
) {
appliedTemplate = "minimalist";
}
return {
enhancedPrompt,
detectedLanguage,
appliedTemplate,
detectedStyle: appliedTemplate,
enhancements,
};
}
private detectLanguage(text: string): string {
// Simple language detection heuristics
if (/[\u4e00-\u9fff]/.test(text)) return "Chinese";
if (/[\u3040-\u309f\u30a0-\u30ff]/.test(text)) return "Japanese";
if (/[\uac00-\ud7af]/.test(text)) return "Korean";
if (/[àáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ]/.test(text))
return "Romance Language";
if (/[а-яё]/.test(text.toLowerCase())) return "Russian";
if (/[α-ωΑ-Ω]/.test(text)) return "Greek";
if (/[أ-ي]/.test(text)) return "Arabic";
if (/[א-ת]/.test(text)) return "Hebrew";
return "English";
}
}

View File

@ -0,0 +1,124 @@
import { writeFileSync, readFileSync, existsSync, mkdirSync } from "fs";
import { dirname } from "path";
export interface TTILogEntry {
timestamp: string;
orgId: string;
projectId: string;
prompt: string;
meta?: {
tags?: string[];
};
referenceImages?: Array<{
mimetype: string;
size: number;
originalname: string;
}>;
model: string;
config: any;
}
export class TTILogger {
private static instance: TTILogger | null = null;
private logFilePath: string | null = null;
private isEnabled: boolean = false;
private constructor() {
const ttiLogPath = process.env["TTI_LOG"];
if (ttiLogPath) {
this.logFilePath = ttiLogPath;
this.isEnabled = true;
this.initializeLogFile();
}
}
static getInstance(): TTILogger {
if (!TTILogger.instance) {
TTILogger.instance = new TTILogger();
}
return TTILogger.instance;
}
private initializeLogFile(): void {
if (!this.logFilePath) return;
try {
// Ensure directory exists
const dir = dirname(this.logFilePath);
if (!existsSync(dir)) {
mkdirSync(dir, { recursive: true });
}
// Reset/clear the log file on service start
writeFileSync(this.logFilePath, "# Text-to-Image Generation Log\n\n", {
encoding: "utf-8",
});
console.log(`[TTILogger] Log file initialized: ${this.logFilePath}`);
} catch (error) {
console.error(`[TTILogger] Failed to initialize log file:`, error);
this.isEnabled = false;
}
}
log(entry: TTILogEntry): void {
if (!this.isEnabled || !this.logFilePath) {
return;
}
try {
const newLogEntry = this.formatLogEntry(entry);
// Read existing content
const existingContent = existsSync(this.logFilePath)
? readFileSync(this.logFilePath, "utf-8")
: "# Text-to-Image Generation Log\n\n";
// Insert new entry AFTER header but BEFORE old entries
const headerEnd = existingContent.indexOf("\n\n") + 2;
const header = existingContent.slice(0, headerEnd);
const oldEntries = existingContent.slice(headerEnd);
writeFileSync(
this.logFilePath,
header + newLogEntry + oldEntries,
"utf-8",
);
} catch (error) {
console.error(`[TTILogger] Failed to write log entry:`, error);
}
}
private formatLogEntry(entry: TTILogEntry): string {
const { timestamp, orgId, projectId, prompt, meta, referenceImages, model, config } = entry;
// Format date from ISO timestamp
const date = new Date(timestamp);
const formattedDate = date.toISOString().replace("T", " ").slice(0, 19);
let logText = `## ${formattedDate}\n`;
logText += `${orgId}/${projectId}\n\n`;
logText += `**Prompt:** ${prompt}\n\n`;
// Add tags if present
if (meta?.tags && meta.tags.length > 0) {
logText += `**Tags:** ${meta.tags.join(", ")}\n\n`;
}
if (referenceImages && referenceImages.length > 0) {
logText += `**Reference Images:** ${referenceImages.length} image${referenceImages.length > 1 ? "s" : ""}\n`;
for (const img of referenceImages) {
const sizeMB = (img.size / (1024 * 1024)).toFixed(2);
logText += `- ${img.originalname} (${img.mimetype}, ${sizeMB} MB)\n`;
}
logText += "\n";
}
logText += `**Model:** ${model}\n`;
logText += `**Config:** ${JSON.stringify(config)}\n\n`;
logText += `---\n\n`;
return logText;
}
}

View File

@ -0,0 +1,122 @@
import { writeFileSync, readFileSync, existsSync, mkdirSync } from "fs";
import { dirname } from "path";
import { EnhancementLogEntry } from "./types";
export class EnhancementLogger {
private static instance: EnhancementLogger | null = null;
private logFilePath: string | null = null;
private isEnabled: boolean = false;
private constructor() {
const enhLogPath = process.env["ENH_LOG"];
if (enhLogPath) {
this.logFilePath = enhLogPath;
this.isEnabled = true;
this.initializeLogFile();
}
}
static getInstance(): EnhancementLogger {
if (!EnhancementLogger.instance) {
EnhancementLogger.instance = new EnhancementLogger();
}
return EnhancementLogger.instance;
}
private initializeLogFile(): void {
if (!this.logFilePath) return;
try {
// Ensure directory exists
const dir = dirname(this.logFilePath);
if (!existsSync(dir)) {
mkdirSync(dir, { recursive: true });
}
// Reset/clear the log file on service start
writeFileSync(this.logFilePath, "# Prompt Enhancement Log\n\n", {
encoding: "utf-8",
});
console.log(
`[EnhancementLogger] Log file initialized: ${this.logFilePath}`,
);
} catch (error) {
console.error(
`[EnhancementLogger] Failed to initialize log file:`,
error,
);
this.isEnabled = false;
}
}
log(entry: EnhancementLogEntry): void {
if (!this.isEnabled || !this.logFilePath) {
return;
}
try {
const newLogEntry = this.formatLogEntry(entry);
// Read existing content
const existingContent = existsSync(this.logFilePath)
? readFileSync(this.logFilePath, "utf-8")
: "# Prompt Enhancement Log\n\n";
// Insert new entry AFTER header but BEFORE old entries
const headerEnd = existingContent.indexOf("\n\n") + 2;
const header = existingContent.slice(0, headerEnd);
const oldEntries = existingContent.slice(headerEnd);
writeFileSync(
this.logFilePath,
header + newLogEntry + oldEntries,
"utf-8",
);
} catch (error) {
console.error(`[EnhancementLogger] Failed to write log entry:`, error);
}
}
private formatLogEntry(entry: EnhancementLogEntry): string {
const {
timestamp,
orgId,
projectId,
originalPrompt,
enhancedPrompt,
meta,
template,
detectedLanguage,
enhancements,
model,
} = entry;
// Format date from ISO timestamp
const date = new Date(timestamp);
const formattedDate = date.toISOString().replace("T", " ").slice(0, 19);
let logText = `## ${formattedDate}\n`;
logText += `${orgId}/${projectId}\n\n`;
logText += `**Original Prompt:** ${originalPrompt}\n\n`;
logText += `**Enhanced Prompt:** ${enhancedPrompt}\n\n`;
// Add tags if present
if (meta?.tags && meta.tags.length > 0) {
logText += `**Tags:** ${meta.tags.join(", ")}\n\n`;
}
logText += `**Template:** ${template}\n`;
if (detectedLanguage) {
logText += `**Language:** ${detectedLanguage}\n`;
}
if (enhancements.length > 0) {
logText += `**Enhancements:** ${enhancements.join(", ")}\n`;
}
logText += `**Model:** ${model}\n\n`;
logText += `---\n\n`;
return logText;
}
}

View File

@ -0,0 +1,123 @@
import {
PromptEnhancementOptions,
PromptEnhancementContext,
PromptEnhancementResult,
} from "./types";
import { getAgent } from "./agents";
import { validatePromptLength } from "./validators";
import { EnhancementLogger } from "./EnhancementLogger";
export class PromptEnhancementService {
private apiKey: string;
private model = "gemini-2.5-flash";
constructor(apiKey: string) {
if (!apiKey) {
throw new Error("Gemini API key is required");
}
this.apiKey = apiKey;
}
async enhancePrompt(
rawPrompt: string,
options: PromptEnhancementOptions = {},
context?: PromptEnhancementContext,
): Promise<PromptEnhancementResult> {
const timestamp = new Date().toISOString();
console.log(
`[${timestamp}] Starting prompt enhancement for: "${rawPrompt.substring(0, 50)}..."`,
);
console.log(
`[${timestamp}] Template: ${options.template || "general (auto-select)"}`,
);
if (options.tags && options.tags.length > 0) {
console.log(`[${timestamp}] Tags: ${options.tags.join(", ")}`);
}
// Pre-validate input prompt
const inputValidation = validatePromptLength(rawPrompt, 5000);
if (!inputValidation.valid) {
return {
success: false,
originalPrompt: rawPrompt,
error: inputValidation.error || "Validation failed",
};
}
try {
// Get appropriate agent
const agent = getAgent(this.apiKey, options.template);
// Enhance the prompt
const agentResult = await agent.enhance(rawPrompt, options);
if (!agentResult.success || !agentResult.enhancedPrompt) {
return {
success: false,
originalPrompt: rawPrompt,
error: agentResult.error || "Enhancement failed",
};
}
// Post-validate enhanced prompt length
const outputValidation = validatePromptLength(
agentResult.enhancedPrompt,
2000,
);
if (!outputValidation.valid) {
console.warn(
`[${timestamp}] Enhanced prompt exceeds 2000 characters (${agentResult.enhancedPrompt.length}), truncating...`,
);
agentResult.enhancedPrompt = agentResult.enhancedPrompt.substring(
0,
2000,
);
}
const result: PromptEnhancementResult = {
success: true,
originalPrompt: rawPrompt,
enhancedPrompt: agentResult.enhancedPrompt,
...(agentResult.detectedLanguage && {
detectedLanguage: agentResult.detectedLanguage,
}),
appliedTemplate:
agentResult.appliedTemplate || options.template || "general",
metadata: {
style: agentResult.appliedTemplate || options.template || "general",
enhancements: agentResult.enhancements,
},
};
// Log the enhancement if context is provided
if (context) {
EnhancementLogger.getInstance().log({
timestamp,
orgId: context.orgId,
projectId: context.projectId,
originalPrompt: rawPrompt,
enhancedPrompt: agentResult.enhancedPrompt,
...(context.meta && { meta: context.meta }),
template:
agentResult.appliedTemplate || options.template || "general",
...(agentResult.detectedLanguage && {
detectedLanguage: agentResult.detectedLanguage,
}),
enhancements: agentResult.enhancements,
model: this.model,
});
}
console.log(`[${timestamp}] Enhancement completed successfully`);
return result;
} catch (error) {
console.error(`[${timestamp}] Prompt enhancement failed:`, error);
return {
success: false,
originalPrompt: rawPrompt,
error: error instanceof Error ? error.message : "Enhancement failed",
};
}
}
}

View File

@ -0,0 +1,94 @@
import { GoogleGenAI } from "@google/genai";
import {
IPromptAgent,
PromptEnhancementOptions,
AgentResult,
} from "../types";
import { detectLanguage, detectEnhancements } from "../utils";
export abstract class BaseAgent implements IPromptAgent {
protected ai: GoogleGenAI;
protected model = "gemini-2.5-flash";
abstract readonly templateType: string;
constructor(apiKey: string) {
if (!apiKey) {
throw new Error("Gemini API key is required");
}
this.ai = new GoogleGenAI({ apiKey });
}
protected abstract getSystemPrompt(): string;
protected abstract getTemplate(): string;
async enhance(
rawPrompt: string,
_options: PromptEnhancementOptions,
): Promise<AgentResult> {
const timestamp = new Date().toISOString();
console.log(
`[${timestamp}] [${this.templateType}Agent] Enhancing prompt: "${rawPrompt.substring(0, 50)}..."`,
);
try {
const systemPrompt = this.getSystemPrompt();
const userPrompt = this.buildUserPrompt(rawPrompt);
const response = await this.ai.models.generateContent({
model: this.model,
config: { responseModalities: ["TEXT"] },
contents: [
{
role: "user" as const,
parts: [{ text: `${systemPrompt}\n\n${userPrompt}` }],
},
],
});
if (
response.candidates &&
response.candidates[0] &&
response.candidates[0].content
) {
const content = response.candidates[0].content;
const enhancedPrompt = content.parts?.[0]?.text?.trim() || "";
console.log(
`[${timestamp}] [${this.templateType}Agent] Enhancement successful`,
);
return {
success: true,
enhancedPrompt,
detectedLanguage: detectLanguage(rawPrompt),
appliedTemplate: this.templateType,
enhancements: detectEnhancements(rawPrompt, enhancedPrompt),
};
}
return {
success: false,
error: "No enhanced prompt received from API",
enhancements: [],
};
} catch (error) {
console.error(
`[${timestamp}] [${this.templateType}Agent] Enhancement failed:`,
error,
);
return {
success: false,
error: error instanceof Error ? error.message : "Enhancement failed",
enhancements: [],
};
}
}
protected buildUserPrompt(rawPrompt: string): string {
return `Transform this prompt into a professional image generation prompt: "${rawPrompt}"
Target template/style: ${this.templateType}`;
}
}

View File

@ -0,0 +1,48 @@
import { BaseAgent } from "./BaseAgent";
export const COMIC_TEMPLATE = `A single comic book panel in a [art style] style. In the foreground, [character description and action]. In the background, [setting details]. The panel has a [dialogue/caption box] with the text "[Text]". The lighting creates a [mood] mood. [Aspect ratio].
Example:
A single comic book panel in a gritty, noir art style with high-contrast black and white inks. In the foreground, a detective in a trench coat stands under a flickering streetlamp, rain soaking his shoulders. In the background, the neon sign of a desolate bar reflects in a puddle. A caption box at the top reads "The city was a tough place to keep secrets." The lighting is harsh, creating a dramatic, somber mood. Landscape.`;
export class ComicAgent extends BaseAgent {
readonly templateType = "comic";
protected getTemplate(): string {
return COMIC_TEMPLATE;
}
protected getSystemPrompt(): string {
return `You are an expert AI prompt engineer specializing in comic book and sequential art using the Gemini Flash Image Generation model.
CORE PRINCIPLE: Create dynamic, story-driven panels with clear visual narrative and emotional impact.
COMIC ART GUIDELINES:
- Specify art style (noir, manga, superhero, indie, webcomic, etc.)
- Detail inking technique (bold lines, cross-hatching, clean digital, etc.)
- Describe foreground action and character poses
- Include background setting details
- Add mood and atmosphere (dramatic, tense, lighthearted, etc.)
- Specify panel composition and framing
- Include dialogue/caption placement if needed
- Use comic-specific lighting (dramatic shadows, rim lighting, etc.)
- Dynamic poses and expressions
TEMPLATE STRUCTURE:
${this.getTemplate()}
ENHANCEMENT STRATEGY:
1. Transform any language into professional English
2. Add comic art terminology
3. Create dynamic, story-driven compositions
4. Specify art style and technique clearly
5. Include mood and narrative elements
RESPONSE FORMAT:
Provide only the enhanced prompt as a single, cohesive paragraph. Do not include explanations, metadata, or multiple options. The response should be ready to use directly for image generation.
CRITICAL: The prompt length MUST be under 2000 characters. Contract the prompt if it's longer.
Remember: Comics are about storytelling. Create panels that convey action, emotion, and narrative progression.`;
}
}

View File

@ -0,0 +1,68 @@
import { BaseAgent } from "./BaseAgent";
import { PHOTOREALISTIC_TEMPLATE } from "./PhotorealisticAgent";
import { ILLUSTRATION_TEMPLATE } from "./IllustrationAgent";
import { MINIMALIST_TEMPLATE } from "./MinimalistAgent";
import { STICKER_TEMPLATE } from "./StickerAgent";
import { PRODUCT_TEMPLATE } from "./ProductAgent";
import { COMIC_TEMPLATE } from "./ComicAgent";
export class GeneralAgent extends BaseAgent {
readonly templateType = "general";
protected getTemplate(): string {
return `AVAILABLE TEMPLATES:
1. PHOTOREALISTIC:
${PHOTOREALISTIC_TEMPLATE}
2. ILLUSTRATION:
${ILLUSTRATION_TEMPLATE}
3. MINIMALIST:
${MINIMALIST_TEMPLATE}
4. STICKER:
${STICKER_TEMPLATE}
5. PRODUCT:
${PRODUCT_TEMPLATE}
6. COMIC:
${COMIC_TEMPLATE}`;
}
protected getSystemPrompt(): string {
return `You are an expert AI prompt engineer specializing in transforming rough, unstructured prompts into professional, detailed prompts for the Gemini Flash Image Generation model.
CORE PRINCIPLE: Describe the scene, don't just list keywords. Use narrative, descriptive paragraphs rather than disconnected words.
YOUR TASK:
1. Analyze the user's prompt to understand their intent
2. Determine which template/style best matches their vision:
- PHOTOREALISTIC: Real-world scenes, portraits, landscapes, realistic subjects
- ILLUSTRATION: Artistic renderings, stylized art, drawings, paintings
- MINIMALIST: Simple compositions, negative space, clean designs, backgrounds
- STICKER: Icons, logos, die-cut designs, kawaii, bold graphics
- PRODUCT: Commercial photography, e-commerce shots, studio lighting
- COMIC: Sequential art, panels, story-driven scenes, narrative art
3. Apply the most appropriate template's guidelines
4. Enhance the prompt with professional terminology and detailed descriptions
${this.getTemplate()}
ENHANCEMENT GUIDELINES:
- Transform any language into professional English
- Be hyper-specific with details instead of vague descriptions
- Use appropriate technical terminology (photography, art, design terms)
- Provide context and intent for better understanding
- Apply the template that best matches the user's vision
- If unclear, default to a balanced photorealistic approach
RESPONSE FORMAT:
Provide only the enhanced prompt as a single, cohesive paragraph. Do not include explanations, metadata, template names, or multiple options. The response should be ready to use directly for image generation.
CRITICAL: The prompt length MUST be under 2000 characters. Contract the prompt if it's longer.
Remember: Your job is to intelligently select the best approach and create a vivid, specific description that guides the model toward the exact image envisioned.`;
}
}

View File

@ -0,0 +1,46 @@
import { BaseAgent } from "./BaseAgent";
export const ILLUSTRATION_TEMPLATE = `A [style] illustration of [subject], featuring [key characteristics] with [color palette]. The art style is [art technique description], with [line work style] and [shading technique]. The composition includes [composition details].
Example:
A watercolor illustration of a magical forest clearing at twilight, featuring glowing fireflies and an ancient stone archway covered in luminescent moss. The art style is whimsical and dreamlike, with soft, flowing brushstrokes and gentle color bleeding. The color palette consists of deep purples, soft blues, and warm golden yellows. The illustration uses delicate line work for fine details and subtle wet-on-wet shading to create atmospheric depth.`;
export class IllustrationAgent extends BaseAgent {
readonly templateType = "illustration";
protected getTemplate(): string {
return ILLUSTRATION_TEMPLATE;
}
protected getSystemPrompt(): string {
return `You are an expert AI prompt engineer specializing in illustrated artwork using the Gemini Flash Image Generation model.
CORE PRINCIPLE: Describe the scene, don't just list keywords. Use narrative, descriptive paragraphs rather than disconnected words.
ILLUSTRATION GUIDELINES:
- Specify art style (watercolor, digital painting, sketch, anime, manga, etc.)
- Describe line work quality (bold outlines, delicate lines, clean vectors, etc.)
- Detail color palette and color theory (complementary, monochromatic, vibrant, muted, etc.)
- Mention shading technique (cel-shading, soft shading, cross-hatching, etc.)
- Include artistic influences or style references when appropriate
- Describe texture and brush work
- Add composition and framing details
TEMPLATE STRUCTURE:
${this.getTemplate()}
ENHANCEMENT STRATEGY:
1. Transform any language into professional English
2. Add art-specific terminology
3. Expand vague descriptions with artistic details
4. Specify style, technique, and medium
5. Maintain natural, flowing narrative style
RESPONSE FORMAT:
Provide only the enhanced prompt as a single, cohesive paragraph. Do not include explanations, metadata, or multiple options. The response should be ready to use directly for image generation.
CRITICAL: The prompt length MUST be under 2000 characters. Contract the prompt if it's longer.
Remember: Artistic detail creates better results. Transform basic ideas into rich, stylistically-defined illustrations.`;
}
}

View File

@ -0,0 +1,47 @@
import { BaseAgent } from "./BaseAgent";
export const MINIMALIST_TEMPLATE = `A minimalist composition featuring a single [subject] positioned in the [position in frame] of the frame. The background is a vast, empty [color/description] canvas, creating significant negative space. Soft, subtle lighting. [Aspect ratio].
Example:
A minimalist composition featuring a single, delicate red maple leaf positioned in the bottom-right of the frame. The background is a vast, empty off-white canvas, creating significant negative space for text. Soft, diffused lighting from the top left. Square image.`;
export class MinimalistAgent extends BaseAgent {
readonly templateType = "minimalist";
protected getTemplate(): string {
return MINIMALIST_TEMPLATE;
}
protected getSystemPrompt(): string {
return `You are an expert AI prompt engineer specializing in minimalist design using the Gemini Flash Image Generation model.
CORE PRINCIPLE: Less is more. Emphasize negative space, simple composition, and subtle elements.
MINIMALIST GUIDELINES:
- Focus on negative space and emptiness
- Simple, clean composition with few elements
- Describe precise positioning in the frame (bottom-right, top-left, centered, etc.)
- Use muted or solid color backgrounds
- Subtle, soft lighting
- Avoid clutter and excessive detail
- Emphasize balance and proportion
- Consider use cases (backgrounds for text, presentations, marketing)
TEMPLATE STRUCTURE:
${this.getTemplate()}
ENHANCEMENT STRATEGY:
1. Transform any language into professional English
2. Simplify and clarify the composition
3. Emphasize negative space and positioning
4. Add subtle lighting and color descriptions
5. Maintain clean, precise language
RESPONSE FORMAT:
Provide only the enhanced prompt as a single, cohesive paragraph. Do not include explanations, metadata, or multiple options. The response should be ready to use directly for image generation.
CRITICAL: The prompt length MUST be under 2000 characters. Contract the prompt if it's longer.
Remember: Minimalism is about restraint and intentional placement. Create breathing room and focus.`;
}
}

View File

@ -0,0 +1,46 @@
import { BaseAgent } from "./BaseAgent";
export const PHOTOREALISTIC_TEMPLATE = `A photorealistic [shot type] of [subject], [action or expression], set in [environment]. The scene is illuminated by [lighting description], creating a [mood] atmosphere. Captured with a [camera/lens details], emphasizing [key textures and details]. The image should be in a [aspect ratio] format.
Example:
A photorealistic close-up portrait of an elderly Japanese ceramicist with deep, sun-etched wrinkles and a warm, knowing smile. He is carefully inspecting a freshly glazed tea bowl. The setting is his rustic, sun-drenched workshop. The scene is illuminated by soft, golden hour light streaming through a window, highlighting the fine texture of the clay. Captured with an 85mm portrait lens, resulting in a soft, blurred background (bokeh). The overall mood is serene and masterful. Vertical portrait orientation.`;
export class PhotorealisticAgent extends BaseAgent {
readonly templateType = "photorealistic";
protected getTemplate(): string {
return PHOTOREALISTIC_TEMPLATE;
}
protected getSystemPrompt(): string {
return `You are an expert AI prompt engineer specializing in photorealistic image generation using the Gemini Flash Image Generation model.
CORE PRINCIPLE: Describe the scene, don't just list keywords. Use narrative, descriptive paragraphs rather than disconnected words.
PHOTOREALISTIC GUIDELINES:
- Use photography terms: camera angles, lens types, lighting, fine details
- Mention specific camera equipment (e.g., 85mm portrait lens, wide-angle shot)
- Describe lighting in detail (golden hour, soft diffused light, studio lighting, etc.)
- Include mood and atmosphere
- Specify textures and materials
- Add composition details (depth of field, bokeh, focus points)
- Be hyper-specific with visual details
TEMPLATE STRUCTURE:
${this.getTemplate()}
ENHANCEMENT STRATEGY:
1. Transform any language into professional English
2. Add photography-specific terminology
3. Expand vague descriptions with specific visual details
4. Include lighting, camera, and composition details
5. Maintain natural, flowing narrative style
RESPONSE FORMAT:
Provide only the enhanced prompt as a single, cohesive paragraph. Do not include explanations, metadata, or multiple options. The response should be ready to use directly for image generation.
CRITICAL: The prompt length MUST be under 2000 characters. Contract the prompt if it's longer.
Remember: More detail equals more control. Transform vague concepts into vivid, photographic descriptions.`;
}
}

View File

@ -0,0 +1,48 @@
import { BaseAgent } from "./BaseAgent";
export const PRODUCT_TEMPLATE = `A high-resolution, studio-lit product photograph of a [product description] on a [background surface/description]. The lighting is a [lighting setup, e.g., three-point softbox setup] to [lighting purpose]. The camera angle is a [angle type] to showcase [specific feature]. Ultra-realistic, with sharp focus on [key detail]. [Aspect ratio].
Example:
A high-resolution, studio-lit product photograph of a minimalist ceramic coffee mug in matte black, presented on a polished concrete surface. The lighting is a three-point softbox setup designed to create soft, diffused highlights and eliminate harsh shadows. The camera angle is a slightly elevated 45-degree shot to showcase its clean lines. Ultra-realistic, with sharp focus on the steam rising from the coffee. Square image.`;
export class ProductAgent extends BaseAgent {
readonly templateType = "product";
protected getTemplate(): string {
return PRODUCT_TEMPLATE;
}
protected getSystemPrompt(): string {
return `You are an expert AI prompt engineer specializing in commercial product photography using the Gemini Flash Image Generation model.
CORE PRINCIPLE: Create professional, e-commerce ready product shots with studio-quality lighting and composition.
PRODUCT PHOTOGRAPHY GUIDELINES:
- Studio lighting setups (three-point, softbox, rim lighting, etc.)
- Professional camera angles (45-degree, overhead flat-lay, eye-level, etc.)
- Clean, appropriate backgrounds (white seamless, concrete, wood, gradient, etc.)
- High-resolution, sharp focus on product details
- Describe surface materials and textures
- Lighting purpose (eliminate shadows, create highlights, show texture, etc.)
- Commercial photography terms
- Ultra-realistic rendering
- Appropriate aspect ratios for e-commerce (square, portrait)
TEMPLATE STRUCTURE:
${this.getTemplate()}
ENHANCEMENT STRATEGY:
1. Transform any language into professional English
2. Add commercial photography terminology
3. Specify professional lighting setups
4. Detail surface materials and backgrounds
5. Include sharp focus and detail specifications
RESPONSE FORMAT:
Provide only the enhanced prompt as a single, cohesive paragraph. Do not include explanations, metadata, or multiple options. The response should be ready to use directly for image generation.
CRITICAL: The prompt length MUST be under 2000 characters. Contract the prompt if it's longer.
Remember: Product photography is about showcasing the product clearly with professional studio quality. Think e-commerce and advertising.`;
}
}

View File

@ -0,0 +1,48 @@
import { BaseAgent } from "./BaseAgent";
export const STICKER_TEMPLATE = `A [style] sticker of a [subject], featuring [key characteristics] and a [color palette]. The design should have [line style] and [shading style]. The background must be transparent.
Example:
A kawaii-style sticker of a happy red panda wearing a tiny bamboo hat. It's munching on a green bamboo leaf. The design features bold, clean outlines, simple cel-shading, and a vibrant color palette. The background must be white.`;
export class StickerAgent extends BaseAgent {
readonly templateType = "sticker";
protected getTemplate(): string {
return STICKER_TEMPLATE;
}
protected getSystemPrompt(): string {
return `You are an expert AI prompt engineer specializing in sticker and icon design using the Gemini Flash Image Generation model.
CORE PRINCIPLE: Create clean, recognizable designs optimized for small formats and die-cut production.
STICKER GUIDELINES:
- Specify sticker style (kawaii, bold, minimalist, vintage, cartoon, etc.)
- Use bold, clean outlines for definition
- Simple, clear shapes that work at small sizes
- Vibrant, well-defined color palettes
- Simple shading (cel-shading, flat colors, minimal gradients)
- ALWAYS specify transparent or white background
- Avoid fine details that won't scale well
- Consider die-cut friendly designs
- Make subjects iconic and recognizable
TEMPLATE STRUCTURE:
${this.getTemplate()}
ENHANCEMENT STRATEGY:
1. Transform any language into professional English
2. Simplify complex ideas into iconic, sticker-friendly designs
3. Add style-specific terminology (kawaii, chibi, retro, etc.)
4. Specify clear outlines and simple shading
5. Always include background specification
RESPONSE FORMAT:
Provide only the enhanced prompt as a single, cohesive paragraph. Do not include explanations, metadata, or multiple options. The response should be ready to use directly for image generation.
CRITICAL: The prompt length MUST be under 2000 characters. Contract the prompt if it's longer.
Remember: Stickers need to be simple, bold, and instantly recognizable. Think iconic, not detailed.`;
}
}

View File

@ -0,0 +1,46 @@
import { IPromptAgent } from "../types";
import { PhotorealisticAgent } from "./PhotorealisticAgent";
import { IllustrationAgent } from "./IllustrationAgent";
import { MinimalistAgent } from "./MinimalistAgent";
import { StickerAgent } from "./StickerAgent";
import { ProductAgent } from "./ProductAgent";
import { ComicAgent } from "./ComicAgent";
import { GeneralAgent } from "./GeneralAgent";
type AgentConstructor = new (apiKey: string) => IPromptAgent;
const AGENT_REGISTRY: Record<string, AgentConstructor> = {
photorealistic: PhotorealisticAgent,
illustration: IllustrationAgent,
minimalist: MinimalistAgent,
sticker: StickerAgent,
product: ProductAgent,
comic: ComicAgent,
general: GeneralAgent,
};
export function getAgent(apiKey: string, template?: string): IPromptAgent {
if (!template) {
return new GeneralAgent(apiKey);
}
const AgentClass = AGENT_REGISTRY[template];
if (!AgentClass) {
console.warn(
`Unknown template "${template}", falling back to GeneralAgent`,
);
return new GeneralAgent(apiKey);
}
return new AgentClass(apiKey);
}
export {
PhotorealisticAgent,
IllustrationAgent,
MinimalistAgent,
StickerAgent,
ProductAgent,
ComicAgent,
GeneralAgent,
};

View File

@ -0,0 +1,12 @@
export { PromptEnhancementService } from "./PromptEnhancementService";
export { EnhancementLogger } from "./EnhancementLogger";
export type {
PromptEnhancementOptions,
PromptEnhancementContext,
PromptEnhancementResult,
EnhancementLogEntry,
IPromptAgent,
AgentResult,
ValidationResult,
} from "./types";

View File

@ -0,0 +1,70 @@
export interface PromptEnhancementOptions {
template?:
| "photorealistic"
| "illustration"
| "minimalist"
| "sticker"
| "product"
| "comic"
| "general";
tags?: string[];
}
export interface PromptEnhancementContext {
orgId: string;
projectId: string;
meta?: {
tags?: string[];
};
}
export interface PromptEnhancementResult {
success: boolean;
originalPrompt: string;
enhancedPrompt?: string;
detectedLanguage?: string;
appliedTemplate?: string;
metadata?: {
style?: string;
aspectRatio?: string;
enhancements: string[];
};
error?: string;
}
export interface AgentResult {
success: boolean;
enhancedPrompt?: string;
detectedLanguage?: string;
appliedTemplate?: string;
enhancements: string[];
error?: string;
}
export interface IPromptAgent {
readonly templateType: string;
enhance(
prompt: string,
options: PromptEnhancementOptions,
): Promise<AgentResult>;
}
export interface ValidationResult {
valid: boolean;
error?: string;
}
export interface EnhancementLogEntry {
timestamp: string;
orgId: string;
projectId: string;
originalPrompt: string;
enhancedPrompt: string;
meta?: {
tags?: string[];
};
template: string;
detectedLanguage?: string;
enhancements: string[];
model: string;
}

View File

@ -0,0 +1,47 @@
export function detectLanguage(text: string): string {
if (/[\u4e00-\u9fff]/.test(text)) return "Chinese";
if (/[\u3040-\u309f\u30a0-\u30ff]/.test(text)) return "Japanese";
if (/[\uac00-\ud7af]/.test(text)) return "Korean";
if (/[àáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ]/.test(text))
return "Romance Language";
if (/[а-яё]/.test(text.toLowerCase())) return "Russian";
if (/[α-ωΑ-Ω]/.test(text)) return "Greek";
if (/[أ-ي]/.test(text)) return "Arabic";
if (/[א-ת]/.test(text)) return "Hebrew";
return "English";
}
export function detectEnhancements(
originalPrompt: string,
enhancedPrompt: string,
): string[] {
const enhancements: string[] = [];
if (enhancedPrompt.length > originalPrompt.length * 1.5) {
enhancements.push("Added detailed descriptions");
}
if (
enhancedPrompt.includes("photorealistic") ||
enhancedPrompt.includes("shot") ||
enhancedPrompt.includes("lens")
) {
enhancements.push("Applied photography terminology");
}
if (
enhancedPrompt.includes("lighting") ||
enhancedPrompt.includes("illuminated")
) {
enhancements.push("Enhanced lighting description");
}
if (
enhancedPrompt.includes("texture") ||
enhancedPrompt.includes("surface")
) {
enhancements.push("Added texture details");
}
return enhancements;
}

View File

@ -0,0 +1,22 @@
import { ValidationResult } from "./types";
export function validatePromptLength(
prompt: string,
maxLength: number = 2000,
): ValidationResult {
if (!prompt || prompt.trim().length === 0) {
return {
valid: false,
error: "Prompt cannot be empty",
};
}
if (prompt.length > maxLength) {
return {
valid: false,
error: `Prompt exceeds maximum length of ${maxLength} characters (current: ${prompt.length})`,
};
}
return { valid: true };
}

View File

@ -9,20 +9,20 @@ export interface GenerateImageRequest {
export interface TextToImageRequest {
prompt: string;
filename: string;
autoEnhance?: boolean;
aspectRatio?: string; // Gemini aspect ratio format (e.g., "1:1", "16:9", "3:2")
autoEnhance?: boolean; // Defaults to true
enhancementOptions?: {
imageStyle?:
template?:
| "photorealistic"
| "illustration"
| "minimalist"
| "sticker"
| "product"
| "comic";
aspectRatio?: "square" | "portrait" | "landscape" | "wide" | "ultrawide";
mood?: string;
lighting?: string;
cameraAngle?: string;
negativePrompts?: string[];
| "comic"
| "general"; // Defaults to "photorealistic"
};
meta?: {
tags?: string[]; // Optional array of tags for tracking/grouping (not stored, only logged)
};
}
@ -59,9 +59,13 @@ export interface ImageGenerationOptions {
prompt: string;
filename: string;
referenceImages?: ReferenceImage[];
aspectRatio?: string;
orgId?: string;
projectId?: string;
userId?: string;
meta?: {
tags?: string[];
};
}
export interface ReferenceImage {
@ -74,6 +78,9 @@ export interface GeminiParams {
model: string;
config: {
responseModalities: string[];
imageConfig?: {
aspectRatio?: string;
};
};
contentsStructure: {
role: string;
@ -115,19 +122,14 @@ export interface LogContext {
export interface PromptEnhancementRequest {
prompt: string;
options?: {
imageStyle?:
template?:
| "photorealistic"
| "illustration"
| "minimalist"
| "sticker"
| "product"
| "comic";
aspectRatio?: "square" | "portrait" | "landscape" | "wide" | "ultrawide";
mood?: string;
lighting?: string;
cameraAngle?: string;
outputFormat?: "text" | "markdown" | "detailed";
negativePrompts?: string[];
| "comic"
| "general"; // Defaults to "photorealistic"
};
}
@ -147,20 +149,20 @@ export interface PromptEnhancementResponse {
// Enhanced Generate Request (with auto-enhancement option)
export interface EnhancedGenerateImageRequest extends GenerateImageRequest {
autoEnhance?: boolean;
aspectRatio?: string; // Gemini aspect ratio format (e.g., "1:1", "16:9", "3:2")
autoEnhance?: boolean; // Defaults to true
enhancementOptions?: {
imageStyle?:
template?:
| "photorealistic"
| "illustration"
| "minimalist"
| "sticker"
| "product"
| "comic";
aspectRatio?: "square" | "portrait" | "landscape" | "wide" | "ultrawide";
mood?: string;
lighting?: string;
cameraAngle?: string;
negativePrompts?: string[];
| "comic"
| "general"; // Defaults to "photorealistic"
};
meta?: {
tags?: string[];
};
}

View File

@ -9,6 +9,11 @@ import { AdvancedOptionsModal, AdvancedOptionsData } from '@/components/demo/Adv
const API_BASE_URL = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:3000';
const API_KEY_STORAGE_KEY = 'banatie_demo_api_key';
// Generate random 6-character uppercase ID for pairing images
function generatePairId(): string {
return Math.random().toString(36).substring(2, 8).toUpperCase();
}
interface GenerationResult {
id: string;
timestamp: Date;
@ -38,8 +43,10 @@ interface GenerationResult {
geminiParams: object;
};
enhancementOptions?: {
imageStyle?: string;
aspectRatio?: string;
template?: string;
meta?: {
tags?: string[];
};
} & AdvancedOptionsData;
}
@ -64,8 +71,8 @@ export default function DemoTTIPage() {
const [generationError, setGenerationError] = useState('');
// Enhancement Options State
const [aspectRatio, setAspectRatio] = useState('');
const [imageStyle, setImageStyle] = useState('');
const [aspectRatio, setAspectRatio] = useState('1:1');
const [template, setTemplate] = useState('photorealistic');
const [advancedOptions, setAdvancedOptions] = useState<AdvancedOptionsData>({});
const [showAdvancedModal, setShowAdvancedModal] = useState(false);
@ -202,35 +209,13 @@ export default function DemoTTIPage() {
setGenerationStartTime(startTime);
const resultId = Date.now().toString();
const pairId = generatePairId(); // NEW: Generate unique pair ID
const timestamp = new Date();
try {
// Build enhancement options for right image (only non-empty values)
const rightEnhancementOptions: any = {};
if (imageStyle) {
rightEnhancementOptions.imageStyle = imageStyle;
}
if (aspectRatio) {
rightEnhancementOptions.aspectRatio = aspectRatio;
}
if (advancedOptions.mood) {
rightEnhancementOptions.mood = advancedOptions.mood;
}
if (advancedOptions.lighting) {
rightEnhancementOptions.lighting = advancedOptions.lighting;
}
if (advancedOptions.cameraAngle) {
rightEnhancementOptions.cameraAngle = advancedOptions.cameraAngle;
}
if (advancedOptions.negativePrompts && advancedOptions.negativePrompts.length > 0) {
rightEnhancementOptions.negativePrompts = advancedOptions.negativePrompts;
}
const hasEnhancementOptions = Object.keys(rightEnhancementOptions).length > 0;
// Call API twice in parallel
// Left: original prompt with no enhancement options
// Right: original prompt WITH enhancement options
// Left: original prompt WITHOUT enhancement (autoEnhance: false)
// Right: original prompt WITH enhancement (autoEnhance: true + template)
const [leftResult, rightResult] = await Promise.all([
fetch(`${API_BASE_URL}/api/text-to-image`, {
method: 'POST',
@ -241,6 +226,11 @@ export default function DemoTTIPage() {
body: JSON.stringify({
prompt: prompt.trim(),
filename: `demo_${resultId}_left`,
aspectRatio,
autoEnhance: false, // Explicitly disable enhancement for left image
meta: {
tags: [pairId, 'simple'] // NEW: Pair ID + "simple" tag
}
}),
}),
fetch(`${API_BASE_URL}/api/text-to-image`, {
@ -252,10 +242,14 @@ export default function DemoTTIPage() {
body: JSON.stringify({
prompt: prompt.trim(),
filename: `demo_${resultId}_right`,
autoEnhance: true,
...(hasEnhancementOptions && {
enhancementOptions: rightEnhancementOptions
}),
aspectRatio,
autoEnhance: true, // Enable enhancement for right image
enhancementOptions: {
template: template || 'photorealistic', // Only template parameter
},
meta: {
tags: [pairId, 'enhanced'] // NEW: Pair ID + "enhanced" tag
}
}),
}),
]);
@ -292,6 +286,11 @@ export default function DemoTTIPage() {
request: {
prompt: prompt.trim(),
filename: `demo_${resultId}_left`,
aspectRatio,
autoEnhance: false,
meta: {
tags: [pairId, 'simple']
}
},
response: leftData,
geminiParams: leftData.data?.geminiParams || {},
@ -300,20 +299,25 @@ export default function DemoTTIPage() {
request: {
prompt: prompt.trim(),
filename: `demo_${resultId}_right`,
aspectRatio,
autoEnhance: true,
...(hasEnhancementOptions && {
enhancementOptions: rightEnhancementOptions
}),
enhancementOptions: {
template: template || 'photorealistic',
},
meta: {
tags: [pairId, 'enhanced']
}
},
response: rightData,
geminiParams: rightData.data?.geminiParams || {},
},
// Store enhancement options for display in inspect mode
enhancementOptions: hasEnhancementOptions ? {
imageStyle,
aspectRatio,
...advancedOptions,
} : undefined,
enhancementOptions: {
template,
meta: {
tags: [pairId, 'enhanced']
}
},
};
if (!leftData.success) {
@ -488,61 +492,56 @@ export default function DemoTTIPage() {
disabled={!apiKeyValidated || generating}
className="w-full px-3 py-2 text-sm bg-slate-800 border border-slate-700 rounded-lg text-white focus:outline-none focus:ring-2 focus:ring-amber-500 focus:border-transparent disabled:opacity-50 disabled:cursor-not-allowed"
>
<option value="">Auto</option>
<option value="square">Square (1:1)</option>
<option value="portrait">Portrait (3:4)</option>
<option value="landscape">Landscape (4:3)</option>
<option value="wide">Wide (16:9)</option>
<option value="ultrawide">Ultrawide (21:9)</option>
<option value="1:1">Square (1:1)</option>
<option value="3:4">Portrait (3:4)</option>
<option value="4:3">Landscape (4:3)</option>
<option value="9:16">Vertical (9:16)</option>
<option value="16:9">Widescreen (16:9)</option>
<option value="21:9">Ultrawide (21:9)</option>
</select>
</div>
{/* Image Style */}
{/* Template */}
<div className="flex-1 min-w-[150px]">
<label htmlFor="image-style" className="block text-xs font-medium text-gray-400 mb-1.5">
Image Style
<label htmlFor="template" className="block text-xs font-medium text-gray-400 mb-1.5">
Template
</label>
<select
id="image-style"
value={imageStyle}
onChange={(e) => setImageStyle(e.target.value)}
id="template"
value={template}
onChange={(e) => setTemplate(e.target.value)}
disabled={!apiKeyValidated || generating}
className="w-full px-3 py-2 text-sm bg-slate-800 border border-slate-700 rounded-lg text-white focus:outline-none focus:ring-2 focus:ring-amber-500 focus:border-transparent disabled:opacity-50 disabled:cursor-not-allowed"
>
<option value="">Auto</option>
<option value="photorealistic">Photorealistic</option>
<option value="illustration">Illustration</option>
<option value="minimalist">Minimalist</option>
<option value="sticker">Sticker</option>
<option value="product">Product</option>
<option value="comic">Comic</option>
<option value="general">General</option>
</select>
</div>
{/* Advanced Options Button */}
{/* Advanced Options Button - Disabled (Coming Soon) */}
<div className="flex-1 min-w-[150px]">
<label className="block text-xs font-medium text-gray-400 mb-1.5 md:invisible">
Advanced
</label>
<button
onClick={() => setShowAdvancedModal(true)}
disabled={!apiKeyValidated || generating}
className="w-full px-3 py-2 text-sm bg-slate-800 border border-slate-700 rounded-lg text-white hover:bg-slate-750 transition-colors disabled:opacity-50 disabled:cursor-not-allowed focus:outline-none focus:ring-2 focus:ring-amber-500 flex items-center justify-center gap-2"
aria-label="Open advanced options"
>
<span></span>
<span>Advanced</span>
{(advancedOptions.mood || advancedOptions.lighting || advancedOptions.cameraAngle || (advancedOptions.negativePrompts && advancedOptions.negativePrompts.length > 0)) && (
<span className="ml-1 px-1.5 py-0.5 text-xs bg-amber-600/20 text-amber-400 rounded-full">
{[
advancedOptions.mood,
advancedOptions.lighting,
advancedOptions.cameraAngle,
advancedOptions.negativePrompts && advancedOptions.negativePrompts.length > 0 ? 'prompts' : null
].filter(Boolean).length}
<div className="relative">
<button
disabled={true}
className="w-full px-3 py-2 text-sm bg-slate-800 border border-slate-700 rounded-lg text-gray-500 opacity-50 cursor-not-allowed flex items-center justify-center gap-2"
aria-label="Advanced options (coming soon)"
title="Advanced options coming soon"
>
<span></span>
<span>Advanced</span>
<span className="ml-1 px-1.5 py-0.5 text-xs bg-slate-700 text-gray-400 rounded-full">
🔒
</span>
)}
</button>
</button>
</div>
</div>
</div>

View File

@ -42,6 +42,7 @@ services:
- LOG_LEVEL=${LOG_LEVEL}
- PORT=${PORT}
- CORS_ORIGIN=${CORS_ORIGIN}
- TTI_LOG=${TTI_LOG}
restart: unless-stopped
postgres:

View File

@ -273,14 +273,9 @@ Generate images from text prompts with optional reference images.
**Enhancement Options:**
| Field | Type | Options | Description |
|-------|------|---------|-------------|
| `imageStyle` | string | `photorealistic`, `illustration`, `minimalist`, `sticker`, `product`, `comic` | Visual style |
| `aspectRatio` | string | `square`, `portrait`, `landscape`, `wide`, `ultrawide` | Image proportions |
| `mood` | string | - | Mood description (max 100 chars) |
| `lighting` | string | - | Lighting description (max 100 chars) |
| `cameraAngle` | string | - | Camera angle description (max 100 chars) |
| `negativePrompts` | string[] | - | What to avoid (max 10 items, 100 chars each) |
| Field | Type | Options | Default | Description |
|-------|------|---------|---------|-------------|
| `template` | string | `photorealistic`, `illustration`, `minimalist`, `sticker`, `product`, `comic`, `general` | `photorealistic` | Prompt engineering template to apply |
**Example Request:**
```bash
@ -342,10 +337,10 @@ Generate images from text prompts only using JSON payload. Simplified endpoint f
{
"prompt": "A beautiful sunset over mountains",
"filename": "sunset_image",
"aspectRatio": "16:9",
"autoEnhance": true,
"enhancementOptions": {
"imageStyle": "photorealistic",
"aspectRatio": "landscape",
"template": "photorealistic",
"mood": "peaceful",
"lighting": "golden hour"
}
@ -354,12 +349,26 @@ Generate images from text prompts only using JSON payload. Simplified endpoint f
**Parameters:**
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `prompt` | string | Yes | - | Text description of the image to generate (3-2000 chars) |
| `filename` | string | Yes | - | Desired filename for the generated image (alphanumeric, underscore, hyphen only) |
| `aspectRatio` | string | No | `"1:1"` | Image aspect ratio (`"1:1"`, `"2:3"`, `"3:2"`, `"3:4"`, `"4:3"`, `"4:5"`, `"5:4"`, `"9:16"`, `"16:9"`, `"21:9"`) |
| `autoEnhance` | boolean | No | `true` | Enable automatic prompt enhancement (set to `false` to use prompt as-is) |
| `enhancementOptions` | object | No | - | Enhancement configuration options |
| `meta` | object | No | - | Metadata for request tracking |
**Enhancement Options:**
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `template` | string | No | `"photorealistic"` | Prompt engineering template: `"photorealistic"`, `"illustration"`, `"minimalist"`, `"sticker"`, `"product"`, `"comic"`, `"general"` |
**Meta Object:**
| Field | Type | Required | Description |
|-------|------|----------|-------------|
| `prompt` | string | Yes | Text description of the image to generate (3-2000 chars) |
| `filename` | string | Yes | Desired filename for the generated image (alphanumeric, underscore, hyphen only) |
| `autoEnhance` | boolean | No | Enable automatic prompt enhancement |
| `enhancementOptions` | object | No | Enhancement configuration options (same as /api/generate) |
| `tags` | string[] | No | Array of string tags for tracking/grouping requests (not stored, only logged) |
**Example Request:**
```bash
@ -369,10 +378,13 @@ curl -X POST http://localhost:3000/api/text-to-image \
-d '{
"prompt": "A beautiful sunset over mountains with golden clouds",
"filename": "test_sunset",
"aspectRatio": "16:9",
"autoEnhance": true,
"enhancementOptions": {
"imageStyle": "photorealistic",
"aspectRatio": "landscape"
"template": "photorealistic"
},
"meta": {
"tags": ["demo", "sunset"]
}
}'
```
@ -413,6 +425,16 @@ curl -X POST http://localhost:3000/api/text-to-image \
- **Faster**: No multipart parsing overhead
- **Simpler testing**: Easy to use with curl or API clients
- **Same features**: Supports all enhancement options
- **Auto-enhance by default**: `autoEnhance` defaults to `true`, set explicitly to `false` to use prompt as-is
**Template Descriptions:**
- `photorealistic`: Photography-focused with camera angles, lens types, lighting, and fine details
- `illustration`: Art style specifications with line work, color palette, and shading techniques
- `minimalist`: Emphasis on negative space, simple composition, and subtle elements
- `sticker`: Bold outlines, kawaii style, clean design, transparent background style
- `product`: Studio lighting setups, commercial photography terms, surfaces, and angles
- `comic`: Panel style, art technique, mood, and dialogue/caption integration
- `general`: Balanced approach with clear descriptions and artistic detail
---

View File

@ -73,8 +73,8 @@ importers:
specifier: workspace:*
version: link:../../packages/database
'@google/genai':
specifier: ^1.17.0
version: 1.20.0
specifier: ^1.22.0
version: 1.22.0
cors:
specifier: ^2.8.5
version: 2.8.5
@ -970,8 +970,8 @@ packages:
'@floating-ui/utils@0.2.10':
resolution: {integrity: sha512-aGTxbpbg8/b5JfU1HXSrbH3wXZuLPJcNEcZQFMxLs3oSzgtVu6nFPkbbGGUvBcUjKV2YyB9Wxxabo+HEH9tcRQ==}
'@google/genai@1.20.0':
resolution: {integrity: sha512-QdShxO9LX35jFogy3iKprQNqgKKveux4H2QjOnyIvyHRuGi6PHiz3fjNf8Y0VPY8o5V2fHqR2XqiSVoz7yZs0w==}
'@google/genai@1.22.0':
resolution: {integrity: sha512-siETS3zTm3EGpTT4+BFc1z20xXBYfueD3gCYfxkOjuAKRk8lt8TJevDHi3zepn1oSI6NhG/LZvy0i+Q3qheObg==}
engines: {node: '>=20.0.0'}
peerDependencies:
'@modelcontextprotocol/sdk': ^1.11.4
@ -5430,7 +5430,7 @@ snapshots:
'@floating-ui/utils@0.2.10': {}
'@google/genai@1.20.0':
'@google/genai@1.22.0':
dependencies:
google-auth-library: 9.15.1
ws: 8.18.3
@ -7279,7 +7279,7 @@ snapshots:
eslint: 8.57.1
eslint-import-resolver-node: 0.3.9
eslint-import-resolver-typescript: 3.10.1(eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.44.0(eslint@8.57.1)(typescript@5.9.2))(eslint@8.57.1))(eslint@8.57.1)
eslint-plugin-import: 2.32.0(@typescript-eslint/parser@8.44.0(eslint@8.57.1)(typescript@5.9.2))(eslint-import-resolver-typescript@3.10.1)(eslint@8.57.1)
eslint-plugin-import: 2.32.0(@typescript-eslint/parser@8.44.0(eslint@8.57.1)(typescript@5.9.2))(eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.44.0(eslint@8.57.1)(typescript@5.9.2))(eslint@8.57.1))(eslint@8.57.1))(eslint@8.57.1)
eslint-plugin-jsx-a11y: 6.10.2(eslint@8.57.1)
eslint-plugin-react: 7.37.5(eslint@8.57.1)
eslint-plugin-react-hooks: 5.0.0-canary-7118f5dd7-20230705(eslint@8.57.1)
@ -7313,7 +7313,7 @@ snapshots:
tinyglobby: 0.2.15
unrs-resolver: 1.11.1
optionalDependencies:
eslint-plugin-import: 2.32.0(@typescript-eslint/parser@8.44.0(eslint@8.57.1)(typescript@5.9.2))(eslint-import-resolver-typescript@3.10.1)(eslint@8.57.1)
eslint-plugin-import: 2.32.0(@typescript-eslint/parser@8.44.0(eslint@8.57.1)(typescript@5.9.2))(eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.44.0(eslint@8.57.1)(typescript@5.9.2))(eslint@8.57.1))(eslint@8.57.1))(eslint@8.57.1)
transitivePeerDependencies:
- supports-color
@ -7328,7 +7328,7 @@ snapshots:
transitivePeerDependencies:
- supports-color
eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.44.0(eslint@8.57.1)(typescript@5.9.2))(eslint-import-resolver-typescript@3.10.1)(eslint@8.57.1):
eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.44.0(eslint@8.57.1)(typescript@5.9.2))(eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.44.0(eslint@8.57.1)(typescript@5.9.2))(eslint@8.57.1))(eslint@8.57.1))(eslint@8.57.1):
dependencies:
'@rtsao/scc': 1.1.0
array-includes: 3.1.9