From 847145c385fac076764ca96bc04f0d76d9712dbd Mon Sep 17 00:00:00 2001 From: Oleg Proskurin Date: Tue, 7 Oct 2025 22:28:27 +0700 Subject: [PATCH] feat: update API --- .../src/middleware/jsonValidation.ts | 74 +++++----------- .../src/middleware/promptEnhancement.ts | 7 +- .../src/services/PromptEnhancementService.ts | 84 ++++++------------- apps/api-service/src/types/api.ts | 37 +++----- docs/api/README.md | 48 +++++++---- 5 files changed, 93 insertions(+), 157 deletions(-) diff --git a/apps/api-service/src/middleware/jsonValidation.ts b/apps/api-service/src/middleware/jsonValidation.ts index 5085f87..4afc3af 100644 --- a/apps/api-service/src/middleware/jsonValidation.ts +++ b/apps/api-service/src/middleware/jsonValidation.ts @@ -55,6 +55,20 @@ export const validateTextToImageRequest = ( }); } + // Set defaults before validation + // Default autoEnhance to true if not explicitly set + if (req.body.autoEnhance === undefined) { + req.body.autoEnhance = true; + } + + // Default template to "photorealistic" in enhancementOptions + if (req.body.enhancementOptions && !req.body.enhancementOptions.template) { + req.body.enhancementOptions.template = "photorealistic"; + } else if (!req.body.enhancementOptions && req.body.autoEnhance !== false) { + // If autoEnhance is true (default) and no enhancementOptions, create it with default template + req.body.enhancementOptions = { template: "photorealistic" }; + } + // Validate prompt if (!prompt) { errors.push("Prompt is required"); @@ -111,17 +125,11 @@ export const validateTextToImageRequest = ( ) { errors.push("enhancementOptions must be an object"); } else { - const { - imageStyle, - aspectRatio, - mood, - lighting, - cameraAngle, - negativePrompts, - } = enhancementOptions; + const { template } = enhancementOptions; + // Validate template parameter if ( - imageStyle !== undefined && + template !== undefined && ![ "photorealistic", "illustration", @@ -129,55 +137,13 @@ export const validateTextToImageRequest = ( "sticker", "product", "comic", - ].includes(imageStyle) - ) { - errors.push("Invalid imageStyle in enhancementOptions"); - } - - if ( - aspectRatio !== undefined && - !VALID_ASPECT_RATIOS.includes(aspectRatio as any) + "general", + ].includes(template) ) { errors.push( - `Invalid aspectRatio. Must be one of: ${VALID_ASPECT_RATIOS.join(", ")}` + "Invalid template in enhancementOptions. Must be one of: photorealistic, illustration, minimalist, sticker, product, comic, general", ); } - - if ( - mood !== undefined && - (typeof mood !== "string" || mood.length > 100) - ) { - errors.push("mood must be a string with max 100 characters"); - } - - if ( - lighting !== undefined && - (typeof lighting !== "string" || lighting.length > 100) - ) { - errors.push("lighting must be a string with max 100 characters"); - } - - if ( - cameraAngle !== undefined && - (typeof cameraAngle !== "string" || cameraAngle.length > 100) - ) { - errors.push("cameraAngle must be a string with max 100 characters"); - } - - if (negativePrompts !== undefined) { - if (!Array.isArray(negativePrompts) || negativePrompts.length > 10) { - errors.push("negativePrompts must be an array with max 10 items"); - } else { - for (const item of negativePrompts) { - if (typeof item !== "string" || item.length > 100) { - errors.push( - "Each negative prompt must be a string with max 100 characters", - ); - break; - } - } - } - } } } diff --git a/apps/api-service/src/middleware/promptEnhancement.ts b/apps/api-service/src/middleware/promptEnhancement.ts index 8791014..5d8884a 100644 --- a/apps/api-service/src/middleware/promptEnhancement.ts +++ b/apps/api-service/src/middleware/promptEnhancement.ts @@ -24,9 +24,12 @@ export const autoEnhancePrompt = async ( const requestId = req.requestId; const { prompt, autoEnhance, enhancementOptions } = req.body; - if (!autoEnhance) { + // Default autoEnhance to true if not explicitly set to false + const shouldEnhance = autoEnhance !== false; + + if (!shouldEnhance) { console.log( - `[${timestamp}] [${requestId}] Auto-enhancement disabled, skipping`, + `[${timestamp}] [${requestId}] Auto-enhancement explicitly disabled, skipping`, ); return next(); } diff --git a/apps/api-service/src/services/PromptEnhancementService.ts b/apps/api-service/src/services/PromptEnhancementService.ts index f307ad7..4948c12 100644 --- a/apps/api-service/src/services/PromptEnhancementService.ts +++ b/apps/api-service/src/services/PromptEnhancementService.ts @@ -1,19 +1,14 @@ import { GoogleGenAI } from "@google/genai"; export interface PromptEnhancementOptions { - imageStyle?: + template?: | "photorealistic" | "illustration" | "minimalist" | "sticker" | "product" - | "comic"; - aspectRatio?: "square" | "portrait" | "landscape" | "wide" | "ultrawide"; - mood?: string; - lighting?: string; - cameraAngle?: string; - outputFormat?: "text" | "markdown" | "detailed"; - negativePrompts?: string[]; + | "comic" + | "general"; } export interface PromptEnhancementResult { @@ -47,13 +42,20 @@ export class PromptEnhancementService { ): Promise { const timestamp = new Date().toISOString(); + // Default template to "photorealistic" if not specified + const finalOptions = { + ...options, + template: options.template || "photorealistic", + }; + console.log( `[${timestamp}] Starting prompt enhancement for: "${rawPrompt.substring(0, 50)}..."`, ); + console.log(`[${timestamp}] Using template: ${finalOptions.template}`); try { - const systemPrompt = this.buildSystemPrompt(options); - const userPrompt = this.buildUserPrompt(rawPrompt, options); + const systemPrompt = this.buildSystemPrompt(finalOptions); + const userPrompt = this.buildUserPrompt(rawPrompt, finalOptions); console.log( `[${timestamp}] Making API request to Gemini 2.5 Flash for prompt enhancement...`, @@ -83,7 +85,7 @@ export class PromptEnhancementService { const result = this.parseEnhancedResponse( enhancedText, rawPrompt, - options, + finalOptions, ); const enhancementResult: PromptEnhancementResult = { @@ -93,14 +95,9 @@ export class PromptEnhancementService { ...(result.detectedLanguage && { detectedLanguage: result.detectedLanguage, }), - ...(result.appliedTemplate && { - appliedTemplate: result.appliedTemplate, - }), + appliedTemplate: finalOptions.template, metadata: { - ...(options.imageStyle && { style: options.imageStyle }), - ...(!options.imageStyle && - result.detectedStyle && { style: result.detectedStyle }), - ...(options.aspectRatio && { aspectRatio: options.aspectRatio }), + style: finalOptions.template, enhancements: result.enhancements, }, }; @@ -123,14 +120,10 @@ export class PromptEnhancementService { } private buildSystemPrompt(options: PromptEnhancementOptions): string { - const { - imageStyle, - aspectRatio, - mood, - lighting, - cameraAngle, - negativePrompts, - } = options; + const { template } = options; + + // Default to photorealistic + const selectedTemplate = template || "photorealistic"; return `You are an expert AI prompt engineer specializing in transforming rough, unstructured prompts into professional, detailed prompts for the Gemini Flash Image Generation model. Your goal is to follow these principles: @@ -150,14 +143,10 @@ STYLE TEMPLATES: - Sticker: Emphasize style (kawaii, bold outlines, clean design), transparent background - Product: Studio lighting setup, commercial photography terms, surfaces, angles - Comic: Panel style, art technique, mood, dialogue/caption integration +- General: Balanced approach with clear descriptions and artistic detail TECHNICAL REQUIREMENTS: -${imageStyle ? `- Target Style: ${imageStyle}` : "- Auto-detect and apply appropriate style"} -${aspectRatio ? `- Aspect Ratio: ${aspectRatio}` : "- Use square format unless context suggests otherwise"} -${mood ? `- Mood: ${mood}` : ""} -${lighting ? `- Lighting: ${lighting}` : ""} -${cameraAngle ? `- Camera Angle: ${cameraAngle}` : ""} -${negativePrompts && negativePrompts.length > 0 ? `- Avoid: ${negativePrompts.join(", ")}` : ""} +- Target Template: ${selectedTemplate} RESPONSE FORMAT: Provide only the enhanced prompt as a single, cohesive paragraph. Do not include explanations, metadata, or multiple options. The response should be ready to use directly for image generation. @@ -171,13 +160,8 @@ Remember: More detail equals more control. Transform vague concepts into vivid, ): string { let prompt = `Transform this prompt into a professional image generation prompt: "${rawPrompt}"`; - if (options.imageStyle) { - prompt += `\n\nTarget style: ${options.imageStyle}`; - } - - if (options.aspectRatio) { - prompt += `\n\nAspect ratio: ${options.aspectRatio}`; - } + const selectedTemplate = options.template || "photorealistic"; + prompt += `\n\nTarget template/style: ${selectedTemplate}`; return prompt; } @@ -228,26 +212,8 @@ Remember: More detail equals more control. Transform vague concepts into vivid, // Try to detect the language of original prompt (simple heuristic) const detectedLanguage = this.detectLanguage(originalPrompt); - // Detect applied template based on content - let appliedTemplate = "general"; - if (options.imageStyle) { - appliedTemplate = options.imageStyle; - } else if ( - enhancedPrompt.includes("photorealistic") || - enhancedPrompt.includes("camera") - ) { - appliedTemplate = "photorealistic"; - } else if ( - enhancedPrompt.includes("sticker") || - enhancedPrompt.includes("kawaii") - ) { - appliedTemplate = "sticker"; - } else if ( - enhancedPrompt.includes("minimalist") || - enhancedPrompt.includes("negative space") - ) { - appliedTemplate = "minimalist"; - } + // Use the explicit template if provided + const appliedTemplate = options.template || "photorealistic"; return { enhancedPrompt, diff --git a/apps/api-service/src/types/api.ts b/apps/api-service/src/types/api.ts index a047c0d..472806d 100644 --- a/apps/api-service/src/types/api.ts +++ b/apps/api-service/src/types/api.ts @@ -9,20 +9,17 @@ export interface GenerateImageRequest { export interface TextToImageRequest { prompt: string; filename: string; - autoEnhance?: boolean; + aspectRatio?: string; // Gemini aspect ratio format (e.g., "1:1", "16:9", "3:2") + autoEnhance?: boolean; // Defaults to true enhancementOptions?: { - imageStyle?: + template?: | "photorealistic" | "illustration" | "minimalist" | "sticker" | "product" - | "comic"; - aspectRatio?: "square" | "portrait" | "landscape" | "wide" | "ultrawide"; - mood?: string; - lighting?: string; - cameraAngle?: string; - negativePrompts?: string[]; + | "comic" + | "general"; // Defaults to "photorealistic" }; } @@ -119,19 +116,14 @@ export interface LogContext { export interface PromptEnhancementRequest { prompt: string; options?: { - imageStyle?: + template?: | "photorealistic" | "illustration" | "minimalist" | "sticker" | "product" - | "comic"; - aspectRatio?: "square" | "portrait" | "landscape" | "wide" | "ultrawide"; - mood?: string; - lighting?: string; - cameraAngle?: string; - outputFormat?: "text" | "markdown" | "detailed"; - negativePrompts?: string[]; + | "comic" + | "general"; // Defaults to "photorealistic" }; } @@ -151,20 +143,17 @@ export interface PromptEnhancementResponse { // Enhanced Generate Request (with auto-enhancement option) export interface EnhancedGenerateImageRequest extends GenerateImageRequest { - autoEnhance?: boolean; + aspectRatio?: string; // Gemini aspect ratio format (e.g., "1:1", "16:9", "3:2") + autoEnhance?: boolean; // Defaults to true enhancementOptions?: { - imageStyle?: + template?: | "photorealistic" | "illustration" | "minimalist" | "sticker" | "product" - | "comic"; - aspectRatio?: "square" | "portrait" | "landscape" | "wide" | "ultrawide"; - mood?: string; - lighting?: string; - cameraAngle?: string; - negativePrompts?: string[]; + | "comic" + | "general"; // Defaults to "photorealistic" }; } diff --git a/docs/api/README.md b/docs/api/README.md index 7895d98..42b7ae0 100644 --- a/docs/api/README.md +++ b/docs/api/README.md @@ -273,14 +273,9 @@ Generate images from text prompts with optional reference images. **Enhancement Options:** -| Field | Type | Options | Description | -|-------|------|---------|-------------| -| `imageStyle` | string | `photorealistic`, `illustration`, `minimalist`, `sticker`, `product`, `comic` | Visual style | -| `aspectRatio` | string | `square`, `portrait`, `landscape`, `wide`, `ultrawide` | Image proportions | -| `mood` | string | - | Mood description (max 100 chars) | -| `lighting` | string | - | Lighting description (max 100 chars) | -| `cameraAngle` | string | - | Camera angle description (max 100 chars) | -| `negativePrompts` | string[] | - | What to avoid (max 10 items, 100 chars each) | +| Field | Type | Options | Default | Description | +|-------|------|---------|---------|-------------| +| `template` | string | `photorealistic`, `illustration`, `minimalist`, `sticker`, `product`, `comic`, `general` | `photorealistic` | Prompt engineering template to apply | **Example Request:** ```bash @@ -342,10 +337,10 @@ Generate images from text prompts only using JSON payload. Simplified endpoint f { "prompt": "A beautiful sunset over mountains", "filename": "sunset_image", + "aspectRatio": "16:9", "autoEnhance": true, "enhancementOptions": { - "imageStyle": "photorealistic", - "aspectRatio": "landscape", + "template": "photorealistic", "mood": "peaceful", "lighting": "golden hour" } @@ -354,12 +349,19 @@ Generate images from text prompts only using JSON payload. Simplified endpoint f **Parameters:** -| Field | Type | Required | Description | -|-------|------|----------|-------------| -| `prompt` | string | Yes | Text description of the image to generate (3-2000 chars) | -| `filename` | string | Yes | Desired filename for the generated image (alphanumeric, underscore, hyphen only) | -| `autoEnhance` | boolean | No | Enable automatic prompt enhancement | -| `enhancementOptions` | object | No | Enhancement configuration options (same as /api/generate) | +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `prompt` | string | Yes | - | Text description of the image to generate (3-2000 chars) | +| `filename` | string | Yes | - | Desired filename for the generated image (alphanumeric, underscore, hyphen only) | +| `aspectRatio` | string | No | `"1:1"` | Image aspect ratio (`"1:1"`, `"2:3"`, `"3:2"`, `"3:4"`, `"4:3"`, `"4:5"`, `"5:4"`, `"9:16"`, `"16:9"`, `"21:9"`) | +| `autoEnhance` | boolean | No | `true` | Enable automatic prompt enhancement (set to `false` to use prompt as-is) | +| `enhancementOptions` | object | No | - | Enhancement configuration options | + +**Enhancement Options:** + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `template` | string | No | `"photorealistic"` | Prompt engineering template: `"photorealistic"`, `"illustration"`, `"minimalist"`, `"sticker"`, `"product"`, `"comic"`, `"general"` | **Example Request:** ```bash @@ -369,10 +371,10 @@ curl -X POST http://localhost:3000/api/text-to-image \ -d '{ "prompt": "A beautiful sunset over mountains with golden clouds", "filename": "test_sunset", + "aspectRatio": "16:9", "autoEnhance": true, "enhancementOptions": { - "imageStyle": "photorealistic", - "aspectRatio": "landscape" + "template": "photorealistic" } }' ``` @@ -413,6 +415,16 @@ curl -X POST http://localhost:3000/api/text-to-image \ - **Faster**: No multipart parsing overhead - **Simpler testing**: Easy to use with curl or API clients - **Same features**: Supports all enhancement options +- **Auto-enhance by default**: `autoEnhance` defaults to `true`, set explicitly to `false` to use prompt as-is + +**Template Descriptions:** +- `photorealistic`: Photography-focused with camera angles, lens types, lighting, and fine details +- `illustration`: Art style specifications with line work, color palette, and shading techniques +- `minimalist`: Emphasis on negative space, simple composition, and subtle elements +- `sticker`: Bold outlines, kawaii style, clean design, transparent background style +- `product`: Studio lighting setups, commercial photography terms, surfaces, and angles +- `comic`: Panel style, art technique, mood, and dialogue/caption integration +- `general`: Balanced approach with clear descriptions and artistic detail ---