feat: update API

This commit is contained in:
Oleg Proskurin 2025-10-07 22:28:27 +07:00
parent 63aa812f5e
commit 847145c385
5 changed files with 93 additions and 157 deletions

View File

@ -55,6 +55,20 @@ export const validateTextToImageRequest = (
});
}
// Set defaults before validation
// Default autoEnhance to true if not explicitly set
if (req.body.autoEnhance === undefined) {
req.body.autoEnhance = true;
}
// Default template to "photorealistic" in enhancementOptions
if (req.body.enhancementOptions && !req.body.enhancementOptions.template) {
req.body.enhancementOptions.template = "photorealistic";
} else if (!req.body.enhancementOptions && req.body.autoEnhance !== false) {
// If autoEnhance is true (default) and no enhancementOptions, create it with default template
req.body.enhancementOptions = { template: "photorealistic" };
}
// Validate prompt
if (!prompt) {
errors.push("Prompt is required");
@ -111,17 +125,11 @@ export const validateTextToImageRequest = (
) {
errors.push("enhancementOptions must be an object");
} else {
const {
imageStyle,
aspectRatio,
mood,
lighting,
cameraAngle,
negativePrompts,
} = enhancementOptions;
const { template } = enhancementOptions;
// Validate template parameter
if (
imageStyle !== undefined &&
template !== undefined &&
![
"photorealistic",
"illustration",
@ -129,55 +137,13 @@ export const validateTextToImageRequest = (
"sticker",
"product",
"comic",
].includes(imageStyle)
) {
errors.push("Invalid imageStyle in enhancementOptions");
}
if (
aspectRatio !== undefined &&
!VALID_ASPECT_RATIOS.includes(aspectRatio as any)
"general",
].includes(template)
) {
errors.push(
`Invalid aspectRatio. Must be one of: ${VALID_ASPECT_RATIOS.join(", ")}`
"Invalid template in enhancementOptions. Must be one of: photorealistic, illustration, minimalist, sticker, product, comic, general",
);
}
if (
mood !== undefined &&
(typeof mood !== "string" || mood.length > 100)
) {
errors.push("mood must be a string with max 100 characters");
}
if (
lighting !== undefined &&
(typeof lighting !== "string" || lighting.length > 100)
) {
errors.push("lighting must be a string with max 100 characters");
}
if (
cameraAngle !== undefined &&
(typeof cameraAngle !== "string" || cameraAngle.length > 100)
) {
errors.push("cameraAngle must be a string with max 100 characters");
}
if (negativePrompts !== undefined) {
if (!Array.isArray(negativePrompts) || negativePrompts.length > 10) {
errors.push("negativePrompts must be an array with max 10 items");
} else {
for (const item of negativePrompts) {
if (typeof item !== "string" || item.length > 100) {
errors.push(
"Each negative prompt must be a string with max 100 characters",
);
break;
}
}
}
}
}
}

View File

@ -24,9 +24,12 @@ export const autoEnhancePrompt = async (
const requestId = req.requestId;
const { prompt, autoEnhance, enhancementOptions } = req.body;
if (!autoEnhance) {
// Default autoEnhance to true if not explicitly set to false
const shouldEnhance = autoEnhance !== false;
if (!shouldEnhance) {
console.log(
`[${timestamp}] [${requestId}] Auto-enhancement disabled, skipping`,
`[${timestamp}] [${requestId}] Auto-enhancement explicitly disabled, skipping`,
);
return next();
}

View File

@ -1,19 +1,14 @@
import { GoogleGenAI } from "@google/genai";
export interface PromptEnhancementOptions {
imageStyle?:
template?:
| "photorealistic"
| "illustration"
| "minimalist"
| "sticker"
| "product"
| "comic";
aspectRatio?: "square" | "portrait" | "landscape" | "wide" | "ultrawide";
mood?: string;
lighting?: string;
cameraAngle?: string;
outputFormat?: "text" | "markdown" | "detailed";
negativePrompts?: string[];
| "comic"
| "general";
}
export interface PromptEnhancementResult {
@ -47,13 +42,20 @@ export class PromptEnhancementService {
): Promise<PromptEnhancementResult> {
const timestamp = new Date().toISOString();
// Default template to "photorealistic" if not specified
const finalOptions = {
...options,
template: options.template || "photorealistic",
};
console.log(
`[${timestamp}] Starting prompt enhancement for: "${rawPrompt.substring(0, 50)}..."`,
);
console.log(`[${timestamp}] Using template: ${finalOptions.template}`);
try {
const systemPrompt = this.buildSystemPrompt(options);
const userPrompt = this.buildUserPrompt(rawPrompt, options);
const systemPrompt = this.buildSystemPrompt(finalOptions);
const userPrompt = this.buildUserPrompt(rawPrompt, finalOptions);
console.log(
`[${timestamp}] Making API request to Gemini 2.5 Flash for prompt enhancement...`,
@ -83,7 +85,7 @@ export class PromptEnhancementService {
const result = this.parseEnhancedResponse(
enhancedText,
rawPrompt,
options,
finalOptions,
);
const enhancementResult: PromptEnhancementResult = {
@ -93,14 +95,9 @@ export class PromptEnhancementService {
...(result.detectedLanguage && {
detectedLanguage: result.detectedLanguage,
}),
...(result.appliedTemplate && {
appliedTemplate: result.appliedTemplate,
}),
appliedTemplate: finalOptions.template,
metadata: {
...(options.imageStyle && { style: options.imageStyle }),
...(!options.imageStyle &&
result.detectedStyle && { style: result.detectedStyle }),
...(options.aspectRatio && { aspectRatio: options.aspectRatio }),
style: finalOptions.template,
enhancements: result.enhancements,
},
};
@ -123,14 +120,10 @@ export class PromptEnhancementService {
}
private buildSystemPrompt(options: PromptEnhancementOptions): string {
const {
imageStyle,
aspectRatio,
mood,
lighting,
cameraAngle,
negativePrompts,
} = options;
const { template } = options;
// Default to photorealistic
const selectedTemplate = template || "photorealistic";
return `You are an expert AI prompt engineer specializing in transforming rough, unstructured prompts into professional, detailed prompts for the Gemini Flash Image Generation model. Your goal is to follow these principles:
@ -150,14 +143,10 @@ STYLE TEMPLATES:
- Sticker: Emphasize style (kawaii, bold outlines, clean design), transparent background
- Product: Studio lighting setup, commercial photography terms, surfaces, angles
- Comic: Panel style, art technique, mood, dialogue/caption integration
- General: Balanced approach with clear descriptions and artistic detail
TECHNICAL REQUIREMENTS:
${imageStyle ? `- Target Style: ${imageStyle}` : "- Auto-detect and apply appropriate style"}
${aspectRatio ? `- Aspect Ratio: ${aspectRatio}` : "- Use square format unless context suggests otherwise"}
${mood ? `- Mood: ${mood}` : ""}
${lighting ? `- Lighting: ${lighting}` : ""}
${cameraAngle ? `- Camera Angle: ${cameraAngle}` : ""}
${negativePrompts && negativePrompts.length > 0 ? `- Avoid: ${negativePrompts.join(", ")}` : ""}
- Target Template: ${selectedTemplate}
RESPONSE FORMAT:
Provide only the enhanced prompt as a single, cohesive paragraph. Do not include explanations, metadata, or multiple options. The response should be ready to use directly for image generation.
@ -171,13 +160,8 @@ Remember: More detail equals more control. Transform vague concepts into vivid,
): string {
let prompt = `Transform this prompt into a professional image generation prompt: "${rawPrompt}"`;
if (options.imageStyle) {
prompt += `\n\nTarget style: ${options.imageStyle}`;
}
if (options.aspectRatio) {
prompt += `\n\nAspect ratio: ${options.aspectRatio}`;
}
const selectedTemplate = options.template || "photorealistic";
prompt += `\n\nTarget template/style: ${selectedTemplate}`;
return prompt;
}
@ -228,26 +212,8 @@ Remember: More detail equals more control. Transform vague concepts into vivid,
// Try to detect the language of original prompt (simple heuristic)
const detectedLanguage = this.detectLanguage(originalPrompt);
// Detect applied template based on content
let appliedTemplate = "general";
if (options.imageStyle) {
appliedTemplate = options.imageStyle;
} else if (
enhancedPrompt.includes("photorealistic") ||
enhancedPrompt.includes("camera")
) {
appliedTemplate = "photorealistic";
} else if (
enhancedPrompt.includes("sticker") ||
enhancedPrompt.includes("kawaii")
) {
appliedTemplate = "sticker";
} else if (
enhancedPrompt.includes("minimalist") ||
enhancedPrompt.includes("negative space")
) {
appliedTemplate = "minimalist";
}
// Use the explicit template if provided
const appliedTemplate = options.template || "photorealistic";
return {
enhancedPrompt,

View File

@ -9,20 +9,17 @@ export interface GenerateImageRequest {
export interface TextToImageRequest {
prompt: string;
filename: string;
autoEnhance?: boolean;
aspectRatio?: string; // Gemini aspect ratio format (e.g., "1:1", "16:9", "3:2")
autoEnhance?: boolean; // Defaults to true
enhancementOptions?: {
imageStyle?:
template?:
| "photorealistic"
| "illustration"
| "minimalist"
| "sticker"
| "product"
| "comic";
aspectRatio?: "square" | "portrait" | "landscape" | "wide" | "ultrawide";
mood?: string;
lighting?: string;
cameraAngle?: string;
negativePrompts?: string[];
| "comic"
| "general"; // Defaults to "photorealistic"
};
}
@ -119,19 +116,14 @@ export interface LogContext {
export interface PromptEnhancementRequest {
prompt: string;
options?: {
imageStyle?:
template?:
| "photorealistic"
| "illustration"
| "minimalist"
| "sticker"
| "product"
| "comic";
aspectRatio?: "square" | "portrait" | "landscape" | "wide" | "ultrawide";
mood?: string;
lighting?: string;
cameraAngle?: string;
outputFormat?: "text" | "markdown" | "detailed";
negativePrompts?: string[];
| "comic"
| "general"; // Defaults to "photorealistic"
};
}
@ -151,20 +143,17 @@ export interface PromptEnhancementResponse {
// Enhanced Generate Request (with auto-enhancement option)
export interface EnhancedGenerateImageRequest extends GenerateImageRequest {
autoEnhance?: boolean;
aspectRatio?: string; // Gemini aspect ratio format (e.g., "1:1", "16:9", "3:2")
autoEnhance?: boolean; // Defaults to true
enhancementOptions?: {
imageStyle?:
template?:
| "photorealistic"
| "illustration"
| "minimalist"
| "sticker"
| "product"
| "comic";
aspectRatio?: "square" | "portrait" | "landscape" | "wide" | "ultrawide";
mood?: string;
lighting?: string;
cameraAngle?: string;
negativePrompts?: string[];
| "comic"
| "general"; // Defaults to "photorealistic"
};
}

View File

@ -273,14 +273,9 @@ Generate images from text prompts with optional reference images.
**Enhancement Options:**
| Field | Type | Options | Description |
|-------|------|---------|-------------|
| `imageStyle` | string | `photorealistic`, `illustration`, `minimalist`, `sticker`, `product`, `comic` | Visual style |
| `aspectRatio` | string | `square`, `portrait`, `landscape`, `wide`, `ultrawide` | Image proportions |
| `mood` | string | - | Mood description (max 100 chars) |
| `lighting` | string | - | Lighting description (max 100 chars) |
| `cameraAngle` | string | - | Camera angle description (max 100 chars) |
| `negativePrompts` | string[] | - | What to avoid (max 10 items, 100 chars each) |
| Field | Type | Options | Default | Description |
|-------|------|---------|---------|-------------|
| `template` | string | `photorealistic`, `illustration`, `minimalist`, `sticker`, `product`, `comic`, `general` | `photorealistic` | Prompt engineering template to apply |
**Example Request:**
```bash
@ -342,10 +337,10 @@ Generate images from text prompts only using JSON payload. Simplified endpoint f
{
"prompt": "A beautiful sunset over mountains",
"filename": "sunset_image",
"aspectRatio": "16:9",
"autoEnhance": true,
"enhancementOptions": {
"imageStyle": "photorealistic",
"aspectRatio": "landscape",
"template": "photorealistic",
"mood": "peaceful",
"lighting": "golden hour"
}
@ -354,12 +349,19 @@ Generate images from text prompts only using JSON payload. Simplified endpoint f
**Parameters:**
| Field | Type | Required | Description |
|-------|------|----------|-------------|
| `prompt` | string | Yes | Text description of the image to generate (3-2000 chars) |
| `filename` | string | Yes | Desired filename for the generated image (alphanumeric, underscore, hyphen only) |
| `autoEnhance` | boolean | No | Enable automatic prompt enhancement |
| `enhancementOptions` | object | No | Enhancement configuration options (same as /api/generate) |
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `prompt` | string | Yes | - | Text description of the image to generate (3-2000 chars) |
| `filename` | string | Yes | - | Desired filename for the generated image (alphanumeric, underscore, hyphen only) |
| `aspectRatio` | string | No | `"1:1"` | Image aspect ratio (`"1:1"`, `"2:3"`, `"3:2"`, `"3:4"`, `"4:3"`, `"4:5"`, `"5:4"`, `"9:16"`, `"16:9"`, `"21:9"`) |
| `autoEnhance` | boolean | No | `true` | Enable automatic prompt enhancement (set to `false` to use prompt as-is) |
| `enhancementOptions` | object | No | - | Enhancement configuration options |
**Enhancement Options:**
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `template` | string | No | `"photorealistic"` | Prompt engineering template: `"photorealistic"`, `"illustration"`, `"minimalist"`, `"sticker"`, `"product"`, `"comic"`, `"general"` |
**Example Request:**
```bash
@ -369,10 +371,10 @@ curl -X POST http://localhost:3000/api/text-to-image \
-d '{
"prompt": "A beautiful sunset over mountains with golden clouds",
"filename": "test_sunset",
"aspectRatio": "16:9",
"autoEnhance": true,
"enhancementOptions": {
"imageStyle": "photorealistic",
"aspectRatio": "landscape"
"template": "photorealistic"
}
}'
```
@ -413,6 +415,16 @@ curl -X POST http://localhost:3000/api/text-to-image \
- **Faster**: No multipart parsing overhead
- **Simpler testing**: Easy to use with curl or API clients
- **Same features**: Supports all enhancement options
- **Auto-enhance by default**: `autoEnhance` defaults to `true`, set explicitly to `false` to use prompt as-is
**Template Descriptions:**
- `photorealistic`: Photography-focused with camera angles, lens types, lighting, and fine details
- `illustration`: Art style specifications with line work, color palette, and shading techniques
- `minimalist`: Emphasis on negative space, simple composition, and subtle elements
- `sticker`: Bold outlines, kawaii style, clean design, transparent background style
- `product`: Studio lighting setups, commercial photography terms, surfaces, and angles
- `comic`: Panel style, art technique, mood, and dialogue/caption integration
- `general`: Balanced approach with clear descriptions and artistic detail
---