refactor: expand enhancement service

This commit is contained in:
Oleg Proskurin 2025-10-08 23:41:02 +07:00
parent a97029a2b4
commit 1ea8492e21
18 changed files with 768 additions and 298 deletions

View File

@ -1,5 +1,5 @@
import { Request, Response } from "express"; import { Request, Response } from "express";
import { PromptEnhancementService } from "../services/PromptEnhancementService"; import { PromptEnhancementService } from "../services/promptEnhancement";
import { EnhancedGenerateImageRequest } from "../types/api"; import { EnhancedGenerateImageRequest } from "../types/api";
let promptEnhancementService: PromptEnhancementService | null = null; let promptEnhancementService: PromptEnhancementService | null = null;

View File

@ -1,6 +1,6 @@
import { Request, Response, Router } from "express"; import { Request, Response, Router } from "express";
import type { Router as RouterType } from "express"; import type { Router as RouterType } from "express";
import { PromptEnhancementService } from "../services/PromptEnhancementService"; import { PromptEnhancementService } from "../services/promptEnhancement";
import { asyncHandler } from "../middleware/errorHandler"; import { asyncHandler } from "../middleware/errorHandler";
import { import {
PromptEnhancementRequest, PromptEnhancementRequest,

View File

@ -1,281 +0,0 @@
import { GoogleGenAI } from "@google/genai";
import { EnhancementLogger, EnhancementLogEntry } from "./EnhancementLogger";
export interface PromptEnhancementOptions {
template?:
| "photorealistic"
| "illustration"
| "minimalist"
| "sticker"
| "product"
| "comic"
| "general";
tags?: string[]; // Optional tags - accepted but not used yet
}
export interface PromptEnhancementContext {
orgId: string;
projectId: string;
meta?: {
tags?: string[];
};
}
export interface PromptEnhancementResult {
success: boolean;
originalPrompt: string;
enhancedPrompt?: string;
detectedLanguage?: string;
appliedTemplate?: string;
metadata?: {
style?: string;
aspectRatio?: string;
enhancements: string[];
};
error?: string;
}
export class PromptEnhancementService {
private ai: GoogleGenAI;
private model = "gemini-2.5-flash";
constructor(apiKey: string) {
if (!apiKey) {
throw new Error("Gemini API key is required");
}
this.ai = new GoogleGenAI({ apiKey });
}
async enhancePrompt(
rawPrompt: string,
options: PromptEnhancementOptions = {},
context?: PromptEnhancementContext,
): Promise<PromptEnhancementResult> {
const timestamp = new Date().toISOString();
// Default template to "photorealistic" if not specified
const finalOptions = {
...options,
template: options.template || "photorealistic",
};
console.log(
`[${timestamp}] Starting prompt enhancement for: "${rawPrompt.substring(0, 50)}..."`,
);
console.log(`[${timestamp}] Using template: ${finalOptions.template}`);
if (finalOptions.tags && finalOptions.tags.length > 0) {
console.log(`[${timestamp}] Tags: ${finalOptions.tags.join(", ")}`);
}
try {
const systemPrompt = this.buildSystemPrompt(finalOptions);
const userPrompt = this.buildUserPrompt(rawPrompt, finalOptions);
console.log(
`[${timestamp}] Making API request to Gemini 2.5 Flash for prompt enhancement...`,
);
const response = await this.ai.models.generateContent({
model: this.model,
config: { responseModalities: ["TEXT"] },
contents: [
{
role: "user" as const,
parts: [{ text: `${systemPrompt}\n\n${userPrompt}` }],
},
],
});
if (
response.candidates &&
response.candidates[0] &&
response.candidates[0].content
) {
const content = response.candidates[0].content;
const enhancedText = content.parts?.[0]?.text || "";
console.log(`[${timestamp}] Enhanced prompt generated successfully`);
const result = this.parseEnhancedResponse(
enhancedText,
rawPrompt,
finalOptions,
);
const enhancementResult: PromptEnhancementResult = {
success: true,
originalPrompt: rawPrompt,
enhancedPrompt: result.enhancedPrompt,
...(result.detectedLanguage && {
detectedLanguage: result.detectedLanguage,
}),
appliedTemplate: finalOptions.template,
metadata: {
style: finalOptions.template,
enhancements: result.enhancements,
},
};
// Log the enhancement if context is provided
if (context) {
const logEntry: EnhancementLogEntry = {
timestamp,
orgId: context.orgId,
projectId: context.projectId,
originalPrompt: rawPrompt,
enhancedPrompt: result.enhancedPrompt,
...(context.meta && { meta: context.meta }),
template: finalOptions.template,
...(result.detectedLanguage && {
detectedLanguage: result.detectedLanguage,
}),
enhancements: result.enhancements,
model: this.model,
};
EnhancementLogger.getInstance().log(logEntry);
}
return enhancementResult;
}
return {
success: false,
originalPrompt: rawPrompt,
error: "No enhanced prompt received from API",
};
} catch (error) {
console.error(`[${timestamp}] Prompt enhancement failed:`, error);
return {
success: false,
originalPrompt: rawPrompt,
error: error instanceof Error ? error.message : "Enhancement failed",
};
}
}
private buildSystemPrompt(options: PromptEnhancementOptions): string {
const { template } = options;
// Default to photorealistic
const selectedTemplate = template || "photorealistic";
return `You are an expert AI prompt engineer specializing in transforming rough, unstructured prompts into professional, detailed prompts for the Gemini Flash Image Generation model. Your goal is to follow these principles:
CORE PRINCIPLE: Describe the scene, don't just list keywords. Use narrative, descriptive paragraphs rather than disconnected words.
ENHANCEMENT GUIDELINES:
1. Transform any language into professional English
2. Be hyper-specific with details instead of vague descriptions
3. Use photography and cinematic language for composition control
4. Provide context and intent for better understanding
5. Apply the appropriate template based on the desired style
STYLE TEMPLATES:
- Photorealistic: Use photography terms (camera angles, lens types, lighting, fine details)
- Illustration: Specify art style, line work, color palette, shading technique
- Minimalist: Focus on negative space, simple composition, subtle elements
- Sticker: Emphasize style (kawaii, bold outlines, clean design), transparent background
- Product: Studio lighting setup, commercial photography terms, surfaces, angles
- Comic: Panel style, art technique, mood, dialogue/caption integration
- General: Balanced approach with clear descriptions and artistic detail
TECHNICAL REQUIREMENTS:
- Target Template: ${selectedTemplate}
RESPONSE FORMAT:
Provide only the enhanced prompt as a single, cohesive paragraph. Do not include explanations, metadata, or multiple options. The response should be ready to use directly for image generation.
CRITICAL: the prompt length MUST be under 2000 characters length. Contract the prompts if they're longer
Remember: More detail equals more control. Transform vague concepts into vivid, specific descriptions that guide the model toward the exact image envisioned.`;
}
private buildUserPrompt(
rawPrompt: string,
options: PromptEnhancementOptions,
): string {
let prompt = `Transform this prompt into a professional image generation prompt: "${rawPrompt}"`;
const selectedTemplate = options.template || "photorealistic";
prompt += `\n\nTarget template/style: ${selectedTemplate}`;
console.log(
"🚀 ~ PromptEnhancementService ~ buildUserPrompt ~ prompt:",
prompt,
);
return prompt;
}
private parseEnhancedResponse(
enhancedText: string,
originalPrompt: string,
options: PromptEnhancementOptions,
): {
enhancedPrompt: string;
detectedLanguage?: string;
appliedTemplate?: string;
detectedStyle?: string;
enhancements: string[];
} {
const enhancements: string[] = [];
// Clean up the enhanced text
const enhancedPrompt = enhancedText.trim();
// Detect applied enhancements
if (enhancedPrompt.length > originalPrompt.length * 1.5) {
enhancements.push("Added detailed descriptions");
}
if (
enhancedPrompt.includes("photorealistic") ||
enhancedPrompt.includes("shot") ||
enhancedPrompt.includes("lens")
) {
enhancements.push("Applied photography terminology");
}
if (
enhancedPrompt.includes("lighting") ||
enhancedPrompt.includes("illuminated")
) {
enhancements.push("Enhanced lighting description");
}
if (
enhancedPrompt.includes("texture") ||
enhancedPrompt.includes("surface")
) {
enhancements.push("Added texture details");
}
// Try to detect the language of original prompt (simple heuristic)
const detectedLanguage = this.detectLanguage(originalPrompt);
// Use the explicit template if provided
const appliedTemplate = options.template || "photorealistic";
return {
enhancedPrompt,
detectedLanguage,
appliedTemplate,
detectedStyle: appliedTemplate,
enhancements,
};
}
private detectLanguage(text: string): string {
// Simple language detection heuristics
if (/[\u4e00-\u9fff]/.test(text)) return "Chinese";
if (/[\u3040-\u309f\u30a0-\u30ff]/.test(text)) return "Japanese";
if (/[\uac00-\ud7af]/.test(text)) return "Korean";
if (/[àáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ]/.test(text))
return "Romance Language";
if (/[а-яё]/.test(text.toLowerCase())) return "Russian";
if (/[α-ωΑ-Ω]/.test(text)) return "Greek";
if (/[أ-ي]/.test(text)) return "Arabic";
if (/[א-ת]/.test(text)) return "Hebrew";
return "English";
}
}

View File

@ -1,20 +1,6 @@
import { writeFileSync, readFileSync, existsSync, mkdirSync } from "fs"; import { writeFileSync, readFileSync, existsSync, mkdirSync } from "fs";
import { dirname } from "path"; import { dirname } from "path";
import { EnhancementLogEntry } from "./types";
export interface EnhancementLogEntry {
timestamp: string;
orgId: string;
projectId: string;
originalPrompt: string;
enhancedPrompt: string;
meta?: {
tags?: string[];
};
template: string;
detectedLanguage?: string;
enhancements: string[];
model: string;
}
export class EnhancementLogger { export class EnhancementLogger {
private static instance: EnhancementLogger | null = null; private static instance: EnhancementLogger | null = null;

View File

@ -0,0 +1,123 @@
import {
PromptEnhancementOptions,
PromptEnhancementContext,
PromptEnhancementResult,
} from "./types";
import { getAgent } from "./agents";
import { validatePromptLength } from "./validators";
import { EnhancementLogger } from "./EnhancementLogger";
export class PromptEnhancementService {
private apiKey: string;
private model = "gemini-2.5-flash";
constructor(apiKey: string) {
if (!apiKey) {
throw new Error("Gemini API key is required");
}
this.apiKey = apiKey;
}
async enhancePrompt(
rawPrompt: string,
options: PromptEnhancementOptions = {},
context?: PromptEnhancementContext,
): Promise<PromptEnhancementResult> {
const timestamp = new Date().toISOString();
console.log(
`[${timestamp}] Starting prompt enhancement for: "${rawPrompt.substring(0, 50)}..."`,
);
console.log(
`[${timestamp}] Template: ${options.template || "general (auto-select)"}`,
);
if (options.tags && options.tags.length > 0) {
console.log(`[${timestamp}] Tags: ${options.tags.join(", ")}`);
}
// Pre-validate input prompt
const inputValidation = validatePromptLength(rawPrompt, 5000);
if (!inputValidation.valid) {
return {
success: false,
originalPrompt: rawPrompt,
error: inputValidation.error || "Validation failed",
};
}
try {
// Get appropriate agent
const agent = getAgent(this.apiKey, options.template);
// Enhance the prompt
const agentResult = await agent.enhance(rawPrompt, options);
if (!agentResult.success || !agentResult.enhancedPrompt) {
return {
success: false,
originalPrompt: rawPrompt,
error: agentResult.error || "Enhancement failed",
};
}
// Post-validate enhanced prompt length
const outputValidation = validatePromptLength(
agentResult.enhancedPrompt,
2000,
);
if (!outputValidation.valid) {
console.warn(
`[${timestamp}] Enhanced prompt exceeds 2000 characters (${agentResult.enhancedPrompt.length}), truncating...`,
);
agentResult.enhancedPrompt = agentResult.enhancedPrompt.substring(
0,
2000,
);
}
const result: PromptEnhancementResult = {
success: true,
originalPrompt: rawPrompt,
enhancedPrompt: agentResult.enhancedPrompt,
...(agentResult.detectedLanguage && {
detectedLanguage: agentResult.detectedLanguage,
}),
appliedTemplate:
agentResult.appliedTemplate || options.template || "general",
metadata: {
style: agentResult.appliedTemplate || options.template || "general",
enhancements: agentResult.enhancements,
},
};
// Log the enhancement if context is provided
if (context) {
EnhancementLogger.getInstance().log({
timestamp,
orgId: context.orgId,
projectId: context.projectId,
originalPrompt: rawPrompt,
enhancedPrompt: agentResult.enhancedPrompt,
...(context.meta && { meta: context.meta }),
template:
agentResult.appliedTemplate || options.template || "general",
...(agentResult.detectedLanguage && {
detectedLanguage: agentResult.detectedLanguage,
}),
enhancements: agentResult.enhancements,
model: this.model,
});
}
console.log(`[${timestamp}] Enhancement completed successfully`);
return result;
} catch (error) {
console.error(`[${timestamp}] Prompt enhancement failed:`, error);
return {
success: false,
originalPrompt: rawPrompt,
error: error instanceof Error ? error.message : "Enhancement failed",
};
}
}
}

View File

@ -0,0 +1,94 @@
import { GoogleGenAI } from "@google/genai";
import {
IPromptAgent,
PromptEnhancementOptions,
AgentResult,
} from "../types";
import { detectLanguage, detectEnhancements } from "../utils";
export abstract class BaseAgent implements IPromptAgent {
protected ai: GoogleGenAI;
protected model = "gemini-2.5-flash";
abstract readonly templateType: string;
constructor(apiKey: string) {
if (!apiKey) {
throw new Error("Gemini API key is required");
}
this.ai = new GoogleGenAI({ apiKey });
}
protected abstract getSystemPrompt(): string;
protected abstract getTemplate(): string;
async enhance(
rawPrompt: string,
_options: PromptEnhancementOptions,
): Promise<AgentResult> {
const timestamp = new Date().toISOString();
console.log(
`[${timestamp}] [${this.templateType}Agent] Enhancing prompt: "${rawPrompt.substring(0, 50)}..."`,
);
try {
const systemPrompt = this.getSystemPrompt();
const userPrompt = this.buildUserPrompt(rawPrompt);
const response = await this.ai.models.generateContent({
model: this.model,
config: { responseModalities: ["TEXT"] },
contents: [
{
role: "user" as const,
parts: [{ text: `${systemPrompt}\n\n${userPrompt}` }],
},
],
});
if (
response.candidates &&
response.candidates[0] &&
response.candidates[0].content
) {
const content = response.candidates[0].content;
const enhancedPrompt = content.parts?.[0]?.text?.trim() || "";
console.log(
`[${timestamp}] [${this.templateType}Agent] Enhancement successful`,
);
return {
success: true,
enhancedPrompt,
detectedLanguage: detectLanguage(rawPrompt),
appliedTemplate: this.templateType,
enhancements: detectEnhancements(rawPrompt, enhancedPrompt),
};
}
return {
success: false,
error: "No enhanced prompt received from API",
enhancements: [],
};
} catch (error) {
console.error(
`[${timestamp}] [${this.templateType}Agent] Enhancement failed:`,
error,
);
return {
success: false,
error: error instanceof Error ? error.message : "Enhancement failed",
enhancements: [],
};
}
}
protected buildUserPrompt(rawPrompt: string): string {
return `Transform this prompt into a professional image generation prompt: "${rawPrompt}"
Target template/style: ${this.templateType}`;
}
}

View File

@ -0,0 +1,48 @@
import { BaseAgent } from "./BaseAgent";
export const COMIC_TEMPLATE = `A single comic book panel in a [art style] style. In the foreground, [character description and action]. In the background, [setting details]. The panel has a [dialogue/caption box] with the text "[Text]". The lighting creates a [mood] mood. [Aspect ratio].
Example:
A single comic book panel in a gritty, noir art style with high-contrast black and white inks. In the foreground, a detective in a trench coat stands under a flickering streetlamp, rain soaking his shoulders. In the background, the neon sign of a desolate bar reflects in a puddle. A caption box at the top reads "The city was a tough place to keep secrets." The lighting is harsh, creating a dramatic, somber mood. Landscape.`;
export class ComicAgent extends BaseAgent {
readonly templateType = "comic";
protected getTemplate(): string {
return COMIC_TEMPLATE;
}
protected getSystemPrompt(): string {
return `You are an expert AI prompt engineer specializing in comic book and sequential art using the Gemini Flash Image Generation model.
CORE PRINCIPLE: Create dynamic, story-driven panels with clear visual narrative and emotional impact.
COMIC ART GUIDELINES:
- Specify art style (noir, manga, superhero, indie, webcomic, etc.)
- Detail inking technique (bold lines, cross-hatching, clean digital, etc.)
- Describe foreground action and character poses
- Include background setting details
- Add mood and atmosphere (dramatic, tense, lighthearted, etc.)
- Specify panel composition and framing
- Include dialogue/caption placement if needed
- Use comic-specific lighting (dramatic shadows, rim lighting, etc.)
- Dynamic poses and expressions
TEMPLATE STRUCTURE:
${this.getTemplate()}
ENHANCEMENT STRATEGY:
1. Transform any language into professional English
2. Add comic art terminology
3. Create dynamic, story-driven compositions
4. Specify art style and technique clearly
5. Include mood and narrative elements
RESPONSE FORMAT:
Provide only the enhanced prompt as a single, cohesive paragraph. Do not include explanations, metadata, or multiple options. The response should be ready to use directly for image generation.
CRITICAL: The prompt length MUST be under 2000 characters. Contract the prompt if it's longer.
Remember: Comics are about storytelling. Create panels that convey action, emotion, and narrative progression.`;
}
}

View File

@ -0,0 +1,68 @@
import { BaseAgent } from "./BaseAgent";
import { PHOTOREALISTIC_TEMPLATE } from "./PhotorealisticAgent";
import { ILLUSTRATION_TEMPLATE } from "./IllustrationAgent";
import { MINIMALIST_TEMPLATE } from "./MinimalistAgent";
import { STICKER_TEMPLATE } from "./StickerAgent";
import { PRODUCT_TEMPLATE } from "./ProductAgent";
import { COMIC_TEMPLATE } from "./ComicAgent";
export class GeneralAgent extends BaseAgent {
readonly templateType = "general";
protected getTemplate(): string {
return `AVAILABLE TEMPLATES:
1. PHOTOREALISTIC:
${PHOTOREALISTIC_TEMPLATE}
2. ILLUSTRATION:
${ILLUSTRATION_TEMPLATE}
3. MINIMALIST:
${MINIMALIST_TEMPLATE}
4. STICKER:
${STICKER_TEMPLATE}
5. PRODUCT:
${PRODUCT_TEMPLATE}
6. COMIC:
${COMIC_TEMPLATE}`;
}
protected getSystemPrompt(): string {
return `You are an expert AI prompt engineer specializing in transforming rough, unstructured prompts into professional, detailed prompts for the Gemini Flash Image Generation model.
CORE PRINCIPLE: Describe the scene, don't just list keywords. Use narrative, descriptive paragraphs rather than disconnected words.
YOUR TASK:
1. Analyze the user's prompt to understand their intent
2. Determine which template/style best matches their vision:
- PHOTOREALISTIC: Real-world scenes, portraits, landscapes, realistic subjects
- ILLUSTRATION: Artistic renderings, stylized art, drawings, paintings
- MINIMALIST: Simple compositions, negative space, clean designs, backgrounds
- STICKER: Icons, logos, die-cut designs, kawaii, bold graphics
- PRODUCT: Commercial photography, e-commerce shots, studio lighting
- COMIC: Sequential art, panels, story-driven scenes, narrative art
3. Apply the most appropriate template's guidelines
4. Enhance the prompt with professional terminology and detailed descriptions
${this.getTemplate()}
ENHANCEMENT GUIDELINES:
- Transform any language into professional English
- Be hyper-specific with details instead of vague descriptions
- Use appropriate technical terminology (photography, art, design terms)
- Provide context and intent for better understanding
- Apply the template that best matches the user's vision
- If unclear, default to a balanced photorealistic approach
RESPONSE FORMAT:
Provide only the enhanced prompt as a single, cohesive paragraph. Do not include explanations, metadata, template names, or multiple options. The response should be ready to use directly for image generation.
CRITICAL: The prompt length MUST be under 2000 characters. Contract the prompt if it's longer.
Remember: Your job is to intelligently select the best approach and create a vivid, specific description that guides the model toward the exact image envisioned.`;
}
}

View File

@ -0,0 +1,46 @@
import { BaseAgent } from "./BaseAgent";
export const ILLUSTRATION_TEMPLATE = `A [style] illustration of [subject], featuring [key characteristics] with [color palette]. The art style is [art technique description], with [line work style] and [shading technique]. The composition includes [composition details].
Example:
A watercolor illustration of a magical forest clearing at twilight, featuring glowing fireflies and an ancient stone archway covered in luminescent moss. The art style is whimsical and dreamlike, with soft, flowing brushstrokes and gentle color bleeding. The color palette consists of deep purples, soft blues, and warm golden yellows. The illustration uses delicate line work for fine details and subtle wet-on-wet shading to create atmospheric depth.`;
export class IllustrationAgent extends BaseAgent {
readonly templateType = "illustration";
protected getTemplate(): string {
return ILLUSTRATION_TEMPLATE;
}
protected getSystemPrompt(): string {
return `You are an expert AI prompt engineer specializing in illustrated artwork using the Gemini Flash Image Generation model.
CORE PRINCIPLE: Describe the scene, don't just list keywords. Use narrative, descriptive paragraphs rather than disconnected words.
ILLUSTRATION GUIDELINES:
- Specify art style (watercolor, digital painting, sketch, anime, manga, etc.)
- Describe line work quality (bold outlines, delicate lines, clean vectors, etc.)
- Detail color palette and color theory (complementary, monochromatic, vibrant, muted, etc.)
- Mention shading technique (cel-shading, soft shading, cross-hatching, etc.)
- Include artistic influences or style references when appropriate
- Describe texture and brush work
- Add composition and framing details
TEMPLATE STRUCTURE:
${this.getTemplate()}
ENHANCEMENT STRATEGY:
1. Transform any language into professional English
2. Add art-specific terminology
3. Expand vague descriptions with artistic details
4. Specify style, technique, and medium
5. Maintain natural, flowing narrative style
RESPONSE FORMAT:
Provide only the enhanced prompt as a single, cohesive paragraph. Do not include explanations, metadata, or multiple options. The response should be ready to use directly for image generation.
CRITICAL: The prompt length MUST be under 2000 characters. Contract the prompt if it's longer.
Remember: Artistic detail creates better results. Transform basic ideas into rich, stylistically-defined illustrations.`;
}
}

View File

@ -0,0 +1,47 @@
import { BaseAgent } from "./BaseAgent";
export const MINIMALIST_TEMPLATE = `A minimalist composition featuring a single [subject] positioned in the [position in frame] of the frame. The background is a vast, empty [color/description] canvas, creating significant negative space. Soft, subtle lighting. [Aspect ratio].
Example:
A minimalist composition featuring a single, delicate red maple leaf positioned in the bottom-right of the frame. The background is a vast, empty off-white canvas, creating significant negative space for text. Soft, diffused lighting from the top left. Square image.`;
export class MinimalistAgent extends BaseAgent {
readonly templateType = "minimalist";
protected getTemplate(): string {
return MINIMALIST_TEMPLATE;
}
protected getSystemPrompt(): string {
return `You are an expert AI prompt engineer specializing in minimalist design using the Gemini Flash Image Generation model.
CORE PRINCIPLE: Less is more. Emphasize negative space, simple composition, and subtle elements.
MINIMALIST GUIDELINES:
- Focus on negative space and emptiness
- Simple, clean composition with few elements
- Describe precise positioning in the frame (bottom-right, top-left, centered, etc.)
- Use muted or solid color backgrounds
- Subtle, soft lighting
- Avoid clutter and excessive detail
- Emphasize balance and proportion
- Consider use cases (backgrounds for text, presentations, marketing)
TEMPLATE STRUCTURE:
${this.getTemplate()}
ENHANCEMENT STRATEGY:
1. Transform any language into professional English
2. Simplify and clarify the composition
3. Emphasize negative space and positioning
4. Add subtle lighting and color descriptions
5. Maintain clean, precise language
RESPONSE FORMAT:
Provide only the enhanced prompt as a single, cohesive paragraph. Do not include explanations, metadata, or multiple options. The response should be ready to use directly for image generation.
CRITICAL: The prompt length MUST be under 2000 characters. Contract the prompt if it's longer.
Remember: Minimalism is about restraint and intentional placement. Create breathing room and focus.`;
}
}

View File

@ -0,0 +1,46 @@
import { BaseAgent } from "./BaseAgent";
export const PHOTOREALISTIC_TEMPLATE = `A photorealistic [shot type] of [subject], [action or expression], set in [environment]. The scene is illuminated by [lighting description], creating a [mood] atmosphere. Captured with a [camera/lens details], emphasizing [key textures and details]. The image should be in a [aspect ratio] format.
Example:
A photorealistic close-up portrait of an elderly Japanese ceramicist with deep, sun-etched wrinkles and a warm, knowing smile. He is carefully inspecting a freshly glazed tea bowl. The setting is his rustic, sun-drenched workshop. The scene is illuminated by soft, golden hour light streaming through a window, highlighting the fine texture of the clay. Captured with an 85mm portrait lens, resulting in a soft, blurred background (bokeh). The overall mood is serene and masterful. Vertical portrait orientation.`;
export class PhotorealisticAgent extends BaseAgent {
readonly templateType = "photorealistic";
protected getTemplate(): string {
return PHOTOREALISTIC_TEMPLATE;
}
protected getSystemPrompt(): string {
return `You are an expert AI prompt engineer specializing in photorealistic image generation using the Gemini Flash Image Generation model.
CORE PRINCIPLE: Describe the scene, don't just list keywords. Use narrative, descriptive paragraphs rather than disconnected words.
PHOTOREALISTIC GUIDELINES:
- Use photography terms: camera angles, lens types, lighting, fine details
- Mention specific camera equipment (e.g., 85mm portrait lens, wide-angle shot)
- Describe lighting in detail (golden hour, soft diffused light, studio lighting, etc.)
- Include mood and atmosphere
- Specify textures and materials
- Add composition details (depth of field, bokeh, focus points)
- Be hyper-specific with visual details
TEMPLATE STRUCTURE:
${this.getTemplate()}
ENHANCEMENT STRATEGY:
1. Transform any language into professional English
2. Add photography-specific terminology
3. Expand vague descriptions with specific visual details
4. Include lighting, camera, and composition details
5. Maintain natural, flowing narrative style
RESPONSE FORMAT:
Provide only the enhanced prompt as a single, cohesive paragraph. Do not include explanations, metadata, or multiple options. The response should be ready to use directly for image generation.
CRITICAL: The prompt length MUST be under 2000 characters. Contract the prompt if it's longer.
Remember: More detail equals more control. Transform vague concepts into vivid, photographic descriptions.`;
}
}

View File

@ -0,0 +1,48 @@
import { BaseAgent } from "./BaseAgent";
export const PRODUCT_TEMPLATE = `A high-resolution, studio-lit product photograph of a [product description] on a [background surface/description]. The lighting is a [lighting setup, e.g., three-point softbox setup] to [lighting purpose]. The camera angle is a [angle type] to showcase [specific feature]. Ultra-realistic, with sharp focus on [key detail]. [Aspect ratio].
Example:
A high-resolution, studio-lit product photograph of a minimalist ceramic coffee mug in matte black, presented on a polished concrete surface. The lighting is a three-point softbox setup designed to create soft, diffused highlights and eliminate harsh shadows. The camera angle is a slightly elevated 45-degree shot to showcase its clean lines. Ultra-realistic, with sharp focus on the steam rising from the coffee. Square image.`;
export class ProductAgent extends BaseAgent {
readonly templateType = "product";
protected getTemplate(): string {
return PRODUCT_TEMPLATE;
}
protected getSystemPrompt(): string {
return `You are an expert AI prompt engineer specializing in commercial product photography using the Gemini Flash Image Generation model.
CORE PRINCIPLE: Create professional, e-commerce ready product shots with studio-quality lighting and composition.
PRODUCT PHOTOGRAPHY GUIDELINES:
- Studio lighting setups (three-point, softbox, rim lighting, etc.)
- Professional camera angles (45-degree, overhead flat-lay, eye-level, etc.)
- Clean, appropriate backgrounds (white seamless, concrete, wood, gradient, etc.)
- High-resolution, sharp focus on product details
- Describe surface materials and textures
- Lighting purpose (eliminate shadows, create highlights, show texture, etc.)
- Commercial photography terms
- Ultra-realistic rendering
- Appropriate aspect ratios for e-commerce (square, portrait)
TEMPLATE STRUCTURE:
${this.getTemplate()}
ENHANCEMENT STRATEGY:
1. Transform any language into professional English
2. Add commercial photography terminology
3. Specify professional lighting setups
4. Detail surface materials and backgrounds
5. Include sharp focus and detail specifications
RESPONSE FORMAT:
Provide only the enhanced prompt as a single, cohesive paragraph. Do not include explanations, metadata, or multiple options. The response should be ready to use directly for image generation.
CRITICAL: The prompt length MUST be under 2000 characters. Contract the prompt if it's longer.
Remember: Product photography is about showcasing the product clearly with professional studio quality. Think e-commerce and advertising.`;
}
}

View File

@ -0,0 +1,48 @@
import { BaseAgent } from "./BaseAgent";
export const STICKER_TEMPLATE = `A [style] sticker of a [subject], featuring [key characteristics] and a [color palette]. The design should have [line style] and [shading style]. The background must be transparent.
Example:
A kawaii-style sticker of a happy red panda wearing a tiny bamboo hat. It's munching on a green bamboo leaf. The design features bold, clean outlines, simple cel-shading, and a vibrant color palette. The background must be white.`;
export class StickerAgent extends BaseAgent {
readonly templateType = "sticker";
protected getTemplate(): string {
return STICKER_TEMPLATE;
}
protected getSystemPrompt(): string {
return `You are an expert AI prompt engineer specializing in sticker and icon design using the Gemini Flash Image Generation model.
CORE PRINCIPLE: Create clean, recognizable designs optimized for small formats and die-cut production.
STICKER GUIDELINES:
- Specify sticker style (kawaii, bold, minimalist, vintage, cartoon, etc.)
- Use bold, clean outlines for definition
- Simple, clear shapes that work at small sizes
- Vibrant, well-defined color palettes
- Simple shading (cel-shading, flat colors, minimal gradients)
- ALWAYS specify transparent or white background
- Avoid fine details that won't scale well
- Consider die-cut friendly designs
- Make subjects iconic and recognizable
TEMPLATE STRUCTURE:
${this.getTemplate()}
ENHANCEMENT STRATEGY:
1. Transform any language into professional English
2. Simplify complex ideas into iconic, sticker-friendly designs
3. Add style-specific terminology (kawaii, chibi, retro, etc.)
4. Specify clear outlines and simple shading
5. Always include background specification
RESPONSE FORMAT:
Provide only the enhanced prompt as a single, cohesive paragraph. Do not include explanations, metadata, or multiple options. The response should be ready to use directly for image generation.
CRITICAL: The prompt length MUST be under 2000 characters. Contract the prompt if it's longer.
Remember: Stickers need to be simple, bold, and instantly recognizable. Think iconic, not detailed.`;
}
}

View File

@ -0,0 +1,46 @@
import { IPromptAgent } from "../types";
import { PhotorealisticAgent } from "./PhotorealisticAgent";
import { IllustrationAgent } from "./IllustrationAgent";
import { MinimalistAgent } from "./MinimalistAgent";
import { StickerAgent } from "./StickerAgent";
import { ProductAgent } from "./ProductAgent";
import { ComicAgent } from "./ComicAgent";
import { GeneralAgent } from "./GeneralAgent";
type AgentConstructor = new (apiKey: string) => IPromptAgent;
const AGENT_REGISTRY: Record<string, AgentConstructor> = {
photorealistic: PhotorealisticAgent,
illustration: IllustrationAgent,
minimalist: MinimalistAgent,
sticker: StickerAgent,
product: ProductAgent,
comic: ComicAgent,
general: GeneralAgent,
};
export function getAgent(apiKey: string, template?: string): IPromptAgent {
if (!template) {
return new GeneralAgent(apiKey);
}
const AgentClass = AGENT_REGISTRY[template];
if (!AgentClass) {
console.warn(
`Unknown template "${template}", falling back to GeneralAgent`,
);
return new GeneralAgent(apiKey);
}
return new AgentClass(apiKey);
}
export {
PhotorealisticAgent,
IllustrationAgent,
MinimalistAgent,
StickerAgent,
ProductAgent,
ComicAgent,
GeneralAgent,
};

View File

@ -0,0 +1,12 @@
export { PromptEnhancementService } from "./PromptEnhancementService";
export { EnhancementLogger } from "./EnhancementLogger";
export type {
PromptEnhancementOptions,
PromptEnhancementContext,
PromptEnhancementResult,
EnhancementLogEntry,
IPromptAgent,
AgentResult,
ValidationResult,
} from "./types";

View File

@ -0,0 +1,70 @@
export interface PromptEnhancementOptions {
template?:
| "photorealistic"
| "illustration"
| "minimalist"
| "sticker"
| "product"
| "comic"
| "general";
tags?: string[];
}
export interface PromptEnhancementContext {
orgId: string;
projectId: string;
meta?: {
tags?: string[];
};
}
export interface PromptEnhancementResult {
success: boolean;
originalPrompt: string;
enhancedPrompt?: string;
detectedLanguage?: string;
appliedTemplate?: string;
metadata?: {
style?: string;
aspectRatio?: string;
enhancements: string[];
};
error?: string;
}
export interface AgentResult {
success: boolean;
enhancedPrompt?: string;
detectedLanguage?: string;
appliedTemplate?: string;
enhancements: string[];
error?: string;
}
export interface IPromptAgent {
readonly templateType: string;
enhance(
prompt: string,
options: PromptEnhancementOptions,
): Promise<AgentResult>;
}
export interface ValidationResult {
valid: boolean;
error?: string;
}
export interface EnhancementLogEntry {
timestamp: string;
orgId: string;
projectId: string;
originalPrompt: string;
enhancedPrompt: string;
meta?: {
tags?: string[];
};
template: string;
detectedLanguage?: string;
enhancements: string[];
model: string;
}

View File

@ -0,0 +1,47 @@
export function detectLanguage(text: string): string {
if (/[\u4e00-\u9fff]/.test(text)) return "Chinese";
if (/[\u3040-\u309f\u30a0-\u30ff]/.test(text)) return "Japanese";
if (/[\uac00-\ud7af]/.test(text)) return "Korean";
if (/[àáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ]/.test(text))
return "Romance Language";
if (/[а-яё]/.test(text.toLowerCase())) return "Russian";
if (/[α-ωΑ-Ω]/.test(text)) return "Greek";
if (/[أ-ي]/.test(text)) return "Arabic";
if (/[א-ת]/.test(text)) return "Hebrew";
return "English";
}
export function detectEnhancements(
originalPrompt: string,
enhancedPrompt: string,
): string[] {
const enhancements: string[] = [];
if (enhancedPrompt.length > originalPrompt.length * 1.5) {
enhancements.push("Added detailed descriptions");
}
if (
enhancedPrompt.includes("photorealistic") ||
enhancedPrompt.includes("shot") ||
enhancedPrompt.includes("lens")
) {
enhancements.push("Applied photography terminology");
}
if (
enhancedPrompt.includes("lighting") ||
enhancedPrompt.includes("illuminated")
) {
enhancements.push("Enhanced lighting description");
}
if (
enhancedPrompt.includes("texture") ||
enhancedPrompt.includes("surface")
) {
enhancements.push("Added texture details");
}
return enhancements;
}

View File

@ -0,0 +1,22 @@
import { ValidationResult } from "./types";
export function validatePromptLength(
prompt: string,
maxLength: number = 2000,
): ValidationResult {
if (!prompt || prompt.trim().length === 0) {
return {
valid: false,
error: "Prompt cannot be empty",
};
}
if (prompt.length > maxLength) {
return {
valid: false,
error: `Prompt exceeds maximum length of ${maxLength} characters (current: ${prompt.length})`,
};
}
return { valid: true };
}