From f942480fc8abaa37f89d9ea537bc873746c83805 Mon Sep 17 00:00:00 2001 From: Oleg Proskurin Date: Sat, 11 Oct 2025 18:11:56 +0700 Subject: [PATCH] feat: filename sanitization --- .../src/services/MinioStorageService.ts | 56 +++++++++++++-- test-filename-sanitization.js | 72 +++++++++++++++++++ 2 files changed, 121 insertions(+), 7 deletions(-) create mode 100644 test-filename-sanitization.js diff --git a/apps/api-service/src/services/MinioStorageService.ts b/apps/api-service/src/services/MinioStorageService.ts index 9d93567..04d0bde 100644 --- a/apps/api-service/src/services/MinioStorageService.ts +++ b/apps/api-service/src/services/MinioStorageService.ts @@ -59,13 +59,51 @@ export class MinioStorageService implements StorageService { } private sanitizeFilename(filename: string): string { - // Remove dangerous characters and path traversal attempts - return filename + // Remove path traversal attempts FIRST from entire filename + let cleaned = filename.replace(/\.\./g, '').trim(); + + // Split filename and extension + const lastDotIndex = cleaned.lastIndexOf('.'); + let baseName = lastDotIndex > 0 ? cleaned.substring(0, lastDotIndex) : cleaned; + const extension = lastDotIndex > 0 ? cleaned.substring(lastDotIndex) : ''; + + // Remove dangerous characters from base name + baseName = baseName .replace(/[<>:"/\\|?*\x00-\x1f]/g, '') // Remove dangerous chars - .replace(/\.\./g, '') // Remove path traversal - .replace(/^\.+/, '') // Remove leading dots - .trim() - .substring(0, 255); // Limit length + .trim(); + + // Replace non-ASCII characters with ASCII equivalents or remove them + // This prevents S3 signature mismatches with MinIO + baseName = baseName + .normalize('NFD') // Decompose combined characters (é -> e + ´) + .replace(/[\u0300-\u036f]/g, '') // Remove diacritical marks + .replace(/[^\x20-\x7E]/g, '_') // Replace any remaining non-ASCII with underscore + .replace(/[^\w\s\-_.]/g, '_') // Replace special chars (except word chars, space, dash, underscore, dot) with underscore + .replace(/\s+/g, '_') // Replace spaces with underscores + .replace(/_{2,}/g, '_') // Collapse multiple underscores + .replace(/^_+|_+$/g, ''); // Remove leading/trailing underscores + + // Ensure we still have a valid base name + if (baseName.length === 0) { + baseName = 'file'; + } + + // Sanitize extension (remove only dangerous chars, keep the dot) + let sanitizedExt = extension + .replace(/[<>:"/\\|?*\x00-\x1f]/g, '') + .replace(/[^\x20-\x7E]/g, '') + .toLowerCase(); + + // Ensure extension starts with a dot and is reasonable + if (sanitizedExt && !sanitizedExt.startsWith('.')) { + sanitizedExt = '.' + sanitizedExt; + } + if (sanitizedExt.length > 10) { + sanitizedExt = sanitizedExt.substring(0, 10); + } + + const result = baseName + sanitizedExt; + return result.substring(0, 255); // Limit total length } private validateFilePath( @@ -150,9 +188,13 @@ export class MinioStorageService implements StorageService { const uniqueFilename = this.generateUniqueFilename(filename); const filePath = this.getFilePath(orgId, projectId, category, uniqueFilename); + // Encode original filename to Base64 to safely store non-ASCII characters in metadata + const originalNameEncoded = Buffer.from(filename, 'utf-8').toString('base64'); + const metadata = { 'Content-Type': contentType, - 'X-Amz-Meta-Original-Name': filename, + 'X-Amz-Meta-Original-Name': originalNameEncoded, + 'X-Amz-Meta-Original-Name-Encoding': 'base64', 'X-Amz-Meta-Category': category, 'X-Amz-Meta-Project': projectId, 'X-Amz-Meta-Organization': orgId, diff --git a/test-filename-sanitization.js b/test-filename-sanitization.js new file mode 100644 index 0000000..2002dc5 --- /dev/null +++ b/test-filename-sanitization.js @@ -0,0 +1,72 @@ +// Test filename sanitization for Unicode characters + +function sanitizeFilename(filename) { + // Remove path traversal attempts FIRST from entire filename + let cleaned = filename.replace(/\.\./g, '').trim(); + + // Split filename and extension + const lastDotIndex = cleaned.lastIndexOf('.'); + let baseName = lastDotIndex > 0 ? cleaned.substring(0, lastDotIndex) : cleaned; + const extension = lastDotIndex > 0 ? cleaned.substring(lastDotIndex) : ''; + + // Remove dangerous characters from base name + baseName = baseName + .replace(/[<>:"/\\|?*\x00-\x1f]/g, '') // Remove dangerous chars + .trim(); + + // Replace non-ASCII characters with ASCII equivalents or remove them + // This prevents S3 signature mismatches with MinIO + baseName = baseName + .normalize('NFD') // Decompose combined characters (é -> e + ´) + .replace(/[\u0300-\u036f]/g, '') // Remove diacritical marks + .replace(/[^\x20-\x7E]/g, '_') // Replace any remaining non-ASCII with underscore + .replace(/[^\w\s\-_.]/g, '_') // Replace special chars (except word chars, space, dash, underscore, dot) with underscore + .replace(/\s+/g, '_') // Replace spaces with underscores + .replace(/_{2,}/g, '_') // Collapse multiple underscores + .replace(/^_+|_+$/g, ''); // Remove leading/trailing underscores + + // Ensure we still have a valid base name + if (baseName.length === 0) { + baseName = 'file'; + } + + // Sanitize extension (remove only dangerous chars, keep the dot) + let sanitizedExt = extension + .replace(/[<>:"/\\|?*\x00-\x1f]/g, '') + .replace(/[^\x20-\x7E]/g, '') + .toLowerCase(); + + // Ensure extension starts with a dot and is reasonable + if (sanitizedExt && !sanitizedExt.startsWith('.')) { + sanitizedExt = '.' + sanitizedExt; + } + if (sanitizedExt.length > 10) { + sanitizedExt = sanitizedExt.substring(0, 10); + } + + const result = baseName + sanitizedExt; + return result.substring(0, 255); // Limit total length +} + +// Test cases +const testCases = [ + 'Ущелье.png', // Cyrillic (Russian) + '测试文件.jpg', // Chinese + 'test-file.png', // ASCII + 'café-français.jpg', // French with accents + '🎉party🎊.gif', // Emoji + 'test_مرحبا.webp', // Arabic + 'file@#$%.png', // Special chars + '../../../etc/passwd', // Path traversal + '...hidden.txt', // Leading dots +]; + +console.log('Filename Sanitization Test Results:\n'); +console.log('=' .repeat(80)); + +testCases.forEach(filename => { + const sanitized = sanitizeFilename(filename); + console.log(`Original: ${filename}`); + console.log(`Sanitized: ${sanitized}`); + console.log('-'.repeat(80)); +});