// Test filename sanitization for Unicode characters function sanitizeFilename(filename) { // Remove path traversal attempts FIRST from entire filename let cleaned = filename.replace(/\.\./g, '').trim(); // Split filename and extension const lastDotIndex = cleaned.lastIndexOf('.'); let baseName = lastDotIndex > 0 ? cleaned.substring(0, lastDotIndex) : cleaned; const extension = lastDotIndex > 0 ? cleaned.substring(lastDotIndex) : ''; // Remove dangerous characters from base name baseName = baseName .replace(/[<>:"/\\|?*\x00-\x1f]/g, '') // Remove dangerous chars .trim(); // Replace non-ASCII characters with ASCII equivalents or remove them // This prevents S3 signature mismatches with MinIO baseName = baseName .normalize('NFD') // Decompose combined characters (é -> e + ´) .replace(/[\u0300-\u036f]/g, '') // Remove diacritical marks .replace(/[^\x20-\x7E]/g, '_') // Replace any remaining non-ASCII with underscore .replace(/[^\w\s\-_.]/g, '_') // Replace special chars (except word chars, space, dash, underscore, dot) with underscore .replace(/\s+/g, '_') // Replace spaces with underscores .replace(/_{2,}/g, '_') // Collapse multiple underscores .replace(/^_+|_+$/g, ''); // Remove leading/trailing underscores // Ensure we still have a valid base name if (baseName.length === 0) { baseName = 'file'; } // Sanitize extension (remove only dangerous chars, keep the dot) let sanitizedExt = extension .replace(/[<>:"/\\|?*\x00-\x1f]/g, '') .replace(/[^\x20-\x7E]/g, '') .toLowerCase(); // Ensure extension starts with a dot and is reasonable if (sanitizedExt && !sanitizedExt.startsWith('.')) { sanitizedExt = '.' + sanitizedExt; } if (sanitizedExt.length > 10) { sanitizedExt = sanitizedExt.substring(0, 10); } const result = baseName + sanitizedExt; return result.substring(0, 255); // Limit total length } // Test cases const testCases = [ 'Ущелье.png', // Cyrillic (Russian) '测试文件.jpg', // Chinese 'test-file.png', // ASCII 'café-français.jpg', // French with accents '🎉party🎊.gif', // Emoji 'test_مرحبا.webp', // Arabic 'file@#$%.png', // Special chars '../../../etc/passwd', // Path traversal '...hidden.txt', // Leading dots ]; console.log('Filename Sanitization Test Results:\n'); console.log('=' .repeat(80)); testCases.forEach(filename => { const sanitized = sanitizeFilename(filename); console.log(`Original: ${filename}`); console.log(`Sanitized: ${sanitized}`); console.log('-'.repeat(80)); });