banatie-service/test-filename-sanitization.js

73 lines
2.5 KiB
JavaScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Test filename sanitization for Unicode characters
function sanitizeFilename(filename) {
// Remove path traversal attempts FIRST from entire filename
let cleaned = filename.replace(/\.\./g, '').trim();
// Split filename and extension
const lastDotIndex = cleaned.lastIndexOf('.');
let baseName = lastDotIndex > 0 ? cleaned.substring(0, lastDotIndex) : cleaned;
const extension = lastDotIndex > 0 ? cleaned.substring(lastDotIndex) : '';
// Remove dangerous characters from base name
baseName = baseName
.replace(/[<>:"/\\|?*\x00-\x1f]/g, '') // Remove dangerous chars
.trim();
// Replace non-ASCII characters with ASCII equivalents or remove them
// This prevents S3 signature mismatches with MinIO
baseName = baseName
.normalize('NFD') // Decompose combined characters (é -> e + ´)
.replace(/[\u0300-\u036f]/g, '') // Remove diacritical marks
.replace(/[^\x20-\x7E]/g, '_') // Replace any remaining non-ASCII with underscore
.replace(/[^\w\s\-_.]/g, '_') // Replace special chars (except word chars, space, dash, underscore, dot) with underscore
.replace(/\s+/g, '_') // Replace spaces with underscores
.replace(/_{2,}/g, '_') // Collapse multiple underscores
.replace(/^_+|_+$/g, ''); // Remove leading/trailing underscores
// Ensure we still have a valid base name
if (baseName.length === 0) {
baseName = 'file';
}
// Sanitize extension (remove only dangerous chars, keep the dot)
let sanitizedExt = extension
.replace(/[<>:"/\\|?*\x00-\x1f]/g, '')
.replace(/[^\x20-\x7E]/g, '')
.toLowerCase();
// Ensure extension starts with a dot and is reasonable
if (sanitizedExt && !sanitizedExt.startsWith('.')) {
sanitizedExt = '.' + sanitizedExt;
}
if (sanitizedExt.length > 10) {
sanitizedExt = sanitizedExt.substring(0, 10);
}
const result = baseName + sanitizedExt;
return result.substring(0, 255); // Limit total length
}
// Test cases
const testCases = [
'Ущелье.png', // Cyrillic (Russian)
'测试文件.jpg', // Chinese
'test-file.png', // ASCII
'café-français.jpg', // French with accents
'🎉party🎊.gif', // Emoji
'test_مرحبا.webp', // Arabic
'file@#$%.png', // Special chars
'../../../etc/passwd', // Path traversal
'...hidden.txt', // Leading dots
];
console.log('Filename Sanitization Test Results:\n');
console.log('=' .repeat(80));
testCases.forEach(filename => {
const sanitized = sanitizeFilename(filename);
console.log(`Original: ${filename}`);
console.log(`Sanitized: ${sanitized}`);
console.log('-'.repeat(80));
});