feat: filename sanitization

This commit is contained in:
Oleg Proskurin 2025-10-11 18:11:56 +07:00
parent 15f9dc3526
commit f942480fc8
2 changed files with 121 additions and 7 deletions

View File

@ -59,13 +59,51 @@ export class MinioStorageService implements StorageService {
}
private sanitizeFilename(filename: string): string {
// Remove dangerous characters and path traversal attempts
return filename
// Remove path traversal attempts FIRST from entire filename
let cleaned = filename.replace(/\.\./g, '').trim();
// Split filename and extension
const lastDotIndex = cleaned.lastIndexOf('.');
let baseName = lastDotIndex > 0 ? cleaned.substring(0, lastDotIndex) : cleaned;
const extension = lastDotIndex > 0 ? cleaned.substring(lastDotIndex) : '';
// Remove dangerous characters from base name
baseName = baseName
.replace(/[<>:"/\\|?*\x00-\x1f]/g, '') // Remove dangerous chars
.replace(/\.\./g, '') // Remove path traversal
.replace(/^\.+/, '') // Remove leading dots
.trim()
.substring(0, 255); // Limit length
.trim();
// Replace non-ASCII characters with ASCII equivalents or remove them
// This prevents S3 signature mismatches with MinIO
baseName = baseName
.normalize('NFD') // Decompose combined characters (é -> e + ´)
.replace(/[\u0300-\u036f]/g, '') // Remove diacritical marks
.replace(/[^\x20-\x7E]/g, '_') // Replace any remaining non-ASCII with underscore
.replace(/[^\w\s\-_.]/g, '_') // Replace special chars (except word chars, space, dash, underscore, dot) with underscore
.replace(/\s+/g, '_') // Replace spaces with underscores
.replace(/_{2,}/g, '_') // Collapse multiple underscores
.replace(/^_+|_+$/g, ''); // Remove leading/trailing underscores
// Ensure we still have a valid base name
if (baseName.length === 0) {
baseName = 'file';
}
// Sanitize extension (remove only dangerous chars, keep the dot)
let sanitizedExt = extension
.replace(/[<>:"/\\|?*\x00-\x1f]/g, '')
.replace(/[^\x20-\x7E]/g, '')
.toLowerCase();
// Ensure extension starts with a dot and is reasonable
if (sanitizedExt && !sanitizedExt.startsWith('.')) {
sanitizedExt = '.' + sanitizedExt;
}
if (sanitizedExt.length > 10) {
sanitizedExt = sanitizedExt.substring(0, 10);
}
const result = baseName + sanitizedExt;
return result.substring(0, 255); // Limit total length
}
private validateFilePath(
@ -150,9 +188,13 @@ export class MinioStorageService implements StorageService {
const uniqueFilename = this.generateUniqueFilename(filename);
const filePath = this.getFilePath(orgId, projectId, category, uniqueFilename);
// Encode original filename to Base64 to safely store non-ASCII characters in metadata
const originalNameEncoded = Buffer.from(filename, 'utf-8').toString('base64');
const metadata = {
'Content-Type': contentType,
'X-Amz-Meta-Original-Name': filename,
'X-Amz-Meta-Original-Name': originalNameEncoded,
'X-Amz-Meta-Original-Name-Encoding': 'base64',
'X-Amz-Meta-Category': category,
'X-Amz-Meta-Project': projectId,
'X-Amz-Meta-Organization': orgId,

View File

@ -0,0 +1,72 @@
// Test filename sanitization for Unicode characters
function sanitizeFilename(filename) {
// Remove path traversal attempts FIRST from entire filename
let cleaned = filename.replace(/\.\./g, '').trim();
// Split filename and extension
const lastDotIndex = cleaned.lastIndexOf('.');
let baseName = lastDotIndex > 0 ? cleaned.substring(0, lastDotIndex) : cleaned;
const extension = lastDotIndex > 0 ? cleaned.substring(lastDotIndex) : '';
// Remove dangerous characters from base name
baseName = baseName
.replace(/[<>:"/\\|?*\x00-\x1f]/g, '') // Remove dangerous chars
.trim();
// Replace non-ASCII characters with ASCII equivalents or remove them
// This prevents S3 signature mismatches with MinIO
baseName = baseName
.normalize('NFD') // Decompose combined characters (é -> e + ´)
.replace(/[\u0300-\u036f]/g, '') // Remove diacritical marks
.replace(/[^\x20-\x7E]/g, '_') // Replace any remaining non-ASCII with underscore
.replace(/[^\w\s\-_.]/g, '_') // Replace special chars (except word chars, space, dash, underscore, dot) with underscore
.replace(/\s+/g, '_') // Replace spaces with underscores
.replace(/_{2,}/g, '_') // Collapse multiple underscores
.replace(/^_+|_+$/g, ''); // Remove leading/trailing underscores
// Ensure we still have a valid base name
if (baseName.length === 0) {
baseName = 'file';
}
// Sanitize extension (remove only dangerous chars, keep the dot)
let sanitizedExt = extension
.replace(/[<>:"/\\|?*\x00-\x1f]/g, '')
.replace(/[^\x20-\x7E]/g, '')
.toLowerCase();
// Ensure extension starts with a dot and is reasonable
if (sanitizedExt && !sanitizedExt.startsWith('.')) {
sanitizedExt = '.' + sanitizedExt;
}
if (sanitizedExt.length > 10) {
sanitizedExt = sanitizedExt.substring(0, 10);
}
const result = baseName + sanitizedExt;
return result.substring(0, 255); // Limit total length
}
// Test cases
const testCases = [
'Ущелье.png', // Cyrillic (Russian)
'测试文件.jpg', // Chinese
'test-file.png', // ASCII
'café-français.jpg', // French with accents
'🎉party🎊.gif', // Emoji
'test_مرحبا.webp', // Arabic
'file@#$%.png', // Special chars
'../../../etc/passwd', // Path traversal
'...hidden.txt', // Leading dots
];
console.log('Filename Sanitization Test Results:\n');
console.log('=' .repeat(80));
testCases.forEach(filename => {
const sanitized = sanitizeFilename(filename);
console.log(`Original: ${filename}`);
console.log(`Sanitized: ${sanitized}`);
console.log('-'.repeat(80));
});