openserve 2.0.3

A modern, high-performance, AI-enhanced file server built in Rust
Documentation
//! File Utilities
//!
//! This module provides advanced file handling utilities that enhance
//! the basic file operations with better error handling, validation,
//! and safety features.

use std::path::{Path, PathBuf};
use anyhow::Result;
use tokio::fs;
use std::os::unix::fs::PermissionsExt;

/// Enhanced file operations with safety checks.
///
/// This struct provides a collection of utility methods for file operations
/// that include additional safety checks, validation, and enhanced functionality
/// beyond the standard library's file operations.
pub struct FileUtils;

impl FileUtils {
    /// Safely joins two paths while preventing directory traversal attacks.
    ///
    /// This method ensures that the resulting path remains within the base
    /// directory and cannot be used to access files outside of it through
    /// path traversal techniques like `../`.
    ///
    /// # Arguments
    ///
    /// * `base` - The base directory path.
    /// * `path` - The path to join to the base.
    ///
    /// # Returns
    ///
    /// A `Result` containing the safely joined `PathBuf`, or an error if
    /// path traversal is detected.
    pub fn safe_join<P: AsRef<Path>>(base: P, path: P) -> Result<PathBuf> {
        let base = base.as_ref();
        let path = path.as_ref();
        
        // Remove any leading slashes or dots
        let sanitized = path.strip_prefix("/").unwrap_or(path);
        let sanitized = sanitized.strip_prefix("./").unwrap_or(sanitized);
        
        let mut result = base.to_path_buf();
        for component in sanitized.components() {
            match component {
                std::path::Component::Normal(name) => {
                    result.push(name);
                }
                std::path::Component::ParentDir => {
                    // Only allow going up if we're still within the base
                    if !result.pop() || !result.starts_with(base) {
                        return Err(anyhow::anyhow!("Path traversal attempt detected"));
                    }
                }
                std::path::Component::CurDir => {
                    // Ignore current directory references
                }
                _ => {
                    return Err(anyhow::anyhow!("Invalid path component"));
                }
            }
        }
        
        // Ensure the final path is still within the base directory
        if !result.starts_with(base) {
            return Err(anyhow::anyhow!("Path outside of allowed directory"));
        }
        
        Ok(result)
    }
    
    /// Retrieves enhanced file metadata with additional information.
    ///
    /// This method provides more comprehensive file information than the
    /// standard metadata, including MIME type detection and text file
    /// classification.
    ///
    /// # Arguments
    ///
    /// * `path` - The path to the file or directory.
    ///
    /// # Returns
    ///
    /// A `Result` containing `EnhancedFileMetadata` with detailed file information.
    pub async fn get_enhanced_metadata(path: &Path) -> Result<EnhancedFileMetadata> {
        let metadata = fs::metadata(path).await?;
        let is_text = crate::utils::is_text_file(path);
        let mime_type = mime_guess::from_path(path).first_or_octet_stream().to_string();
        
        Ok(EnhancedFileMetadata {
            size: metadata.len(),
            modified: metadata.modified()?.into(),
            created: metadata.created().ok().map(Into::into),
            is_file: metadata.is_file(),
            is_dir: metadata.is_dir(),
            is_text,
            mime_type,
            permissions: format!("{:o}", metadata.permissions().mode()),
        })
    }
    
    /// Copies a file with progress tracking.
    ///
    /// This method copies a file from one location to another while providing
    /// progress updates through a callback function. It uses a buffered approach
    /// for efficient copying of large files.
    ///
    /// # Arguments
    ///
    /// * `from` - The source file path.
    /// * `to` - The destination file path.
    /// * `progress_callback` - A function called with (copied_bytes, total_bytes).
    ///
    /// # Returns
    ///
    /// A `Result` indicating success or failure of the copy operation.
    pub async fn copy_with_progress<F>(
        from: &Path,
        to: &Path,
        progress_callback: F,
    ) -> Result<()>
    where
        F: Fn(u64, u64) + Send + 'static,
    {
        use tokio::io::{AsyncReadExt, AsyncWriteExt};
        
        let mut source = fs::File::open(from).await?;
        let mut dest = fs::File::create(to).await?;
        
        let total_size = source.metadata().await?.len();
        let mut copied = 0u64;
        let mut buffer = vec![0u8; 64 * 1024]; // 64KB buffer
        
        loop {
            let bytes_read = source.read(&mut buffer).await?;
            if bytes_read == 0 {
                break;
            }
            
            dest.write_all(&buffer[..bytes_read]).await?;
            copied += bytes_read as u64;
            
            progress_callback(copied, total_size);
        }
        
        dest.flush().await?;
        Ok(())
    }
    
    /// Calculates the total size of a directory recursively.
    ///
    /// This method traverses a directory tree and sums up the sizes of all
    /// files contained within it, including subdirectories.
    ///
    /// # Arguments
    ///
    /// * `path` - The directory path to calculate the size for.
    ///
    /// # Returns
    ///
    /// A `Future` that resolves to a `Result` containing the total size in bytes.
    pub fn calculate_dir_size(path: &Path) -> std::pin::Pin<Box<dyn std::future::Future<Output = Result<u64>> + Send + '_>> {
        Box::pin(async move {
            let mut total_size = 0u64;
            let mut dir = fs::read_dir(path).await?;
            
            while let Some(entry) = dir.next_entry().await? {
                let metadata = entry.metadata().await?;
                if metadata.is_file() {
                    total_size += metadata.len();
                } else if metadata.is_dir() {
                    total_size += Self::calculate_dir_size(&entry.path()).await?;
                }
            }
            
            Ok(total_size)
        })
    }
    
    /// Finds duplicate files by comparing their content hashes.
    ///
    /// This method analyzes a list of file paths and groups files that have
    /// identical content based on SHA-256 hash comparison.
    ///
    /// # Arguments
    ///
    /// * `paths` - A slice of file paths to analyze for duplicates.
    ///
    /// # Returns
    ///
    /// A `Result` containing a vector of groups, where each group contains
    /// paths to files with identical content.
    pub async fn find_duplicates(paths: &[PathBuf]) -> Result<Vec<Vec<PathBuf>>> {
        use std::collections::HashMap;
        use sha2::{Sha256, Digest};
        
        let mut hash_map: HashMap<String, Vec<PathBuf>> = HashMap::new();
        
        for path in paths {
            if path.is_file() {
                let content = fs::read(path).await?;
                let mut hasher = Sha256::new();
                hasher.update(&content);
                let hash = format!("{:x}", hasher.finalize());
                
                hash_map.entry(hash).or_default().push(path.clone());
            }
        }
        
        Ok(hash_map.into_values().filter(|v| v.len() > 1).collect())
    }
}

/// Enhanced file metadata with additional information.
///
/// This struct extends the standard file metadata with additional fields
/// that are useful for file management and analysis operations.
#[derive(Debug, Clone)]
pub struct EnhancedFileMetadata {
    /// File size in bytes
    pub size: u64,
    /// Last modification time
    pub modified: chrono::DateTime<chrono::Utc>,
    /// Creation time (if available)
    pub created: Option<chrono::DateTime<chrono::Utc>>,
    /// Whether this is a regular file
    pub is_file: bool,
    /// Whether this is a directory
    pub is_dir: bool,
    /// Whether this appears to be a text file
    pub is_text: bool,
    /// MIME type of the file
    pub mime_type: String,
    /// File permissions in octal format
    pub permissions: String,
}

#[cfg(test)]
mod tests {
    use super::*;
    use tempfile::TempDir;
    
    #[tokio::test]
    async fn test_safe_join() {
        let temp_dir = TempDir::new().unwrap();
        let base = temp_dir.path();
        
        // Valid path
        let result = FileUtils::safe_join(base, Path::new("test.txt")).unwrap();
        assert!(result.starts_with(base));
        
        // Invalid path traversal
        let result = FileUtils::safe_join(base, Path::new("../../../etc/passwd"));
        assert!(result.is_err());
        
        // Test that valid result starts with base
        let valid_result = FileUtils::safe_join(base, Path::new("test.txt")).unwrap();
        assert!(valid_result.starts_with(base));
    }
    
    #[tokio::test]
    async fn test_enhanced_metadata() {
        let temp_dir = TempDir::new().unwrap();
        let file_path = temp_dir.path().join("test.txt");
        fs::write(&file_path, "test content").await.unwrap();
        
        let metadata = FileUtils::get_enhanced_metadata(&file_path).await.unwrap();
        assert!(metadata.is_file);
        assert!(!metadata.is_dir);
        assert!(metadata.is_text);
        assert_eq!(metadata.size, 12);
    }
}