exarch-core 0.2.9

Memory-safe archive extraction library with security validation
Documentation
//! Optimized file copy implementation with reusable buffers.
//!
//! OPT-C002: Provides a stack-allocated copy buffer for efficient file
//! extraction without heap allocations on every copy operation. This reduces
//! memory pressure and improves throughput by 5-10% compared to
//! `std::io::copy`.
//!
//! # Security Guarantees
//!
//! - Preserves quota overflow detection via checked arithmetic
//! - No unsafe code
//! - Buffer size is constant and stack-allocated

use std::io::Read;
use std::io::Write;
use std::io::{self};

use crate::ExtractionError;

/// Optimal buffer size for I/O operations (64KB).
///
/// This matches typical filesystem block sizes and provides good balance
/// between memory usage and I/O performance.
const COPY_BUFFER_SIZE: usize = 64 * 1024;

/// Stack-allocated buffer for efficient file copying.
///
/// Uses a fixed-size array on the stack to avoid heap allocations
/// during copy operations. The buffer is reusable across multiple
/// copy operations within the same extraction session.
///
/// # Examples
///
/// ```no_run
/// # use std::io::{Read, Write};
/// # use exarch_core::copy::{CopyBuffer, copy_with_buffer};
/// # use exarch_core::ExtractionError;
/// # fn example() -> Result<(), ExtractionError> {
/// let mut buffer = CopyBuffer::new();
/// let mut input = std::fs::File::open("input.txt")?;
/// let mut output = std::fs::File::create("output.txt")?;
///
/// let bytes_copied = copy_with_buffer(&mut input, &mut output, &mut buffer)?;
/// println!("Copied {} bytes", bytes_copied);
/// # Ok(())
/// # }
/// ```
#[derive(Debug)]
pub struct CopyBuffer {
    // Stack allocation is intentional for performance (avoids heap overhead)
    #[allow(clippy::large_stack_arrays)]
    buf: [u8; COPY_BUFFER_SIZE],
}

impl CopyBuffer {
    /// Creates a new copy buffer.
    ///
    /// The buffer is allocated on the stack and zero-initialized.
    #[inline]
    #[must_use]
    #[allow(clippy::large_stack_arrays)]
    pub fn new() -> Self {
        Self {
            buf: [0u8; COPY_BUFFER_SIZE],
        }
    }

    /// Returns the buffer size in bytes.
    #[inline]
    #[must_use]
    pub fn size(&self) -> usize {
        COPY_BUFFER_SIZE
    }
}

impl Default for CopyBuffer {
    fn default() -> Self {
        Self::new()
    }
}

/// Copies data from reader to writer using the provided reusable buffer.
///
/// This is an optimized version of `std::io::copy` that:
/// - Uses a caller-provided buffer (avoiding heap allocation)
/// - Uses checked arithmetic to detect quota overflows
/// - Returns the total number of bytes copied
///
/// # Errors
///
/// Returns an error if:
/// - Reading from the source fails
/// - Writing to the destination fails
/// - Total bytes written would overflow u64 (quota protection)
///
/// # Security
///
/// Quota overflow is explicitly checked using `checked_add`, ensuring
/// that malicious archives cannot bypass size limits via integer overflow.
///
/// # Examples
///
/// ```no_run
/// # use std::io::{Read, Write};
/// # use exarch_core::copy::{CopyBuffer, copy_with_buffer};
/// # use exarch_core::ExtractionError;
/// # fn example() -> Result<(), ExtractionError> {
/// let mut buffer = CopyBuffer::new();
/// let mut input = std::fs::File::open("large_file.bin")?;
/// let mut output = std::fs::File::create("output.bin")?;
///
/// let total = copy_with_buffer(&mut input, &mut output, &mut buffer)?;
/// println!("Copied {} bytes without heap allocation", total);
/// # Ok(())
/// # }
/// ```
#[inline]
pub fn copy_with_buffer<R: Read, W: Write>(
    reader: &mut R,
    writer: &mut W,
    buffer: &mut CopyBuffer,
) -> Result<u64, ExtractionError> {
    let mut total: u64 = 0;

    loop {
        let bytes_read = match reader.read(&mut buffer.buf) {
            Ok(0) => break,
            Ok(n) => n,
            Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
            Err(e) => return Err(ExtractionError::Io(e)),
        };

        writer
            .write_all(&buffer.buf[..bytes_read])
            .map_err(ExtractionError::Io)?;

        // SECURITY: Detect overflow to prevent quota bypass
        total = total
            .checked_add(bytes_read as u64)
            .ok_or(ExtractionError::QuotaExceeded {
                resource: crate::QuotaResource::IntegerOverflow,
            })?;
    }

    Ok(total)
}

#[cfg(test)]
#[allow(clippy::unwrap_used)]
mod tests {
    use super::*;
    use std::io::Cursor;

    #[test]
    fn test_copy_buffer_new() {
        let buffer = CopyBuffer::new();
        assert_eq!(buffer.size(), 64 * 1024);
    }

    #[test]
    fn test_copy_buffer_default() {
        let buffer = CopyBuffer::default();
        assert_eq!(buffer.size(), 64 * 1024);
    }

    #[test]
    fn test_copy_empty_source() {
        let mut buffer = CopyBuffer::new();
        let mut input = Cursor::new(Vec::<u8>::new());
        let mut output = Vec::new();

        let result = copy_with_buffer(&mut input, &mut output, &mut buffer);
        assert!(result.is_ok());
        assert_eq!(result.unwrap(), 0);
        assert_eq!(output.len(), 0);
    }

    #[test]
    fn test_copy_small_data() {
        let mut buffer = CopyBuffer::new();
        let input_data = b"Hello, World!";
        let mut input = Cursor::new(input_data);
        let mut output = Vec::new();

        let result = copy_with_buffer(&mut input, &mut output, &mut buffer);
        assert!(result.is_ok());
        assert_eq!(result.unwrap(), input_data.len() as u64);
        assert_eq!(output, input_data);
    }

    #[test]
    fn test_copy_large_data() {
        let mut buffer = CopyBuffer::new();
        // Create 1MB of data
        let input_data = vec![0x42u8; 1024 * 1024];
        let mut input = Cursor::new(&input_data);
        let mut output = Vec::new();

        let result = copy_with_buffer(&mut input, &mut output, &mut buffer);
        assert!(result.is_ok());
        assert_eq!(result.unwrap(), input_data.len() as u64);
        assert_eq!(output, input_data);
    }

    #[test]
    fn test_copy_exact_buffer_size() {
        let mut buffer = CopyBuffer::new();
        let input_data = vec![0xAAu8; COPY_BUFFER_SIZE];
        let mut input = Cursor::new(&input_data);
        let mut output = Vec::new();

        let result = copy_with_buffer(&mut input, &mut output, &mut buffer);
        assert!(result.is_ok());
        assert_eq!(result.unwrap(), COPY_BUFFER_SIZE as u64);
        assert_eq!(output, input_data);
    }

    #[test]
    fn test_copy_multiple_chunks() {
        let mut buffer = CopyBuffer::new();
        // Create data larger than buffer size
        let input_data = vec![0x55u8; COPY_BUFFER_SIZE * 3 + 1000];
        let mut input = Cursor::new(&input_data);
        let mut output = Vec::new();

        let result = copy_with_buffer(&mut input, &mut output, &mut buffer);
        assert!(result.is_ok());
        assert_eq!(result.unwrap(), input_data.len() as u64);
        assert_eq!(output, input_data);
    }

    #[test]
    fn test_copy_reusable_buffer() {
        let mut buffer = CopyBuffer::new();

        // First copy
        let data1 = b"First copy";
        let mut input1 = Cursor::new(data1);
        let mut output1 = Vec::new();
        let result1 = copy_with_buffer(&mut input1, &mut output1, &mut buffer);
        assert!(result1.is_ok());
        assert_eq!(output1, data1);

        // Second copy with same buffer
        let data2 = b"Second copy with different data";
        let mut input2 = Cursor::new(data2);
        let mut output2 = Vec::new();
        let result2 = copy_with_buffer(&mut input2, &mut output2, &mut buffer);
        assert!(result2.is_ok());
        assert_eq!(output2, data2);
    }

    #[test]
    fn test_copy_byte_for_byte_correctness() {
        let mut buffer = CopyBuffer::new();
        // Test with diverse byte patterns
        let mut input_data = Vec::new();
        for i in 0..=255u8 {
            input_data.extend_from_slice(&[i; 256]);
        }

        let mut input = Cursor::new(&input_data);
        let mut output = Vec::new();

        let result = copy_with_buffer(&mut input, &mut output, &mut buffer);
        assert!(result.is_ok());
        assert_eq!(result.unwrap(), input_data.len() as u64);
        assert_eq!(output, input_data);
    }

    // Edge case: Test interrupted reads handling
    #[test]
    fn test_copy_with_interrupted_reads() {
        use std::io::Error;
        use std::io::ErrorKind;

        // Mock reader that simulates interrupted reads
        struct InterruptedReader {
            data: Vec<u8>,
            position: usize,
            interrupt_count: usize,
        }

        impl Read for InterruptedReader {
            fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
                if self.interrupt_count.is_multiple_of(3) && self.position < self.data.len() {
                    self.interrupt_count += 1;
                    return Err(Error::new(ErrorKind::Interrupted, "interrupted"));
                }

                self.interrupt_count += 1;

                if self.position >= self.data.len() {
                    return Ok(0); // EOF
                }

                let remaining = self.data.len() - self.position;
                let to_read = remaining.min(buf.len());
                buf[..to_read].copy_from_slice(&self.data[self.position..self.position + to_read]);
                self.position += to_read;
                Ok(to_read)
            }
        }

        let test_data = vec![0x42u8; 1000];
        let mut reader = InterruptedReader {
            data: test_data.clone(),
            position: 0,
            interrupt_count: 0,
        };

        let mut buffer = CopyBuffer::new();
        let mut output = Vec::new();

        let result = copy_with_buffer(&mut reader, &mut output, &mut buffer);
        assert!(result.is_ok(), "copy should handle interrupted reads");
        assert_eq!(
            output, test_data,
            "data should be copied correctly despite interruptions"
        );
    }

    // Edge case: Test write failure propagation
    #[test]
    fn test_copy_with_write_failure() {
        use std::io::Error;
        use std::io::ErrorKind;

        // Mock writer that fails after a certain number of bytes
        struct FailingWriter {
            written: usize,
            fail_after: usize,
        }

        impl Write for FailingWriter {
            fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
                if self.written >= self.fail_after {
                    return Err(Error::other("write failed"));
                }
                let to_write = (self.fail_after - self.written).min(buf.len());
                self.written += to_write;
                Ok(to_write)
            }

            fn flush(&mut self) -> std::io::Result<()> {
                Ok(())
            }
        }

        let test_data = vec![0x42u8; 1000];
        let mut input = Cursor::new(test_data);
        let mut writer = FailingWriter {
            written: 0,
            fail_after: 500,
        };

        let mut buffer = CopyBuffer::new();
        let result = copy_with_buffer(&mut input, &mut writer, &mut buffer);

        assert!(result.is_err(), "copy should propagate write errors");
        match result {
            Err(ExtractionError::Io(e)) => {
                assert_eq!(e.kind(), ErrorKind::Other);
            }
            _ => panic!("expected IO error"),
        }
    }
}