kelora 0.4.0

A command-line log analysis tool with embedded Rhai scripting
use anyhow::{anyhow, Result};
use flate2::read::MultiGzDecoder;
use std::fs::File;
use std::io::{BufRead, BufReader, Chain, Cursor, Read};
use std::path::Path;

/// Streaming decompression wrapper that implements BufRead
/// Detects gzip compression using magic bytes (1F 8B 08) for all inputs
#[derive(Debug)]
pub enum DecompressionReader {
    /// Gzip decompression
    Gzip(BufReader<MultiGzDecoder<Chain<Cursor<Vec<u8>>, File>>>),
    /// Passthrough for non-compressed files
    Plain(BufReader<Chain<Cursor<Vec<u8>>, File>>),
}

// Manually implement Send since all variants contain Send types
unsafe impl Send for DecompressionReader {}

impl BufRead for DecompressionReader {
    fn fill_buf(&mut self) -> std::io::Result<&[u8]> {
        match self {
            DecompressionReader::Gzip(reader) => reader.fill_buf(),
            DecompressionReader::Plain(reader) => reader.fill_buf(),
        }
    }

    fn consume(&mut self, amt: usize) {
        match self {
            DecompressionReader::Gzip(reader) => reader.consume(amt),
            DecompressionReader::Plain(reader) => reader.consume(amt),
        }
    }
}

impl Read for DecompressionReader {
    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
        match self {
            DecompressionReader::Gzip(reader) => reader.read(buf),
            DecompressionReader::Plain(reader) => reader.read(buf),
        }
    }
}

/// Detect gzip by magic bytes and return appropriate reader
/// Reads first 3 bytes to check for gzip magic signature: 1F 8B 08
fn maybe_gzip_file(mut file: File) -> std::io::Result<DecompressionReader> {
    let mut head = [0u8; 3];
    let n = file.read(&mut head)?;

    // Put the read bytes back in front using a cursor chain
    let prefix = Cursor::new(head[..n].to_vec());
    let chained = prefix.chain(file);

    let is_gzip = n >= 3 && head[0] == 0x1F && head[1] == 0x8B && head[2] == 0x08;

    if is_gzip {
        let decoder = MultiGzDecoder::new(chained);
        Ok(DecompressionReader::Gzip(BufReader::new(decoder)))
    } else {
        // For non-gzip files, use the chain directly as the source
        Ok(DecompressionReader::Plain(BufReader::new(chained)))
    }
}

/// Generic magic bytes detection for any Read type
/// Returns Box<dyn Read + Send> that's either MultiGzDecoder or passthrough
pub fn maybe_gzip<R: Read + Send + 'static>(
    mut reader: R,
) -> std::io::Result<Box<dyn Read + Send>> {
    let mut head = [0u8; 3];
    let n = reader.read(&mut head)?;

    // Put the read bytes back in front using a cursor chain
    let prefix = Cursor::new(head[..n].to_vec());
    let chained: Chain<Cursor<Vec<u8>>, R> = prefix.chain(reader);

    let is_gzip = n >= 3 && head[0] == 0x1F && head[1] == 0x8B && head[2] == 0x08;

    if is_gzip {
        Ok(Box::new(MultiGzDecoder::new(chained)))
    } else {
        Ok(Box::new(chained))
    }
}

impl DecompressionReader {
    /// Create a new decompression reader with auto-detection based on magic bytes
    pub fn new<P: AsRef<Path>>(path: P) -> Result<Self> {
        let path_ref = path.as_ref();
        let file = File::open(path_ref)?;

        // Check file extension for known unsupported formats
        if let Some(extension) = path_ref.extension().and_then(|ext| ext.to_str()) {
            if extension.to_lowercase() == "zip" {
                return Err(anyhow!("ZIP file decompression is not supported. Only gzip files are supported for streaming decompression. Extract the ZIP file first: unzip {}", path_ref.display()));
            }
        }

        // Use magic bytes detection for all files
        maybe_gzip_file(file).map_err(|e| anyhow!("Failed to detect compression format: {}", e))
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::io::{Read, Write};
    use tempfile::NamedTempFile;

    #[test]
    fn test_plain_file_passthrough() -> Result<()> {
        let mut temp_file = NamedTempFile::new()?;
        writeln!(temp_file, "test line 1")?;
        writeln!(temp_file, "test line 2")?;
        temp_file.flush()?;

        let mut reader = DecompressionReader::new(temp_file.path())?;
        let mut content = String::new();
        reader.read_to_string(&mut content)?;

        assert!(content.contains("test line 1"));
        assert!(content.contains("test line 2"));
        Ok(())
    }

    #[test]
    fn test_zip_file_rejection() {
        // Create a temporary file with .zip extension
        let temp_file = NamedTempFile::new().unwrap();
        let temp_path = temp_file.path();

        // Create a new path with .zip extension
        let zip_path = temp_path.with_extension("zip");

        // Create an empty file at the zip path for testing
        std::fs::write(&zip_path, b"fake zip content").unwrap();

        let result = DecompressionReader::new(&zip_path);
        assert!(result.is_err());
        let error_msg = result.unwrap_err().to_string();
        assert!(error_msg.contains("ZIP file decompression is not supported"));
        assert!(error_msg.contains("Only gzip files are supported"));

        // Clean up
        let _ = std::fs::remove_file(&zip_path);
    }

    #[test]
    fn test_magic_bytes_detection() -> Result<()> {
        // Test non-gzip file (should be treated as plain)
        let mut temp_file = NamedTempFile::new()?;
        writeln!(temp_file, "plain text file")?;
        temp_file.flush()?;

        let mut reader = DecompressionReader::new(temp_file.path())?;
        let mut content = String::new();
        reader.read_to_string(&mut content)?;
        assert!(content.contains("plain text file"));

        // Test file with gzip magic bytes but wrong extension
        let mut gzip_temp = NamedTempFile::new()?;

        // Write gzip magic bytes followed by some fake data
        // This won't be a valid gzip stream, but tests magic byte detection
        gzip_temp.write_all(&[0x1F, 0x8B, 0x08])?;
        gzip_temp.write_all(b"fake gzip data")?;
        gzip_temp.flush()?;

        // This should detect it as gzip and try to decompress
        // It will likely fail during decompression, but that's expected
        // The important thing is that magic bytes are detected
        let result = DecompressionReader::new(gzip_temp.path());

        // We don't test the exact error since decompressing invalid gzip
        // data may produce various error types, but it should at least
        // attempt gzip decompression based on magic bytes
        match result {
            Ok(_reader) => {
                // If it succeeds in creating the reader, magic bytes worked
                // Reading from it might fail, but detection worked
            }
            Err(_e) => {
                // If it fails, it could be due to decompression error
                // The key is that it attempted gzip decompression
            }
        }

        Ok(())
    }
}