wallswitch 0.58.0

use crate::{ConcurrencyExt, Dimension, FileInfo, WallSwitchResult, exec_cmd};
use blake3::Hasher;
use image::image_dimensions;
use std::{
    fs::File,
    io::{BufReader, Read},
    path::PathBuf,
    process::Command,
    thread,
};

/// Size of the buffer used for reading files during the hashing process.
/// 64 KB is an optimal balance between memory usage and disk read throughput.
const BUFFER_SIZE: usize = 64 * 1024;

/// Probes image dimensions using a hybrid, highly efficient strategy:
///
/// 1. Fast Path: In-process header scanning using the pure-Rust `image` crate.
/// 2. Fallback Path: Spawning an external ImageMagick `identify` process for formats
///    unsupported by the `image` crate (e.g., SVG, TIFF, AVIF).
pub fn probe_image_dimension(path: &PathBuf, verbose: bool) -> WallSwitchResult<Dimension> {
    // Step 1: Attempt fast, in-process header decoding without loading pixels
    match image_dimensions(path) {
        // image (Rust)
        Ok((width, height)) => Ok(Dimension {
            width: width as u64,
            height: height as u64,
        }),
        // identify (ImageMagick)
        Err(_err) => {
            // Step 2: Fallback to external ImageMagick command execution
            probe_image_dimension_fallback(path, verbose)
        }
    }
}

/// Executes the ImageMagick 'identify' command to probe physical image dimensions.
///
/// This keeps the Shell execution logic and OS-level dependencies strictly
/// out of the pure Domain Entities (like `FileInfo`).
pub fn probe_image_dimension_fallback(
    path: &PathBuf,
    verbose: bool,
) -> WallSwitchResult<Dimension> {
    let mut cmd = Command::new("identify");
    let identify_cmd = cmd
        .arg("-format")
        .arg("%wx%h") // Extract width x height
        .arg(path);

    // Execute the command using the centralized utility function
    let identify_out = exec_cmd(identify_cmd, verbose, "identify")?;

    // Convert the stdout bytes to a UTF-8 string safely
    let std_output = String::from_utf8(identify_out.stdout)?;

    // Parse the string into the pure Dimension domain entity
    Dimension::new(&std_output)
}

/// Computes the BLAKE3 hash of multiple files using a thread-safe parallel approach.
///
/// Hardware-concurrency is utilized to balance the workload across available CPU cores,
/// drastically reducing the time required to index large wallpaper collections.
pub fn compute_hashes_parallel(files: &mut [FileInfo]) {
    // Utilize the ConcurrencyExt trait to determine the optimal batch size per core
    let chunk_size = files.get_chunk_size(files.len());

    // Create a scoped thread pool to safely borrow the mutable 'files' slice
    thread::scope(|scope| {
        // Divide the mutable slice into non-overlapping mutable chunks
        for chunk in files.chunks_mut(chunk_size) {
            // Spawn a thread for each chunk
            scope.spawn(move || {
                // Sequential processing within this specific core's chunk.
                // This prevents the OS from throwing "Too many open files" (Error 24).
                for file_info in chunk {
                    // Attempt to open the file
                    if let Ok(file) = File::open(&file_info.path) {
                        // Use a BufReader for efficient chunked reading
                        let reader = BufReader::with_capacity(BUFFER_SIZE, file);

                        // If hashing is successful, update the file's hash in memory
                        if let Ok(hash) = get_hash(reader) {
                            file_info.hash = hash;
                        }
                    }
                }
            });
        }
    });
}

/// Calculates the BLAKE3 hash from any IO Reader stream.
///
/// BLAKE3 is an extremely fast, hardware-accelerated cryptographic hash function,
/// ideal for visually deduplicating large sets of image files.
pub fn get_hash(mut reader: impl Read) -> WallSwitchResult<String> {
    let mut hasher = Hasher::new();
    let mut buffer = [0_u8; BUFFER_SIZE];

    loop {
        // Read a chunk of bytes into the buffer
        let count = reader.read(&mut buffer)?;

        // If count is 0, we have reached the End of File (EOF)
        if count == 0 {
            break;
        }

        // Update the hasher state with the newly read bytes
        hasher.update(&buffer[..count]);
    }

    // Finalize the hash and return it as a hexadecimal string
    Ok(hasher.finalize().to_hex().to_string())
}

//----------------------------------------------------------------------------//
//                                   Tests                                    //
//----------------------------------------------------------------------------//
//
// cargo test -- --help
// cargo test -- --nocapture
// cargo test -- --show-output

/// Run tests with:
/// cargo test -- --show-output tests_metadata
#[cfg(test)]
mod tests_metadata {
    use super::*;
    use std::fs;

    #[test]
    /// Tests the `get_hash` function using an in-memory byte slice.
    /// This is a pure unit test with no disk I/O.
    ///
    /// We use a known string and verify it against the standard BLAKE3 hash output.
    fn test_get_hash_in_memory() {
        // The test data
        let data = b"hello wallswitch";

        // The known BLAKE3 hash for "hello world"
        // Command: `echo -n "hello world" | b3sum`
        // Result: d74981efa70a0c880b8d8c1985d075dbcbf679b99a5f9914e5aaf96b831a9e24
        let expected_hash = "5cde4798fe09c816b40570b6a00f62a7149de218a3f2ab37e69f761d01d038e5";

        // Since &[u8] implements the `Read` trait, we can pass it directly
        let result = get_hash(&data[..]);

        assert!(
            result.is_ok(),
            "Hashing should not fail on valid memory reads"
        );
        assert_eq!(result.unwrap(), expected_hash);
    }

    #[test]
    /// Tests the parallel hashing function.
    ///
    /// Strategy:
    /// 1. Create two temporary text files with different contents.
    /// 2. Construct mock `FileInfo` structs pointing to these files.
    /// 3. Run `compute_hashes_parallel`.
    /// 4. Assert that the hashes were populated and are different.
    fn test_compute_hashes_parallel() {
        let temp_dir = std::env::temp_dir();
        let file1_path = temp_dir.join("wallswitch_test_hash_1.txt");
        let file2_path = temp_dir.join("wallswitch_test_hash_2.txt");

        // Create temporary files
        fs::write(&file1_path, b"content A").expect("Failed to write temp file 1");
        fs::write(&file2_path, b"content B").expect("Failed to write temp file 2");

        // Create a mutable slice of FileInfo (default hash is empty string)
        let mut files = vec![
            FileInfo {
                path: file1_path.clone(),
                ..Default::default()
            },
            FileInfo {
                path: file2_path.clone(),
                ..Default::default()
            },
        ];

        // Ensure hashes are initially empty
        assert!(files[0].hash.is_empty());

        // Execute the parallel hashing
        compute_hashes_parallel(&mut files);

        /*
        b3sum /tmp/wallswitch_test_hash_*
        4f081fea7fb11c55156d6d6e7f44fa43c956a987c1e1d43d2f8cdeb2162dd5e2  /tmp/wallswitch_test_hash_1.txt
        0ececda959d7107819dfa5c37d52db957aa075149cdebd4622d96e0160813ff3  /tmp/wallswitch_test_hash_2.txt
        */

        let expected_hash_1 = "4f081fea7fb11c55156d6d6e7f44fa43c956a987c1e1d43d2f8cdeb2162dd5e2";
        let expected_hash_2 = "0ececda959d7107819dfa5c37d52db957aa075149cdebd4622d96e0160813ff3";

        // Assertions
        assert!(
            !files[0].hash.is_empty(),
            "Hash for file 1 should be populated"
        );
        assert!(
            !files[1].hash.is_empty(),
            "Hash for file 2 should be populated"
        );
        assert_ne!(
            files[0].hash, files[1].hash,
            "Different contents must produce different hashes"
        );
        assert_eq!(files[0].hash, expected_hash_1);
        assert_eq!(files[1].hash, expected_hash_2);

        // Cleanup
        let _ = fs::remove_file(file1_path);
        let _ = fs::remove_file(file2_path);
    }

    #[test]
    /// Tests the image dimension probing via ImageMagick.
    ///
    /// Strategy:
    /// 1. Verify if `identify` is installed. If not, skip the test gracefully (useful for CI/CD).
    /// 2. Create a valid, minimal image format using plain text (PPM format).
    /// 3. Run the prober and assert it correctly identifies the dimensions.
    fn test_probe_image_dimension() {
        // 1. Pre-flight check: Skip if ImageMagick is missing
        if Command::new("identify").arg("-version").output().is_err() {
            println!("Skipping test: ImageMagick 'identify' command not found.");
            return;
        }

        // 2. Create a minimal valid image (PPM - Portable Pixmap format)
        // This is a 2x2 pixel image.
        let ppm_data = "P3\n2 2\n255\n255 0 0  0 255 0\n0 0 255  255 255 255\n";

        let temp_dir = std::env::temp_dir();
        let img_path = temp_dir.join("wallswitch_test_image.ppm");
        fs::write(&img_path, ppm_data).expect("Failed to write temp image file");

        // 3. Execute the function being tested
        let result = probe_image_dimension(&img_path, false);

        // 4. Assertions
        assert!(
            result.is_ok(),
            "ImageMagick failed to probe the test image. Result: {:?}",
            result
        );

        let dim = result.unwrap();
        assert_eq!(dim.width, 2, "Expected width to be 2");
        assert_eq!(dim.height, 2, "Expected height to be 2");

        // 5. Cleanup
        let _ = fs::remove_file(img_path);
    }
}