vyctor 0.1.0

A fast CLI tool for semantic file search using vector embeddings
Documentation
//! Content hashing for change detection

use blake3::Hasher;

/// Compute a fast hash of the content using BLAKE3
pub fn content_hash(content: &str) -> String {
    let mut hasher = Hasher::new();
    hasher.update(content.as_bytes());
    let hash = hasher.finalize();
    hash.to_hex().to_string()
}

/// Compute hash of a file by path
#[allow(dead_code)]
pub fn file_hash(path: &std::path::Path) -> std::io::Result<String> {
    let content = std::fs::read_to_string(path)?;
    Ok(content_hash(&content))
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::io::Write;
    use tempfile::NamedTempFile;

    #[test]
    fn test_content_hash() {
        let hash1 = content_hash("hello world");
        let hash2 = content_hash("hello world");
        let hash3 = content_hash("hello world!");

        // Same content should produce same hash
        assert_eq!(hash1, hash2);
        // Different content should produce different hash
        assert_ne!(hash1, hash3);
    }

    #[test]
    fn test_hash_format() {
        let hash = content_hash("test");
        // BLAKE3 produces 64 hex characters
        assert_eq!(hash.len(), 64);
        assert!(hash.chars().all(|c| c.is_ascii_hexdigit()));
    }

    #[test]
    fn test_empty_content() {
        let hash = content_hash("");
        assert_eq!(hash.len(), 64);
    }

    #[test]
    fn test_whitespace_matters() {
        let hash1 = content_hash("hello");
        let hash2 = content_hash("hello ");
        let hash3 = content_hash(" hello");
        let hash4 = content_hash("hello\n");

        // Whitespace differences should produce different hashes
        assert_ne!(hash1, hash2);
        assert_ne!(hash1, hash3);
        assert_ne!(hash1, hash4);
        assert_ne!(hash2, hash3);
    }

    #[test]
    fn test_case_sensitive() {
        let hash1 = content_hash("Hello");
        let hash2 = content_hash("hello");
        let hash3 = content_hash("HELLO");

        assert_ne!(hash1, hash2);
        assert_ne!(hash1, hash3);
        assert_ne!(hash2, hash3);
    }

    #[test]
    fn test_unicode_content() {
        let hash1 = content_hash("日本語");
        let hash2 = content_hash("日本語");
        let hash3 = content_hash("中文");

        assert_eq!(hash1, hash2);
        assert_ne!(hash1, hash3);
        assert_eq!(hash1.len(), 64);
    }

    #[test]
    fn test_emoji_content() {
        let hash1 = content_hash("🎉🚀");
        let hash2 = content_hash("🎉🚀");
        let hash3 = content_hash("🎉");

        assert_eq!(hash1, hash2);
        assert_ne!(hash1, hash3);
    }

    #[test]
    fn test_large_content() {
        let large_content = "x".repeat(1_000_000); // 1MB of content
        let hash = content_hash(&large_content);

        assert_eq!(hash.len(), 64);

        // Verify it's deterministic
        let hash2 = content_hash(&large_content);
        assert_eq!(hash, hash2);
    }

    #[test]
    fn test_newline_variations() {
        let unix = content_hash("line1\nline2");
        let windows = content_hash("line1\r\nline2");
        let mac = content_hash("line1\rline2");

        // Different newline styles should produce different hashes
        assert_ne!(unix, windows);
        assert_ne!(unix, mac);
        assert_ne!(windows, mac);
    }

    #[test]
    fn test_file_hash() {
        let mut file = NamedTempFile::new().unwrap();
        writeln!(file, "test content").unwrap();
        file.flush().unwrap();

        let hash = file_hash(file.path()).unwrap();
        assert_eq!(hash.len(), 64);

        // Verify it matches content_hash
        let expected = content_hash("test content\n");
        assert_eq!(hash, expected);
    }

    #[test]
    fn test_file_hash_nonexistent() {
        let result = file_hash(std::path::Path::new("/nonexistent/path/file.txt"));
        assert!(result.is_err());
    }

    #[test]
    fn test_file_hash_matches_content_hash() {
        let mut file = NamedTempFile::new().unwrap();
        let content = "fn main() {\n    println!(\"Hello\");\n}";
        write!(file, "{}", content).unwrap();
        file.flush().unwrap();

        let file_h = file_hash(file.path()).unwrap();
        let content_h = content_hash(content);

        assert_eq!(file_h, content_h);
    }

    #[test]
    fn test_hash_is_lowercase_hex() {
        let hash = content_hash("test");

        // Verify all characters are lowercase hex
        assert!(hash
            .chars()
            .all(|c| c.is_ascii_hexdigit() && !c.is_uppercase()));
    }

    #[test]
    fn test_deterministic_across_calls() {
        let content = "reproducible hash test";

        let hashes: Vec<_> = (0..10).map(|_| content_hash(content)).collect();

        // All hashes should be identical
        assert!(hashes.windows(2).all(|w| w[0] == w[1]));
    }
}