vyctor 0.1.0

A fast CLI tool for semantic file search using vector embeddings
Documentation
//! Integration tests for the file indexer

mod common;

use common::TempProject;
use vyctor::embeddings::MockEmbedder;
use vyctor::indexer::{content_hash, Chunker, FileWalker};
use vyctor::EmbeddingProvider;

#[test]
fn test_chunker_with_real_code() {
    let rust_code = r#"
use std::collections::HashMap;

/// A simple key-value store
pub struct Store {
    data: HashMap<String, String>,
}

impl Store {
    /// Create a new empty store
    pub fn new() -> Self {
        Self {
            data: HashMap::new(),
        }
    }

    /// Insert a key-value pair
    pub fn insert(&mut self, key: String, value: String) {
        self.data.insert(key, value);
    }

    /// Get a value by key
    pub fn get(&self, key: &str) -> Option<&String> {
        self.data.get(key)
    }

    /// Remove a key-value pair
    pub fn remove(&mut self, key: &str) -> Option<String> {
        self.data.remove(key)
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_insert_and_get() {
        let mut store = Store::new();
        store.insert("key".to_string(), "value".to_string());
        assert_eq!(store.get("key"), Some(&"value".to_string()));
    }
}
"#;

    let chunker = Chunker::new(500, 100);
    let chunks = chunker.chunk(rust_code);

    // Should produce multiple chunks for this code
    assert!(chunks.len() >= 1);

    // First chunk should start at line 1
    assert_eq!(chunks[0].start_line, 1);

    // All chunks should have valid line numbers
    for chunk in &chunks {
        assert!(chunk.start_line >= 1);
        assert!(chunk.end_line >= chunk.start_line);
        assert!(!chunk.content.is_empty());
    }
}

#[test]
fn test_file_walker_with_project() {
    let project = TempProject::with_sample_files();

    let walker = FileWalker::new(
        project.path_buf(),
        vec![
            "**/*.rs".to_string(),
            "**/*.ts".to_string(),
            "**/*.py".to_string(),
            "**/*.md".to_string(),
        ],
        vec!["**/node_modules/**".to_string()],
    );

    let files: Vec<_> = walker.walk().collect();

    // Should find the sample files but not node_modules
    assert!(files.len() >= 4); // main.rs, utils.ts, helper.py, README.md

    // Verify no node_modules files
    for file in &files {
        assert!(!file.to_string_lossy().contains("node_modules"));
    }
}

#[test]
fn test_content_hash_consistency() {
    let project = TempProject::with_sample_files();

    // Read a file and hash it
    let content = project.read_file("src/main.rs");
    let hash1 = content_hash(&content);
    let hash2 = content_hash(&content);

    assert_eq!(hash1, hash2);
    assert_eq!(hash1.len(), 64);
}

#[test]
fn test_hash_detects_changes() {
    let project = TempProject::new();

    // Create a file
    project.create_file("test.txt", "original content");
    let hash1 = content_hash(&project.read_file("test.txt"));

    // Modify the file
    project.create_file("test.txt", "modified content");
    let hash2 = content_hash(&project.read_file("test.txt"));

    assert_ne!(hash1, hash2);
}

#[tokio::test]
async fn test_mock_embedder_integration() {
    let embedder = MockEmbedder::new(64);

    // Embed some code snippets
    let code1 = "fn main() { println!(\"Hello\"); }";
    let code2 = "def main(): print('Hello')";

    let result1 = embedder.embed(code1).await.unwrap();
    let result2 = embedder.embed(code2).await.unwrap();

    // Should produce embeddings of correct dimension
    assert_eq!(result1.embedding.len(), 64);
    assert_eq!(result2.embedding.len(), 64);

    // Different code should produce different embeddings
    assert_ne!(result1.embedding, result2.embedding);
}

#[tokio::test]
async fn test_mock_embedder_batch() {
    let embedder = MockEmbedder::new(128);

    let texts: Vec<String> = vec![
        "fn test1() {}".to_string(),
        "fn test2() {}".to_string(),
        "fn test3() {}".to_string(),
    ];

    let results = embedder.embed_batch(&texts).await.unwrap();

    assert_eq!(results.len(), 3);
    for result in results {
        assert_eq!(result.embedding.len(), 128);
    }
}

#[test]
fn test_chunker_preserves_line_info() {
    let content = "line 1\nline 2\nline 3\nline 4\nline 5\nline 6\nline 7\nline 8\nline 9\nline 10";
    let chunker = Chunker::new(30, 10);

    let chunks = chunker.chunk(content);

    // First chunk should start at line 1
    assert_eq!(chunks[0].start_line, 1);

    // Line numbers should increase across chunks
    for i in 1..chunks.len() {
        assert!(chunks[i].start_line > 0);
    }
}

#[test]
fn test_file_walker_respects_gitignore_patterns() {
    let project = TempProject::new();

    // Create files that would typically be ignored
    project.create_file("src/main.rs", "fn main() {}");
    project.create_file("target/debug/main", "binary");
    project.create_file(".git/config", "git config");
    project.create_file("node_modules/package/index.js", "module");

    let walker = FileWalker::new(
        project.path_buf(),
        vec!["**/*".to_string()],
        vec![
            "**/target/**".to_string(),
            "**/.git/**".to_string(),
            "**/node_modules/**".to_string(),
        ],
    );

    let files: Vec<_> = walker.walk().collect();

    // Should only find src/main.rs
    assert_eq!(files.len(), 1);
    assert!(files[0].to_string_lossy().contains("main.rs"));
}

#[test]
fn test_chunker_handles_unicode() {
    let unicode_content = r#"
// Japanese comment: 日本語コメント
fn greet(name: &str) -> String {
    format!("こんにちは、{}さん!", name)
}

// Chinese comment: 中文注释
fn farewell(name: &str) -> String {
    format!("再见,{}!", name)
}

// Emoji test 🎉
const CELEBRATION: &str = "🎊🎉🎈";
"#;

    let chunker = Chunker::new(200, 50);
    let chunks = chunker.chunk(unicode_content);

    assert!(!chunks.is_empty());

    // Verify unicode content is preserved
    let all_content: String = chunks.iter().map(|c| c.content.as_str()).collect();
    assert!(all_content.contains("日本語"));
    assert!(all_content.contains("中文"));
    assert!(all_content.contains("🎉"));
}

#[test]
fn test_end_to_end_chunking_and_hashing() {
    let project = TempProject::with_sample_files();

    let walker = FileWalker::new(project.path_buf(), vec!["**/*.rs".to_string()], vec![]);

    let chunker = Chunker::new(500, 100);

    for file_path in walker.walk() {
        let content = std::fs::read_to_string(&file_path).unwrap();
        let hash = content_hash(&content);
        let chunks = chunker.chunk(&content);

        // All files should produce valid hashes
        assert_eq!(hash.len(), 64);

        // Non-empty files should produce chunks
        if !content.trim().is_empty() {
            assert!(
                !chunks.is_empty(),
                "File should produce chunks: {:?}",
                file_path
            );
        }
    }
}

#[tokio::test]
async fn test_mock_embedder_determinism() {
    let embedder = MockEmbedder::new(64);

    let text = "fn deterministic() {}";

    // Embed the same text multiple times
    let result1 = embedder.embed(text).await.unwrap();
    let result2 = embedder.embed(text).await.unwrap();
    let result3 = embedder.embed(text).await.unwrap();

    // All should be identical
    assert_eq!(result1.embedding, result2.embedding);
    assert_eq!(result2.embedding, result3.embedding);
}

#[tokio::test]
async fn test_mock_embedder_similar_text_different_embeddings() {
    let embedder = MockEmbedder::new(64);

    // Very similar texts
    let text1 = "fn test()";
    let text2 = "fn test() "; // Added space
    let text3 = "fn Test()"; // Changed case

    let e1 = embedder.embed(text1).await.unwrap().embedding;
    let e2 = embedder.embed(text2).await.unwrap().embedding;
    let e3 = embedder.embed(text3).await.unwrap().embedding;

    // Should all be different (hash-based mock doesn't do similarity)
    assert_ne!(e1, e2);
    assert_ne!(e1, e3);
    assert_ne!(e2, e3);
}

#[test]
fn test_chunker_very_small_files() {
    let chunker = Chunker::new(1000, 200);

    // Single word
    let chunks = chunker.chunk("hello");
    assert_eq!(chunks.len(), 1);
    assert_eq!(chunks[0].content, "hello");

    // Single line
    let chunks = chunker.chunk("fn main() {}");
    assert_eq!(chunks.len(), 1);
}

#[test]
fn test_chunker_very_large_files() {
    let chunker = Chunker::new(100, 20);

    // Generate a large file
    let large_content: String = (0..1000)
        .map(|i| format!("// Line {}: some content here\n", i))
        .collect();

    let chunks = chunker.chunk(&large_content);

    // Should produce many chunks
    assert!(chunks.len() > 10);

    // First and last chunks should be valid
    assert_eq!(chunks.first().unwrap().start_line, 1);
    assert!(chunks.last().unwrap().end_line >= 990);
}