mod common;
use common::TempProject;
use vyctor::embeddings::MockEmbedder;
use vyctor::indexer::{content_hash, Chunker, FileWalker};
use vyctor::EmbeddingProvider;
#[test]
fn test_chunker_with_real_code() {
let rust_code = r#"
use std::collections::HashMap;
/// A simple key-value store
pub struct Store {
data: HashMap<String, String>,
}
impl Store {
/// Create a new empty store
pub fn new() -> Self {
Self {
data: HashMap::new(),
}
}
/// Insert a key-value pair
pub fn insert(&mut self, key: String, value: String) {
self.data.insert(key, value);
}
/// Get a value by key
pub fn get(&self, key: &str) -> Option<&String> {
self.data.get(key)
}
/// Remove a key-value pair
pub fn remove(&mut self, key: &str) -> Option<String> {
self.data.remove(key)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_insert_and_get() {
let mut store = Store::new();
store.insert("key".to_string(), "value".to_string());
assert_eq!(store.get("key"), Some(&"value".to_string()));
}
}
"#;
let chunker = Chunker::new(500, 100);
let chunks = chunker.chunk(rust_code);
assert!(chunks.len() >= 1);
assert_eq!(chunks[0].start_line, 1);
for chunk in &chunks {
assert!(chunk.start_line >= 1);
assert!(chunk.end_line >= chunk.start_line);
assert!(!chunk.content.is_empty());
}
}
#[test]
fn test_file_walker_with_project() {
let project = TempProject::with_sample_files();
let walker = FileWalker::new(
project.path_buf(),
vec![
"**/*.rs".to_string(),
"**/*.ts".to_string(),
"**/*.py".to_string(),
"**/*.md".to_string(),
],
vec!["**/node_modules/**".to_string()],
);
let files: Vec<_> = walker.walk().collect();
assert!(files.len() >= 4);
for file in &files {
assert!(!file.to_string_lossy().contains("node_modules"));
}
}
#[test]
fn test_content_hash_consistency() {
let project = TempProject::with_sample_files();
let content = project.read_file("src/main.rs");
let hash1 = content_hash(&content);
let hash2 = content_hash(&content);
assert_eq!(hash1, hash2);
assert_eq!(hash1.len(), 64);
}
#[test]
fn test_hash_detects_changes() {
let project = TempProject::new();
project.create_file("test.txt", "original content");
let hash1 = content_hash(&project.read_file("test.txt"));
project.create_file("test.txt", "modified content");
let hash2 = content_hash(&project.read_file("test.txt"));
assert_ne!(hash1, hash2);
}
#[tokio::test]
async fn test_mock_embedder_integration() {
let embedder = MockEmbedder::new(64);
let code1 = "fn main() { println!(\"Hello\"); }";
let code2 = "def main(): print('Hello')";
let result1 = embedder.embed(code1).await.unwrap();
let result2 = embedder.embed(code2).await.unwrap();
assert_eq!(result1.embedding.len(), 64);
assert_eq!(result2.embedding.len(), 64);
assert_ne!(result1.embedding, result2.embedding);
}
#[tokio::test]
async fn test_mock_embedder_batch() {
let embedder = MockEmbedder::new(128);
let texts: Vec<String> = vec![
"fn test1() {}".to_string(),
"fn test2() {}".to_string(),
"fn test3() {}".to_string(),
];
let results = embedder.embed_batch(&texts).await.unwrap();
assert_eq!(results.len(), 3);
for result in results {
assert_eq!(result.embedding.len(), 128);
}
}
#[test]
fn test_chunker_preserves_line_info() {
let content = "line 1\nline 2\nline 3\nline 4\nline 5\nline 6\nline 7\nline 8\nline 9\nline 10";
let chunker = Chunker::new(30, 10);
let chunks = chunker.chunk(content);
assert_eq!(chunks[0].start_line, 1);
for i in 1..chunks.len() {
assert!(chunks[i].start_line > 0);
}
}
#[test]
fn test_file_walker_respects_gitignore_patterns() {
let project = TempProject::new();
project.create_file("src/main.rs", "fn main() {}");
project.create_file("target/debug/main", "binary");
project.create_file(".git/config", "git config");
project.create_file("node_modules/package/index.js", "module");
let walker = FileWalker::new(
project.path_buf(),
vec!["**/*".to_string()],
vec![
"**/target/**".to_string(),
"**/.git/**".to_string(),
"**/node_modules/**".to_string(),
],
);
let files: Vec<_> = walker.walk().collect();
assert_eq!(files.len(), 1);
assert!(files[0].to_string_lossy().contains("main.rs"));
}
#[test]
fn test_chunker_handles_unicode() {
let unicode_content = r#"
// Japanese comment: 日本語コメント
fn greet(name: &str) -> String {
format!("こんにちは、{}さん!", name)
}
// Chinese comment: 中文注释
fn farewell(name: &str) -> String {
format!("再见,{}!", name)
}
// Emoji test 🎉
const CELEBRATION: &str = "🎊🎉🎈";
"#;
let chunker = Chunker::new(200, 50);
let chunks = chunker.chunk(unicode_content);
assert!(!chunks.is_empty());
let all_content: String = chunks.iter().map(|c| c.content.as_str()).collect();
assert!(all_content.contains("日本語"));
assert!(all_content.contains("中文"));
assert!(all_content.contains("🎉"));
}
#[test]
fn test_end_to_end_chunking_and_hashing() {
let project = TempProject::with_sample_files();
let walker = FileWalker::new(project.path_buf(), vec!["**/*.rs".to_string()], vec![]);
let chunker = Chunker::new(500, 100);
for file_path in walker.walk() {
let content = std::fs::read_to_string(&file_path).unwrap();
let hash = content_hash(&content);
let chunks = chunker.chunk(&content);
assert_eq!(hash.len(), 64);
if !content.trim().is_empty() {
assert!(
!chunks.is_empty(),
"File should produce chunks: {:?}",
file_path
);
}
}
}
#[tokio::test]
async fn test_mock_embedder_determinism() {
let embedder = MockEmbedder::new(64);
let text = "fn deterministic() {}";
let result1 = embedder.embed(text).await.unwrap();
let result2 = embedder.embed(text).await.unwrap();
let result3 = embedder.embed(text).await.unwrap();
assert_eq!(result1.embedding, result2.embedding);
assert_eq!(result2.embedding, result3.embedding);
}
#[tokio::test]
async fn test_mock_embedder_similar_text_different_embeddings() {
let embedder = MockEmbedder::new(64);
let text1 = "fn test()";
let text2 = "fn test() "; let text3 = "fn Test()";
let e1 = embedder.embed(text1).await.unwrap().embedding;
let e2 = embedder.embed(text2).await.unwrap().embedding;
let e3 = embedder.embed(text3).await.unwrap().embedding;
assert_ne!(e1, e2);
assert_ne!(e1, e3);
assert_ne!(e2, e3);
}
#[test]
fn test_chunker_very_small_files() {
let chunker = Chunker::new(1000, 200);
let chunks = chunker.chunk("hello");
assert_eq!(chunks.len(), 1);
assert_eq!(chunks[0].content, "hello");
let chunks = chunker.chunk("fn main() {}");
assert_eq!(chunks.len(), 1);
}
#[test]
fn test_chunker_very_large_files() {
let chunker = Chunker::new(100, 20);
let large_content: String = (0..1000)
.map(|i| format!("// Line {}: some content here\n", i))
.collect();
let chunks = chunker.chunk(&large_content);
assert!(chunks.len() > 10);
assert_eq!(chunks.first().unwrap().start_line, 1);
assert!(chunks.last().unwrap().end_line >= 990);
}