#[cfg(test)]
#[allow(clippy::module_inception)]
mod tests {
use std::path::PathBuf;
use crate::search::tokenizer::{split_camel_case, tokenize_path, tokenize_query};
use super::super::query::{QueryIntent, classify_query, needs_code_search};
use super::super::{Bm25Index, IndexUpdate, SearchResult};
use std::fs;
fn tokenize_code(content: &str) -> Vec<String> {
crate::search::tokenizer::tokenize_code(content)
.into_keys()
.collect()
}
#[test]
fn test_tokenize_code_snake_case() {
let tokens = tokenize_code("let my_variable = 42;");
assert!(tokens.contains(&"my".to_string()));
assert!(tokens.contains(&"variable".to_string()));
assert!(tokens.contains(&"my_variable".to_string()));
}
#[test]
fn test_tokenize_code_camel_case() {
let tokens = tokenize_code("myFunctionName()");
assert!(tokens.contains(&"my".to_string()));
assert!(tokens.contains(&"function".to_string()));
assert!(tokens.contains(&"name".to_string()));
assert!(tokens.contains(&"myfunctionname".to_string()));
}
#[test]
fn test_tokenize_code_pascal_case() {
let tokens = tokenize_code("struct ConversationContext");
assert!(tokens.contains(&"conversation".to_string()));
assert!(tokens.contains(&"context".to_string()));
assert!(tokens.contains(&"conversationcontext".to_string()));
}
#[test]
fn test_tokenize_code_filters_noise() {
let tokens = tokenize_code("pub fn self let mut");
assert!(tokens.is_empty());
}
#[test]
fn test_tokenize_code_skips_string_literals() {
let tokens = tokenize_code(r#"let x = "hello world";"#);
assert!(!tokens.contains(&"hello".to_string()));
assert!(!tokens.contains(&"world".to_string()));
}
#[test]
fn test_tokenize_path() {
let tokens = tokenize_path("src/agent/context.rs");
assert!(tokens.contains(&"src".to_string()));
assert!(tokens.contains(&"agent".to_string()));
assert!(tokens.contains(&"context".to_string()));
assert!(tokens.contains(&"rs".to_string()));
}
#[test]
fn test_tokenize_query() {
let tokens = tokenize_query("where is the error handling?");
assert!(tokens.contains(&"error".to_string()));
assert!(tokens.contains(&"handling".to_string()));
assert!(!tokens.contains(&"the".to_string()));
assert!(!tokens.contains(&"is".to_string()));
}
#[test]
fn test_split_camel_case() {
assert_eq!(split_camel_case("camelCase"), vec!["camel", "Case"]);
assert_eq!(split_camel_case("PascalCase"), vec!["Pascal", "Case"]);
assert_eq!(split_camel_case("HTMLParser"), vec!["HTML", "Parser"]);
assert_eq!(split_camel_case("simple"), vec!["simple"]);
}
#[test]
fn test_bm25_basic_search() {
let tmp = std::env::temp_dir().join("collet_bm25_test");
let _ = fs::remove_dir_all(&tmp);
fs::create_dir_all(&tmp).unwrap();
fs::write(
tmp.join("error.rs"),
"fn handle_error() { eprintln!(\"error occurred\"); }",
)
.unwrap();
fs::write(tmp.join("main.rs"), "fn main() { println!(\"hello\"); }").unwrap();
fs::write(
tmp.join("config.rs"),
"fn load_config() { /* read config file */ }",
)
.unwrap();
let files: Vec<PathBuf> = vec![
tmp.join("error.rs"),
tmp.join("main.rs"),
tmp.join("config.rs"),
];
let mut index = Bm25Index::new();
index.build(&files, &tmp);
assert_eq!(index.doc_count(), 3);
let results = index.search("error handling", 10);
assert!(!results.is_empty());
assert_eq!(results[0].rel_path, "error.rs");
let _ = fs::remove_dir_all(&tmp);
}
#[test]
fn test_bm25_idf_ranking() {
let tmp = std::env::temp_dir().join("collet_bm25_idf");
let _ = fs::remove_dir_all(&tmp);
fs::create_dir_all(&tmp).unwrap();
fs::write(tmp.join("a.rs"), "fn config_a() { config(); }").unwrap();
fs::write(tmp.join("b.rs"), "fn config_b() { config(); }").unwrap();
fs::write(tmp.join("c.rs"), "fn config_c() { encryption_handler(); }").unwrap();
let files: Vec<PathBuf> = vec![tmp.join("a.rs"), tmp.join("b.rs"), tmp.join("c.rs")];
let mut index = Bm25Index::new();
index.build(&files, &tmp);
let results = index.search("encryption", 10);
assert!(!results.is_empty());
assert_eq!(results[0].rel_path, "c.rs");
let _ = fs::remove_dir_all(&tmp);
}
#[test]
fn test_bm25_empty_query() {
let index = Bm25Index::new();
let results = index.search("", 10);
assert!(results.is_empty());
}
#[test]
fn test_bm25_relevant_files() {
let tmp = std::env::temp_dir().join("collet_bm25_rel");
let _ = fs::remove_dir_all(&tmp);
fs::create_dir_all(&tmp).unwrap();
fs::write(
tmp.join("auth.rs"),
"fn authenticate_user() { verify_token(); }",
)
.unwrap();
fs::write(tmp.join("db.rs"), "fn query_database() { execute_sql(); }").unwrap();
let files: Vec<PathBuf> = vec![tmp.join("auth.rs"), tmp.join("db.rs")];
let mut index = Bm25Index::new();
index.build(&files, &tmp);
let relevant = index.relevant_files("authentication token", 5);
assert!(!relevant.is_empty());
assert_eq!(relevant[0].0, "auth.rs");
let _ = fs::remove_dir_all(&tmp);
}
#[test]
fn test_bm25_path_search() {
let tmp = std::env::temp_dir().join("collet_bm25_path");
let _ = fs::remove_dir_all(&tmp);
fs::create_dir_all(tmp.join("src/agent")).unwrap();
fs::write(tmp.join("src/agent/context.rs"), "fn compact() {}").unwrap();
fs::write(tmp.join("src/agent/loop.rs"), "fn run() {}").unwrap();
let files: Vec<PathBuf> = vec![
tmp.join("src/agent/context.rs"),
tmp.join("src/agent/loop.rs"),
];
let mut index = Bm25Index::new();
index.build(&files, &tmp);
let results = index.search("context", 5);
assert!(!results.is_empty());
assert!(results[0].rel_path.contains("context"));
let _ = fs::remove_dir_all(&tmp);
}
#[test]
fn test_format_results() {
let results = vec![SearchResult {
rel_path: "src/main.rs".to_string(),
abs_path: PathBuf::from("/tmp/src/main.rs"),
score: 2.72,
matching_lines: vec![10],
snippets: vec!["L10: fn main()".to_string()],
}];
let formatted = Bm25Index::format_results(&results, true);
assert!(formatted.contains("src/main.rs"));
assert!(formatted.contains("2.72"));
assert!(formatted.contains("fn main()"));
}
#[test]
fn test_format_results_empty() {
let formatted = Bm25Index::format_results(&[], true);
assert!(formatted.contains("No relevant results"));
}
#[test]
fn test_incremental_add() {
let tmp = std::env::temp_dir().join("collet_bm25_inc_add");
let _ = fs::remove_dir_all(&tmp);
fs::create_dir_all(&tmp).unwrap();
fs::write(tmp.join("a.rs"), "fn alpha() {}").unwrap();
let mut index = Bm25Index::new();
index.build(&[tmp.join("a.rs")], &tmp);
assert_eq!(index.doc_count(), 1);
fs::write(tmp.join("b.rs"), "fn beta_search() {}").unwrap();
index.update(&[IndexUpdate::Upsert(tmp.join("b.rs"))], &tmp);
assert_eq!(index.doc_count(), 2);
let results = index.search("beta", 5);
assert!(!results.is_empty());
assert_eq!(results[0].rel_path, "b.rs");
let _ = fs::remove_dir_all(&tmp);
}
#[test]
fn test_incremental_modify() {
let tmp = std::env::temp_dir().join("collet_bm25_inc_mod");
let _ = fs::remove_dir_all(&tmp);
fs::create_dir_all(&tmp).unwrap();
fs::write(tmp.join("a.rs"), "fn old_function() {}").unwrap();
let mut index = Bm25Index::new();
index.build(&[tmp.join("a.rs")], &tmp);
assert!(!index.search("old_function", 5).is_empty());
fs::write(tmp.join("a.rs"), "fn new_replacement() {}").unwrap();
index.update(&[IndexUpdate::Upsert(tmp.join("a.rs"))], &tmp);
assert!(index.search("old_function", 5).is_empty());
assert!(!index.search("new_replacement", 5).is_empty());
assert_eq!(index.doc_count(), 1);
let _ = fs::remove_dir_all(&tmp);
}
#[test]
fn test_incremental_remove() {
let tmp = std::env::temp_dir().join("collet_bm25_inc_rm");
let _ = fs::remove_dir_all(&tmp);
fs::create_dir_all(&tmp).unwrap();
fs::write(tmp.join("a.rs"), "fn alpha() {}").unwrap();
fs::write(tmp.join("b.rs"), "fn beta() {}").unwrap();
let mut index = Bm25Index::new();
index.build(&[tmp.join("a.rs"), tmp.join("b.rs")], &tmp);
assert_eq!(index.doc_count(), 2);
index.update(&[IndexUpdate::Remove(tmp.join("a.rs"))], &tmp);
assert_eq!(index.doc_count(), 1);
assert!(index.search("alpha", 5).is_empty());
assert!(!index.search("beta", 5).is_empty());
let _ = fs::remove_dir_all(&tmp);
}
#[test]
fn test_retain_files() {
let tmp = std::env::temp_dir().join("collet_bm25_retain");
let _ = fs::remove_dir_all(&tmp);
fs::create_dir_all(&tmp).unwrap();
fs::write(tmp.join("a.rs"), "fn alpha() {}").unwrap();
fs::write(tmp.join("b.rs"), "fn beta() {}").unwrap();
fs::write(tmp.join("c.rs"), "fn gamma() {}").unwrap();
let files: Vec<PathBuf> = vec![tmp.join("a.rs"), tmp.join("b.rs"), tmp.join("c.rs")];
let mut index = Bm25Index::new();
index.build(&files, &tmp);
assert_eq!(index.doc_count(), 3);
let current: std::collections::HashSet<PathBuf> =
[tmp.join("b.rs"), tmp.join("c.rs")].into_iter().collect();
index.retain_files(¤t);
assert_eq!(index.doc_count(), 2);
assert!(index.search("alpha", 5).is_empty());
let _ = fs::remove_dir_all(&tmp);
}
#[test]
fn test_doc_freq_consistency_after_updates() {
let tmp = std::env::temp_dir().join("collet_bm25_df");
let _ = fs::remove_dir_all(&tmp);
fs::create_dir_all(&tmp).unwrap();
fs::write(tmp.join("a.rs"), "fn shared_token() {}").unwrap();
fs::write(tmp.join("b.rs"), "fn shared_token() { unique_b(); }").unwrap();
let mut index = Bm25Index::new();
index.build(&[tmp.join("a.rs"), tmp.join("b.rs")], &tmp);
assert_eq!(index.doc_freq.get("shared").copied(), Some(2));
index.update(&[IndexUpdate::Remove(tmp.join("a.rs"))], &tmp);
assert_eq!(index.doc_freq.get("shared").copied(), Some(1));
index.update(&[IndexUpdate::Remove(tmp.join("b.rs"))], &tmp);
assert_eq!(index.doc_freq.get("shared"), None);
let _ = fs::remove_dir_all(&tmp);
}
#[test]
fn test_needs_code_search_short() {
assert!(!needs_code_search("hi"));
assert!(!needs_code_search("push"));
assert!(!needs_code_search("ok thanks"));
assert!(!needs_code_search("git push"));
}
#[test]
fn test_needs_code_search_code_signals() {
assert!(needs_code_search("where is the error handling?"));
assert!(needs_code_search("implement a new search function"));
assert!(needs_code_search("fix the bug in context.rs"));
assert!(needs_code_search("에러 처리 어디서 하는지 찾아줘"));
}
#[test]
fn test_needs_code_search_slash_command() {
assert!(!needs_code_search("/help"));
assert!(!needs_code_search("/search something"));
}
#[test]
fn test_needs_code_search_path_pattern() {
assert!(needs_code_search("look at src/agent/context.rs"));
assert!(needs_code_search("the my_function is broken"));
}
#[test]
fn test_classify_query_docs() {
assert_eq!(
classify_query("explain the architecture of this project"),
QueryIntent::Docs
);
assert_eq!(
classify_query("how does the compaction system work?"),
QueryIntent::Docs
);
assert_eq!(classify_query("아키텍처 설명해줘"), QueryIntent::Docs);
}
#[test]
fn test_classify_query_code() {
assert_eq!(
classify_query("fix the error in context.rs"),
QueryIntent::Code
);
assert_eq!(
classify_query("implement BM25 search function"),
QueryIntent::Code
);
}
#[test]
fn test_classify_query_skip() {
assert_eq!(classify_query("thanks"), QueryIntent::Skip);
assert_eq!(classify_query("push"), QueryIntent::Skip);
assert_eq!(classify_query("고마워"), QueryIntent::Skip);
}
#[test]
fn test_search_fast_no_disk_io() {
let tmp = std::env::temp_dir().join("collet_bm25_fast");
let _ = fs::remove_dir_all(&tmp);
fs::create_dir_all(&tmp).unwrap();
fs::write(tmp.join("a.rs"), "fn search_handler() {}").unwrap();
fs::write(tmp.join("b.rs"), "fn other_thing() {}").unwrap();
let mut index = Bm25Index::new();
index.build(&[tmp.join("a.rs"), tmp.join("b.rs")], &tmp);
let results = index.search_fast("search handler", 5);
assert!(!results.is_empty());
assert_eq!(results[0].0, "a.rs");
let _ = fs::remove_dir_all(&tmp);
}
}