use super::runtime::executable_model_roots;
use super::*;
use crate::db::{IndexDb, NewSymbol};
use crate::embedding_store::{EmbeddingChunk, ScoredChunk};
use std::sync::Mutex;
static MODEL_LOCK: Mutex<()> = Mutex::new(());
static ENV_LOCK: Mutex<()> = Mutex::new(());
macro_rules! skip_without_embedding_model {
() => {
if !super::embedding_model_assets_available() {
eprintln!("skipping embedding test: CodeSearchNet model assets unavailable");
return;
}
};
}
fn make_project_with_source() -> (tempfile::TempDir, ProjectRoot) {
let dir = tempfile::tempdir().unwrap();
let root = dir.path();
let source = "def hello():\n print('hi')\n\ndef world():\n return 42\n";
write_python_file_with_symbols(
root,
"main.py",
source,
"hash1",
&[
("hello", "def hello():", "hello"),
("world", "def world():", "world"),
],
);
let project = ProjectRoot::new_exact(root).unwrap();
(dir, project)
}
fn write_python_file_with_symbols(
root: &std::path::Path,
relative_path: &str,
source: &str,
hash: &str,
symbols: &[(&str, &str, &str)],
) {
std::fs::write(root.join(relative_path), source).unwrap();
let db_path = crate::db::index_db_path(root);
let db = IndexDb::open(&db_path).unwrap();
let file_id = db
.upsert_file(relative_path, 100, hash, source.len() as i64, Some("py"))
.unwrap();
let new_symbols: Vec<NewSymbol<'_>> = symbols
.iter()
.map(|(name, signature, name_path)| {
let start = source.find(signature).unwrap() as i64;
let end = source[start as usize..]
.find("\n\ndef ")
.map(|offset| start + offset as i64)
.unwrap_or(source.len() as i64);
let line = source[..start as usize]
.bytes()
.filter(|&b| b == b'\n')
.count() as i64
+ 1;
NewSymbol {
name,
kind: "function",
line,
column_num: 0,
start_byte: start,
end_byte: end,
signature,
name_path,
parent_id: None,
}
})
.collect();
db.insert_symbols(file_id, &new_symbols).unwrap();
}
fn replace_file_embeddings_with_sentinels(
engine: &EmbeddingEngine,
file_path: &str,
sentinels: &[(&str, f32)],
) {
let mut chunks = engine.store.embeddings_for_files(&[file_path]).unwrap();
for chunk in &mut chunks {
if let Some((_, value)) = sentinels
.iter()
.find(|(symbol_name, _)| *symbol_name == chunk.symbol_name)
{
chunk.embedding = vec![*value; chunk.embedding.len()];
}
}
engine.store.delete_by_file(&[file_path]).unwrap();
engine.store.insert(&chunks).unwrap();
}
fn write_minimal_model_assets(model_dir: &std::path::Path) {
std::fs::create_dir_all(model_dir).unwrap();
for asset in [
"model.onnx",
"tokenizer.json",
"config.json",
"special_tokens_map.json",
"tokenizer_config.json",
] {
std::fs::write(model_dir.join(asset), b"{}").unwrap();
}
}
#[test]
fn build_embedding_text_with_signature() {
let sym = crate::db::SymbolWithFile {
name: "hello".into(),
kind: "function".into(),
file_path: "main.py".into(),
line: 1,
signature: "def hello():".into(),
name_path: "hello".into(),
start_byte: 0,
end_byte: 10,
};
let text = build_embedding_text(&sym, Some("def hello(): pass"));
assert_eq!(text, "function hello in main.py: def hello():");
}
#[test]
fn build_embedding_text_without_source() {
let sym = crate::db::SymbolWithFile {
name: "MyClass".into(),
kind: "class".into(),
file_path: "app.py".into(),
line: 5,
signature: "class MyClass:".into(),
name_path: "MyClass".into(),
start_byte: 0,
end_byte: 50,
};
let text = build_embedding_text(&sym, None);
assert_eq!(text, "class MyClass (My Class) in app.py: class MyClass:");
}
#[test]
fn build_embedding_text_empty_signature() {
let sym = crate::db::SymbolWithFile {
name: "CONFIG".into(),
kind: "variable".into(),
file_path: "config.py".into(),
line: 1,
signature: String::new(),
name_path: "CONFIG".into(),
start_byte: 0,
end_byte: 0,
};
let text = build_embedding_text(&sym, None);
assert_eq!(text, "variable CONFIG in config.py");
}
#[test]
fn filters_direct_test_symbols_from_embedding_index() {
let source = "#[test]\nfn alias_case() {}\n";
let sym = crate::db::SymbolWithFile {
name: "alias_case".into(),
kind: "function".into(),
file_path: "src/lib.rs".into(),
line: 2,
signature: "fn alias_case() {}".into(),
name_path: "alias_case".into(),
start_byte: source.find("fn alias_case").unwrap() as i64,
end_byte: source.len() as i64,
};
assert!(is_test_only_symbol(&sym, Some(source)));
}
#[test]
fn filters_cfg_test_module_symbols_from_embedding_index() {
let source = "#[cfg(all(test, feature = \"semantic\"))]\nmod semantic_tests {\n fn helper_case() {}\n}\n";
let sym = crate::db::SymbolWithFile {
name: "helper_case".into(),
kind: "function".into(),
file_path: "src/lib.rs".into(),
line: 3,
signature: "fn helper_case() {}".into(),
name_path: "helper_case".into(),
start_byte: source.find("fn helper_case").unwrap() as i64,
end_byte: source.len() as i64,
};
assert!(is_test_only_symbol(&sym, Some(source)));
}
#[test]
fn extract_python_docstring() {
let source =
"def greet(name):\n \"\"\"Say hello to a person.\"\"\"\n print(f'hi {name}')\n";
let doc = extract_leading_doc(source, 0, source.len()).unwrap();
assert!(doc.contains("Say hello to a person"));
}
#[test]
fn extract_rust_doc_comment() {
let source = "fn dispatch_tool() {\n /// Route incoming tool requests.\n /// Handles all MCP methods.\n let x = 1;\n}\n";
let doc = extract_leading_doc(source, 0, source.len()).unwrap();
assert!(doc.contains("Route incoming tool requests"));
assert!(doc.contains("Handles all MCP methods"));
}
#[test]
fn extract_leading_doc_returns_none_for_no_doc() {
let source = "def f():\n return 1\n";
assert!(extract_leading_doc(source, 0, source.len()).is_none());
}
#[test]
fn extract_body_hint_finds_first_meaningful_line() {
let source = "pub fn parse_symbols(\n project: &ProjectRoot,\n) -> Vec<SymbolInfo> {\n let mut parser = tree_sitter::Parser::new();\n parser.set_language(lang);\n}\n";
let hint = extract_body_hint(source, 0, source.len());
assert!(hint.is_some());
assert!(hint.unwrap().contains("tree_sitter::Parser"));
}
#[test]
fn extract_body_hint_skips_comments() {
let source = "fn foo() {\n // setup\n let x = bar();\n}\n";
let hint = extract_body_hint(source, 0, source.len());
assert_eq!(hint.unwrap(), "let x = bar();");
}
#[test]
fn extract_body_hint_returns_none_for_empty() {
let source = "fn empty() {\n}\n";
let hint = extract_body_hint(source, 0, source.len());
assert!(hint.is_none());
}
#[test]
fn extract_body_hint_multi_line_collection_via_env_override() {
let previous_lines = std::env::var("CODELENS_EMBED_HINT_LINES").ok();
let previous_chars = std::env::var("CODELENS_EMBED_HINT_CHARS").ok();
unsafe {
std::env::set_var("CODELENS_EMBED_HINT_LINES", "3");
std::env::set_var("CODELENS_EMBED_HINT_CHARS", "200");
}
let source = "\
fn route_request() {
let kind = detect_request_kind();
let target = dispatch_table.get(&kind);
return target.handle();
}
";
let hint = extract_body_hint(source, 0, source.len()).expect("hint present");
let env_restore = || unsafe {
match &previous_lines {
Some(value) => std::env::set_var("CODELENS_EMBED_HINT_LINES", value),
None => std::env::remove_var("CODELENS_EMBED_HINT_LINES"),
}
match &previous_chars {
Some(value) => std::env::set_var("CODELENS_EMBED_HINT_CHARS", value),
None => std::env::remove_var("CODELENS_EMBED_HINT_CHARS"),
}
};
let all_three = hint.contains("detect_request_kind")
&& hint.contains("dispatch_table")
&& hint.contains("target.handle");
let has_separator = hint.contains(" · ");
env_restore();
assert!(all_three, "missing one of three body lines: {hint}");
assert!(has_separator, "missing · separator: {hint}");
}
#[test]
fn hint_line_budget_respects_env_override() {
let previous = std::env::var("CODELENS_EMBED_HINT_LINES").ok();
unsafe {
std::env::set_var("CODELENS_EMBED_HINT_LINES", "5");
}
let budget = super::hint_line_budget();
unsafe {
match previous {
Some(value) => std::env::set_var("CODELENS_EMBED_HINT_LINES", value),
None => std::env::remove_var("CODELENS_EMBED_HINT_LINES"),
}
}
assert_eq!(budget, 5);
}
#[test]
fn is_nl_shaped_accepts_multi_word_prose() {
assert!(super::is_nl_shaped("skip comments and string literals"));
assert!(super::is_nl_shaped("failed to open database"));
assert!(super::is_nl_shaped("detect client version"));
}
#[test]
fn is_nl_shaped_rejects_code_and_paths() {
assert!(!super::is_nl_shaped("crates/codelens-engine/src"));
assert!(!super::is_nl_shaped("C:\\Users\\foo"));
assert!(!super::is_nl_shaped("std::sync::Mutex"));
assert!(!super::is_nl_shaped("detect_client"));
assert!(!super::is_nl_shaped("ok"));
assert!(!super::is_nl_shaped(""));
assert!(!super::is_nl_shaped("1 2 3 4 5"));
}
#[test]
fn extract_comment_body_strips_comment_markers() {
assert_eq!(
super::extract_comment_body("/// rust doc comment"),
Some("rust doc comment".to_string())
);
assert_eq!(
super::extract_comment_body("// regular line comment"),
Some("regular line comment".to_string())
);
assert_eq!(
super::extract_comment_body("# python line comment"),
Some("python line comment".to_string())
);
assert_eq!(
super::extract_comment_body("/* inline block */"),
Some("inline block".to_string())
);
assert_eq!(
super::extract_comment_body("* continuation line"),
Some("continuation line".to_string())
);
}
#[test]
fn extract_comment_body_rejects_rust_attributes_and_shebangs() {
assert!(super::extract_comment_body("#[derive(Debug)]").is_none());
assert!(super::extract_comment_body("#[test]").is_none());
assert!(super::extract_comment_body("#!/usr/bin/env python").is_none());
}
#[test]
fn extract_nl_tokens_gated_off_by_default() {
let _env_guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner());
let previous = std::env::var("CODELENS_EMBED_HINT_INCLUDE_COMMENTS").ok();
unsafe {
std::env::remove_var("CODELENS_EMBED_HINT_INCLUDE_COMMENTS");
}
let source = "\
fn skip_things() {
// skip comments and string literals during search
let lit = \"scan for matching tokens\";
}
";
let result = extract_nl_tokens(source, 0, source.len());
unsafe {
if let Some(value) = previous {
std::env::set_var("CODELENS_EMBED_HINT_INCLUDE_COMMENTS", value);
}
}
assert!(result.is_none(), "gate leaked: {result:?}");
}
#[test]
fn auto_hint_mode_defaults_on_unless_explicit_off() {
let _env_guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner());
let previous = std::env::var("CODELENS_EMBED_HINT_AUTO").ok();
unsafe {
std::env::remove_var("CODELENS_EMBED_HINT_AUTO");
}
let default_enabled = super::auto_hint_mode_enabled();
assert!(
default_enabled,
"v1.6.0 default flip: auto hint mode should be ON when env unset"
);
unsafe {
std::env::set_var("CODELENS_EMBED_HINT_AUTO", "0");
}
let explicit_off = super::auto_hint_mode_enabled();
assert!(
!explicit_off,
"explicit CODELENS_EMBED_HINT_AUTO=0 must still disable (opt-out escape hatch)"
);
unsafe {
std::env::set_var("CODELENS_EMBED_HINT_AUTO", "1");
}
let explicit_on = super::auto_hint_mode_enabled();
assert!(
explicit_on,
"explicit CODELENS_EMBED_HINT_AUTO=1 must still enable"
);
unsafe {
match previous {
Some(v) => std::env::set_var("CODELENS_EMBED_HINT_AUTO", v),
None => std::env::remove_var("CODELENS_EMBED_HINT_AUTO"),
}
}
}
#[test]
fn language_supports_nl_stack_classifies_correctly() {
assert!(super::language_supports_nl_stack("rs"));
assert!(super::language_supports_nl_stack("rust"));
assert!(super::language_supports_nl_stack("cpp"));
assert!(super::language_supports_nl_stack("c++"));
assert!(super::language_supports_nl_stack("c"));
assert!(super::language_supports_nl_stack("go"));
assert!(super::language_supports_nl_stack("golang"));
assert!(super::language_supports_nl_stack("java"));
assert!(super::language_supports_nl_stack("kt"));
assert!(super::language_supports_nl_stack("kotlin"));
assert!(super::language_supports_nl_stack("scala"));
assert!(super::language_supports_nl_stack("cs"));
assert!(super::language_supports_nl_stack("csharp"));
assert!(super::language_supports_nl_stack("ts"));
assert!(super::language_supports_nl_stack("typescript"));
assert!(super::language_supports_nl_stack("tsx"));
assert!(super::language_supports_nl_stack("js"));
assert!(super::language_supports_nl_stack("javascript"));
assert!(super::language_supports_nl_stack("jsx"));
assert!(super::language_supports_nl_stack("Rust"));
assert!(super::language_supports_nl_stack("RUST"));
assert!(super::language_supports_nl_stack("TypeScript"));
assert!(super::language_supports_nl_stack(" rust "));
assert!(super::language_supports_nl_stack(" ts "));
assert!(!super::language_supports_nl_stack("py"));
assert!(!super::language_supports_nl_stack("python"));
assert!(!super::language_supports_nl_stack("rb"));
assert!(!super::language_supports_nl_stack("ruby"));
assert!(!super::language_supports_nl_stack("php"));
assert!(!super::language_supports_nl_stack("lua"));
assert!(!super::language_supports_nl_stack("sh"));
assert!(!super::language_supports_nl_stack("klingon"));
assert!(!super::language_supports_nl_stack(""));
}
#[test]
fn language_supports_sparse_weighting_classifies_correctly() {
assert!(super::language_supports_sparse_weighting("rs"));
assert!(super::language_supports_sparse_weighting("rust"));
assert!(super::language_supports_sparse_weighting("cpp"));
assert!(super::language_supports_sparse_weighting("go"));
assert!(super::language_supports_sparse_weighting("java"));
assert!(super::language_supports_sparse_weighting("kotlin"));
assert!(super::language_supports_sparse_weighting("csharp"));
assert!(!super::language_supports_sparse_weighting("ts"));
assert!(!super::language_supports_sparse_weighting("typescript"));
assert!(!super::language_supports_sparse_weighting("tsx"));
assert!(!super::language_supports_sparse_weighting("js"));
assert!(!super::language_supports_sparse_weighting("javascript"));
assert!(!super::language_supports_sparse_weighting("jsx"));
assert!(!super::language_supports_sparse_weighting("py"));
assert!(!super::language_supports_sparse_weighting("python"));
assert!(!super::language_supports_sparse_weighting("klingon"));
assert!(!super::language_supports_sparse_weighting(""));
}
#[test]
fn auto_hint_should_enable_requires_both_gate_and_supported_lang() {
let _env_guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner());
let prev_auto = std::env::var("CODELENS_EMBED_HINT_AUTO").ok();
let prev_lang = std::env::var("CODELENS_EMBED_HINT_AUTO_LANG").ok();
unsafe {
std::env::set_var("CODELENS_EMBED_HINT_AUTO", "0");
std::env::set_var("CODELENS_EMBED_HINT_AUTO_LANG", "rust");
}
assert!(
!super::auto_hint_should_enable(),
"gate-off (explicit =0) with lang=rust must stay disabled"
);
unsafe {
std::env::set_var("CODELENS_EMBED_HINT_AUTO", "1");
std::env::set_var("CODELENS_EMBED_HINT_AUTO_LANG", "rust");
}
assert!(
super::auto_hint_should_enable(),
"gate-on + lang=rust must enable"
);
unsafe {
std::env::set_var("CODELENS_EMBED_HINT_AUTO", "1");
std::env::set_var("CODELENS_EMBED_HINT_AUTO_LANG", "typescript");
}
assert!(
super::auto_hint_should_enable(),
"gate-on + lang=typescript must keep Phase 2b/2c enabled"
);
unsafe {
std::env::set_var("CODELENS_EMBED_HINT_AUTO", "1");
std::env::set_var("CODELENS_EMBED_HINT_AUTO_LANG", "python");
}
assert!(
!super::auto_hint_should_enable(),
"gate-on + lang=python must stay disabled"
);
unsafe {
std::env::set_var("CODELENS_EMBED_HINT_AUTO", "1");
std::env::remove_var("CODELENS_EMBED_HINT_AUTO_LANG");
}
assert!(
!super::auto_hint_should_enable(),
"gate-on + no lang tag must stay disabled"
);
unsafe {
match prev_auto {
Some(v) => std::env::set_var("CODELENS_EMBED_HINT_AUTO", v),
None => std::env::remove_var("CODELENS_EMBED_HINT_AUTO"),
}
match prev_lang {
Some(v) => std::env::set_var("CODELENS_EMBED_HINT_AUTO_LANG", v),
None => std::env::remove_var("CODELENS_EMBED_HINT_AUTO_LANG"),
}
}
}
#[test]
fn auto_sparse_should_enable_requires_both_gate_and_sparse_supported_lang() {
let _env_guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner());
let prev_auto = std::env::var("CODELENS_EMBED_HINT_AUTO").ok();
let prev_lang = std::env::var("CODELENS_EMBED_HINT_AUTO_LANG").ok();
unsafe {
std::env::set_var("CODELENS_EMBED_HINT_AUTO", "0");
std::env::set_var("CODELENS_EMBED_HINT_AUTO_LANG", "rust");
}
assert!(
!super::auto_sparse_should_enable(),
"gate-off (explicit =0) must disable sparse auto gate"
);
unsafe {
std::env::set_var("CODELENS_EMBED_HINT_AUTO", "1");
std::env::set_var("CODELENS_EMBED_HINT_AUTO_LANG", "rust");
}
assert!(
super::auto_sparse_should_enable(),
"gate-on + lang=rust must enable sparse auto gate"
);
unsafe {
std::env::set_var("CODELENS_EMBED_HINT_AUTO", "1");
std::env::set_var("CODELENS_EMBED_HINT_AUTO_LANG", "typescript");
}
assert!(
!super::auto_sparse_should_enable(),
"gate-on + lang=typescript must keep sparse auto gate disabled"
);
unsafe {
std::env::set_var("CODELENS_EMBED_HINT_AUTO", "1");
std::env::set_var("CODELENS_EMBED_HINT_AUTO_LANG", "python");
}
assert!(
!super::auto_sparse_should_enable(),
"gate-on + lang=python must keep sparse auto gate disabled"
);
unsafe {
std::env::set_var("CODELENS_EMBED_HINT_AUTO", "1");
std::env::remove_var("CODELENS_EMBED_HINT_AUTO_LANG");
}
assert!(
!super::auto_sparse_should_enable(),
"gate-on + no lang tag must keep sparse auto gate disabled"
);
unsafe {
match prev_auto {
Some(v) => std::env::set_var("CODELENS_EMBED_HINT_AUTO", v),
None => std::env::remove_var("CODELENS_EMBED_HINT_AUTO"),
}
match prev_lang {
Some(v) => std::env::set_var("CODELENS_EMBED_HINT_AUTO_LANG", v),
None => std::env::remove_var("CODELENS_EMBED_HINT_AUTO_LANG"),
}
}
}
#[test]
fn nl_tokens_enabled_explicit_env_wins_over_auto() {
let _env_guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner());
let prev_explicit = std::env::var("CODELENS_EMBED_HINT_INCLUDE_COMMENTS").ok();
let prev_auto = std::env::var("CODELENS_EMBED_HINT_AUTO").ok();
let prev_lang = std::env::var("CODELENS_EMBED_HINT_AUTO_LANG").ok();
unsafe {
std::env::set_var("CODELENS_EMBED_HINT_INCLUDE_COMMENTS", "1");
std::env::set_var("CODELENS_EMBED_HINT_AUTO", "1");
std::env::set_var("CODELENS_EMBED_HINT_AUTO_LANG", "python");
}
assert!(
super::nl_tokens_enabled(),
"explicit=1 must win over auto+python=off"
);
unsafe {
std::env::set_var("CODELENS_EMBED_HINT_INCLUDE_COMMENTS", "0");
std::env::set_var("CODELENS_EMBED_HINT_AUTO", "1");
std::env::set_var("CODELENS_EMBED_HINT_AUTO_LANG", "rust");
}
assert!(
!super::nl_tokens_enabled(),
"explicit=0 must win over auto+rust=on"
);
unsafe {
std::env::remove_var("CODELENS_EMBED_HINT_INCLUDE_COMMENTS");
std::env::set_var("CODELENS_EMBED_HINT_AUTO", "1");
std::env::set_var("CODELENS_EMBED_HINT_AUTO_LANG", "rust");
}
assert!(
super::nl_tokens_enabled(),
"no explicit + auto+rust must enable"
);
unsafe {
std::env::remove_var("CODELENS_EMBED_HINT_INCLUDE_COMMENTS");
std::env::set_var("CODELENS_EMBED_HINT_AUTO", "1");
std::env::set_var("CODELENS_EMBED_HINT_AUTO_LANG", "python");
}
assert!(
!super::nl_tokens_enabled(),
"no explicit + auto+python must stay disabled"
);
unsafe {
match prev_explicit {
Some(v) => std::env::set_var("CODELENS_EMBED_HINT_INCLUDE_COMMENTS", v),
None => std::env::remove_var("CODELENS_EMBED_HINT_INCLUDE_COMMENTS"),
}
match prev_auto {
Some(v) => std::env::set_var("CODELENS_EMBED_HINT_AUTO", v),
None => std::env::remove_var("CODELENS_EMBED_HINT_AUTO"),
}
match prev_lang {
Some(v) => std::env::set_var("CODELENS_EMBED_HINT_AUTO_LANG", v),
None => std::env::remove_var("CODELENS_EMBED_HINT_AUTO_LANG"),
}
}
}
#[test]
fn strict_comments_gated_off_by_default() {
let _env_guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner());
let previous = std::env::var("CODELENS_EMBED_HINT_STRICT_COMMENTS").ok();
unsafe {
std::env::remove_var("CODELENS_EMBED_HINT_STRICT_COMMENTS");
}
let enabled = super::strict_comments_enabled();
unsafe {
if let Some(value) = previous {
std::env::set_var("CODELENS_EMBED_HINT_STRICT_COMMENTS", value);
}
}
assert!(!enabled, "strict comments gate leaked");
}
#[test]
fn looks_like_meta_annotation_detects_rejected_prefixes() {
assert!(super::looks_like_meta_annotation("TODO: fix later"));
assert!(super::looks_like_meta_annotation("todo handle edge case"));
assert!(super::looks_like_meta_annotation("FIXME this is broken"));
assert!(super::looks_like_meta_annotation(
"HACK: workaround for bug"
));
assert!(super::looks_like_meta_annotation("XXX not implemented yet"));
assert!(super::looks_like_meta_annotation(
"BUG in the upstream crate"
));
assert!(super::looks_like_meta_annotation("REVIEW before merging"));
assert!(super::looks_like_meta_annotation(
"REFACTOR this block later"
));
assert!(super::looks_like_meta_annotation("TEMP: remove before v2"));
assert!(super::looks_like_meta_annotation(
"DEPRECATED use new_api instead"
));
assert!(super::looks_like_meta_annotation(
" TODO: with leading ws"
));
}
#[test]
fn looks_like_meta_annotation_preserves_behaviour_prefixes() {
assert!(!super::looks_like_meta_annotation(
"NOTE: this branch handles empty input"
));
assert!(!super::looks_like_meta_annotation(
"WARN: overflow is possible"
));
assert!(!super::looks_like_meta_annotation(
"SAFETY: caller must hold the lock"
));
assert!(!super::looks_like_meta_annotation(
"PANIC: unreachable by construction"
));
assert!(!super::looks_like_meta_annotation(
"parse json body from request"
));
assert!(!super::looks_like_meta_annotation(
"walk directory respecting gitignore"
));
assert!(!super::looks_like_meta_annotation(
"compute cosine similarity between vectors"
));
assert!(!super::looks_like_meta_annotation(""));
assert!(!super::looks_like_meta_annotation(" "));
assert!(!super::looks_like_meta_annotation("123 numeric prefix"));
}
#[test]
fn strict_comments_filters_meta_annotations_during_extraction() {
let _env_guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner());
let previous = std::env::var("CODELENS_EMBED_HINT_STRICT_COMMENTS").ok();
unsafe {
std::env::set_var("CODELENS_EMBED_HINT_STRICT_COMMENTS", "1");
}
let source = "\
fn handle_request() {
// TODO: handle the error path properly
// parse json body from the incoming request
// FIXME: this can panic on empty input
// walk directory respecting the gitignore rules
let _x = 1;
}
";
let result = super::extract_nl_tokens_inner(source, 0, source.len());
unsafe {
match previous {
Some(value) => std::env::set_var("CODELENS_EMBED_HINT_STRICT_COMMENTS", value),
None => std::env::remove_var("CODELENS_EMBED_HINT_STRICT_COMMENTS"),
}
}
let hint = result.expect("behaviour comments must survive");
assert!(
hint.contains("parse json body"),
"behaviour comment dropped: {hint}"
);
assert!(!hint.contains("TODO"), "TODO annotation leaked: {hint}");
assert!(!hint.contains("FIXME"), "FIXME annotation leaked: {hint}");
}
#[test]
fn strict_comments_is_orthogonal_to_strict_literals() {
let _env_guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner());
let prev_c = std::env::var("CODELENS_EMBED_HINT_STRICT_COMMENTS").ok();
let prev_l = std::env::var("CODELENS_EMBED_HINT_STRICT_LITERALS").ok();
unsafe {
std::env::set_var("CODELENS_EMBED_HINT_STRICT_COMMENTS", "1");
std::env::remove_var("CODELENS_EMBED_HINT_STRICT_LITERALS");
}
let source = "\
fn handle() {
// handles real behaviour
let fmt = \"format error string\";
}
";
let result = super::extract_nl_tokens_inner(source, 0, source.len());
unsafe {
match prev_c {
Some(v) => std::env::set_var("CODELENS_EMBED_HINT_STRICT_COMMENTS", v),
None => std::env::remove_var("CODELENS_EMBED_HINT_STRICT_COMMENTS"),
}
match prev_l {
Some(v) => std::env::set_var("CODELENS_EMBED_HINT_STRICT_LITERALS", v),
None => std::env::remove_var("CODELENS_EMBED_HINT_STRICT_LITERALS"),
}
}
let hint = result.expect("tokens must exist");
assert!(hint.contains("handles real"), "comment dropped: {hint}");
assert!(
hint.contains("format error string"),
"literal dropped: {hint}"
);
}
#[test]
fn strict_literal_filter_gated_off_by_default() {
let _env_guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner());
let previous = std::env::var("CODELENS_EMBED_HINT_STRICT_LITERALS").ok();
unsafe {
std::env::remove_var("CODELENS_EMBED_HINT_STRICT_LITERALS");
}
let enabled = super::strict_literal_filter_enabled();
unsafe {
if let Some(value) = previous {
std::env::set_var("CODELENS_EMBED_HINT_STRICT_LITERALS", value);
}
}
assert!(!enabled, "strict literal filter gate leaked");
}
#[test]
fn contains_format_specifier_detects_c_and_python_style() {
assert!(super::contains_format_specifier("Invalid URL %s"));
assert!(super::contains_format_specifier("got %d matches"));
assert!(super::contains_format_specifier("value=%r"));
assert!(super::contains_format_specifier("size=%f"));
assert!(super::contains_format_specifier("sending request to {url}"));
assert!(super::contains_format_specifier("got {0} items"));
assert!(super::contains_format_specifier("{:?}"));
assert!(super::contains_format_specifier("value: {x:.2f}"));
assert!(super::contains_format_specifier("{}"));
assert!(!super::contains_format_specifier(
"skip comments and string literals"
));
assert!(!super::contains_format_specifier("failed to open database"));
assert!(!super::contains_format_specifier("{name: foo, id: 1}"));
}
#[test]
fn looks_like_error_or_log_prefix_rejects_common_patterns() {
assert!(super::looks_like_error_or_log_prefix("Invalid URL format"));
assert!(super::looks_like_error_or_log_prefix(
"Cannot decode response"
));
assert!(super::looks_like_error_or_log_prefix("could not open file"));
assert!(super::looks_like_error_or_log_prefix(
"Failed to send request"
));
assert!(super::looks_like_error_or_log_prefix(
"Expected int, got str"
));
assert!(super::looks_like_error_or_log_prefix(
"sending request to server"
));
assert!(super::looks_like_error_or_log_prefix(
"received response headers"
));
assert!(super::looks_like_error_or_log_prefix(
"starting worker pool"
));
assert!(!super::looks_like_error_or_log_prefix(
"parse json body from request"
));
assert!(!super::looks_like_error_or_log_prefix(
"compute cosine similarity between vectors"
));
assert!(!super::looks_like_error_or_log_prefix(
"walk directory tree respecting gitignore"
));
}
#[test]
fn strict_mode_rejects_format_and_error_literals_during_extraction() {
let _env_guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner());
let previous = std::env::var("CODELENS_EMBED_HINT_STRICT_LITERALS").ok();
unsafe {
std::env::set_var("CODELENS_EMBED_HINT_STRICT_LITERALS", "1");
}
let source = "\
fn handle_request() {
let err = \"Invalid URL %s\";
let log = \"sending request to the upstream server\";
let fmt = \"received {count} items in batch\";
let real = \"parse json body from the incoming request\";
}
";
let result = super::extract_nl_tokens_inner(source, 0, source.len());
unsafe {
match previous {
Some(value) => std::env::set_var("CODELENS_EMBED_HINT_STRICT_LITERALS", value),
None => std::env::remove_var("CODELENS_EMBED_HINT_STRICT_LITERALS"),
}
}
let hint = result.expect("some token should survive");
assert!(
hint.contains("parse json body"),
"real literal was filtered out: {hint}"
);
assert!(
!hint.contains("Invalid URL"),
"format-specifier literal leaked: {hint}"
);
assert!(
!hint.contains("sending request"),
"log-prefix literal leaked: {hint}"
);
assert!(
!hint.contains("received {count}"),
"python fstring literal leaked: {hint}"
);
}
#[test]
fn strict_mode_leaves_comments_untouched() {
let _env_guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner());
let previous = std::env::var("CODELENS_EMBED_HINT_STRICT_LITERALS").ok();
unsafe {
std::env::set_var("CODELENS_EMBED_HINT_STRICT_LITERALS", "1");
}
let source = "\
fn do_work() {
// Invalid inputs are rejected by this guard clause.
// sending requests in parallel across worker threads.
let _lit = \"format spec %s\";
}
";
let result = super::extract_nl_tokens_inner(source, 0, source.len());
unsafe {
match previous {
Some(value) => std::env::set_var("CODELENS_EMBED_HINT_STRICT_LITERALS", value),
None => std::env::remove_var("CODELENS_EMBED_HINT_STRICT_LITERALS"),
}
}
let hint = result.expect("comments should survive strict mode");
assert!(
hint.contains("Invalid inputs") || hint.contains("rejected by this guard"),
"strict mode swallowed a comment: {hint}"
);
assert!(
!hint.contains("format spec"),
"format-specifier literal leaked under strict mode: {hint}"
);
}
#[test]
fn should_reject_literal_strict_composes_format_and_prefix() {
assert!(super::should_reject_literal_strict("Invalid URL %s"));
assert!(super::should_reject_literal_strict(
"sending request to server"
));
assert!(super::should_reject_literal_strict("value: {x:.2f}"));
assert!(!super::should_reject_literal_strict(
"parse json body from the incoming request"
));
assert!(!super::should_reject_literal_strict(
"compute cosine similarity between vectors"
));
}
#[test]
fn is_static_method_ident_accepts_pascal_and_rejects_snake() {
assert!(super::is_static_method_ident("HashMap"));
assert!(super::is_static_method_ident("Parser"));
assert!(super::is_static_method_ident("A"));
assert!(!super::is_static_method_ident("std"));
assert!(!super::is_static_method_ident("fs"));
assert!(!super::is_static_method_ident("_private"));
assert!(!super::is_static_method_ident(""));
}
#[test]
fn extract_api_calls_gated_off_by_default() {
let _env_guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner());
let previous = std::env::var("CODELENS_EMBED_HINT_INCLUDE_API_CALLS").ok();
unsafe {
std::env::remove_var("CODELENS_EMBED_HINT_INCLUDE_API_CALLS");
}
let source = "\
fn make_parser() {
let p = Parser::new();
let _ = HashMap::with_capacity(8);
}
";
let result = extract_api_calls(source, 0, source.len());
unsafe {
if let Some(value) = previous {
std::env::set_var("CODELENS_EMBED_HINT_INCLUDE_API_CALLS", value);
}
}
assert!(result.is_none(), "gate leaked: {result:?}");
}
#[test]
fn extract_api_calls_captures_type_method_patterns() {
let source = "\
fn open_db() {
let p = Parser::new();
let map = HashMap::with_capacity(16);
let _ = tree_sitter::Parser::new();
}
";
let hint = super::extract_api_calls_inner(source, 0, source.len())
.expect("api calls should be produced");
assert!(hint.contains("Parser::new"), "missing Parser::new: {hint}");
assert!(
hint.contains("HashMap::with_capacity"),
"missing HashMap::with_capacity: {hint}"
);
}
#[test]
fn extract_api_calls_rejects_module_prefixed_free_functions() {
let source = "\
fn read_config() {
let _ = std::fs::read_to_string(\"foo\");
let _ = crate::util::parse();
}
";
let hint = super::extract_api_calls_inner(source, 0, source.len());
if let Some(hint) = hint {
assert!(!hint.contains("std::fs"), "lowercase module leaked: {hint}");
assert!(
!hint.contains("fs::read_to_string"),
"module-prefixed free function leaked: {hint}"
);
assert!(!hint.contains("crate::util"), "crate path leaked: {hint}");
}
}
#[test]
fn extract_api_calls_deduplicates_repeated_calls() {
let source = "\
fn hot_loop() {
for _ in 0..10 {
let _ = Parser::new();
let _ = Parser::new();
}
let _ = Parser::new();
}
";
let hint = super::extract_api_calls_inner(source, 0, source.len())
.expect("api calls should be produced");
let first = hint.find("Parser::new").expect("hit");
let rest = &hint[first + "Parser::new".len()..];
assert!(
!rest.contains("Parser::new"),
"duplicate not deduplicated: {hint}"
);
}
#[test]
fn extract_api_calls_returns_none_when_body_has_no_type_calls() {
let source = "\
fn plain() {
let x = 1;
let y = x + 2;
}
";
assert!(super::extract_api_calls_inner(source, 0, source.len()).is_none());
}
#[test]
fn extract_nl_tokens_collects_comments_and_string_literals() {
let source = "\
fn search_for_matches() {
// skip comments and string literals during search
let error = \"failed to open database\";
let single = \"tok\";
let path = \"src/foo/bar\";
let keyword = match kind {
Kind::Ident => \"detect client version\",
_ => \"\",
};
}
";
let hint = super::extract_nl_tokens_inner(source, 0, source.len())
.expect("nl tokens should be produced");
let has_first_nl_signal = hint.contains("skip comments")
|| hint.contains("failed to open")
|| hint.contains("detect client");
assert!(has_first_nl_signal, "no NL signal produced: {hint}");
assert!(!hint.contains(" tok "), "short literal leaked: {hint}");
assert!(!hint.contains("src/foo/bar"), "path literal leaked: {hint}");
}
#[test]
fn hint_char_budget_respects_env_override() {
let previous = std::env::var("CODELENS_EMBED_HINT_CHARS").ok();
unsafe {
std::env::set_var("CODELENS_EMBED_HINT_CHARS", "120");
}
let budget = super::hint_char_budget();
unsafe {
match previous {
Some(value) => std::env::set_var("CODELENS_EMBED_HINT_CHARS", value),
None => std::env::remove_var("CODELENS_EMBED_HINT_CHARS"),
}
}
assert_eq!(budget, 120);
}
#[test]
fn embedding_to_bytes_roundtrip() {
let floats = vec![1.0f32, -0.5, 0.0, 3.25];
let bytes = embedding_to_bytes(&floats);
assert_eq!(bytes.len(), 4 * 4);
let recovered: Vec<f32> = bytes
.chunks_exact(4)
.map(|chunk| f32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]))
.collect();
assert_eq!(floats, recovered);
}
#[test]
fn duplicate_pair_key_is_order_independent() {
let a = duplicate_pair_key("a.py", "foo", "b.py", "bar");
let b = duplicate_pair_key("b.py", "bar", "a.py", "foo");
assert_eq!(a, b);
}
#[test]
fn text_embedding_cache_updates_recency() {
let mut cache = TextEmbeddingCache::new(2);
cache.insert("a".into(), vec![1.0]);
cache.insert("b".into(), vec![2.0]);
assert_eq!(cache.get("a"), Some(vec![1.0]));
cache.insert("c".into(), vec![3.0]);
assert_eq!(cache.get("a"), Some(vec![1.0]));
assert_eq!(cache.get("b"), None);
assert_eq!(cache.get("c"), Some(vec![3.0]));
}
#[test]
fn text_embedding_cache_can_be_disabled() {
let mut cache = TextEmbeddingCache::new(0);
cache.insert("a".into(), vec![1.0]);
assert_eq!(cache.get("a"), None);
}
#[test]
fn engine_new_and_index() {
let _lock = MODEL_LOCK.lock().unwrap();
skip_without_embedding_model!();
let (_dir, project) = make_project_with_source();
let engine = EmbeddingEngine::new(&project).expect("engine should load");
assert!(!engine.is_indexed());
let count = engine.index_from_project(&project).unwrap();
assert_eq!(count, 2, "should index 2 symbols");
assert!(engine.is_indexed());
}
#[test]
fn engine_search_returns_results() {
let _lock = MODEL_LOCK.lock().unwrap();
skip_without_embedding_model!();
let (_dir, project) = make_project_with_source();
let engine = EmbeddingEngine::new(&project).unwrap();
engine.index_from_project(&project).unwrap();
let results = engine.search("hello function", 10).unwrap();
assert!(!results.is_empty(), "search should return results");
for r in &results {
assert!(
r.score >= -1.0 && r.score <= 1.0,
"score should be in [-1,1]: {}",
r.score
);
}
}
#[test]
fn engine_incremental_index() {
let _lock = MODEL_LOCK.lock().unwrap();
skip_without_embedding_model!();
let (_dir, project) = make_project_with_source();
let engine = EmbeddingEngine::new(&project).unwrap();
engine.index_from_project(&project).unwrap();
assert_eq!(engine.store.count().unwrap(), 2);
let count = engine.index_changed_files(&project, &["main.py"]).unwrap();
assert_eq!(count, 2);
assert_eq!(engine.store.count().unwrap(), 2);
}
#[test]
fn engine_reindex_preserves_symbol_count() {
let _lock = MODEL_LOCK.lock().unwrap();
skip_without_embedding_model!();
let (_dir, project) = make_project_with_source();
let engine = EmbeddingEngine::new(&project).unwrap();
engine.index_from_project(&project).unwrap();
assert_eq!(engine.store.count().unwrap(), 2);
let count = engine.index_from_project(&project).unwrap();
assert_eq!(count, 2);
assert_eq!(engine.store.count().unwrap(), 2);
}
#[test]
fn full_reindex_reuses_unchanged_embeddings() {
let _lock = MODEL_LOCK.lock().unwrap();
skip_without_embedding_model!();
let (_dir, project) = make_project_with_source();
let engine = EmbeddingEngine::new(&project).unwrap();
engine.index_from_project(&project).unwrap();
replace_file_embeddings_with_sentinels(&engine, "main.py", &[("hello", 11.0), ("world", 22.0)]);
let count = engine.index_from_project(&project).unwrap();
assert_eq!(count, 2);
let hello = engine
.store
.get_embedding("main.py", "hello")
.unwrap()
.expect("hello should exist");
let world = engine
.store
.get_embedding("main.py", "world")
.unwrap()
.expect("world should exist");
assert!(hello.embedding.iter().all(|value| *value == 11.0));
assert!(world.embedding.iter().all(|value| *value == 22.0));
}
#[test]
fn full_reindex_reuses_unchanged_sibling_after_edit() {
let _lock = MODEL_LOCK.lock().unwrap();
skip_without_embedding_model!();
let (dir, project) = make_project_with_source();
let engine = EmbeddingEngine::new(&project).unwrap();
engine.index_from_project(&project).unwrap();
replace_file_embeddings_with_sentinels(&engine, "main.py", &[("hello", 11.0), ("world", 22.0)]);
let updated_source =
"def hello():\n print('hi')\n\ndef world(name):\n return name.upper()\n";
write_python_file_with_symbols(
dir.path(),
"main.py",
updated_source,
"hash2",
&[
("hello", "def hello():", "hello"),
("world", "def world(name):", "world"),
],
);
let count = engine.index_from_project(&project).unwrap();
assert_eq!(count, 2);
let hello = engine
.store
.get_embedding("main.py", "hello")
.unwrap()
.expect("hello should exist");
let world = engine
.store
.get_embedding("main.py", "world")
.unwrap()
.expect("world should exist");
assert!(hello.embedding.iter().all(|value| *value == 11.0));
assert!(world.embedding.iter().any(|value| *value != 22.0));
assert_eq!(engine.store.count().unwrap(), 2);
}
#[test]
fn full_reindex_removes_deleted_files() {
let _lock = MODEL_LOCK.lock().unwrap();
skip_without_embedding_model!();
let (dir, project) = make_project_with_source();
write_python_file_with_symbols(
dir.path(),
"extra.py",
"def bonus():\n return 7\n",
"hash-extra",
&[("bonus", "def bonus():", "bonus")],
);
let engine = EmbeddingEngine::new(&project).unwrap();
engine.index_from_project(&project).unwrap();
assert_eq!(engine.store.count().unwrap(), 3);
std::fs::remove_file(dir.path().join("extra.py")).unwrap();
let db_path = crate::db::index_db_path(dir.path());
let db = IndexDb::open(&db_path).unwrap();
db.delete_file("extra.py").unwrap();
let count = engine.index_from_project(&project).unwrap();
assert_eq!(count, 2);
assert_eq!(engine.store.count().unwrap(), 2);
assert!(
engine
.store
.embeddings_for_files(&["extra.py"])
.unwrap()
.is_empty()
);
}
#[test]
fn ensure_index_fresh_updates_changed_symbol_rows() {
let _lock = MODEL_LOCK.lock().unwrap();
skip_without_embedding_model!();
let (dir, project) = make_project_with_source();
let engine = EmbeddingEngine::new(&project).unwrap();
engine.index_from_project(&project).unwrap();
let stale_hello = engine
.store
.get_embedding("main.py", "hello")
.unwrap()
.expect("hello should exist");
assert_eq!(stale_hello.signature, "def hello():");
let changed_source = "def hello(name):\n print(name)\n\ndef world():\n return 42\n";
write_python_file_with_symbols(
dir.path(),
"main.py",
changed_source,
"hash2",
&[
("hello", "def hello(name):", "hello"),
("world", "def world():", "world"),
],
);
let still_stale = engine
.store
.get_embedding("main.py", "hello")
.unwrap()
.expect("hello should still exist before freshness reconciliation");
assert_eq!(still_stale.signature, "def hello():");
let report = engine.ensure_index_fresh_for_project(&project).unwrap();
assert_eq!(report.checked_files, 1);
assert_eq!(report.refreshed_files, 1);
assert_eq!(report.removed_files, 0);
assert_eq!(report.indexed_symbols, 2);
let fresh_hello = engine
.store
.get_embedding("main.py", "hello")
.unwrap()
.expect("hello should exist after freshness reconciliation");
assert_eq!(fresh_hello.signature, "def hello(name):");
assert_ne!(fresh_hello.text, stale_hello.text);
}
#[test]
fn engine_model_change_recreates_db() {
let _lock = MODEL_LOCK.lock().unwrap();
skip_without_embedding_model!();
let (_dir, project) = make_project_with_source();
let engine1 = EmbeddingEngine::new(&project).unwrap();
engine1.index_from_project(&project).unwrap();
assert_eq!(engine1.store.count().unwrap(), 2);
drop(engine1);
let engine2 = EmbeddingEngine::new(&project).unwrap();
assert!(engine2.store.count().unwrap() >= 2);
}
#[test]
fn inspect_existing_index_returns_model_and_count() {
let _lock = MODEL_LOCK.lock().unwrap();
skip_without_embedding_model!();
let (_dir, project) = make_project_with_source();
let engine = EmbeddingEngine::new(&project).unwrap();
engine.index_from_project(&project).unwrap();
let info = EmbeddingEngine::inspect_existing_index(&project)
.unwrap()
.expect("index info should exist");
assert_eq!(info.model_name, engine.model_name());
assert_eq!(info.indexed_symbols, 2);
}
#[test]
fn inspect_existing_index_recovers_from_corrupt_db() {
let (_dir, project) = make_project_with_source();
let index_dir = project.as_path().join(".codelens/index");
let db_path = index_dir.join("embeddings.db");
let wal_path = index_dir.join("embeddings.db-wal");
let shm_path = index_dir.join("embeddings.db-shm");
std::fs::write(&db_path, b"not a sqlite database").unwrap();
std::fs::write(&wal_path, b"bad wal").unwrap();
std::fs::write(&shm_path, b"bad shm").unwrap();
let info = EmbeddingEngine::inspect_existing_index(&project).unwrap();
assert!(info.is_none());
assert!(db_path.is_file());
let backup_names: Vec<String> = std::fs::read_dir(&index_dir)
.unwrap()
.map(|entry| entry.unwrap().file_name().to_string_lossy().into_owned())
.filter(|name| name.contains(".corrupt-"))
.collect();
assert!(
backup_names
.iter()
.any(|name| name.starts_with("embeddings.db.corrupt-")),
"expected quarantined embedding db, found {backup_names:?}"
);
}
#[test]
fn store_can_fetch_single_embedding_without_loading_all() {
let _lock = MODEL_LOCK.lock().unwrap();
skip_without_embedding_model!();
let (_dir, project) = make_project_with_source();
let engine = EmbeddingEngine::new(&project).unwrap();
engine.index_from_project(&project).unwrap();
let chunk = engine
.store
.get_embedding("main.py", "hello")
.unwrap()
.expect("embedding should exist");
assert_eq!(chunk.file_path, "main.py");
assert_eq!(chunk.symbol_name, "hello");
assert!(!chunk.embedding.is_empty());
}
#[test]
fn find_similar_code_uses_index_and_excludes_target_symbol() {
let _lock = MODEL_LOCK.lock().unwrap();
skip_without_embedding_model!();
let (_dir, project) = make_project_with_source();
let engine = EmbeddingEngine::new(&project).unwrap();
engine.index_from_project(&project).unwrap();
let matches = engine.find_similar_code("main.py", "hello", 5).unwrap();
assert!(!matches.is_empty());
assert!(
matches
.iter()
.all(|m| !(m.file_path == "main.py" && m.symbol_name == "hello"))
);
}
#[test]
fn delete_by_file_removes_rows_in_one_batch() {
let _lock = MODEL_LOCK.lock().unwrap();
skip_without_embedding_model!();
let (_dir, project) = make_project_with_source();
let engine = EmbeddingEngine::new(&project).unwrap();
engine.index_from_project(&project).unwrap();
let deleted = engine.store.delete_by_file(&["main.py"]).unwrap();
assert_eq!(deleted, 2);
assert_eq!(engine.store.count().unwrap(), 0);
}
#[test]
fn store_streams_embeddings_grouped_by_file() {
let _lock = MODEL_LOCK.lock().unwrap();
skip_without_embedding_model!();
let (_dir, project) = make_project_with_source();
let engine = EmbeddingEngine::new(&project).unwrap();
engine.index_from_project(&project).unwrap();
let mut groups = Vec::new();
engine
.store
.for_each_file_embeddings(&mut |file_path, chunks| {
groups.push((file_path, chunks.len()));
Ok(())
})
.unwrap();
assert_eq!(groups, vec![("main.py".to_string(), 2)]);
}
#[test]
fn store_fetches_embeddings_for_specific_files() {
let _lock = MODEL_LOCK.lock().unwrap();
skip_without_embedding_model!();
let (_dir, project) = make_project_with_source();
let engine = EmbeddingEngine::new(&project).unwrap();
engine.index_from_project(&project).unwrap();
let chunks = engine.store.embeddings_for_files(&["main.py"]).unwrap();
assert_eq!(chunks.len(), 2);
assert!(chunks.iter().all(|chunk| chunk.file_path == "main.py"));
}
#[test]
fn store_scoped_search_filters_with_vec_metadata() {
let dir = tempfile::tempdir().unwrap();
let store = super::vec_store::SqliteVecStore::new(
dir.path().join("embeddings.db").as_path(),
2,
"test",
)
.unwrap();
let chunks = vec![
EmbeddingChunk {
file_path: "outside.py".to_owned(),
symbol_name: "outside".to_owned(),
kind: "function".to_owned(),
line: 1,
signature: "def outside():".to_owned(),
name_path: "outside".to_owned(),
text: "outside".to_owned(),
embedding: vec![1.0, 0.0],
doc_embedding: None,
},
EmbeddingChunk {
file_path: "crates/scoped.py".to_owned(),
symbol_name: "scoped".to_owned(),
kind: "function".to_owned(),
line: 1,
signature: "def scoped():".to_owned(),
name_path: "scoped".to_owned(),
text: "scoped".to_owned(),
embedding: vec![1.0, 0.0],
doc_embedding: None,
},
EmbeddingChunk {
file_path: "crates_nested/not_scoped.py".to_owned(),
symbol_name: "not_scoped".to_owned(),
kind: "function".to_owned(),
line: 1,
signature: "def not_scoped():".to_owned(),
name_path: "not_scoped".to_owned(),
text: "not_scoped".to_owned(),
embedding: vec![1.0, 0.0],
doc_embedding: None,
},
];
store.insert(&chunks).unwrap();
let scoped = store
.search_scoped(&[1.0, 0.0], 10, Some("crates"))
.unwrap();
assert_eq!(scoped.len(), 1);
assert_eq!(scoped[0].file_path, "crates/scoped.py");
assert_eq!(scoped[0].symbol_name, "scoped");
}
#[test]
fn store_fetches_embeddings_for_scored_chunks() {
let _lock = MODEL_LOCK.lock().unwrap();
skip_without_embedding_model!();
let (_dir, project) = make_project_with_source();
let engine = EmbeddingEngine::new(&project).unwrap();
engine.index_from_project(&project).unwrap();
let scored = engine.search_scored("hello world function", 2).unwrap();
let chunks = engine.store.embeddings_for_scored_chunks(&scored).unwrap();
assert_eq!(chunks.len(), scored.len());
assert!(scored.iter().all(|candidate| chunks.iter().any(|chunk| {
chunk.file_path == candidate.file_path
&& chunk.symbol_name == candidate.symbol_name
&& chunk.line == candidate.line
&& chunk.signature == candidate.signature
&& chunk.name_path == candidate.name_path
})));
}
#[test]
fn store_fetches_many_scored_chunks_without_expression_depth_overflow() {
let dir = tempfile::tempdir().unwrap();
let store = super::vec_store::SqliteVecStore::new(
dir.path().join("embeddings.db").as_path(),
2,
"test",
)
.unwrap();
let chunks: Vec<_> = (0..1_100)
.map(|index| EmbeddingChunk {
file_path: format!("src/file_{index}.rs"),
symbol_name: format!("symbol_{index}"),
kind: "function".to_owned(),
line: index + 1,
signature: format!("fn symbol_{index}()"),
name_path: format!("symbol_{index}"),
text: format!("fn symbol_{index}() {{}}"),
embedding: vec![index as f32, 1.0],
doc_embedding: None,
})
.collect();
store.insert(&chunks).unwrap();
let scored: Vec<_> = chunks
.iter()
.map(|chunk| ScoredChunk {
file_path: chunk.file_path.clone(),
symbol_name: chunk.symbol_name.clone(),
kind: chunk.kind.clone(),
line: chunk.line,
signature: chunk.signature.clone(),
name_path: chunk.name_path.clone(),
score: 1.0,
})
.collect();
let resolved = store.embeddings_for_scored_chunks(&scored).unwrap();
assert_eq!(resolved.len(), scored.len());
}
#[test]
fn store_batches_embedding_anchors_by_scope() {
let dir = tempfile::tempdir().unwrap();
let store = super::vec_store::SqliteVecStore::new(
dir.path().join("embeddings.db").as_path(),
2,
"test",
)
.unwrap();
let chunks = vec![
EmbeddingChunk {
file_path: ".github/workflows/build.rs".to_owned(),
symbol_name: "outside".to_owned(),
kind: "function".to_owned(),
line: 1,
signature: "fn outside()".to_owned(),
name_path: "outside".to_owned(),
text: "fn outside() {}".to_owned(),
embedding: vec![1.0, 0.0],
doc_embedding: None,
},
EmbeddingChunk {
file_path: "crates/codelens-mcp/src/tools/session/a.rs".to_owned(),
symbol_name: "inside_a".to_owned(),
kind: "function".to_owned(),
line: 2,
signature: "fn inside_a()".to_owned(),
name_path: "inside_a".to_owned(),
text: "fn inside_a() {}".to_owned(),
embedding: vec![0.0, 1.0],
doc_embedding: None,
},
EmbeddingChunk {
file_path: "crates/codelens-mcp/src/tools/session/nested/b.rs".to_owned(),
symbol_name: "inside_b".to_owned(),
kind: "function".to_owned(),
line: 3,
signature: "fn inside_b()".to_owned(),
name_path: "inside_b".to_owned(),
text: "fn inside_b() {}".to_owned(),
embedding: vec![0.0, 2.0],
doc_embedding: None,
},
];
store.insert(&chunks).unwrap();
let mut visited = Vec::new();
store
.for_each_embedding_batch_in_scope(
"crates/codelens-mcp/src/tools/session",
1,
&mut |batch: Vec<EmbeddingChunk>| {
visited.extend(batch.into_iter().map(|chunk| chunk.file_path));
Ok(())
},
)
.unwrap();
assert_eq!(
visited,
vec![
"crates/codelens-mcp/src/tools/session/a.rs",
"crates/codelens-mcp/src/tools/session/nested/b.rs",
]
);
}
#[test]
fn find_misplaced_code_returns_per_file_outliers() {
let _lock = MODEL_LOCK.lock().unwrap();
skip_without_embedding_model!();
let (_dir, project) = make_project_with_source();
let engine = EmbeddingEngine::new(&project).unwrap();
engine.index_from_project(&project).unwrap();
let outliers = engine.find_misplaced_code(5).unwrap();
assert_eq!(outliers.len(), 2);
assert!(outliers.iter().all(|item| item.file_path == "main.py"));
}
#[test]
fn find_duplicates_uses_batched_candidate_embeddings() {
let _lock = MODEL_LOCK.lock().unwrap();
skip_without_embedding_model!();
let (_dir, project) = make_project_with_source();
let engine = EmbeddingEngine::new(&project).unwrap();
engine.index_from_project(&project).unwrap();
replace_file_embeddings_with_sentinels(&engine, "main.py", &[("hello", 5.0), ("world", 5.0)]);
let duplicates = engine.find_duplicates(0.99, 4).unwrap();
assert!(!duplicates.is_empty());
assert!(duplicates.iter().any(|pair| {
(pair.symbol_a == "main.py:hello" && pair.symbol_b == "main.py:world")
|| (pair.symbol_a == "main.py:world" && pair.symbol_b == "main.py:hello")
}));
}
#[test]
fn find_duplicates_in_scope_continues_past_global_pairs() {
let _lock = MODEL_LOCK.lock().unwrap();
skip_without_embedding_model!();
let dir = tempfile::tempdir().unwrap();
let root = dir.path();
std::fs::create_dir_all(root.join(".github/workflows")).unwrap();
std::fs::create_dir_all(root.join("crates")).unwrap();
write_python_file_with_symbols(
root,
".github/workflows/outside_a.py",
"def outside_a():\n return 1\n",
"outside-a",
&[("outside_a", "def outside_a():", "outside_a")],
);
write_python_file_with_symbols(
root,
".github/workflows/outside_b.py",
"def outside_b():\n return 2\n",
"outside-b",
&[("outside_b", "def outside_b():", "outside_b")],
);
write_python_file_with_symbols(
root,
"crates/scoped_a.py",
"def scoped_a():\n return 3\n",
"scoped-a",
&[("scoped_a", "def scoped_a():", "scoped_a")],
);
write_python_file_with_symbols(
root,
"crates/scoped_b.py",
"def scoped_b():\n return 4\n",
"scoped-b",
&[("scoped_b", "def scoped_b():", "scoped_b")],
);
let project = ProjectRoot::new_exact(root).unwrap();
let engine = EmbeddingEngine::new(&project).unwrap();
engine.index_from_project(&project).unwrap();
replace_file_embeddings_with_sentinels(
&engine,
".github/workflows/outside_a.py",
&[("outside_a", 1.0)],
);
replace_file_embeddings_with_sentinels(
&engine,
".github/workflows/outside_b.py",
&[("outside_b", 1.0)],
);
replace_file_embeddings_with_sentinels(&engine, "crates/scoped_a.py", &[("scoped_a", -1.0)]);
replace_file_embeddings_with_sentinels(&engine, "crates/scoped_b.py", &[("scoped_b", -1.0)]);
let global = engine.find_duplicates(0.99, 1).unwrap();
assert_eq!(global.len(), 1);
assert_eq!(global[0].file_a, ".github/workflows/outside_a.py");
assert_eq!(global[0].file_b, ".github/workflows/outside_b.py");
let scoped = engine
.find_duplicates_in_scope(0.99, 1, Some("crates"))
.unwrap();
assert_eq!(scoped.len(), 1);
assert!(
scoped
.iter()
.all(|pair| pair.file_a.starts_with("crates/") || pair.file_b.starts_with("crates/"))
);
assert_eq!(scoped[0].file_a, "crates/scoped_a.py");
assert_eq!(scoped[0].file_b, "crates/scoped_b.py");
}
#[test]
fn find_duplicates_in_scope_keeps_cross_boundary_candidates() {
let _lock = MODEL_LOCK.lock().unwrap();
skip_without_embedding_model!();
let dir = tempfile::tempdir().unwrap();
let root = dir.path();
std::fs::create_dir_all(root.join("crates")).unwrap();
write_python_file_with_symbols(
root,
"outside.py",
"def outside():\n return 1\n",
"outside",
&[("outside", "def outside():", "outside")],
);
write_python_file_with_symbols(
root,
"crates/scoped.py",
"def scoped():\n return 2\n",
"scoped",
&[("scoped", "def scoped():", "scoped")],
);
let project = ProjectRoot::new_exact(root).unwrap();
let engine = EmbeddingEngine::new(&project).unwrap();
engine.index_from_project(&project).unwrap();
replace_file_embeddings_with_sentinels(&engine, "outside.py", &[("outside", 3.0)]);
replace_file_embeddings_with_sentinels(&engine, "crates/scoped.py", &[("scoped", 3.0)]);
let scoped = engine
.find_duplicates_in_scope(0.99, 1, Some("crates"))
.unwrap();
assert_eq!(scoped.len(), 1);
assert!(scoped[0].file_a == "crates/scoped.py" || scoped[0].file_b == "crates/scoped.py");
assert!(scoped[0].file_a == "outside.py" || scoped[0].file_b == "outside.py");
}
#[test]
fn search_scored_returns_raw_chunks() {
let _lock = MODEL_LOCK.lock().unwrap();
skip_without_embedding_model!();
let (_dir, project) = make_project_with_source();
let engine = EmbeddingEngine::new(&project).unwrap();
engine.index_from_project(&project).unwrap();
let chunks = engine.search_scored("world function", 5).unwrap();
assert!(!chunks.is_empty());
for c in &chunks {
assert!(!c.file_path.is_empty());
assert!(!c.symbol_name.is_empty());
}
}
#[test]
fn configured_embedding_model_name_defaults_to_codesearchnet() {
let _lock = MODEL_LOCK.lock().unwrap();
let previous_dir = std::env::var("CODELENS_MODEL_DIR").ok();
let previous_model = std::env::var("CODELENS_EMBED_MODEL").ok();
unsafe {
std::env::remove_var("CODELENS_MODEL_DIR");
std::env::remove_var("CODELENS_EMBED_MODEL");
}
assert_eq!(configured_embedding_model_name(), CODESEARCH_MODEL_NAME);
unsafe {
match previous_dir {
Some(value) => std::env::set_var("CODELENS_MODEL_DIR", value),
None => std::env::remove_var("CODELENS_MODEL_DIR"),
}
match previous_model {
Some(value) => std::env::set_var("CODELENS_EMBED_MODEL", value),
None => std::env::remove_var("CODELENS_EMBED_MODEL"),
}
}
}
#[test]
fn resolve_model_dir_accepts_direct_model_dir_override() {
let _lock = MODEL_LOCK.lock().unwrap();
let dir = tempfile::tempdir().unwrap();
let model_dir = dir.path().join("merged-lora-model");
write_minimal_model_assets(&model_dir);
let previous = std::env::var("CODELENS_MODEL_DIR").ok();
unsafe {
std::env::set_var("CODELENS_MODEL_DIR", &model_dir);
}
let resolved = resolve_model_dir().unwrap();
unsafe {
match previous {
Some(value) => std::env::set_var("CODELENS_MODEL_DIR", value),
None => std::env::remove_var("CODELENS_MODEL_DIR"),
}
}
assert_eq!(resolved, model_dir);
}
#[test]
fn resolve_model_dir_accepts_release_variant_split_onnx_layout() {
let _lock = MODEL_LOCK.lock().unwrap();
let dir = tempfile::tempdir().unwrap();
let variant_dir = dir.path().join("models").join("codelens-code-search").join(
if cfg!(target_arch = "aarch64") {
"arm64"
} else {
"avx2"
},
);
std::fs::create_dir_all(variant_dir.join("onnx")).unwrap();
std::fs::write(variant_dir.join("onnx").join("model.onnx"), b"{}").unwrap();
for asset in [
"tokenizer.json",
"config.json",
"special_tokens_map.json",
"tokenizer_config.json",
] {
std::fs::write(variant_dir.join(asset), b"{}").unwrap();
}
let previous = std::env::var("CODELENS_MODEL_DIR").ok();
unsafe {
std::env::set_var("CODELENS_MODEL_DIR", dir.path().join("models"));
}
let resolved = resolve_model_dir().unwrap();
unsafe {
match previous {
Some(value) => std::env::set_var("CODELENS_MODEL_DIR", value),
None => std::env::remove_var("CODELENS_MODEL_DIR"),
}
}
assert_eq!(resolved, variant_dir);
}
#[test]
fn executable_model_roots_include_installed_sidecar_layouts() {
let exe_dir = std::path::Path::new("/opt/codelens/bin");
let roots = executable_model_roots(exe_dir);
assert!(roots.contains(&std::path::PathBuf::from("/opt/codelens/bin/models")));
assert!(roots.contains(&std::path::PathBuf::from("/opt/codelens/models")));
assert!(roots.contains(&std::path::PathBuf::from(
"/opt/codelens/share/codelens/models"
)));
}
#[test]
fn configured_embedding_model_name_prefers_manifest_name_from_model_dir() {
let _lock = MODEL_LOCK.lock().unwrap();
let dir = tempfile::tempdir().unwrap();
let model_dir = dir.path().join("merged-lora-model");
write_minimal_model_assets(&model_dir);
std::fs::write(
model_dir.join("model-manifest.json"),
r#"{
"model_name": "MiniLM-L12-CodeSearchNet-LoRA-Merged-v1",
"base_model": "MiniLM-L12-CodeSearchNet-INT8",
"adapter_type": "lora",
"lora_merged_from": "scripts/finetune/output/lora-python/model",
"export_backend": "onnx"
}"#,
)
.unwrap();
let previous_dir = std::env::var("CODELENS_MODEL_DIR").ok();
let previous_model = std::env::var("CODELENS_EMBED_MODEL").ok();
unsafe {
std::env::set_var("CODELENS_MODEL_DIR", &model_dir);
std::env::remove_var("CODELENS_EMBED_MODEL");
}
let configured = configured_embedding_model_name();
unsafe {
match previous_dir {
Some(value) => std::env::set_var("CODELENS_MODEL_DIR", value),
None => std::env::remove_var("CODELENS_MODEL_DIR"),
}
match previous_model {
Some(value) => std::env::set_var("CODELENS_EMBED_MODEL", value),
None => std::env::remove_var("CODELENS_EMBED_MODEL"),
}
}
assert_eq!(configured, "MiniLM-L12-CodeSearchNet-LoRA-Merged-v1");
}
#[test]
fn requested_embedding_model_override_ignores_default_model_name() {
let _lock = MODEL_LOCK.lock().unwrap();
let previous = std::env::var("CODELENS_EMBED_MODEL").ok();
unsafe {
std::env::set_var("CODELENS_EMBED_MODEL", CODESEARCH_MODEL_NAME);
}
let result = requested_embedding_model_override().unwrap();
unsafe {
match previous {
Some(value) => std::env::set_var("CODELENS_EMBED_MODEL", value),
None => std::env::remove_var("CODELENS_EMBED_MODEL"),
}
}
assert_eq!(result, None);
}
#[cfg(not(feature = "model-bakeoff"))]
#[test]
fn requested_embedding_model_override_requires_bakeoff_feature() {
let _lock = MODEL_LOCK.lock().unwrap();
let previous = std::env::var("CODELENS_EMBED_MODEL").ok();
unsafe {
std::env::set_var("CODELENS_EMBED_MODEL", "all-MiniLM-L12-v2");
}
let err = requested_embedding_model_override().unwrap_err();
unsafe {
match previous {
Some(value) => std::env::set_var("CODELENS_EMBED_MODEL", value),
None => std::env::remove_var("CODELENS_EMBED_MODEL"),
}
}
assert!(err.to_string().contains("model-bakeoff"));
}
#[cfg(feature = "model-bakeoff")]
#[test]
fn requested_embedding_model_override_accepts_alternative_model() {
let _lock = MODEL_LOCK.lock().unwrap();
let previous = std::env::var("CODELENS_EMBED_MODEL").ok();
unsafe {
std::env::set_var("CODELENS_EMBED_MODEL", "all-MiniLM-L12-v2");
}
let result = requested_embedding_model_override().unwrap();
unsafe {
match previous {
Some(value) => std::env::set_var("CODELENS_EMBED_MODEL", value),
None => std::env::remove_var("CODELENS_EMBED_MODEL"),
}
}
assert_eq!(result.as_deref(), Some("all-MiniLM-L12-v2"));
}
#[test]
fn recommended_embed_threads_caps_macos_style_load() {
let _env_guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner());
let threads = recommended_embed_threads();
assert!(threads >= 1);
assert!(threads <= 8);
}
#[cfg(all(target_os = "macos", not(feature = "coreml")))]
#[test]
fn macos_runtime_preference_reports_cpu_when_coreml_is_not_compiled() {
let _env_guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner());
let previous = std::env::var("CODELENS_EMBED_PROVIDER").ok();
unsafe {
std::env::remove_var("CODELENS_EMBED_PROVIDER");
}
let default_preference = configured_embedding_runtime_preference();
unsafe {
std::env::set_var("CODELENS_EMBED_PROVIDER", "coreml");
}
let explicit_coreml_preference = configured_embedding_runtime_preference();
unsafe {
match previous {
Some(value) => std::env::set_var("CODELENS_EMBED_PROVIDER", value),
None => std::env::remove_var("CODELENS_EMBED_PROVIDER"),
}
}
assert_eq!(default_preference, "cpu");
assert_eq!(explicit_coreml_preference, "cpu");
}
#[cfg(all(target_os = "macos", feature = "coreml"))]
#[test]
fn macos_runtime_preference_reports_coreml_when_coreml_is_compiled() {
let _env_guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner());
let previous = std::env::var("CODELENS_EMBED_PROVIDER").ok();
unsafe {
std::env::remove_var("CODELENS_EMBED_PROVIDER");
}
let default_preference = configured_embedding_runtime_preference();
unsafe {
std::env::set_var("CODELENS_EMBED_PROVIDER", "coreml");
}
let explicit_coreml_preference = configured_embedding_runtime_preference();
unsafe {
match previous {
Some(value) => std::env::set_var("CODELENS_EMBED_PROVIDER", value),
None => std::env::remove_var("CODELENS_EMBED_PROVIDER"),
}
}
assert_eq!(default_preference, "coreml_preferred");
assert_eq!(explicit_coreml_preference, "coreml");
}
#[test]
fn embed_batch_size_has_safe_default_floor() {
let _env_guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner());
assert!(embed_batch_size() >= 1);
if cfg!(target_os = "macos") {
assert!(embed_batch_size() <= DEFAULT_MACOS_EMBED_BATCH_SIZE);
}
}
#[test]
fn low_power_embedding_profile_caps_default_threads_and_batch() {
let _env_guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner());
let previous_profile = std::env::var("CODELENS_EMBED_RESOURCE_PROFILE").ok();
let previous_threads = std::env::var("CODELENS_EMBED_THREADS").ok();
let previous_batch = std::env::var("CODELENS_EMBED_BATCH_SIZE").ok();
let previous_provider = std::env::var("CODELENS_EMBED_PROVIDER").ok();
unsafe {
std::env::set_var("CODELENS_EMBED_RESOURCE_PROFILE", "low_power");
std::env::remove_var("CODELENS_EMBED_THREADS");
std::env::remove_var("CODELENS_EMBED_BATCH_SIZE");
std::env::remove_var("CODELENS_EMBED_PROVIDER");
}
let threads = recommended_embed_threads();
let batch = embed_batch_size();
let provider = configured_embedding_runtime_preference();
unsafe {
match previous_profile {
Some(value) => std::env::set_var("CODELENS_EMBED_RESOURCE_PROFILE", value),
None => std::env::remove_var("CODELENS_EMBED_RESOURCE_PROFILE"),
}
match previous_threads {
Some(value) => std::env::set_var("CODELENS_EMBED_THREADS", value),
None => std::env::remove_var("CODELENS_EMBED_THREADS"),
}
match previous_batch {
Some(value) => std::env::set_var("CODELENS_EMBED_BATCH_SIZE", value),
None => std::env::remove_var("CODELENS_EMBED_BATCH_SIZE"),
}
match previous_provider {
Some(value) => std::env::set_var("CODELENS_EMBED_PROVIDER", value),
None => std::env::remove_var("CODELENS_EMBED_PROVIDER"),
}
}
assert!((1..=2).contains(&threads));
assert_eq!(batch, 32);
assert_eq!(provider, "cpu");
}
#[test]
fn explicit_embedding_knobs_override_low_power_profile() {
let _env_guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner());
let previous_profile = std::env::var("CODELENS_EMBED_RESOURCE_PROFILE").ok();
let previous_threads = std::env::var("CODELENS_EMBED_THREADS").ok();
let previous_batch = std::env::var("CODELENS_EMBED_BATCH_SIZE").ok();
let previous_provider = std::env::var("CODELENS_EMBED_PROVIDER").ok();
unsafe {
std::env::set_var("CODELENS_EMBED_RESOURCE_PROFILE", "low_power");
std::env::set_var("CODELENS_EMBED_THREADS", "5");
std::env::set_var("CODELENS_EMBED_BATCH_SIZE", "64");
std::env::set_var("CODELENS_EMBED_PROVIDER", "cpu");
}
let threads = recommended_embed_threads();
let batch = embed_batch_size();
let provider = configured_embedding_runtime_preference();
unsafe {
match previous_profile {
Some(value) => std::env::set_var("CODELENS_EMBED_RESOURCE_PROFILE", value),
None => std::env::remove_var("CODELENS_EMBED_RESOURCE_PROFILE"),
}
match previous_threads {
Some(value) => std::env::set_var("CODELENS_EMBED_THREADS", value),
None => std::env::remove_var("CODELENS_EMBED_THREADS"),
}
match previous_batch {
Some(value) => std::env::set_var("CODELENS_EMBED_BATCH_SIZE", value),
None => std::env::remove_var("CODELENS_EMBED_BATCH_SIZE"),
}
match previous_provider {
Some(value) => std::env::set_var("CODELENS_EMBED_PROVIDER", value),
None => std::env::remove_var("CODELENS_EMBED_PROVIDER"),
}
}
assert_eq!(threads, 5);
assert_eq!(batch, 64);
assert_eq!(provider, "cpu");
}