1use anyhow::{Context, Result};
2#[cfg(target_os = "macos")]
3use fastembed::ExecutionProviderDispatch;
4use fastembed::{InitOptionsUserDefined, TextEmbedding, TokenizerFiles, UserDefinedEmbeddingModel};
5use std::sync::Once;
6use std::thread::available_parallelism;
7use tracing::debug;
8
9use super::EmbeddingRuntimeInfo;
10use super::ffi;
11
12pub static ORT_ENV_INIT: Once = Once::new();
13
14pub const DEFAULT_EMBED_BATCH_SIZE: usize = 128;
15pub const DEFAULT_MACOS_EMBED_BATCH_SIZE: usize = 128;
16pub const DEFAULT_TEXT_EMBED_CACHE_SIZE: usize = 256;
17pub const DEFAULT_MACOS_TEXT_EMBED_CACHE_SIZE: usize = 1024;
18pub const CODESEARCH_DIMENSION: usize = 384;
19pub const DEFAULT_MAX_EMBED_SYMBOLS: usize = 50_000;
20pub const CHANGED_FILE_QUERY_CHUNK: usize = 128;
21pub const DEFAULT_DUPLICATE_SCAN_BATCH_SIZE: usize = 128;
22
23pub const CODESEARCH_MODEL_NAME: &str = "MiniLM-L12-CodeSearchNet-INT8";
26
27pub fn resolve_model_dir() -> Result<std::path::PathBuf> {
35 if let Ok(dir) = std::env::var("CODELENS_MODEL_DIR") {
37 let p = std::path::PathBuf::from(dir).join("codesearch");
38 if p.join("model.onnx").exists() {
39 return Ok(p);
40 }
41 }
42
43 if let Ok(exe) = std::env::current_exe()
45 && let Some(exe_dir) = exe.parent()
46 {
47 let p = exe_dir.join("models").join("codesearch");
48 if p.join("model.onnx").exists() {
49 return Ok(p);
50 }
51 }
52
53 if let Some(home) = dirs_fallback() {
55 let p = home
56 .join(".cache")
57 .join("codelens")
58 .join("models")
59 .join("codesearch");
60 if p.join("model.onnx").exists() {
61 return Ok(p);
62 }
63 }
64
65 let dev_path = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
67 .join("models")
68 .join("codesearch");
69 if dev_path.join("model.onnx").exists() {
70 return Ok(dev_path);
71 }
72
73 anyhow::bail!(
74 "CodeSearchNet model not found. Place model files in one of:\n\
75 - $CODELENS_MODEL_DIR/codesearch/\n\
76 - <executable>/models/codesearch/\n\
77 - ~/.cache/codelens/models/codesearch/\n\
78 Required files: model.onnx, tokenizer.json, config.json, special_tokens_map.json, tokenizer_config.json"
79 )
80}
81
82pub fn dirs_fallback() -> Option<std::path::PathBuf> {
83 std::env::var_os("HOME").map(std::path::PathBuf::from)
84}
85
86pub fn parse_usize_env(name: &str) -> Option<usize> {
87 std::env::var(name)
88 .ok()
89 .and_then(|v| v.trim().parse::<usize>().ok())
90 .filter(|v| *v > 0)
91}
92
93pub fn parse_bool_env(name: &str) -> Option<bool> {
94 std::env::var(name).ok().and_then(|value| {
95 let normalized = value.trim().to_ascii_lowercase();
96 match normalized.as_str() {
97 "1" | "true" | "yes" | "on" => Some(true),
98 "0" | "false" | "no" | "off" => Some(false),
99 _ => None,
100 }
101 })
102}
103
104#[cfg(target_os = "macos")]
105pub fn apple_perf_cores() -> Option<usize> {
106 ffi::sysctl_usize(b"hw.perflevel0.physicalcpu\0")
107 .filter(|value| *value > 0)
108 .or_else(|| ffi::sysctl_usize(b"hw.physicalcpu\0").filter(|value| *value > 0))
109}
110
111#[cfg(not(target_os = "macos"))]
112pub fn apple_perf_cores() -> Option<usize> {
113 None
114}
115
116pub fn configured_embedding_runtime_preference() -> String {
117 let requested = std::env::var("CODELENS_EMBED_PROVIDER")
118 .ok()
119 .map(|value| value.trim().to_ascii_lowercase());
120
121 match requested.as_deref() {
122 Some("cpu") => "cpu".to_string(),
123 Some("coreml") if cfg!(target_os = "macos") => "coreml".to_string(),
124 Some("coreml") => "cpu".to_string(),
125 _ if cfg!(target_os = "macos") => "coreml_preferred".to_string(),
126 _ => "cpu".to_string(),
127 }
128}
129
130pub fn configured_embedding_threads() -> usize {
131 recommended_embed_threads()
132}
133
134pub fn configured_embedding_max_length() -> usize {
135 parse_usize_env("CODELENS_EMBED_MAX_LENGTH")
136 .unwrap_or(256)
137 .clamp(32, 512)
138}
139
140pub fn configured_embedding_text_cache_size() -> usize {
141 std::env::var("CODELENS_EMBED_TEXT_CACHE_SIZE")
142 .ok()
143 .and_then(|value| value.trim().parse::<usize>().ok())
144 .unwrap_or({
145 if cfg!(target_os = "macos") {
146 DEFAULT_MACOS_TEXT_EMBED_CACHE_SIZE
147 } else {
148 DEFAULT_TEXT_EMBED_CACHE_SIZE
149 }
150 })
151 .min(8192)
152}
153
154#[cfg(target_os = "macos")]
155pub fn configured_coreml_compute_units_name() -> String {
156 match std::env::var("CODELENS_EMBED_COREML_COMPUTE_UNITS")
157 .ok()
158 .map(|value| value.trim().to_ascii_lowercase())
159 .as_deref()
160 {
161 Some("all") => "all".to_string(),
162 Some("cpu") | Some("cpu_only") => "cpu_only".to_string(),
163 Some("gpu") | Some("cpu_and_gpu") => "cpu_and_gpu".to_string(),
164 Some("ane") | Some("neural_engine") | Some("cpu_and_neural_engine") => {
165 "cpu_and_neural_engine".to_string()
166 }
167 _ => "cpu_and_neural_engine".to_string(),
168 }
169}
170
171#[cfg(target_os = "macos")]
172pub fn configured_coreml_model_format_name() -> String {
173 match std::env::var("CODELENS_EMBED_COREML_MODEL_FORMAT")
174 .ok()
175 .map(|value| value.trim().to_ascii_lowercase())
176 .as_deref()
177 {
178 Some("neuralnetwork") | Some("neural_network") => "neural_network".to_string(),
179 _ => "mlprogram".to_string(),
180 }
181}
182
183#[cfg(target_os = "macos")]
184pub fn configured_coreml_profile_compute_plan() -> bool {
185 parse_bool_env("CODELENS_EMBED_COREML_PROFILE_PLAN").unwrap_or(false)
186}
187
188#[cfg(target_os = "macos")]
189pub fn configured_coreml_static_input_shapes() -> bool {
190 parse_bool_env("CODELENS_EMBED_COREML_STATIC_INPUT_SHAPES").unwrap_or(true)
191}
192
193#[cfg(target_os = "macos")]
194pub fn configured_coreml_specialization_strategy_name() -> String {
195 match std::env::var("CODELENS_EMBED_COREML_SPECIALIZATION")
196 .ok()
197 .map(|value| value.trim().to_ascii_lowercase())
198 .as_deref()
199 {
200 Some("default") => "default".to_string(),
201 _ => "fast_prediction".to_string(),
202 }
203}
204
205#[cfg(target_os = "macos")]
206pub fn configured_coreml_model_cache_dir() -> std::path::PathBuf {
207 dirs_fallback()
208 .unwrap_or_else(std::env::temp_dir)
209 .join(".cache")
210 .join("codelens")
211 .join("coreml-cache")
212 .join("codesearch")
213}
214
215pub fn recommended_embed_threads() -> usize {
216 if let Some(explicit) = parse_usize_env("CODELENS_EMBED_THREADS") {
217 return explicit.max(1);
218 }
219
220 let available = available_parallelism().map(|n| n.get()).unwrap_or(1);
221 if cfg!(target_os = "macos") {
222 apple_perf_cores()
223 .unwrap_or(available)
224 .min(available)
225 .clamp(1, 8)
226 } else {
227 available.div_ceil(2).clamp(1, 8)
228 }
229}
230
231pub fn embed_batch_size() -> usize {
232 parse_usize_env("CODELENS_EMBED_BATCH_SIZE").unwrap_or({
233 if cfg!(target_os = "macos") {
234 DEFAULT_MACOS_EMBED_BATCH_SIZE
235 } else {
236 DEFAULT_EMBED_BATCH_SIZE
237 }
238 })
239}
240
241pub fn max_embed_symbols() -> usize {
242 parse_usize_env("CODELENS_MAX_EMBED_SYMBOLS").unwrap_or(DEFAULT_MAX_EMBED_SYMBOLS)
243}
244
245fn set_env_if_unset(name: &str, value: impl Into<String>) {
246 if std::env::var_os(name).is_none() {
247 unsafe {
250 std::env::set_var(name, value.into());
251 }
252 }
253}
254
255pub fn configure_embedding_runtime() {
256 let threads = recommended_embed_threads();
257 let runtime_preference = configured_embedding_runtime_preference();
258
259 set_env_if_unset("OMP_NUM_THREADS", threads.to_string());
262 set_env_if_unset("OMP_WAIT_POLICY", "PASSIVE");
263 set_env_if_unset("OMP_DYNAMIC", "FALSE");
264 set_env_if_unset("TOKENIZERS_PARALLELISM", "false");
265 if cfg!(target_os = "macos") {
266 set_env_if_unset("VECLIB_MAXIMUM_THREADS", threads.to_string());
267 }
268
269 ORT_ENV_INIT.call_once(|| {
270 let pool = ort::environment::GlobalThreadPoolOptions::default()
271 .with_intra_threads(threads)
272 .and_then(|pool| pool.with_inter_threads(1))
273 .and_then(|pool| pool.with_spin_control(false));
274
275 if let Ok(pool) = pool {
276 let _ = ort::init()
277 .with_name("codelens-embedding")
278 .with_telemetry(false)
279 .with_global_thread_pool(pool)
280 .commit();
281 }
282 });
283
284 debug!(
285 threads,
286 runtime_preference = %runtime_preference,
287 "configured embedding runtime"
288 );
289}
290
291pub fn requested_embedding_model_override() -> Result<Option<String>> {
292 let env_model = std::env::var("CODELENS_EMBED_MODEL").ok();
293 let Some(model_id) = env_model else {
294 return Ok(None);
295 };
296 if model_id.is_empty() || model_id == CODESEARCH_MODEL_NAME {
297 return Ok(None);
298 }
299
300 #[cfg(feature = "model-bakeoff")]
301 {
302 return Ok(Some(model_id));
303 }
304
305 #[cfg(not(feature = "model-bakeoff"))]
306 {
307 anyhow::bail!(
308 "CODELENS_EMBED_MODEL={model_id} requires the `model-bakeoff` feature; \
309 rebuild the binary with `--features model-bakeoff` to run alternative model bake-offs"
310 );
311 }
312}
313
314pub fn configured_embedding_runtime_info() -> EmbeddingRuntimeInfo {
315 let runtime_preference = configured_embedding_runtime_preference();
316 let threads = configured_embedding_threads();
317
318 #[cfg(target_os = "macos")]
319 {
320 let coreml_enabled = runtime_preference != "cpu";
321 EmbeddingRuntimeInfo {
322 runtime_preference,
323 backend: "not_loaded".to_string(),
324 threads,
325 max_length: configured_embedding_max_length(),
326 coreml_model_format: coreml_enabled.then(configured_coreml_model_format_name),
327 coreml_compute_units: coreml_enabled.then(configured_coreml_compute_units_name),
328 coreml_static_input_shapes: coreml_enabled.then(configured_coreml_static_input_shapes),
329 coreml_profile_compute_plan: coreml_enabled
330 .then(configured_coreml_profile_compute_plan),
331 coreml_specialization_strategy: coreml_enabled
332 .then(configured_coreml_specialization_strategy_name),
333 coreml_model_cache_dir: coreml_enabled
334 .then(|| configured_coreml_model_cache_dir().display().to_string()),
335 fallback_reason: None,
336 }
337 }
338
339 #[cfg(not(target_os = "macos"))]
340 {
341 EmbeddingRuntimeInfo {
342 runtime_preference,
343 backend: "not_loaded".to_string(),
344 threads,
345 max_length: configured_embedding_max_length(),
346 coreml_model_format: None,
347 coreml_compute_units: None,
348 coreml_static_input_shapes: None,
349 coreml_profile_compute_plan: None,
350 coreml_specialization_strategy: None,
351 coreml_model_cache_dir: None,
352 fallback_reason: None,
353 }
354 }
355}
356
357#[cfg(target_os = "macos")]
358pub fn build_coreml_execution_provider() -> ExecutionProviderDispatch {
359 use ort::ep::{
360 CoreML,
361 coreml::{ComputeUnits, ModelFormat, SpecializationStrategy},
362 };
363
364 let compute_units = match configured_coreml_compute_units_name().as_str() {
365 "all" => ComputeUnits::All,
366 "cpu_only" => ComputeUnits::CPUOnly,
367 "cpu_and_gpu" => ComputeUnits::CPUAndGPU,
368 _ => ComputeUnits::CPUAndNeuralEngine,
369 };
370 let model_format = match configured_coreml_model_format_name().as_str() {
371 "neural_network" => ModelFormat::NeuralNetwork,
372 _ => ModelFormat::MLProgram,
373 };
374 let specialization = match configured_coreml_specialization_strategy_name().as_str() {
375 "default" => SpecializationStrategy::Default,
376 _ => SpecializationStrategy::FastPrediction,
377 };
378 let cache_dir = configured_coreml_model_cache_dir();
379 let _ = std::fs::create_dir_all(&cache_dir);
380
381 CoreML::default()
382 .with_model_format(model_format)
383 .with_compute_units(compute_units)
384 .with_static_input_shapes(configured_coreml_static_input_shapes())
385 .with_specialization_strategy(specialization)
386 .with_profile_compute_plan(configured_coreml_profile_compute_plan())
387 .with_model_cache_dir(cache_dir.display().to_string())
388 .build()
389 .error_on_failure()
390}
391
392pub fn cpu_runtime_info(
393 runtime_preference: String,
394 fallback_reason: Option<String>,
395) -> EmbeddingRuntimeInfo {
396 EmbeddingRuntimeInfo {
397 runtime_preference,
398 backend: "cpu".to_string(),
399 threads: configured_embedding_threads(),
400 max_length: configured_embedding_max_length(),
401 coreml_model_format: None,
402 coreml_compute_units: None,
403 coreml_static_input_shapes: None,
404 coreml_profile_compute_plan: None,
405 coreml_specialization_strategy: None,
406 coreml_model_cache_dir: None,
407 fallback_reason,
408 }
409}
410
411#[cfg(target_os = "macos")]
412pub fn coreml_runtime_info(
413 runtime_preference: String,
414 fallback_reason: Option<String>,
415) -> EmbeddingRuntimeInfo {
416 EmbeddingRuntimeInfo {
417 runtime_preference,
418 backend: if fallback_reason.is_some() {
419 "cpu".to_string()
420 } else {
421 "coreml".to_string()
422 },
423 threads: configured_embedding_threads(),
424 max_length: configured_embedding_max_length(),
425 coreml_model_format: Some(configured_coreml_model_format_name()),
426 coreml_compute_units: Some(configured_coreml_compute_units_name()),
427 coreml_static_input_shapes: Some(configured_coreml_static_input_shapes()),
428 coreml_profile_compute_plan: Some(configured_coreml_profile_compute_plan()),
429 coreml_specialization_strategy: Some(configured_coreml_specialization_strategy_name()),
430 coreml_model_cache_dir: Some(configured_coreml_model_cache_dir().display().to_string()),
431 fallback_reason,
432 }
433}
434
435#[cfg(feature = "model-bakeoff")]
440pub fn load_fastembed_builtin(
441 model_id: &str,
442) -> Result<(TextEmbedding, usize, String, EmbeddingRuntimeInfo)> {
443 use fastembed::EmbeddingModel;
444
445 let (model_enum, expected_dim) = match model_id {
447 "all-MiniLM-L6-v2" | "sentence-transformers/all-MiniLM-L6-v2" => {
448 (EmbeddingModel::AllMiniLML6V2, 384)
449 }
450 "all-MiniLM-L12-v2" | "sentence-transformers/all-MiniLM-L12-v2" => {
451 (EmbeddingModel::AllMiniLML12V2, 384)
452 }
453 "bge-small-en-v1.5" | "BAAI/bge-small-en-v1.5" => (EmbeddingModel::BGESmallENV15, 384),
454 "bge-base-en-v1.5" | "BAAI/bge-base-en-v1.5" => (EmbeddingModel::BGEBaseENV15, 768),
455 "nomic-embed-text-v1.5" | "nomic-ai/nomic-embed-text-v1.5" => {
456 (EmbeddingModel::NomicEmbedTextV15, 768)
457 }
458 other => {
459 anyhow::bail!(
460 "Unknown fastembed model: {other}. \
461 Supported: all-MiniLM-L6-v2, all-MiniLM-L12-v2, bge-small-en-v1.5, \
462 bge-base-en-v1.5, nomic-embed-text-v1.5"
463 );
464 }
465 };
466
467 let init = fastembed::InitOptionsWithLength::new(model_enum)
468 .with_max_length(configured_embedding_max_length())
469 .with_cache_dir(std::env::temp_dir().join("codelens-fastembed-cache"))
470 .with_show_download_progress(true);
471 let model =
472 TextEmbedding::try_new(init).with_context(|| format!("failed to load {model_id}"))?;
473
474 let runtime_info = cpu_runtime_info("cpu".to_string(), None);
475
476 tracing::info!(
477 model = model_id,
478 dimension = expected_dim,
479 "loaded fastembed built-in model for A/B comparison"
480 );
481
482 Ok((model, expected_dim, model_id.to_string(), runtime_info))
483}
484
485pub fn load_codesearch_model() -> Result<(TextEmbedding, usize, String, EmbeddingRuntimeInfo)> {
487 configure_embedding_runtime();
488
489 #[allow(unused_variables)]
491 if let Some(model_id) = requested_embedding_model_override()? {
492 #[cfg(feature = "model-bakeoff")]
493 {
494 return load_fastembed_builtin(&model_id);
495 }
496
497 #[cfg(not(feature = "model-bakeoff"))]
498 {
499 let _ = model_id;
500 unreachable!("alternative embedding model override should have errored");
501 }
502 }
503
504 let model_dir = resolve_model_dir()?;
505
506 let onnx_bytes =
507 std::fs::read(model_dir.join("model.onnx")).context("failed to read model.onnx")?;
508 let tokenizer_bytes =
509 std::fs::read(model_dir.join("tokenizer.json")).context("failed to read tokenizer.json")?;
510 let config_bytes =
511 std::fs::read(model_dir.join("config.json")).context("failed to read config.json")?;
512 let special_tokens_bytes = std::fs::read(model_dir.join("special_tokens_map.json"))
513 .context("failed to read special_tokens_map.json")?;
514 let tokenizer_config_bytes = std::fs::read(model_dir.join("tokenizer_config.json"))
515 .context("failed to read tokenizer_config.json")?;
516
517 let user_model = UserDefinedEmbeddingModel::new(
518 onnx_bytes,
519 TokenizerFiles {
520 tokenizer_file: tokenizer_bytes,
521 config_file: config_bytes,
522 special_tokens_map_file: special_tokens_bytes,
523 tokenizer_config_file: tokenizer_config_bytes,
524 },
525 );
526
527 let runtime_preference = configured_embedding_runtime_preference();
528
529 #[cfg(target_os = "macos")]
530 if runtime_preference != "cpu" {
531 let init_opts = InitOptionsUserDefined::new()
532 .with_max_length(configured_embedding_max_length())
533 .with_execution_providers(vec![build_coreml_execution_provider()]);
534 match TextEmbedding::try_new_from_user_defined(user_model.clone(), init_opts) {
535 Ok(model) => {
536 let runtime_info = coreml_runtime_info(runtime_preference.clone(), None);
537 debug!(
538 threads = runtime_info.threads,
539 runtime_preference = %runtime_info.runtime_preference,
540 backend = %runtime_info.backend,
541 coreml_compute_units = ?runtime_info.coreml_compute_units,
542 coreml_static_input_shapes = ?runtime_info.coreml_static_input_shapes,
543 coreml_profile_compute_plan = ?runtime_info.coreml_profile_compute_plan,
544 coreml_specialization_strategy = ?runtime_info.coreml_specialization_strategy,
545 coreml_model_cache_dir = ?runtime_info.coreml_model_cache_dir,
546 "loaded CodeSearchNet embedding model"
547 );
548 return Ok((
549 model,
550 CODESEARCH_DIMENSION,
551 CODESEARCH_MODEL_NAME.to_string(),
552 runtime_info,
553 ));
554 }
555 Err(err) => {
556 let reason = err.to_string();
557 debug!(
558 runtime_preference = %runtime_preference,
559 fallback_reason = %reason,
560 "CoreML embedding load failed; falling back to CPU"
561 );
562 let model = TextEmbedding::try_new_from_user_defined(
563 user_model,
564 InitOptionsUserDefined::new()
565 .with_max_length(configured_embedding_max_length()),
566 )
567 .context("failed to load CodeSearchNet embedding model")?;
568 let runtime_info = coreml_runtime_info(runtime_preference.clone(), Some(reason));
569 debug!(
570 threads = runtime_info.threads,
571 runtime_preference = %runtime_info.runtime_preference,
572 backend = %runtime_info.backend,
573 coreml_compute_units = ?runtime_info.coreml_compute_units,
574 coreml_static_input_shapes = ?runtime_info.coreml_static_input_shapes,
575 coreml_profile_compute_plan = ?runtime_info.coreml_profile_compute_plan,
576 coreml_specialization_strategy = ?runtime_info.coreml_specialization_strategy,
577 coreml_model_cache_dir = ?runtime_info.coreml_model_cache_dir,
578 fallback_reason = ?runtime_info.fallback_reason,
579 "loaded CodeSearchNet embedding model"
580 );
581 return Ok((
582 model,
583 CODESEARCH_DIMENSION,
584 CODESEARCH_MODEL_NAME.to_string(),
585 runtime_info,
586 ));
587 }
588 }
589 }
590
591 let model = TextEmbedding::try_new_from_user_defined(
592 user_model,
593 InitOptionsUserDefined::new().with_max_length(configured_embedding_max_length()),
594 )
595 .context("failed to load CodeSearchNet embedding model")?;
596 let runtime_info = cpu_runtime_info(runtime_preference.clone(), None);
597
598 debug!(
599 threads = runtime_info.threads,
600 runtime_preference = %runtime_info.runtime_preference,
601 backend = %runtime_info.backend,
602 "loaded CodeSearchNet embedding model"
603 );
604
605 Ok((
606 model,
607 CODESEARCH_DIMENSION,
608 CODESEARCH_MODEL_NAME.to_string(),
609 runtime_info,
610 ))
611}
612
613pub fn configured_embedding_model_name() -> String {
614 std::env::var("CODELENS_EMBED_MODEL").unwrap_or_else(|_| CODESEARCH_MODEL_NAME.to_string())
615}
616
617pub fn configured_rerank_blend() -> f64 {
618 std::env::var("CODELENS_RERANK_BLEND")
619 .ok()
620 .and_then(|v| v.parse::<f64>().ok())
621 .and_then(|v| {
622 if (0.0..=1.0).contains(&v) {
623 Some(v)
624 } else {
625 None
626 }
627 })
628 .unwrap_or(0.75) }
630
631pub fn embedding_model_assets_available() -> bool {
632 resolve_model_dir().is_ok()
633}