1use anyhow::{Context, Result};
2#[cfg(all(target_os = "macos", feature = "coreml"))]
3use fastembed::ExecutionProviderDispatch;
4use fastembed::{InitOptionsUserDefined, TextEmbedding, TokenizerFiles, UserDefinedEmbeddingModel};
5use serde::Deserialize;
6use std::sync::Once;
7use std::thread::available_parallelism;
8use tracing::debug;
9
10use super::EmbeddingRuntimeInfo;
11#[cfg(target_os = "macos")]
12use super::ffi;
13
14pub static ORT_ENV_INIT: Once = Once::new();
15
16pub const DEFAULT_EMBED_BATCH_SIZE: usize = 128;
17pub const DEFAULT_MACOS_EMBED_BATCH_SIZE: usize = 128;
18pub const DEFAULT_TEXT_EMBED_CACHE_SIZE: usize = 256;
19pub const DEFAULT_MACOS_TEXT_EMBED_CACHE_SIZE: usize = 1024;
20pub const CODESEARCH_DIMENSION: usize = 384;
21pub const DEFAULT_MAX_EMBED_SYMBOLS: usize = 50_000;
22pub const CHANGED_FILE_QUERY_CHUNK: usize = 128;
23pub const DEFAULT_DUPLICATE_SCAN_BATCH_SIZE: usize = 128;
24
25pub const CODESEARCH_MODEL_NAME: &str = "MiniLM-L12-CodeSearchNet-INT8";
28const REQUIRED_MODEL_ASSETS: &[&str] = &[
29 "model.onnx",
30 "tokenizer.json",
31 "config.json",
32 "special_tokens_map.json",
33 "tokenizer_config.json",
34];
35
36#[derive(Debug, Clone, Deserialize, Default)]
37struct EmbeddingModelManifest {
38 model_name: Option<String>,
39 #[allow(dead_code)]
40 base_model: Option<String>,
41 #[allow(dead_code)]
42 fine_tuned_from: Option<String>,
43 #[allow(dead_code)]
44 adapter_type: Option<String>,
45 #[allow(dead_code)]
46 lora_merged_from: Option<String>,
47 #[allow(dead_code)]
48 export_backend: Option<String>,
49 #[allow(dead_code)]
50 export_revision: Option<String>,
51}
52
53fn preferred_export_variant() -> &'static str {
54 if cfg!(target_arch = "aarch64") {
55 "arm64"
56 } else {
57 "avx2"
58 }
59}
60
61fn model_dir_candidates(base: &std::path::Path) -> Vec<std::path::PathBuf> {
62 let variant = preferred_export_variant();
63 let mut candidates = vec![
64 base.to_path_buf(),
65 base.join("codesearch"),
66 base.join("onnx"),
67 base.join(variant),
68 base.join("codelens-code-search"),
69 base.join("codelens-code-search").join(variant),
70 ];
71 candidates.dedup();
72 candidates
73}
74
75fn model_dir_has_assets(dir: &std::path::Path) -> bool {
76 REQUIRED_MODEL_ASSETS
77 .iter()
78 .all(|name| model_asset_path(dir, name).exists())
79}
80
81fn model_asset_path(model_dir: &std::path::Path, asset: &str) -> std::path::PathBuf {
82 let direct = model_dir.join(asset);
83 if direct.exists() {
84 return direct;
85 }
86 if asset == "model.onnx" {
87 let split_onnx = model_dir.join("onnx").join(asset);
88 if split_onnx.exists() {
89 return split_onnx;
90 }
91 }
92 direct
93}
94
95fn first_model_dir_with_assets(base: &std::path::Path) -> Option<std::path::PathBuf> {
96 model_dir_candidates(base)
97 .into_iter()
98 .find(|dir| model_dir_has_assets(dir))
99}
100
101pub(crate) fn executable_model_roots(exe_dir: &std::path::Path) -> Vec<std::path::PathBuf> {
102 let mut roots = vec![exe_dir.join("models")];
103 if let Some(prefix) = exe_dir.parent() {
104 roots.push(prefix.join("models"));
105 roots.push(prefix.join("share").join("codelens").join("models"));
106 }
107 roots.dedup();
108 roots
109}
110
111fn read_model_manifest(model_dir: &std::path::Path) -> Option<EmbeddingModelManifest> {
112 let manifest_path = model_dir.join("model-manifest.json");
113 let content = std::fs::read_to_string(manifest_path).ok()?;
114 serde_json::from_str::<EmbeddingModelManifest>(&content).ok()
115}
116
117fn configured_model_name_for_dir(model_dir: &std::path::Path) -> String {
118 read_model_manifest(model_dir)
119 .and_then(|manifest| manifest.model_name)
120 .unwrap_or_else(|| CODESEARCH_MODEL_NAME.to_string())
121}
122
123pub fn resolve_model_dir() -> Result<std::path::PathBuf> {
131 if let Ok(dir) = std::env::var("CODELENS_MODEL_DIR") {
133 let base = std::path::PathBuf::from(dir);
134 if let Some(found) = first_model_dir_with_assets(&base) {
135 return Ok(found);
136 }
137 }
138
139 if let Ok(exe) = std::env::current_exe()
141 && let Some(exe_dir) = exe.parent()
142 {
143 for base in executable_model_roots(exe_dir) {
144 if let Some(found) = first_model_dir_with_assets(&base) {
145 return Ok(found);
146 }
147 }
148 }
149
150 if let Some(home) = dirs_fallback() {
152 let base = home.join(".cache").join("codelens").join("models");
153 if let Some(found) = first_model_dir_with_assets(&base) {
154 return Ok(found);
155 }
156 }
157
158 let dev_root = std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join("models");
160 if let Some(found) = first_model_dir_with_assets(&dev_root) {
161 return Ok(found);
162 }
163
164 anyhow::bail!(
165 "CodeSearchNet model not found. Place model files in one of these directories or variant subdirectories:\n\
166 - $CODELENS_MODEL_DIR/\n\
167 - $CODELENS_MODEL_DIR/codesearch/\n\
168 - $CODELENS_MODEL_DIR/onnx/\n\
169 - $CODELENS_MODEL_DIR/arm64/ or $CODELENS_MODEL_DIR/avx2/\n\
170 - $CODELENS_MODEL_DIR/codelens-code-search/<arch>/ with onnx/model.onnx\n\
171 - <executable>/models/...\n\
172 - ~/.cache/codelens/models/...\n\
173 Required files: model.onnx, tokenizer.json, config.json, special_tokens_map.json, tokenizer_config.json"
174 )
175}
176
177pub fn dirs_fallback() -> Option<std::path::PathBuf> {
178 std::env::var_os("HOME").map(std::path::PathBuf::from)
179}
180
181pub fn parse_usize_env(name: &str) -> Option<usize> {
182 std::env::var(name)
183 .ok()
184 .and_then(|v| v.trim().parse::<usize>().ok())
185 .filter(|v| *v > 0)
186}
187
188pub fn parse_bool_env(name: &str) -> Option<bool> {
189 std::env::var(name).ok().and_then(|value| {
190 let normalized = value.trim().to_ascii_lowercase();
191 match normalized.as_str() {
192 "1" | "true" | "yes" | "on" => Some(true),
193 "0" | "false" | "no" | "off" => Some(false),
194 _ => None,
195 }
196 })
197}
198
199fn configured_embedding_resource_profile() -> String {
200 match std::env::var("CODELENS_EMBED_RESOURCE_PROFILE")
201 .ok()
202 .map(|value| value.trim().to_ascii_lowercase())
203 .as_deref()
204 {
205 Some("low_power") | Some("low-power") | Some("low") | Some("eco") => {
206 "low_power".to_string()
207 }
208 Some("throughput") | Some("fast") => "throughput".to_string(),
209 _ => "balanced".to_string(),
210 }
211}
212
213#[cfg(target_os = "macos")]
214pub fn apple_perf_cores() -> Option<usize> {
215 ffi::sysctl_usize(b"hw.perflevel0.physicalcpu\0")
216 .filter(|value| *value > 0)
217 .or_else(|| ffi::sysctl_usize(b"hw.physicalcpu\0").filter(|value| *value > 0))
218}
219
220#[cfg(not(target_os = "macos"))]
221pub fn apple_perf_cores() -> Option<usize> {
222 None
223}
224
225pub fn configured_embedding_runtime_preference() -> String {
226 let requested = std::env::var("CODELENS_EMBED_PROVIDER")
227 .ok()
228 .map(|value| value.trim().to_ascii_lowercase());
229 let resource_profile = configured_embedding_resource_profile();
230
231 match requested.as_deref() {
232 Some("cpu") => "cpu".to_string(),
233 Some("coreml") if cfg!(all(target_os = "macos", feature = "coreml")) => {
234 "coreml".to_string()
235 }
236 Some("coreml") => "cpu".to_string(),
237 _ if resource_profile == "low_power" => "cpu".to_string(),
238 _ if cfg!(all(target_os = "macos", feature = "coreml")) => "coreml_preferred".to_string(),
239 _ => "cpu".to_string(),
240 }
241}
242
243pub fn configured_embedding_threads() -> usize {
244 recommended_embed_threads()
245}
246
247pub fn configured_embedding_max_length() -> usize {
248 parse_usize_env("CODELENS_EMBED_MAX_LENGTH")
249 .unwrap_or(256)
250 .clamp(32, 512)
251}
252
253pub fn configured_embedding_text_cache_size() -> usize {
254 std::env::var("CODELENS_EMBED_TEXT_CACHE_SIZE")
255 .ok()
256 .and_then(|value| value.trim().parse::<usize>().ok())
257 .unwrap_or({
258 if cfg!(target_os = "macos") {
259 DEFAULT_MACOS_TEXT_EMBED_CACHE_SIZE
260 } else {
261 DEFAULT_TEXT_EMBED_CACHE_SIZE
262 }
263 })
264 .min(8192)
265}
266
267#[cfg(target_os = "macos")]
268pub fn configured_coreml_compute_units_name() -> String {
269 match std::env::var("CODELENS_EMBED_COREML_COMPUTE_UNITS")
270 .ok()
271 .map(|value| value.trim().to_ascii_lowercase())
272 .as_deref()
273 {
274 Some("all") => "all".to_string(),
275 Some("cpu") | Some("cpu_only") => "cpu_only".to_string(),
276 Some("gpu") | Some("cpu_and_gpu") => "cpu_and_gpu".to_string(),
277 Some("ane") | Some("neural_engine") | Some("cpu_and_neural_engine") => {
278 "cpu_and_neural_engine".to_string()
279 }
280 _ => "cpu_and_neural_engine".to_string(),
281 }
282}
283
284#[cfg(target_os = "macos")]
285pub fn configured_coreml_model_format_name() -> String {
286 match std::env::var("CODELENS_EMBED_COREML_MODEL_FORMAT")
287 .ok()
288 .map(|value| value.trim().to_ascii_lowercase())
289 .as_deref()
290 {
291 Some("neuralnetwork") | Some("neural_network") => "neural_network".to_string(),
292 _ => "mlprogram".to_string(),
293 }
294}
295
296#[cfg(target_os = "macos")]
297pub fn configured_coreml_profile_compute_plan() -> bool {
298 parse_bool_env("CODELENS_EMBED_COREML_PROFILE_PLAN").unwrap_or(false)
299}
300
301#[cfg(target_os = "macos")]
302pub fn configured_coreml_static_input_shapes() -> bool {
303 parse_bool_env("CODELENS_EMBED_COREML_STATIC_INPUT_SHAPES").unwrap_or(true)
304}
305
306#[cfg(target_os = "macos")]
307pub fn configured_coreml_specialization_strategy_name() -> String {
308 match std::env::var("CODELENS_EMBED_COREML_SPECIALIZATION")
309 .ok()
310 .map(|value| value.trim().to_ascii_lowercase())
311 .as_deref()
312 {
313 Some("default") => "default".to_string(),
314 _ => "fast_prediction".to_string(),
315 }
316}
317
318#[cfg(target_os = "macos")]
319pub fn configured_coreml_model_cache_dir() -> std::path::PathBuf {
320 dirs_fallback()
321 .unwrap_or_else(std::env::temp_dir)
322 .join(".cache")
323 .join("codelens")
324 .join("coreml-cache")
325 .join("codesearch")
326}
327
328pub fn recommended_embed_threads() -> usize {
329 if let Some(explicit) = parse_usize_env("CODELENS_EMBED_THREADS") {
330 return explicit.max(1);
331 }
332
333 let available = available_parallelism().map(|n| n.get()).unwrap_or(1);
334 let resource_profile = configured_embedding_resource_profile();
335 if resource_profile == "low_power" {
336 return available.clamp(1, 2);
337 }
338 if cfg!(target_os = "macos") {
339 let base = apple_perf_cores()
340 .unwrap_or(available)
341 .min(available)
342 .clamp(1, 8);
343 if resource_profile == "throughput" {
344 base.max(available.min(8))
345 } else {
346 base
347 }
348 } else {
349 let base = available.div_ceil(2).clamp(1, 8);
350 if resource_profile == "throughput" {
351 available.clamp(1, 8)
352 } else {
353 base
354 }
355 }
356}
357
358pub fn embed_batch_size() -> usize {
359 parse_usize_env("CODELENS_EMBED_BATCH_SIZE").unwrap_or_else(|| {
360 if configured_embedding_resource_profile() == "low_power" {
361 32
362 } else if cfg!(target_os = "macos") {
363 DEFAULT_MACOS_EMBED_BATCH_SIZE
364 } else {
365 DEFAULT_EMBED_BATCH_SIZE
366 }
367 })
368}
369
370pub fn max_embed_symbols() -> usize {
371 parse_usize_env("CODELENS_MAX_EMBED_SYMBOLS").unwrap_or(DEFAULT_MAX_EMBED_SYMBOLS)
372}
373
374fn set_env_if_unset(name: &str, value: impl Into<String>) {
375 if std::env::var_os(name).is_none() {
376 unsafe {
379 std::env::set_var(name, value.into());
380 }
381 }
382}
383
384pub fn configure_embedding_runtime() {
385 let threads = recommended_embed_threads();
386 let runtime_preference = configured_embedding_runtime_preference();
387
388 set_env_if_unset("OMP_NUM_THREADS", threads.to_string());
391 set_env_if_unset("OMP_WAIT_POLICY", "PASSIVE");
392 set_env_if_unset("OMP_DYNAMIC", "FALSE");
393 set_env_if_unset("TOKENIZERS_PARALLELISM", "false");
394 if cfg!(target_os = "macos") {
395 set_env_if_unset("VECLIB_MAXIMUM_THREADS", threads.to_string());
396 }
397
398 ORT_ENV_INIT.call_once(|| {
399 let pool = ort::environment::GlobalThreadPoolOptions::default()
400 .with_intra_threads(threads)
401 .and_then(|pool| pool.with_inter_threads(1))
402 .and_then(|pool| pool.with_spin_control(false));
403
404 if let Ok(pool) = pool {
405 let _ = ort::init()
406 .with_name("codelens-embedding")
407 .with_telemetry(false)
408 .with_global_thread_pool(pool)
409 .commit();
410 }
411 });
412
413 debug!(
414 threads,
415 runtime_preference = %runtime_preference,
416 "configured embedding runtime"
417 );
418}
419
420pub fn requested_embedding_model_override() -> Result<Option<String>> {
421 let env_model = std::env::var("CODELENS_EMBED_MODEL").ok();
422 let Some(model_id) = env_model else {
423 return Ok(None);
424 };
425 if model_id.is_empty() || model_id == CODESEARCH_MODEL_NAME {
426 return Ok(None);
427 }
428
429 #[cfg(feature = "model-bakeoff")]
430 {
431 return Ok(Some(model_id));
432 }
433
434 #[cfg(not(feature = "model-bakeoff"))]
435 {
436 anyhow::bail!(
437 "CODELENS_EMBED_MODEL={model_id} requires the `model-bakeoff` feature; \
438 rebuild the binary with `--features model-bakeoff` to run alternative model bake-offs"
439 );
440 }
441}
442
443pub fn configured_embedding_runtime_info() -> EmbeddingRuntimeInfo {
444 let runtime_preference = configured_embedding_runtime_preference();
445 let threads = configured_embedding_threads();
446
447 #[cfg(target_os = "macos")]
448 {
449 let coreml_enabled = runtime_preference != "cpu";
450 EmbeddingRuntimeInfo {
451 runtime_preference,
452 backend: "not_loaded".to_string(),
453 threads,
454 max_length: configured_embedding_max_length(),
455 coreml_model_format: coreml_enabled.then(configured_coreml_model_format_name),
456 coreml_compute_units: coreml_enabled.then(configured_coreml_compute_units_name),
457 coreml_static_input_shapes: coreml_enabled.then(configured_coreml_static_input_shapes),
458 coreml_profile_compute_plan: coreml_enabled
459 .then(configured_coreml_profile_compute_plan),
460 coreml_specialization_strategy: coreml_enabled
461 .then(configured_coreml_specialization_strategy_name),
462 coreml_model_cache_dir: coreml_enabled
463 .then(|| configured_coreml_model_cache_dir().display().to_string()),
464 fallback_reason: None,
465 }
466 }
467
468 #[cfg(not(target_os = "macos"))]
469 {
470 EmbeddingRuntimeInfo {
471 runtime_preference,
472 backend: "not_loaded".to_string(),
473 threads,
474 max_length: configured_embedding_max_length(),
475 coreml_model_format: None,
476 coreml_compute_units: None,
477 coreml_static_input_shapes: None,
478 coreml_profile_compute_plan: None,
479 coreml_specialization_strategy: None,
480 coreml_model_cache_dir: None,
481 fallback_reason: None,
482 }
483 }
484}
485
486#[cfg(all(target_os = "macos", feature = "coreml"))]
487pub fn build_coreml_execution_provider() -> ExecutionProviderDispatch {
488 use ort::ep::{
489 CoreML,
490 coreml::{ComputeUnits, ModelFormat, SpecializationStrategy},
491 };
492
493 let compute_units = match configured_coreml_compute_units_name().as_str() {
494 "all" => ComputeUnits::All,
495 "cpu_only" => ComputeUnits::CPUOnly,
496 "cpu_and_gpu" => ComputeUnits::CPUAndGPU,
497 _ => ComputeUnits::CPUAndNeuralEngine,
498 };
499 let model_format = match configured_coreml_model_format_name().as_str() {
500 "neural_network" => ModelFormat::NeuralNetwork,
501 _ => ModelFormat::MLProgram,
502 };
503 let specialization = match configured_coreml_specialization_strategy_name().as_str() {
504 "default" => SpecializationStrategy::Default,
505 _ => SpecializationStrategy::FastPrediction,
506 };
507 let cache_dir = configured_coreml_model_cache_dir();
508 let _ = std::fs::create_dir_all(&cache_dir);
509
510 CoreML::default()
511 .with_model_format(model_format)
512 .with_compute_units(compute_units)
513 .with_static_input_shapes(configured_coreml_static_input_shapes())
514 .with_specialization_strategy(specialization)
515 .with_profile_compute_plan(configured_coreml_profile_compute_plan())
516 .with_model_cache_dir(cache_dir.display().to_string())
517 .build()
518 .error_on_failure()
519}
520
521pub fn cpu_runtime_info(
522 runtime_preference: String,
523 fallback_reason: Option<String>,
524) -> EmbeddingRuntimeInfo {
525 EmbeddingRuntimeInfo {
526 runtime_preference,
527 backend: "cpu".to_string(),
528 threads: configured_embedding_threads(),
529 max_length: configured_embedding_max_length(),
530 coreml_model_format: None,
531 coreml_compute_units: None,
532 coreml_static_input_shapes: None,
533 coreml_profile_compute_plan: None,
534 coreml_specialization_strategy: None,
535 coreml_model_cache_dir: None,
536 fallback_reason,
537 }
538}
539
540#[cfg(all(target_os = "macos", feature = "coreml"))]
541pub fn coreml_runtime_info(
542 runtime_preference: String,
543 fallback_reason: Option<String>,
544) -> EmbeddingRuntimeInfo {
545 EmbeddingRuntimeInfo {
546 runtime_preference,
547 backend: if fallback_reason.is_some() {
548 "cpu".to_string()
549 } else {
550 "coreml".to_string()
551 },
552 threads: configured_embedding_threads(),
553 max_length: configured_embedding_max_length(),
554 coreml_model_format: Some(configured_coreml_model_format_name()),
555 coreml_compute_units: Some(configured_coreml_compute_units_name()),
556 coreml_static_input_shapes: Some(configured_coreml_static_input_shapes()),
557 coreml_profile_compute_plan: Some(configured_coreml_profile_compute_plan()),
558 coreml_specialization_strategy: Some(configured_coreml_specialization_strategy_name()),
559 coreml_model_cache_dir: Some(configured_coreml_model_cache_dir().display().to_string()),
560 fallback_reason,
561 }
562}
563
564#[cfg(feature = "model-bakeoff")]
568pub fn load_fastembed_builtin(
569 model_id: &str,
570) -> Result<(TextEmbedding, usize, String, EmbeddingRuntimeInfo)> {
571 use fastembed::EmbeddingModel;
572
573 let (model_enum, expected_dim) = match model_id {
575 "all-MiniLM-L6-v2" | "sentence-transformers/all-MiniLM-L6-v2" => {
576 (EmbeddingModel::AllMiniLML6V2, 384)
577 }
578 "all-MiniLM-L12-v2" | "sentence-transformers/all-MiniLM-L12-v2" => {
579 (EmbeddingModel::AllMiniLML12V2, 384)
580 }
581 "bge-small-en-v1.5" | "BAAI/bge-small-en-v1.5" => (EmbeddingModel::BGESmallENV15, 384),
582 "bge-base-en-v1.5" | "BAAI/bge-base-en-v1.5" => (EmbeddingModel::BGEBaseENV15, 768),
583 "nomic-embed-text-v1.5" | "nomic-ai/nomic-embed-text-v1.5" => {
584 (EmbeddingModel::NomicEmbedTextV15, 768)
585 }
586 "jina-embeddings-v2-base-code" | "jinaai/jina-embeddings-v2-base-code" => {
587 (EmbeddingModel::JinaEmbeddingsV2BaseCode, 768)
588 }
589 other => {
590 anyhow::bail!(
591 "Unknown fastembed model: {other}. \
592 Supported: all-MiniLM-L6-v2, all-MiniLM-L12-v2, bge-small-en-v1.5, \
593 bge-base-en-v1.5, nomic-embed-text-v1.5, jina-embeddings-v2-base-code"
594 );
595 }
596 };
597
598 let init = fastembed::InitOptionsWithLength::new(model_enum)
599 .with_max_length(configured_embedding_max_length())
600 .with_cache_dir(std::env::temp_dir().join("codelens-fastembed-cache"))
601 .with_show_download_progress(true);
602 let model =
603 TextEmbedding::try_new(init).with_context(|| format!("failed to load {model_id}"))?;
604
605 let runtime_info = cpu_runtime_info("cpu".to_string(), None);
606
607 tracing::info!(
608 model = model_id,
609 dimension = expected_dim,
610 "loaded fastembed built-in model for A/B comparison"
611 );
612
613 Ok((model, expected_dim, model_id.to_string(), runtime_info))
614}
615
616pub fn load_codesearch_model() -> Result<(TextEmbedding, usize, String, EmbeddingRuntimeInfo)> {
618 configure_embedding_runtime();
619
620 if let Some(model_id) = requested_embedding_model_override()? {
622 #[cfg(feature = "model-bakeoff")]
623 {
624 return load_fastembed_builtin(&model_id);
625 }
626
627 #[cfg(not(feature = "model-bakeoff"))]
628 {
629 let _ = model_id;
630 unreachable!("alternative embedding model override should have errored");
631 }
632 }
633
634 let model_dir = resolve_model_dir()?;
635 let model_name = configured_model_name_for_dir(&model_dir);
636
637 let onnx_bytes = std::fs::read(model_asset_path(&model_dir, "model.onnx"))
638 .context("failed to read model.onnx")?;
639 let tokenizer_bytes = std::fs::read(model_asset_path(&model_dir, "tokenizer.json"))
640 .context("failed to read tokenizer.json")?;
641 let config_bytes = std::fs::read(model_asset_path(&model_dir, "config.json"))
642 .context("failed to read config.json")?;
643 let special_tokens_bytes =
644 std::fs::read(model_asset_path(&model_dir, "special_tokens_map.json"))
645 .context("failed to read special_tokens_map.json")?;
646 let tokenizer_config_bytes =
647 std::fs::read(model_asset_path(&model_dir, "tokenizer_config.json"))
648 .context("failed to read tokenizer_config.json")?;
649
650 let user_model = UserDefinedEmbeddingModel::new(
651 onnx_bytes,
652 TokenizerFiles {
653 tokenizer_file: tokenizer_bytes,
654 config_file: config_bytes,
655 special_tokens_map_file: special_tokens_bytes,
656 tokenizer_config_file: tokenizer_config_bytes,
657 },
658 );
659
660 let runtime_preference = configured_embedding_runtime_preference();
661
662 #[cfg(all(target_os = "macos", feature = "coreml"))]
663 if runtime_preference != "cpu" {
664 let init_opts = InitOptionsUserDefined::new()
665 .with_max_length(configured_embedding_max_length())
666 .with_execution_providers(vec![build_coreml_execution_provider()]);
667 match TextEmbedding::try_new_from_user_defined(user_model.clone(), init_opts) {
668 Ok(model) => {
669 let runtime_info = coreml_runtime_info(runtime_preference.clone(), None);
670 debug!(
671 threads = runtime_info.threads,
672 runtime_preference = %runtime_info.runtime_preference,
673 backend = %runtime_info.backend,
674 coreml_compute_units = ?runtime_info.coreml_compute_units,
675 coreml_static_input_shapes = ?runtime_info.coreml_static_input_shapes,
676 coreml_profile_compute_plan = ?runtime_info.coreml_profile_compute_plan,
677 coreml_specialization_strategy = ?runtime_info.coreml_specialization_strategy,
678 coreml_model_cache_dir = ?runtime_info.coreml_model_cache_dir,
679 "loaded CodeSearchNet embedding model"
680 );
681 return Ok((
682 model,
683 CODESEARCH_DIMENSION,
684 model_name.clone(),
685 runtime_info,
686 ));
687 }
688 Err(err) => {
689 let reason = err.to_string();
690 debug!(
691 runtime_preference = %runtime_preference,
692 fallback_reason = %reason,
693 "CoreML embedding load failed; falling back to CPU"
694 );
695 let model = TextEmbedding::try_new_from_user_defined(
696 user_model,
697 InitOptionsUserDefined::new()
698 .with_max_length(configured_embedding_max_length()),
699 )
700 .context("failed to load CodeSearchNet embedding model")?;
701 let runtime_info = coreml_runtime_info(runtime_preference.clone(), Some(reason));
702 debug!(
703 threads = runtime_info.threads,
704 runtime_preference = %runtime_info.runtime_preference,
705 backend = %runtime_info.backend,
706 coreml_compute_units = ?runtime_info.coreml_compute_units,
707 coreml_static_input_shapes = ?runtime_info.coreml_static_input_shapes,
708 coreml_profile_compute_plan = ?runtime_info.coreml_profile_compute_plan,
709 coreml_specialization_strategy = ?runtime_info.coreml_specialization_strategy,
710 coreml_model_cache_dir = ?runtime_info.coreml_model_cache_dir,
711 fallback_reason = ?runtime_info.fallback_reason,
712 "loaded CodeSearchNet embedding model"
713 );
714 return Ok((
715 model,
716 CODESEARCH_DIMENSION,
717 model_name.clone(),
718 runtime_info,
719 ));
720 }
721 }
722 }
723
724 let model = TextEmbedding::try_new_from_user_defined(
725 user_model,
726 InitOptionsUserDefined::new().with_max_length(configured_embedding_max_length()),
727 )
728 .context("failed to load CodeSearchNet embedding model")?;
729 let runtime_info = cpu_runtime_info(runtime_preference.clone(), None);
730
731 debug!(
732 threads = runtime_info.threads,
733 runtime_preference = %runtime_info.runtime_preference,
734 backend = %runtime_info.backend,
735 "loaded CodeSearchNet embedding model"
736 );
737
738 Ok((model, CODESEARCH_DIMENSION, model_name, runtime_info))
739}
740
741pub fn configured_embedding_model_name() -> String {
742 if let Ok(model) = std::env::var("CODELENS_EMBED_MODEL") {
743 return model;
744 }
745 if let Ok(model_dir) = resolve_model_dir() {
746 return configured_model_name_for_dir(&model_dir);
747 }
748 CODESEARCH_MODEL_NAME.to_string()
749}
750
751pub fn configured_rerank_blend() -> f64 {
752 std::env::var("CODELENS_RERANK_BLEND")
753 .ok()
754 .and_then(|v| v.parse::<f64>().ok())
755 .and_then(|v| {
756 if (0.0..=1.0).contains(&v) {
757 Some(v)
758 } else {
759 None
760 }
761 })
762 .unwrap_or(0.75) }
764
765pub fn embedding_model_assets_available() -> bool {
766 resolve_model_dir().is_ok()
767}