codelens_engine/embedding/
mod.rs1use crate::embedding_types::{EmbeddingIndexInfo, EmbeddingRuntimeInfo, SemanticMatch};
5use fastembed::TextEmbedding;
6use serde::Serialize;
7use std::sync::Mutex;
8
9mod cache;
11mod chunk_ops;
12mod duplicates;
13mod engine_impl;
14pub(super) mod ffi;
15mod prompt;
16mod runtime;
17mod vec_store;
18
19use cache::TextEmbeddingCache;
20use vec_store::SqliteVecStore;
21
22pub use chunk_ops::{CategoryScore, DuplicatePair, OutlierSymbol, cosine_similarity};
24pub use prompt::auto_sparse_should_enable;
25pub use runtime::{
26 configured_embedding_model_name, configured_embedding_runtime_info,
27 configured_embedding_runtime_preference, configured_embedding_threads,
28 embedding_model_assets_available,
29};
30
31pub(super) use chunk_ops::embedding_to_bytes;
34pub(super) use runtime::{CHANGED_FILE_QUERY_CHUNK, DEFAULT_DUPLICATE_SCAN_BATCH_SIZE};
36
37#[cfg(test)]
39pub(super) use crate::project::ProjectRoot;
40#[cfg(test)]
41pub(super) use chunk_ops::duplicate_pair_key;
42#[cfg(test)]
43pub(super) use prompt::{
44 auto_hint_mode_enabled, auto_hint_should_enable, build_embedding_text,
45 contains_format_specifier, extract_api_calls, extract_api_calls_inner, extract_body_hint,
46 extract_comment_body, extract_leading_doc, extract_nl_tokens, extract_nl_tokens_inner,
47 hint_char_budget, hint_line_budget, is_nl_shaped, is_static_method_ident, is_test_only_symbol,
48 language_supports_nl_stack, language_supports_sparse_weighting, looks_like_error_or_log_prefix,
49 looks_like_meta_annotation, nl_tokens_enabled, should_reject_literal_strict,
50 strict_comments_enabled, strict_literal_filter_enabled,
51};
52#[cfg(test)]
53pub(super) use runtime::{
54 CODESEARCH_MODEL_NAME, DEFAULT_MACOS_EMBED_BATCH_SIZE, embed_batch_size,
55 recommended_embed_threads, requested_embedding_model_override, resolve_model_dir,
56};
57
58pub struct EmbeddingEngine {
61 model: Mutex<TextEmbedding>,
62 store: SqliteVecStore,
63 model_name: String,
64 runtime_info: EmbeddingRuntimeInfo,
65 text_embed_cache: Mutex<TextEmbeddingCache>,
66 indexing: std::sync::atomic::AtomicBool,
67}
68
69#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
70pub struct QueryEmbeddingCacheStats {
71 pub enabled: bool,
72 pub entries: usize,
73 pub max_entries: usize,
74}
75
76#[derive(Debug, Clone, Default, Serialize, PartialEq, Eq)]
77pub struct EmbeddingFreshnessReport {
78 pub checked_files: usize,
79 pub unchanged_files: usize,
80 pub refreshed_files: usize,
81 pub removed_files: usize,
82 pub skipped_new_files: usize,
83 pub indexed_symbols: usize,
84}
85
86#[cfg(test)]
88mod tests;