Skip to main content

codelens_engine/embedding/
mod.rs

1//! Semantic search using fastembed + sqlite-vec.
2//! Gated behind the `semantic` feature flag.
3
4use crate::embedding_store::ScoredChunk;
5use fastembed::TextEmbedding;
6use serde::Serialize;
7use std::sync::Mutex;
8
9// ── Sub-modules ───────────────────────────────────────────────────────
10mod cache;
11mod chunk_ops;
12mod engine_impl;
13pub(super) mod ffi;
14mod prompt;
15mod runtime;
16mod vec_store;
17
18use cache::TextEmbeddingCache;
19use vec_store::SqliteVecStore;
20
21// ── Public re-exports ─────────────────────────────────────────────────
22pub use chunk_ops::{CategoryScore, DuplicatePair, OutlierSymbol};
23pub use prompt::auto_sparse_should_enable;
24pub use runtime::{
25    configured_embedding_model_name, configured_embedding_runtime_info,
26    configured_embedding_runtime_preference, configured_embedding_threads,
27    embedding_model_assets_available,
28};
29
30// ── Internal re-exports used by sibling sub-modules ───────────────────
31// vec_store.rs uses embedding_to_bytes via `super::`
32pub(super) use chunk_ops::embedding_to_bytes;
33// engine_impl.rs uses these constants via `super::`
34pub(super) use runtime::{CHANGED_FILE_QUERY_CHUNK, DEFAULT_DUPLICATE_SCAN_BATCH_SIZE};
35
36// ── Test-only re-exports (for tests.rs via `use super::*`) ────────────
37#[cfg(test)]
38pub(super) use crate::project::ProjectRoot;
39#[cfg(test)]
40pub(super) use chunk_ops::duplicate_pair_key;
41#[cfg(test)]
42pub(super) use prompt::{
43    auto_hint_mode_enabled, auto_hint_should_enable, build_embedding_text,
44    contains_format_specifier, extract_api_calls, extract_api_calls_inner, extract_body_hint,
45    extract_comment_body, extract_leading_doc, extract_nl_tokens, extract_nl_tokens_inner,
46    hint_char_budget, hint_line_budget, is_nl_shaped, is_static_method_ident, is_test_only_symbol,
47    language_supports_nl_stack, language_supports_sparse_weighting, looks_like_error_or_log_prefix,
48    looks_like_meta_annotation, nl_tokens_enabled, should_reject_literal_strict,
49    strict_comments_enabled, strict_literal_filter_enabled,
50};
51#[cfg(test)]
52pub(super) use runtime::{
53    CODESEARCH_MODEL_NAME, DEFAULT_MACOS_EMBED_BATCH_SIZE, embed_batch_size,
54    recommended_embed_threads, requested_embedding_model_override,
55};
56
57// ── Result type ───────────────────────────────────────────────────────
58
59/// Result of a semantic search query.
60#[derive(Debug, Clone, Serialize)]
61pub struct SemanticMatch {
62    pub file_path: String,
63    pub symbol_name: String,
64    pub kind: String,
65    pub line: usize,
66    pub signature: String,
67    pub name_path: String,
68    pub score: f64,
69}
70
71impl From<ScoredChunk> for SemanticMatch {
72    fn from(c: ScoredChunk) -> Self {
73        Self {
74            file_path: c.file_path,
75            symbol_name: c.symbol_name,
76            kind: c.kind,
77            line: c.line,
78            signature: c.signature,
79            name_path: c.name_path,
80            score: c.score,
81        }
82    }
83}
84
85// ── Core engine struct ────────────────────────────────────────────────
86
87pub struct EmbeddingEngine {
88    model: Mutex<TextEmbedding>,
89    store: SqliteVecStore,
90    model_name: String,
91    runtime_info: EmbeddingRuntimeInfo,
92    text_embed_cache: Mutex<TextEmbeddingCache>,
93    indexing: std::sync::atomic::AtomicBool,
94}
95
96#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
97pub struct EmbeddingIndexInfo {
98    pub model_name: String,
99    pub indexed_symbols: usize,
100}
101
102#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
103pub struct EmbeddingRuntimeInfo {
104    pub runtime_preference: String,
105    pub backend: String,
106    pub threads: usize,
107    pub max_length: usize,
108    pub coreml_model_format: Option<String>,
109    pub coreml_compute_units: Option<String>,
110    pub coreml_static_input_shapes: Option<bool>,
111    pub coreml_profile_compute_plan: Option<bool>,
112    pub coreml_specialization_strategy: Option<String>,
113    pub coreml_model_cache_dir: Option<String>,
114    pub fallback_reason: Option<String>,
115}
116
117// ── Tests ─────────────────────────────────────────────────────────────
118#[cfg(test)]
119mod tests;