coding-agent-search 0.5.0

Unified TUI search over local coding agent histories
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
//! FastEmbed-based ML embedders.
//!
//! Loads local ONNX model + tokenizer bundles and produces semantic embeddings.
//! This implementation never downloads model assets; it expects the model files
//! to be present on disk and returns a clear error when they are missing.
//!
//! Supports multiple models:
//! - MiniLM (baseline)
//! - EmbeddingGemma (bake-off candidate)
//! - Qwen3-Embedding (bake-off candidate)
//! - ModernBERT-embed (bake-off candidate)
//! - Snowflake Arctic Embed (bake-off candidate)
//! - Nomic Embed Text (bake-off candidate)

use std::fs;
use std::path::{Path, PathBuf};
use std::sync::Mutex;

use fastembed::{
    InitOptionsUserDefined, Pooling, TextEmbedding, TokenizerFiles, UserDefinedEmbeddingModel,
};

use super::embedder::{Embedder, EmbedderError, EmbedderResult};
use frankensearch::{ModelCategory, ModelTier};

// MiniLM constants (baseline)
const MINILM_MODEL_ID: &str = "all-minilm-l6-v2";
const MINILM_DIR_NAME: &str = "all-MiniLM-L6-v2";
const MINILM_EMBEDDER_ID: &str = "minilm-384";
const MINILM_DIMENSION: usize = 384;

// Standard ONNX file names — prefer onnx/ subdir (modern layout), fall back to flat (legacy).
pub const MODEL_ONNX_SUBDIR: &str = "onnx/model.onnx";
pub const MODEL_ONNX_LEGACY: &str = "model.onnx";
const TOKENIZER_JSON: &str = "tokenizer.json";
const CONFIG_JSON: &str = "config.json";
const SPECIAL_TOKENS_JSON: &str = "special_tokens_map.json";
const TOKENIZER_CONFIG_JSON: &str = "tokenizer_config.json";

/// Configuration for loading an ONNX embedder.
#[derive(Debug, Clone)]
pub struct OnnxEmbedderConfig {
    /// Unique embedder ID (e.g., "minilm-384").
    pub embedder_id: String,
    /// Model identifier for logging.
    pub model_id: String,
    /// Output embedding dimension.
    pub dimension: usize,
    /// Pooling strategy.
    pub pooling: Pooling,
}

impl Default for OnnxEmbedderConfig {
    fn default() -> Self {
        Self {
            embedder_id: MINILM_EMBEDDER_ID.to_string(),
            model_id: MINILM_MODEL_ID.to_string(),
            dimension: MINILM_DIMENSION,
            pooling: Pooling::Mean,
        }
    }
}

/// FastEmbed-backed semantic embedder.
///
/// Supports multiple ONNX models with configurable dimensions and pooling.
pub struct FastEmbedder {
    model: Mutex<TextEmbedding>,
    id: String,
    model_id: String,
    dimension: usize,
}

impl FastEmbedder {
    /// Stable embedder identifier for MiniLM (matches vector index naming).
    pub fn embedder_id_static() -> &'static str {
        MINILM_EMBEDDER_ID
    }

    /// Stable model identifier for MiniLM.
    pub fn model_id_static() -> &'static str {
        MINILM_MODEL_ID
    }

    /// Required non-model files for any ONNX embedder.
    ///
    /// The ONNX model itself can live at `onnx/model.onnx` (modern) or
    /// `model.onnx` (legacy) — use [`select_model_file`] to find it.
    pub fn required_model_files() -> &'static [&'static str] {
        &[
            TOKENIZER_JSON,
            CONFIG_JSON,
            SPECIAL_TOKENS_JSON,
            TOKENIZER_CONFIG_JSON,
        ]
    }

    /// Candidate ONNX model locations, ordered from preferred to legacy.
    pub fn model_file_candidates() -> &'static [&'static str] {
        &[MODEL_ONNX_SUBDIR, MODEL_ONNX_LEGACY]
    }

    /// Select the ONNX model file, preferring `onnx/model.onnx` over `model.onnx`.
    pub fn select_model_file(model_dir: &Path) -> Option<PathBuf> {
        for candidate in Self::model_file_candidates() {
            let path = model_dir.join(candidate);
            if path.is_file() {
                return Some(path);
            }
        }
        None
    }

    /// Default MiniLM model directory relative to the cass data dir.
    pub fn default_model_dir(data_dir: &Path) -> PathBuf {
        data_dir.join("models").join(MINILM_DIR_NAME)
    }

    /// Get model directory for a specific embedder name.
    pub fn model_dir_for(data_dir: &Path, embedder_name: &str) -> Option<PathBuf> {
        let dir_name = match embedder_name {
            "minilm" => MINILM_DIR_NAME,
            "snowflake-arctic-s" => "snowflake-arctic-embed-s",
            "nomic-embed" => "nomic-embed-text-v1.5",
            _ => return None,
        };
        Some(data_dir.join("models").join(dir_name))
    }

    /// Get config for a specific embedder by name.
    pub fn config_for(embedder_name: &str) -> Option<OnnxEmbedderConfig> {
        match embedder_name {
            "minilm" => Some(OnnxEmbedderConfig {
                embedder_id: "minilm-384".to_string(),
                model_id: "all-minilm-l6-v2".to_string(),
                dimension: 384,
                pooling: Pooling::Mean,
            }),
            "snowflake-arctic-s" => Some(OnnxEmbedderConfig {
                embedder_id: "snowflake-arctic-s-384".to_string(),
                model_id: "snowflake-arctic-embed-s".to_string(),
                dimension: 384,
                pooling: Pooling::Mean,
            }),
            "nomic-embed" => Some(OnnxEmbedderConfig {
                embedder_id: "nomic-embed-768".to_string(),
                model_id: "nomic-embed-text-v1.5".to_string(),
                dimension: 768,
                pooling: Pooling::Mean,
            }),
            _ => None,
        }
    }

    /// Load the MiniLM model (convenience wrapper).
    pub fn load_from_dir(model_dir: &Path) -> EmbedderResult<Self> {
        Self::load_with_config(model_dir, OnnxEmbedderConfig::default())
    }

    /// Load an ONNX embedder with custom configuration.
    pub fn load_with_config(model_dir: &Path, config: OnnxEmbedderConfig) -> EmbedderResult<Self> {
        if !model_dir.is_dir() {
            return Err(Self::unavailable_error(
                &config.embedder_id,
                format!("model directory not found: {}", model_dir.display()),
            ));
        }

        let onnx_path = Self::select_model_file(model_dir).ok_or_else(|| {
            Self::unavailable_error(
                &config.embedder_id,
                format!(
                    "no ONNX model file in {} (checked {} and {})",
                    model_dir.display(),
                    MODEL_ONNX_SUBDIR,
                    MODEL_ONNX_LEGACY
                ),
            )
        })?;

        let required = Self::required_model_files();
        let mut missing = Vec::new();
        for name in required {
            let path = model_dir.join(name);
            if !path.is_file() {
                missing.push(*name);
            }
        }
        if !missing.is_empty() {
            return Err(Self::unavailable_error(
                &config.embedder_id,
                format!(
                    "model files missing in {}: {}",
                    model_dir.display(),
                    missing.join(", ")
                ),
            ));
        }

        let model_file = Self::read_required(onnx_path, "model.onnx", &config.embedder_id)?;
        let tokenizer_file = Self::read_required(
            model_dir.join(TOKENIZER_JSON),
            TOKENIZER_JSON,
            &config.embedder_id,
        )?;
        let config_file = Self::read_required(
            model_dir.join(CONFIG_JSON),
            CONFIG_JSON,
            &config.embedder_id,
        )?;
        let special_tokens_map_file = Self::read_required(
            model_dir.join(SPECIAL_TOKENS_JSON),
            SPECIAL_TOKENS_JSON,
            &config.embedder_id,
        )?;
        let tokenizer_config_file = Self::read_required(
            model_dir.join(TOKENIZER_CONFIG_JSON),
            TOKENIZER_CONFIG_JSON,
            &config.embedder_id,
        )?;

        let tokenizer_files = TokenizerFiles {
            tokenizer_file,
            config_file,
            special_tokens_map_file,
            tokenizer_config_file,
        };

        let mut model = UserDefinedEmbeddingModel::new(model_file, tokenizer_files);
        model.pooling = Some(config.pooling);

        let init_options = InitOptionsUserDefined::new();

        let model = TextEmbedding::try_new_from_user_defined(model, init_options).map_err(|e| {
            EmbedderError::EmbeddingFailed {
                model: config.embedder_id.clone(),
                source: Box::new(std::io::Error::other(format!("fastembed init failed: {e}"))),
            }
        })?;

        Ok(Self {
            model: Mutex::new(model),
            id: config.embedder_id,
            model_id: config.model_id,
            dimension: config.dimension,
        })
    }

    /// Load an embedder by name from the data directory.
    pub fn load_by_name(data_dir: &Path, embedder_name: &str) -> EmbedderResult<Self> {
        let model_dir = Self::model_dir_for(data_dir, embedder_name).ok_or_else(|| {
            Self::unavailable_error(
                embedder_name,
                format!("unknown embedder: {}", embedder_name),
            )
        })?;
        let config = Self::config_for(embedder_name).ok_or_else(|| {
            Self::unavailable_error(
                embedder_name,
                format!("no config for embedder: {}", embedder_name),
            )
        })?;
        Self::load_with_config(&model_dir, config)
    }

    /// Stable model identifier for compatibility checks.
    pub fn model_id(&self) -> &str {
        &self.model_id
    }

    fn read_required(path: PathBuf, label: &str, model_id: &str) -> EmbedderResult<Vec<u8>> {
        fs::read(&path).map_err(|e| {
            Self::unavailable_error(
                model_id,
                format!("unable to read {label} at {}: {e}", path.display()),
            )
        })
    }

    fn unavailable_error(model: impl Into<String>, reason: impl Into<String>) -> EmbedderError {
        EmbedderError::EmbedderUnavailable {
            model: model.into(),
            reason: reason.into(),
        }
    }

    fn normalize_in_place(embedding: &mut [f32]) {
        let norm_sq: f32 = embedding.iter().map(|x| x * x).sum();
        if norm_sq.is_finite() && norm_sq > f32::EPSILON {
            let inv_norm = 1.0 / norm_sq.sqrt();
            for v in embedding.iter_mut() {
                *v *= inv_norm;
            }
        } else {
            // NaN/Inf contamination — zero out to prevent poisoning similarity search.
            embedding.fill(0.0);
        }
    }
}

impl Embedder for FastEmbedder {
    fn embed_sync(&self, text: &str) -> EmbedderResult<Vec<f32>> {
        if text.is_empty() {
            return Err(EmbedderError::InvalidConfig {
                field: "input_text".to_string(),
                value: "(empty)".to_string(),
                reason: "empty text".to_string(),
            });
        }

        #[allow(unused_mut)]
        let mut model = self
            .model
            .lock()
            .map_err(|_| EmbedderError::SubsystemError {
                subsystem: "embedder",
                source: Box::new(std::io::Error::other("fastembed lock poisoned")),
            })?;

        let embeddings =
            model
                .embed(vec![text], None)
                .map_err(|e| EmbedderError::EmbeddingFailed {
                    model: self.id.clone(),
                    source: Box::new(std::io::Error::other(format!(
                        "fastembed embed failed: {e}"
                    ))),
                })?;

        let mut embedding =
            embeddings
                .into_iter()
                .next()
                .ok_or_else(|| EmbedderError::EmbeddingFailed {
                    model: self.id.clone(),
                    source: Box::new(std::io::Error::other("fastembed returned no embedding")),
                })?;

        if embedding.len() != self.dimension {
            return Err(EmbedderError::EmbeddingFailed {
                model: self.id.clone(),
                source: Box::new(std::io::Error::other(format!(
                    "fastembed dimension mismatch: expected {}, got {}",
                    self.dimension,
                    embedding.len()
                ))),
            });
        }

        Self::normalize_in_place(&mut embedding);
        Ok(embedding)
    }

    fn embed_batch_sync(&self, texts: &[&str]) -> EmbedderResult<Vec<Vec<f32>>> {
        for text in texts {
            if text.is_empty() {
                return Err(EmbedderError::InvalidConfig {
                    field: "input_text".to_string(),
                    value: "(empty)".to_string(),
                    reason: "empty text in batch".to_string(),
                });
            }
        }

        if texts.is_empty() {
            return Ok(Vec::new());
        }

        #[allow(unused_mut)]
        let mut model = self
            .model
            .lock()
            .map_err(|_| EmbedderError::SubsystemError {
                subsystem: "embedder",
                source: Box::new(std::io::Error::other("fastembed lock poisoned")),
            })?;

        let inputs = texts.to_vec();
        let mut embeddings =
            model
                .embed(inputs, None)
                .map_err(|e| EmbedderError::EmbeddingFailed {
                    model: self.id.clone(),
                    source: Box::new(std::io::Error::other(format!(
                        "fastembed embed failed: {e}"
                    ))),
                })?;

        for embedding in embeddings.iter_mut() {
            if embedding.len() != self.dimension {
                return Err(EmbedderError::EmbeddingFailed {
                    model: self.id.clone(),
                    source: Box::new(std::io::Error::other(format!(
                        "fastembed dimension mismatch: expected {}, got {}",
                        self.dimension,
                        embedding.len()
                    ))),
                });
            }
            Self::normalize_in_place(embedding);
        }

        Ok(embeddings)
    }

    fn dimension(&self) -> usize {
        self.dimension
    }

    fn id(&self) -> &str {
        &self.id
    }

    fn model_name(&self) -> &str {
        &self.model_id
    }

    fn is_semantic(&self) -> bool {
        true
    }

    fn category(&self) -> ModelCategory {
        ModelCategory::TransformerEmbedder
    }

    fn tier(&self) -> ModelTier {
        ModelTier::Quality
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn fastembed_missing_files_returns_unavailable() {
        let tmp = tempfile::tempdir().expect("tempdir");
        let err = FastEmbedder::load_from_dir(tmp.path())
            .err()
            .expect("missing model should fail");
        assert!(
            matches!(err, EmbedderError::EmbedderUnavailable { .. }),
            "expected EmbedderUnavailable, got {err:?}"
        );
    }

    #[test]
    fn unavailable_error_preserves_shape() {
        let err = FastEmbedder::unavailable_error("test-model", "missing files");
        assert!(std::error::Error::source(&err).is_none());
        match err {
            EmbedderError::EmbedderUnavailable { model, reason } => {
                assert_eq!(model, "test-model");
                assert_eq!(reason, "missing files");
            }
            other => panic!("expected EmbedderUnavailable, got {other:?}"),
        }
    }

    #[test]
    fn select_model_file_prefers_modern_onnx_layout() {
        let tmp = tempfile::tempdir().expect("tempdir");
        std::fs::create_dir_all(tmp.path().join("onnx")).unwrap();
        std::fs::write(tmp.path().join("onnx/model.onnx"), b"modern").unwrap();
        std::fs::write(tmp.path().join("model.onnx"), b"legacy").unwrap();

        let selected = FastEmbedder::select_model_file(tmp.path()).unwrap();
        assert!(
            selected.ends_with("onnx/model.onnx"),
            "should prefer onnx/ subdir: {selected:?}"
        );
    }

    #[test]
    fn select_model_file_falls_back_to_legacy() {
        let tmp = tempfile::tempdir().expect("tempdir");
        std::fs::write(tmp.path().join("model.onnx"), b"legacy").unwrap();

        let selected = FastEmbedder::select_model_file(tmp.path()).unwrap();
        assert!(
            selected.ends_with("model.onnx"),
            "should fall back to legacy: {selected:?}"
        );
    }

    #[test]
    fn select_model_file_returns_none_for_empty_dir() {
        let tmp = tempfile::tempdir().expect("tempdir");
        assert!(FastEmbedder::select_model_file(tmp.path()).is_none());
    }

    #[test]
    fn config_for_known_models() {
        let minilm = FastEmbedder::config_for("minilm").unwrap();
        assert_eq!(minilm.dimension, 384);

        let snowflake = FastEmbedder::config_for("snowflake-arctic-s").unwrap();
        assert_eq!(snowflake.dimension, 384);

        let nomic = FastEmbedder::config_for("nomic-embed").unwrap();
        assert_eq!(nomic.dimension, 768);

        assert!(FastEmbedder::config_for("unknown").is_none());
    }
}