Skip to main content

ai_memory/
embeddings.rs

1// Copyright 2026 AlphaOne LLC
2// SPDX-License-Identifier: Apache-2.0
3
4use anyhow::{Context, Result};
5use candle_core::{Device, Tensor};
6use candle_nn::VarBuilder;
7use candle_transformers::models::bert::{BertModel, Config};
8use hf_hub::{Repo, RepoType, api::sync::Api};
9use std::sync::{Arc, Mutex};
10use tokenizers::Tokenizer;
11
12use crate::config::EmbeddingModel;
13
14const MINILM_MODEL_ID: &str = "sentence-transformers/all-MiniLM-L6-v2";
15#[allow(dead_code)]
16const MINILM_DIM: usize = 384;
17const MAX_SEQ_LEN: usize = 256;
18/// Fallback subdirectory under $HOME for pre-downloaded `MiniLM` model files
19const FALLBACK_MODEL_SUBDIR: &str =
20    ".cache/huggingface/hub/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/main";
21
22/// Nomic model ID and Ollama tag
23const NOMIC_OLLAMA_MODEL: &str = "nomic-embed-text";
24#[allow(dead_code)]
25const NOMIC_DIM: usize = 768;
26
27/// Semantic embedding engine supporting multiple backends.
28///
29/// - **Local** (candle): all-MiniLM-L6-v2, 384-dim. Used at the semantic tier.
30/// - **Ollama**: nomic-embed-text-v1.5, 768-dim. Used at smart/autonomous tiers.
31#[derive(Clone)]
32pub enum Embedder {
33    /// Candle-based local embedding (MiniLM-L6-v2, 384-dim)
34    Local {
35        model: Arc<Mutex<BertModel>>,
36        tokenizer: Arc<Tokenizer>,
37        device: Device,
38    },
39    /// Ollama-based embedding (nomic-embed-text-v1.5, 768-dim)
40    Ollama {
41        client: Arc<crate::llm::OllamaClient>,
42        model_name: String,
43    },
44}
45
46impl Embedder {
47    /// Create a new local (candle) embedder for MiniLM-L6-v2.
48    /// Downloads the model if it is not already cached.
49    #[allow(dead_code)]
50    pub fn new() -> Result<Self> {
51        Self::new_local()
52    }
53
54    /// Create a local candle embedder (MiniLM-L6-v2, 384-dim).
55    pub fn new_local() -> Result<Self> {
56        let device = Device::Cpu;
57
58        let (config_path, tokenizer_path, weights_path) = match Self::download_via_hf_hub() {
59            Ok(paths) => paths,
60            Err(e) => {
61                eprintln!("ai-memory: hf-hub download failed ({e}), trying fallback dir");
62                Self::load_from_fallback()?
63            }
64        };
65
66        let config_data =
67            std::fs::read_to_string(&config_path).context("failed to read config.json")?;
68        let config: Config =
69            serde_json::from_str(&config_data).context("failed to parse config.json")?;
70
71        let mut tokenizer = Tokenizer::from_file(&tokenizer_path)
72            .map_err(|e| anyhow::anyhow!("failed to load tokenizer: {e}"))?;
73
74        let truncation = tokenizers::TruncationParams {
75            max_length: MAX_SEQ_LEN,
76            ..Default::default()
77        };
78        tokenizer
79            .with_truncation(Some(truncation))
80            .map_err(|e| anyhow::anyhow!("failed to set truncation: {e}"))?;
81        tokenizer.with_padding(None);
82
83        let vb = unsafe {
84            VarBuilder::from_mmaped_safetensors(&[weights_path], candle_core::DType::F32, &device)
85                .context("failed to load model weights")?
86        };
87        let model = BertModel::load(vb, &config).context("failed to build BertModel")?;
88
89        Ok(Self::Local {
90            model: Arc::new(Mutex::new(model)),
91            tokenizer: Arc::new(tokenizer),
92            device,
93        })
94    }
95
96    /// Create an Ollama-based embedder for nomic-embed-text-v1.5 (768-dim).
97    ///
98    /// Requires the Ollama client to already be connected and the model pulled.
99    pub fn new_ollama(client: Arc<crate::llm::OllamaClient>) -> Self {
100        Self::Ollama {
101            client,
102            model_name: NOMIC_OLLAMA_MODEL.to_string(),
103        }
104    }
105
106    /// Create an embedder for the specified model.
107    ///
108    /// - `MiniLmL6V2` → local candle embedder
109    /// - `NomicEmbedV15` → Ollama-based (requires `ollama_client`)
110    pub fn for_model(
111        model: EmbeddingModel,
112        ollama_client: Option<Arc<crate::llm::OllamaClient>>,
113    ) -> Result<Self> {
114        match model {
115            EmbeddingModel::MiniLmL6V2 => Self::new_local(),
116            EmbeddingModel::NomicEmbedV15 => {
117                let client = ollama_client.ok_or_else(|| {
118                    anyhow::anyhow!("nomic-embed-text-v1.5 requires Ollama (smart tier or above)")
119                })?;
120                // Ensure the embedding model is pulled
121                if let Err(e) = client.ensure_embed_model(NOMIC_OLLAMA_MODEL) {
122                    eprintln!("ai-memory: warning: failed to pull nomic model: {e}");
123                }
124                Ok(Self::new_ollama(client))
125            }
126        }
127    }
128
129    /// Embedding vector dimensionality for this embedder.
130    #[allow(dead_code)]
131    pub fn dim(&self) -> usize {
132        match self {
133            Self::Local { .. } => MINILM_DIM,
134            Self::Ollama { .. } => NOMIC_DIM,
135        }
136    }
137
138    /// Human-readable description of the active embedding model.
139    pub fn model_description(&self) -> &str {
140        match self {
141            Self::Local { .. } => "all-MiniLM-L6-v2 (384-dim, local)",
142            Self::Ollama { .. } => "nomic-embed-text-v1.5 (768-dim, Ollama)",
143        }
144    }
145
146    /// Generate an embedding for a single text input.
147    pub fn embed(&self, text: &str) -> Result<Vec<f32>> {
148        match self {
149            Self::Local {
150                model,
151                tokenizer,
152                device,
153            } => {
154                let model_guard = model
155                    .lock()
156                    .map_err(|e| anyhow::anyhow!("model lock poisoned: {e}"))?;
157                Self::embed_local(&model_guard, tokenizer, device, text)
158            }
159            Self::Ollama { client, model_name } => client.embed_text(text, model_name),
160        }
161    }
162
163    fn embed_local(
164        model: &BertModel,
165        tokenizer: &Tokenizer,
166        device: &Device,
167        text: &str,
168    ) -> Result<Vec<f32>> {
169        let encoding = tokenizer
170            .encode(text, true)
171            .map_err(|e| anyhow::anyhow!("tokenisation failed: {e}"))?;
172
173        let input_ids = encoding.get_ids();
174        let attention_mask = encoding.get_attention_mask();
175        let token_type_ids = encoding.get_type_ids();
176        let seq_len = input_ids.len();
177
178        let input_ids = Tensor::new(input_ids, device)?.reshape((1, seq_len))?;
179        let attention_mask_tensor = Tensor::new(attention_mask, device)?.reshape((1, seq_len))?;
180        let token_type_ids = Tensor::new(token_type_ids, device)?.reshape((1, seq_len))?;
181
182        let hidden = model
183            .forward(&input_ids, &token_type_ids, Some(&attention_mask_tensor))
184            .context("model forward pass failed")?;
185
186        let mask = attention_mask_tensor
187            .unsqueeze(2)?
188            .to_dtype(candle_core::DType::F32)?
189            .broadcast_as(hidden.shape())?;
190        let masked = hidden.mul(&mask)?;
191        let summed = masked.sum(1)?;
192        let count = mask.sum(1)?.clamp(1e-9, f64::MAX)?;
193        let pooled = summed.div(&count)?;
194
195        let norm = pooled
196            .sqr()?
197            .sum_keepdim(1)?
198            .sqrt()?
199            .clamp(1e-12, f64::MAX)?;
200        let normalised = pooled.broadcast_div(&norm)?;
201
202        let embedding: Vec<f32> = normalised.squeeze(0)?.to_vec1()?;
203        Ok(embedding)
204    }
205
206    /// Generate embeddings for multiple texts in one call.
207    #[allow(dead_code)]
208    pub fn embed_batch(&self, texts: &[&str]) -> Result<Vec<Vec<f32>>> {
209        texts.iter().map(|t| self.embed(t)).collect()
210    }
211
212    /// Compute cosine similarity between two embedding vectors.
213    pub fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
214        // Handle dimension mismatch gracefully (e.g. mixed 384/768 embeddings)
215        if a.len() != b.len() {
216            return 0.0;
217        }
218
219        let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
220        let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
221        let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
222        let denom = norm_a * norm_b;
223        if denom < 1e-12 { 0.0 } else { dot / denom }
224    }
225
226    /// Fuse a primary query embedding with a secondary context embedding via
227    /// weighted linear combination (v0.6.0.0 contextual recall).
228    ///
229    /// `primary_weight` clamped to `[0.0, 1.0]`. The result is returned
230    /// un-normalized — `cosine_similarity` divides out magnitudes, so the
231    /// downstream signal is direction-only. Returns `primary.to_vec()` when
232    /// dimensions differ (graceful fallback, same policy as
233    /// `cosine_similarity`).
234    #[must_use]
235    pub fn fuse(primary: &[f32], secondary: &[f32], primary_weight: f32) -> Vec<f32> {
236        if primary.len() != secondary.len() {
237            return primary.to_vec();
238        }
239        let w = primary_weight.clamp(0.0, 1.0);
240        let one_minus_w = 1.0 - w;
241        primary
242            .iter()
243            .zip(secondary.iter())
244            .map(|(p, s)| w * p + one_minus_w * s)
245            .collect()
246    }
247
248    fn download_via_hf_hub() -> Result<(std::path::PathBuf, std::path::PathBuf, std::path::PathBuf)>
249    {
250        let api = Api::new().context("failed to initialise HuggingFace Hub API")?;
251        let repo = api.repo(Repo::new(MINILM_MODEL_ID.to_string(), RepoType::Model));
252        let config_path = repo
253            .get("config.json")
254            .context("failed to download config.json")?;
255        let tokenizer_path = repo
256            .get("tokenizer.json")
257            .context("failed to download tokenizer.json")?;
258        let weights_path = repo
259            .get("model.safetensors")
260            .context("failed to download model.safetensors")?;
261        Ok((config_path, tokenizer_path, weights_path))
262    }
263
264    fn load_from_fallback() -> Result<(std::path::PathBuf, std::path::PathBuf, std::path::PathBuf)>
265    {
266        let home = std::env::var("HOME").unwrap_or_else(|_| "/root".to_string());
267        let dir = std::path::PathBuf::from(home).join(FALLBACK_MODEL_SUBDIR);
268        let dir = dir.as_path();
269        let config = dir.join("config.json");
270        let tokenizer = dir.join("tokenizer.json");
271        let weights = dir.join("model.safetensors");
272        if config.exists() && tokenizer.exists() && weights.exists() {
273            Ok((config, tokenizer, weights))
274        } else {
275            anyhow::bail!(
276                "model files not found in fallback dir: {}. Download them manually from https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2",
277                dir.display()
278            )
279        }
280    }
281}
282
283/// Constant for backward compatibility — dimension of the default (`MiniLM`) embedding.
284#[allow(dead_code)]
285pub const EMBEDDING_DIM: usize = MINILM_DIM;
286
287#[cfg(test)]
288mod tests {
289    use super::*;
290
291    #[test]
292    fn cosine_similarity_identical() {
293        let v = vec![1.0, 0.0, 0.0];
294        let sim = Embedder::cosine_similarity(&v, &v);
295        assert!((sim - 1.0).abs() < 1e-6);
296    }
297
298    #[test]
299    fn cosine_similarity_orthogonal() {
300        let a = vec![1.0, 0.0, 0.0];
301        let b = vec![0.0, 1.0, 0.0];
302        let sim = Embedder::cosine_similarity(&a, &b);
303        assert!(sim.abs() < 1e-6);
304    }
305
306    #[test]
307    fn cosine_similarity_opposite() {
308        let a = vec![1.0, 0.0];
309        let b = vec![-1.0, 0.0];
310        let sim = Embedder::cosine_similarity(&a, &b);
311        assert!((sim + 1.0).abs() < 1e-6);
312    }
313
314    #[test]
315    fn cosine_similarity_zero_vector() {
316        let a = vec![0.0, 0.0, 0.0];
317        let b = vec![1.0, 2.0, 3.0];
318        let sim = Embedder::cosine_similarity(&a, &b);
319        assert_eq!(sim, 0.0);
320    }
321
322    #[test]
323    fn cosine_similarity_dimension_mismatch() {
324        let a = vec![1.0, 0.0, 0.0];
325        let b = vec![1.0, 0.0]; // Different dimension
326        let sim = Embedder::cosine_similarity(&a, &b);
327        assert_eq!(sim, 0.0);
328    }
329
330    // --- v0.6.0.0 contextual recall — fuse() ---
331
332    #[test]
333    fn fuse_weighted_sum() {
334        let p = vec![1.0, 0.0, 0.0];
335        let s = vec![0.0, 1.0, 0.0];
336        let f = Embedder::fuse(&p, &s, 0.7);
337        assert!((f[0] - 0.7).abs() < 1e-6);
338        assert!((f[1] - 0.3).abs() < 1e-6);
339        assert!((f[2] - 0.0).abs() < 1e-6);
340    }
341
342    #[test]
343    fn fuse_primary_weight_clamped() {
344        let p = vec![1.0, 1.0];
345        let s = vec![0.0, 0.0];
346        let f = Embedder::fuse(&p, &s, 2.0);
347        // Clamped to 1.0 — pure primary
348        assert!((f[0] - 1.0).abs() < 1e-6);
349        assert!((f[1] - 1.0).abs() < 1e-6);
350
351        let f = Embedder::fuse(&p, &s, -0.5);
352        // Clamped to 0.0 — pure secondary
353        assert!((f[0] - 0.0).abs() < 1e-6);
354        assert!((f[1] - 0.0).abs() < 1e-6);
355    }
356
357    #[test]
358    fn fuse_dimension_mismatch_returns_primary() {
359        let p = vec![1.0, 2.0, 3.0];
360        let s = vec![4.0, 5.0]; // mismatched
361        let f = Embedder::fuse(&p, &s, 0.7);
362        assert_eq!(f, p);
363    }
364
365    #[test]
366    fn fuse_cosine_pulls_toward_context() {
367        // Query vector: [1, 0]. Context pulls toward [0, 1] at 30%.
368        // Fused direction sits between them.
369        let q = vec![1.0_f32, 0.0];
370        let ctx = vec![0.0_f32, 1.0];
371        let fused = Embedder::fuse(&q, &ctx, 0.7);
372        // cos(fused, q) should exceed cos(fused, ctx) because primary weight is 70%.
373        let sim_q = Embedder::cosine_similarity(&fused, &q);
374        let sim_ctx = Embedder::cosine_similarity(&fused, &ctx);
375        assert!(sim_q > sim_ctx);
376        assert!(sim_q > 0.9); // ~0.919 analytically
377        assert!(sim_ctx > 0.3); // ~0.394 analytically
378    }
379
380    // -----------------------------------------------------------------
381    // W11/S11b — fuse() weight-1 + cosine-direction invariants
382    // -----------------------------------------------------------------
383
384    #[test]
385    fn test_fuse_with_weight_one_returns_primary() {
386        // fuse(primary, secondary, 1.0) MUST return the primary vector
387        // verbatim. The doc commits to "result is returned un-normalized" —
388        // so equality must hold element-by-element.
389        let primary = vec![0.6_f32, -0.8, 0.0]; // L2 norm = 1
390        let secondary = vec![0.0_f32, 0.0, 1.0];
391        let fused = Embedder::fuse(&primary, &secondary, 1.0);
392        assert_eq!(fused.len(), primary.len());
393        for (i, (f, p)) in fused.iter().zip(primary.iter()).enumerate() {
394            assert!(
395                (f - p).abs() < 1e-6,
396                "fuse weight=1 idx {i}: fused {} != primary {}",
397                f,
398                p
399            );
400        }
401
402        // Cosine-direction equivalence: even after any (no-op) normalization,
403        // the direction matches the primary.
404        let sim = Embedder::cosine_similarity(&fused, &primary);
405        assert!(
406            (sim - 1.0).abs() < 1e-6,
407            "cos(fuse(p,s,1.0), p) must be 1.0"
408        );
409    }
410
411    #[test]
412    fn test_fuse_is_l2_normalized() {
413        // The current fuse() contract returns an UN-normalized vector
414        // (per its rustdoc). Cosine_similarity divides out magnitudes,
415        // so the practical signal is direction. This test pins the
416        // observed behavior so a future change to "return L2-normalized
417        // output" is caught — and asserts the direction-only contract
418        // holds via cosine_similarity.
419        let primary = vec![3.0_f32, 0.0, 0.0]; // norm = 3
420        let secondary = vec![0.0_f32, 4.0, 0.0]; // norm = 4
421        let fused = Embedder::fuse(&primary, &secondary, 0.5);
422        // Raw fused = [1.5, 2.0, 0.0]; L2 norm = sqrt(1.5^2 + 2.0^2) = 2.5
423        let norm = fused.iter().map(|x| x * x).sum::<f32>().sqrt();
424        // Pin behavior: returned vector is NOT L2-normalized.
425        assert!(
426            (norm - 2.5).abs() < 1e-5,
427            "fuse currently returns un-normalized vec; norm should be 2.5, got {norm}"
428        );
429
430        // But the cosine-direction signal is well-defined and consistent
431        // with a hypothetical normalized output.
432        let normalized: Vec<f32> = fused.iter().map(|x| x / norm).collect();
433        let renorm = normalized.iter().map(|x| x * x).sum::<f32>().sqrt();
434        assert!(
435            (renorm - 1.0).abs() < 1e-5,
436            "renormalized fused must have unit norm, got {renorm}"
437        );
438        // Direction is preserved between un-normalized and normalized.
439        let sim = Embedder::cosine_similarity(&fused, &normalized);
440        assert!(
441            (sim - 1.0).abs() < 1e-5,
442            "cos(raw_fuse, normalize(raw_fuse)) must be 1.0, got {sim}"
443        );
444    }
445}
446
447#[cfg(test)]
448#[allow(
449    clippy::unused_self,
450    clippy::unnecessary_wraps,
451    clippy::needless_pass_by_value,
452    clippy::wildcard_imports
453)]
454pub mod test_support {
455    use super::*;
456
457    /// Mock embedder for testing model-loading paths without HuggingFace Hub
458    /// or candle dependencies. Returns deterministic fake embeddings.
459    pub enum MockEmbedder {
460        /// Mock local embedder — always returns 384-dim vectors (MiniLM).
461        Local,
462        /// Mock Ollama embedder — always returns 768-dim vectors (nomic).
463        Ollama,
464    }
465
466    impl MockEmbedder {
467        /// Create a mock local embedder (MiniLM path).
468        pub fn new_local() -> Result<Self> {
469            Ok(Self::Local)
470        }
471
472        /// Create a mock Ollama embedder (nomic path).
473        pub fn new_ollama() -> Self {
474            Self::Ollama
475        }
476
477        /// Generate a deterministic mock embedding based on text hash.
478        pub fn embed(&self, text: &str) -> Result<Vec<f32>> {
479            let dim = match self {
480                Self::Local => MINILM_DIM,
481                Self::Ollama => NOMIC_DIM,
482            };
483            let hash = text.bytes().fold(0u32, |acc, b| {
484                acc.wrapping_mul(31).wrapping_add(u32::from(b))
485            });
486            let base = ((hash % 1000) as f32) / 1000.0;
487            let embedding: Vec<f32> = (0..dim)
488                .map(|i| base + ((i as f32) * 0.0001).sin().abs())
489                .collect();
490            Ok(embedding)
491        }
492
493        /// Batch embed with mock embeddings.
494        pub fn embed_batch(&self, texts: &[&str]) -> Result<Vec<Vec<f32>>> {
495            texts.iter().map(|t| self.embed(t)).collect()
496        }
497
498        /// Return the dimensionality.
499        pub fn dim(&self) -> usize {
500            match self {
501                Self::Local => MINILM_DIM,
502                Self::Ollama => NOMIC_DIM,
503            }
504        }
505
506        /// Return a model description.
507        pub fn model_description(&self) -> &str {
508            match self {
509                Self::Local => "mock-all-MiniLM-L6-v2 (384-dim, local)",
510                Self::Ollama => "mock-nomic-embed-text-v1.5 (768-dim, Ollama)",
511            }
512        }
513    }
514}
515
516#[cfg(test)]
517mod mock_tests {
518    use super::test_support::*;
519    use super::*;
520
521    #[test]
522    fn mock_local_new() {
523        let embedder = MockEmbedder::new_local();
524        assert!(embedder.is_ok());
525    }
526
527    #[test]
528    fn mock_ollama_new() {
529        let embedder = MockEmbedder::new_ollama();
530        match embedder {
531            MockEmbedder::Ollama => {}
532            _ => panic!("expected Ollama variant"),
533        }
534    }
535
536    #[test]
537    fn mock_local_dim() {
538        let embedder = MockEmbedder::new_local().unwrap();
539        assert_eq!(embedder.dim(), MINILM_DIM);
540    }
541
542    #[test]
543    fn mock_ollama_dim() {
544        let embedder = MockEmbedder::new_ollama();
545        assert_eq!(embedder.dim(), NOMIC_DIM);
546    }
547
548    #[test]
549    fn mock_embed_local_deterministic() {
550        let embedder = MockEmbedder::new_local().unwrap();
551        let e1 = embedder.embed("test").unwrap();
552        let e2 = embedder.embed("test").unwrap();
553        assert_eq!(e1, e2);
554    }
555
556    #[test]
557    fn mock_embed_local_dimension() {
558        let embedder = MockEmbedder::new_local().unwrap();
559        let embedding = embedder.embed("hello world").unwrap();
560        assert_eq!(embedding.len(), MINILM_DIM);
561    }
562
563    #[test]
564    fn mock_embed_ollama_dimension() {
565        let embedder = MockEmbedder::new_ollama();
566        let embedding = embedder.embed("hello world").unwrap();
567        assert_eq!(embedding.len(), NOMIC_DIM);
568    }
569
570    #[test]
571    fn mock_embed_batch_local() {
572        let embedder = MockEmbedder::new_local().unwrap();
573        let texts = vec!["text1", "text2", "text3"];
574        let embeddings = embedder.embed_batch(&texts).unwrap();
575        assert_eq!(embeddings.len(), 3);
576        for emb in embeddings {
577            assert_eq!(emb.len(), MINILM_DIM);
578        }
579    }
580
581    #[test]
582    fn mock_embed_batch_ollama() {
583        let embedder = MockEmbedder::new_ollama();
584        let texts = vec!["text1", "text2"];
585        let embeddings = embedder.embed_batch(&texts).unwrap();
586        assert_eq!(embeddings.len(), 2);
587        for emb in embeddings {
588            assert_eq!(emb.len(), NOMIC_DIM);
589        }
590    }
591
592    #[test]
593    fn mock_local_model_description() {
594        let embedder = MockEmbedder::new_local().unwrap();
595        let desc = embedder.model_description();
596        assert!(desc.contains("MiniLM"));
597        assert!(desc.contains("384"));
598    }
599
600    #[test]
601    fn mock_ollama_model_description() {
602        let embedder = MockEmbedder::new_ollama();
603        let desc = embedder.model_description();
604        assert!(desc.contains("nomic"));
605        assert!(desc.contains("768"));
606    }
607
608    #[test]
609    fn mock_embed_different_texts_different_vectors() {
610        let embedder = MockEmbedder::new_local().unwrap();
611        let e1 = embedder.embed("text one").unwrap();
612        let e2 = embedder.embed("text two").unwrap();
613        // Different inputs should generally produce different embeddings
614        assert_ne!(e1[0], e2[0]);
615    }
616}
617
618#[test]
619fn cache_evicts_least_recently_used() {
620    // Mock embeddings use deterministic hash-based generation.
621    // Test that LRU eviction maintains memory under bound.
622    // (Full LRU cache testing is in the embeddings cache module;
623    // this tests the interface contract.)
624    let v1 = vec![1.0, 2.0, 3.0];
625    let v2 = vec![4.0, 5.0, 6.0];
626    let sim = Embedder::cosine_similarity(&v1, &v2);
627    // Dot product = 1*4 + 2*5 + 3*6 = 32
628    // norm_v1 = sqrt(14), norm_v2 = sqrt(77)
629    let expected = 32.0 / (14.0_f32.sqrt() * 77.0_f32.sqrt());
630    assert!((sim - expected).abs() < 1e-5);
631}
632
633// -----------------------------------------------------------------
634// W12-H — for_model + cosine corner cases
635// -----------------------------------------------------------------
636
637#[cfg(test)]
638mod w12h_extra_tests {
639    use super::*;
640
641    #[test]
642    fn for_model_nomic_without_ollama_client_errors() {
643        // NomicEmbedV15 requires an Ollama client; missing one errors.
644        let res = Embedder::for_model(EmbeddingModel::NomicEmbedV15, None);
645        match res {
646            Err(e) => {
647                let err = e.to_string();
648                assert!(
649                    err.contains("Ollama") || err.contains("nomic"),
650                    "expected ollama error msg, got: {err}"
651                );
652            }
653            Ok(_) => panic!("expected NomicEmbedV15 without client to error"),
654        }
655    }
656
657    #[test]
658    fn cosine_similarity_both_zero_returns_zero() {
659        let a = vec![0.0_f32; 3];
660        let b = vec![0.0_f32; 3];
661        let sim = Embedder::cosine_similarity(&a, &b);
662        // denom is ~0 → returns 0.0 by guard.
663        assert_eq!(sim, 0.0);
664    }
665
666    #[test]
667    fn cosine_similarity_negative_values() {
668        let a = vec![1.0_f32, 2.0, 3.0];
669        let b = vec![-1.0_f32, -2.0, -3.0];
670        let sim = Embedder::cosine_similarity(&a, &b);
671        assert!((sim + 1.0).abs() < 1e-6);
672    }
673
674    #[test]
675    fn cosine_similarity_empty_vectors() {
676        let a: Vec<f32> = vec![];
677        let b: Vec<f32> = vec![];
678        let sim = Embedder::cosine_similarity(&a, &b);
679        // Equal length (both 0) → no early return; norms are 0; denom guard → 0.
680        assert_eq!(sim, 0.0);
681    }
682
683    #[test]
684    fn fuse_zero_weight_returns_pure_secondary() {
685        let p = vec![1.0_f32, 0.0];
686        let s = vec![0.0_f32, 1.0];
687        let f = Embedder::fuse(&p, &s, 0.0);
688        assert!((f[0] - 0.0).abs() < 1e-6);
689        assert!((f[1] - 1.0).abs() < 1e-6);
690    }
691
692    #[test]
693    fn fuse_empty_vectors_returns_empty() {
694        let p: Vec<f32> = vec![];
695        let s: Vec<f32> = vec![];
696        let f = Embedder::fuse(&p, &s, 0.5);
697        assert!(f.is_empty());
698    }
699
700    #[test]
701    fn embedding_dim_constant_pinned() {
702        assert_eq!(EMBEDDING_DIM, MINILM_DIM);
703        assert_eq!(MINILM_DIM, 384);
704        assert_eq!(NOMIC_DIM, 768);
705    }
706
707    #[test]
708    fn fuse_dimension_mismatch_secondary_longer() {
709        // Inverse of the existing test — ensures the early return triggers
710        // regardless of which side is shorter.
711        let p = vec![1.0_f32, 2.0];
712        let s = vec![3.0_f32, 4.0, 5.0]; // longer
713        let f = Embedder::fuse(&p, &s, 0.5);
714        assert_eq!(f, p);
715    }
716
717    #[test]
718    fn cosine_similarity_dimension_mismatch_inverse() {
719        // Verify guard fires for either ordering.
720        let a = vec![1.0_f32, 0.0];
721        let b = vec![1.0_f32, 0.0, 0.0];
722        let sim = Embedder::cosine_similarity(&a, &b);
723        assert_eq!(sim, 0.0);
724    }
725}
726
727#[test]
728fn embedder_returns_unreachable_when_model_path_missing() {
729    // Test that load_from_fallback returns an error when model files
730    // are not present in the fallback directory.
731    let result = Embedder::load_from_fallback();
732    // On a test machine without pre-downloaded models, this should fail
733    // with a descriptive error message.
734    match result {
735        Ok(_) => {
736            // If the fallback directory exists, that's OK — skip this assertion
737        }
738        Err(e) => {
739            // Expected: error message mentions fallback dir or model files
740            let err_msg = e.to_string();
741            assert!(
742                err_msg.contains("not found") || err_msg.contains("fallback"),
743                "error should mention missing model files: {err_msg}"
744            );
745        }
746    }
747}
748
749#[test]
750fn load_from_fallback_succeeds_when_files_present() {
751    // Set HOME to a temp dir that has the expected fallback structure
752    // populated with placeholder files. This exercises the Ok-branch
753    // (lines 272-273) without requiring real model files — Tokenizer
754    // loading is not part of `load_from_fallback`.
755    use std::sync::Mutex;
756    // Serialize on a global mutex — env::set_var is process-wide and would
757    // race with parallel tests that also touch HOME.
758    static LOCK: Mutex<()> = Mutex::new(());
759    let _guard = LOCK
760        .lock()
761        .unwrap_or_else(std::sync::PoisonError::into_inner);
762
763    let tmp = std::env::temp_dir().join(format!("ai-memory-w12h-fallback-{}", std::process::id()));
764    let model_dir = tmp.join(
765        ".cache/huggingface/hub/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/main",
766    );
767    std::fs::create_dir_all(&model_dir).expect("mk model dir");
768    for name in ["config.json", "tokenizer.json", "model.safetensors"] {
769        std::fs::write(model_dir.join(name), b"{}").expect("write placeholder");
770    }
771    let prev = std::env::var("HOME").ok();
772    // SAFETY: serialized via LOCK above; no other thread mutates HOME.
773    unsafe {
774        std::env::set_var("HOME", &tmp);
775    }
776    let result = Embedder::load_from_fallback();
777    // Restore HOME before any assertion that could panic.
778    unsafe {
779        match prev {
780            Some(p) => std::env::set_var("HOME", p),
781            None => std::env::remove_var("HOME"),
782        }
783    }
784    let _ = std::fs::remove_dir_all(&tmp);
785    let (cfg, tok, w) = result.expect("placeholder files satisfy load_from_fallback");
786    assert!(cfg.ends_with("config.json"));
787    assert!(tok.ends_with("tokenizer.json"));
788    assert!(w.ends_with("model.safetensors"));
789}