Skip to main content

fathomdb_embedder/
lib.rs

1use std::path::PathBuf;
2
3use fathomdb_embedder_api::{Embedder, EmbedderError, EmbedderIdentity, Vector};
4
5#[cfg(feature = "default-embedder")]
6pub mod loader;
7
8/// Structured event surfaced through `OpenReport.embedder_events`
9/// (`dev/design/embedder.md` §7).
10///
11/// Defined unconditionally at the crate root so the engine can reference
12/// it regardless of the `default-embedder` feature; the loader (under
13/// `default-embedder`) emits these variants and re-exports the enum for
14/// ergonomic in-module use.
15#[derive(Debug, Clone, PartialEq, Eq)]
16pub enum EmbedderEvent {
17    /// A file was fetched from the network and written to the cache.
18    DefaultEmbedderDownload {
19        file: String,
20        url: String,
21        bytes: u64,
22        sha256: String,
23        cache_path: PathBuf,
24        duration_ms: u64,
25    },
26    /// A file was found in the cache and verified by sha256. No network.
27    DefaultEmbedderCacheHit { file: String, sha256: String, cache_path: PathBuf },
28    /// EU-5a2 — emitted at the commit that materializes the per-workspace
29    /// mean vector into `_fathomdb_embedder_profiles.mean_vec`. `dim`
30    /// matches the default embedder identity's dimension; `doc_count` is
31    /// the number of pre-pin rows the same transaction's re-quantize
32    /// pass updated (per `dev/design/embedder.md` §0.5, §7).
33    ///
34    /// EU-5a2's only live identity is NoopEmbedder, which does NOT
35    /// request mean-centering, so this event is dormant until EU-5b
36    /// flips the default identity. Defined now so EU-5b is a no-op
37    /// addition to this enum.
38    MeanVecPinned { dim: u32, doc_count: u64 },
39    /// 0.7.2 PR-2b — emitted after the transaction that REFRESHES an
40    /// already-pinned `mean_vec` is durable. `dim` is the embedder
41    /// identity dimension; `doc_count` is the number of rows the
42    /// re-quantize pass re-centered; `trigger` records what drove the
43    /// refresh. As of 0.7.2 PR-2bc the only trigger is the explicit
44    /// `doctor recompute-mean` verb (`Manual`); the automatic in-ingest
45    /// drift detector was carved out and deferred to 0.8.x. See
46    /// `dev/design/embedder.md` §0.3/§0.5 and
47    /// `dev/design/embedder-decision.md` §3.4.
48    MeanVecRecomputed { dim: u32, doc_count: u64, trigger: MeanRecomputeTrigger },
49}
50
51/// 0.7.2 PR-2b — what drove a [`EmbedderEvent::MeanVecRecomputed`].
52///
53/// As of 0.7.2 PR-2bc the only variant is `Manual` (the explicit
54/// `doctor recompute-mean` CLI verb). The `DriftAuto` variant for the
55/// automatic in-ingest drift detector was REMOVED when that path was carved
56/// out and deferred to 0.8.x (see
57/// `dev/plans/prompts/0.8.x-auto-mean-drift-DEFERRED.md`); the enum is kept
58/// (rather than collapsed to a unit) so reviving the auto path in 0.8.x is a
59/// pure additive re-introduction of a variant + tag.
60#[derive(Debug, Clone, Copy, PartialEq, Eq)]
61pub enum MeanRecomputeTrigger {
62    /// Fired explicitly by the `doctor recompute-mean` CLI verb.
63    Manual,
64}
65
66impl MeanRecomputeTrigger {
67    /// Stable lowercase tag used in machine-readable surfaces (CLI/py/napi).
68    #[must_use]
69    pub fn as_str(&self) -> &'static str {
70        match self {
71            MeanRecomputeTrigger::Manual => "manual",
72        }
73    }
74}
75
76#[cfg(feature = "default-embedder")]
77mod candle_bge;
78
79#[cfg(feature = "default-embedder")]
80pub use candle_bge::{CandleBgeEmbedder, DEFAULT_EMBEDDER_DIM, DEFAULT_EMBEDDER_NAME};
81
82#[derive(Clone, Debug)]
83pub struct NoopEmbedder {
84    identity: EmbedderIdentity,
85}
86
87impl Default for NoopEmbedder {
88    fn default() -> Self {
89        Self { identity: EmbedderIdentity::new("fathomdb-noop", "0.6.0-scaffold", 384) }
90    }
91}
92
93impl Embedder for NoopEmbedder {
94    fn identity(&self) -> EmbedderIdentity {
95        self.identity.clone()
96    }
97
98    fn embed(&self, _input: &str) -> Result<Vector, EmbedderError> {
99        let mut vector = vec![0.0_f32; self.identity.dimension as usize];
100        if let Some(first) = vector.first_mut() {
101            *first = 1.0;
102        }
103        Ok(vector)
104    }
105}