fathomdb_embedder/lib.rs
1use std::path::PathBuf;
2
3use fathomdb_embedder_api::{Embedder, EmbedderError, EmbedderIdentity, Vector};
4
5#[cfg(feature = "default-embedder")]
6pub mod loader;
7
8/// Structured event surfaced through `OpenReport.embedder_events`
9/// (`dev/design/embedder.md` §7).
10///
11/// Defined unconditionally at the crate root so the engine can reference
12/// it regardless of the `default-embedder` feature; the loader (under
13/// `default-embedder`) emits these variants and re-exports the enum for
14/// ergonomic in-module use.
15#[derive(Debug, Clone, PartialEq, Eq)]
16pub enum EmbedderEvent {
17 /// A file was fetched from the network and written to the cache.
18 DefaultEmbedderDownload {
19 file: String,
20 url: String,
21 bytes: u64,
22 sha256: String,
23 cache_path: PathBuf,
24 duration_ms: u64,
25 },
26 /// A file was found in the cache and verified by sha256. No network.
27 DefaultEmbedderCacheHit { file: String, sha256: String, cache_path: PathBuf },
28 /// EU-5a2 — emitted at the commit that materializes the per-workspace
29 /// mean vector into `_fathomdb_embedder_profiles.mean_vec`. `dim`
30 /// matches the default embedder identity's dimension; `doc_count` is
31 /// the number of pre-pin rows the same transaction's re-quantize
32 /// pass updated (per `dev/design/embedder.md` §0.5, §7).
33 ///
34 /// EU-5a2's only live identity is NoopEmbedder, which does NOT
35 /// request mean-centering, so this event is dormant until EU-5b
36 /// flips the default identity. Defined now so EU-5b is a no-op
37 /// addition to this enum.
38 MeanVecPinned { dim: u32, doc_count: u64 },
39 /// 0.7.2 PR-2b — emitted after the transaction that REFRESHES an
40 /// already-pinned `mean_vec` is durable. `dim` is the embedder
41 /// identity dimension; `doc_count` is the number of rows the
42 /// re-quantize pass re-centered; `trigger` records what drove the
43 /// refresh. As of 0.7.2 PR-2bc the only trigger is the explicit
44 /// `doctor recompute-mean` verb (`Manual`); the automatic in-ingest
45 /// drift detector was carved out and deferred to 0.8.x. See
46 /// `dev/design/embedder.md` §0.3/§0.5 and
47 /// `dev/design/embedder-decision.md` §3.4.
48 MeanVecRecomputed { dim: u32, doc_count: u64, trigger: MeanRecomputeTrigger },
49}
50
51/// 0.7.2 PR-2b — what drove a [`EmbedderEvent::MeanVecRecomputed`].
52///
53/// As of 0.7.2 PR-2bc the only variant is `Manual` (the explicit
54/// `doctor recompute-mean` CLI verb). The `DriftAuto` variant for the
55/// automatic in-ingest drift detector was REMOVED when that path was carved
56/// out and deferred to 0.8.x (see
57/// `dev/plans/prompts/0.8.x-auto-mean-drift-DEFERRED.md`); the enum is kept
58/// (rather than collapsed to a unit) so reviving the auto path in 0.8.x is a
59/// pure additive re-introduction of a variant + tag.
60#[derive(Debug, Clone, Copy, PartialEq, Eq)]
61pub enum MeanRecomputeTrigger {
62 /// Fired explicitly by the `doctor recompute-mean` CLI verb.
63 Manual,
64}
65
66impl MeanRecomputeTrigger {
67 /// Stable lowercase tag used in machine-readable surfaces (CLI/py/napi).
68 #[must_use]
69 pub fn as_str(&self) -> &'static str {
70 match self {
71 MeanRecomputeTrigger::Manual => "manual",
72 }
73 }
74}
75
76#[cfg(feature = "default-embedder")]
77mod candle_bge;
78
79#[cfg(feature = "default-embedder")]
80pub use candle_bge::{CandleBgeEmbedder, DEFAULT_EMBEDDER_DIM, DEFAULT_EMBEDDER_NAME};
81
82#[derive(Clone, Debug)]
83pub struct NoopEmbedder {
84 identity: EmbedderIdentity,
85}
86
87impl Default for NoopEmbedder {
88 fn default() -> Self {
89 Self { identity: EmbedderIdentity::new("fathomdb-noop", "0.6.0-scaffold", 384) }
90 }
91}
92
93impl Embedder for NoopEmbedder {
94 fn identity(&self) -> EmbedderIdentity {
95 self.identity.clone()
96 }
97
98 fn embed(&self, _input: &str) -> Result<Vector, EmbedderError> {
99 let mut vector = vec![0.0_f32; self.identity.dimension as usize];
100 if let Some(first) = vector.first_mut() {
101 *first = 1.0;
102 }
103 Ok(vector)
104 }
105}