nornir 0.4.33 - Docs.rs

//! Shared embedder support — the **selected** model's identity, [`ModelProfile`]
//! construction, and the tokenize-independent post-processing (mean-pool +
//! L2-normalize) used by **both** the tract (`embed`) and ort (`embed_ort`)
//! backends.
//!
//! Keeping this in one place guarantees the two runtimes produce the *same*
//! `model_profile` and the same vectors for the same text, so embeddings made
//! by one backend are reusable by the other (the warehouse dedup key is
//! backend-agnostic).
//!
//! ## Model is now selectable (plan item G3)
//!
//! The model is **no longer a hard-wired constant**. It is chosen from
//! [`super::embed_registry`] via `$NORNIR_EMBED_MODEL` (default
//! `jina-v2-base-code`, 768-dim). [`active_model`] returns the selected
//! [`EmbedModel`]; its `dim` / `max_tokens` / `model_name` flow everywhere a
//! constant used to. The same env var is read by `build.rs`, so the fetched
//! weights match the loaded model.
//!
//! ## Re-embed awareness
//!
//! The selected model's name + `dim` enter [`ModelProfile`] and therefore the
//! warehouse `model_profile` key. **Different models ⇒ different keys ⇒ vectors
//! never mix**; switching the model requires a fresh `nornir vector index`
//! before semantic search returns hits. See [`super::embed_registry`].
//!
//! Compiled when either embed feature is on.

use super::embed_registry::{self, EmbedModel};
use super::store::ModelProfile;

/// The model the embedder will load/run, resolved from `$NORNIR_EMBED_MODEL`
/// (or the registry default). Panics with a clear message on an unknown id —
/// the same id `build.rs` validated, so a mismatch is a misconfiguration the
/// operator must fix, not something to paper over with the wrong model.
pub fn active_model() -> &'static EmbedModel {
    embed_registry::selected().unwrap_or_else(|e| panic!("{e}"))
}

/// Selected model's fully-qualified name (e.g. `jinaai/jina-embeddings-v2-base-code`).
pub fn model_name() -> &'static str {
    active_model().model_name
}

/// Selected model's output dimensionality. **Dynamic** — formerly the `DIM`
/// const; now sourced from the registry so a different model carries a
/// different dim through the index and the warehouse `dim` column.
pub fn dim() -> usize {
    active_model().dim
}

/// Selected model's max input tokens; longer inputs are truncated.
pub fn max_tokens() -> usize {
    active_model().max_tokens
}

/// Where `build.rs` placed the weights of the **selected** model, and their
/// content hashes. build.rs fetches the env-selected model into its own cache
/// subdir and exports that dir + SHAs here, so these always describe
/// [`active_model`].
pub const MODEL_DIR: &str = env!("NORNIR_MODEL_DIR");
pub const WEIGHTS_SHA: &str = env!("NORNIR_MODEL_WEIGHTS_SHA");
pub const TOKENIZER_SHA: &str = env!("NORNIR_MODEL_TOKENIZER_SHA");

/// Resolve the model directory **at runtime** (the baked [`MODEL_DIR`] points at
/// the *building* user's `~/.cache`, which a service user — e.g. `nornir` —
/// can't read; that EACCES is exactly what broke server-side Vector.Search).
/// Precedence:
///   1. `$NORNIR_MODEL_DIR` (runtime override — the service install can set this),
///   2. `/opt/nornir/models` if it holds the model (shared, world-readable,
///      mirrors the `/opt/nornir/cuda` convention),
///   3. the build-time cache (interactive use on the building user's box).
pub fn model_dir() -> std::path::PathBuf {
    if let Ok(d) = std::env::var("NORNIR_MODEL_DIR") {
        if !d.is_empty() {
            return std::path::PathBuf::from(d);
        }
    }
    let opt = std::path::Path::new("/opt/nornir/models");
    if opt.join("tokenizer.json").exists() {
        return opt.to_path_buf();
    }
    std::path::PathBuf::from(MODEL_DIR)
}

/// The selected model's identity shared by both backends. `model_name` + `dim`
/// come from the registry; `weights_sha` is the SHA of whichever model artifact
/// build.rs fetched (the ONNX export) — the same file feeds tract and ort, so
/// the profile (and thus the dedup key) matches across backends but **differs
/// across models** (so their vectors never collide in the warehouse).
pub fn profile() -> ModelProfile {
    ModelProfile {
        model_name: model_name().to_string(),
        weights_sha: WEIGHTS_SHA.to_string(),
        tokenizer_sha: TOKENIZER_SHA.to_string(),
        pooling: "mean".to_string(),
        normalize: true,
        dim: dim(),
        dtype: "f32".to_string(),
    }
}

/// Mean-pool a `[1, n_tokens, dim]` row-major hidden state (flattened, so
/// element `(t, d)` is `hidden[t * dim + d]`) over tokens, then L2-normalize.
/// Returns the `dim`-length embedding. `dim` is the selected model's
/// dimensionality ([`dim`]); callers pass it so this stays model-agnostic.
pub fn pool_and_normalize(hidden: &[f32], n_tokens: usize, dim: usize) -> Vec<f32> {
    debug_assert_eq!(hidden.len(), n_tokens * dim);
    let mut v = vec![0f32; dim];
    for t in 0..n_tokens {
        let row = &hidden[t * dim..(t + 1) * dim];
        for (acc, &x) in v.iter_mut().zip(row) {
            *acc += x;
        }
    }
    let inv = 1.0 / n_tokens.max(1) as f32;
    for x in &mut v {
        *x *= inv;
    }
    l2_normalize(&mut v);
    v
}

/// L2-normalize in place. A zero vector is left unchanged.
pub fn l2_normalize(v: &mut [f32]) {
    let norm = v.iter().map(|x| x * x).sum::<f32>().sqrt();
    if norm > 0.0 {
        for x in v {
            *x /= norm;
        }
    }
}

/// Truncate raw token ids to `max` (and to at least one token), as `i64` for
/// the ONNX `input_ids` tensor, with an all-ones attention mask. `max` is the
/// selected model's [`max_tokens`].
pub fn prepare_tokens(raw: &[u32], max: usize) -> (Vec<i64>, Vec<i64>) {
    let n = raw.len().clamp(1, max.max(1));
    let ids: Vec<i64> = raw[..n].iter().map(|&x| x as i64).collect();
    let mask = vec![1i64; n];
    (ids, mask)
}

#[cfg(test)]
mod tests {
    use super::*;

    /// Inject-and-assert: with no env override the default model is selected,
    /// and `dim()`/`model_name()`/`profile()` all agree with the registry
    /// default (768-dim jina). NOT a "didn't panic" test — it asserts the
    /// concrete dim and name the profile reports.
    #[test]
    fn default_model_profile_matches_registry() {
        // No NORNIR_EMBED_MODEL in the test env ⇒ default.
        let m = super::embed_registry::default_model();
        assert_eq!(dim(), m.dim);
        assert_eq!(dim(), 768, "default jina stays 768-dim");
        assert_eq!(model_name(), m.model_name);
        let p = profile();
        assert_eq!(p.dim, m.dim);
        assert_eq!(p.model_name, m.model_name);
    }

    /// Pooling honours the passed `dim` (not a global const), so a 384-dim
    /// model pools correctly.
    #[test]
    fn pool_respects_dim() {
        let dim = 4;
        // two tokens, each [1,1,1,1] → mean [1,1,1,1] → normalized 0.5 each.
        let hidden = vec![1.0f32; 2 * dim];
        let v = pool_and_normalize(&hidden, 2, dim);
        assert_eq!(v.len(), dim);
        for x in &v {
            assert!((x - 0.5).abs() < 1e-6, "{x}");
        }
    }
}