fastembed 5.13.2

Library for generating vector embeddings, reranking locally.
Documentation
use ndarray::{s, Array2, ArrayView, Dim, Dimension, IxDynImpl};

#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Pooling {
    Cls,
    Mean,
}

impl Default for Pooling {
    /// Change this to define the default pooling strategy.
    ///
    /// Currently this is set to [`Self::Cls`] for backward compatibility.
    fn default() -> Self {
        Self::Cls
    }
}

pub fn cls(tensor: &ArrayView<f32, Dim<IxDynImpl>>) -> anyhow::Result<Array2<f32>> {
    match tensor.dim().ndim() {
        2 => Ok(tensor.slice(s![.., ..]).to_owned()),
        3 => Ok(tensor.slice(s![.., 0, ..]).to_owned()),
        _ => Err(anyhow::Error::msg(format!(
            "Invalid output shape: {shape:?}. Expected 2D or 3D tensor.",
            shape = tensor.dim()
        ))),
    }
}

/// Pool the previous layer output by taking the element-wise arithmetic mean of the token-level embeddings after applying the attention mask.
/// * `token_embeddings` - token embeddings in form of a tensor output of the encoding.
/// * `attention_mask_array` - is the same mask generated by Tokenizer and used for encoding.
// Please refer to the original python implementation for more details:
// https://github.com/UKPLab/sentence-transformers/blob/c0fc0e8238f7f48a1e92dc90f6f96c86f69f1e02/sentence_transformers/models/Pooling.py#L151
pub fn mean(
    token_embeddings: &ArrayView<f32, Dim<IxDynImpl>>,
    attention_mask_array: Array2<i64>,
) -> anyhow::Result<Array2<f32>> {
    let attention_mask_original_dim = attention_mask_array.dim();

    if token_embeddings.dim().ndim() == 2 {
        // There are no means to speak of if the Axis(1) is missing.
        // Typically we'll see a dimension of (batch_size, feature_count) here.
        // It can be assumed that pooling is already done within the model.
        return Ok(token_embeddings.slice(s![.., ..]).to_owned());
    } else if token_embeddings.dim().ndim() != 3 {
        return Err(anyhow::Error::msg(format!(
            "Invalid output shape: {shape:?}. Expected 2D or 3D tensor.",
            shape = token_embeddings.dim()
        )));
    }

    let token_embeddings =
        // If the token_embeddings is 3D, return the whole thing.
        // Using `slice` here to assert the dimension.
        token_embeddings
            .slice(s![.., .., ..]);

    // Compute attention mask
    let attention_mask = attention_mask_array
        .insert_axis(ndarray::Axis(2))
        .broadcast(token_embeddings.dim())
        .ok_or_else(|| {
            anyhow::Error::msg(format!(
                "Could not broadcast attention mask from {:?} to {:?}",
                attention_mask_original_dim,
                token_embeddings.dim()
            ))
        })?
        .mapv(|x| x as f32);

    let masked_tensor = &attention_mask * &token_embeddings;
    let sum = masked_tensor.sum_axis(ndarray::Axis(1));
    let mask_sum = attention_mask.sum_axis(ndarray::Axis(1));
    let mask_sum = mask_sum.mapv(|x| if x == 0f32 { 1.0 } else { x });
    Ok(&sum / &mask_sum)
}