fastembed/sparse_text_embedding/
init.rs

1use ort::session::Session;
2use tokenizers::Tokenizer;
3
4use crate::{
5    init::{HasMaxLength, InitOptionsWithLength},
6    models::sparse::SparseModel,
7    TokenizerFiles,
8};
9
10use super::DEFAULT_MAX_LENGTH;
11
12impl HasMaxLength for SparseModel {
13    const MAX_LENGTH: usize = DEFAULT_MAX_LENGTH;
14}
15
16/// Options for initializing the SparseTextEmbedding model
17pub type SparseInitOptions = InitOptionsWithLength<SparseModel>;
18
19/// Struct for "bring your own" embedding models
20///
21/// The onnx_file and tokenizer_files are expecting the files' bytes
22#[derive(Debug, Clone, PartialEq, Eq)]
23#[non_exhaustive]
24pub struct UserDefinedSparseModel {
25    pub onnx_file: Vec<u8>,
26    pub tokenizer_files: TokenizerFiles,
27}
28
29impl UserDefinedSparseModel {
30    pub fn new(onnx_file: Vec<u8>, tokenizer_files: TokenizerFiles) -> Self {
31        Self {
32            onnx_file,
33            tokenizer_files,
34        }
35    }
36}
37
38/// Rust representation of the SparseTextEmbedding model
39pub struct SparseTextEmbedding {
40    pub tokenizer: Tokenizer,
41    pub(crate) session: Session,
42    pub(crate) need_token_type_ids: bool,
43    pub(crate) model: SparseModel,
44}