use std::path::{Path, PathBuf};
use ort::{execution_providers::ExecutionProviderDispatch, session::Session};
use tokenizers::Tokenizer;
use crate::{models::sparse::SparseModel, TokenizerFiles, DEFAULT_CACHE_DIR};
use super::{DEFAULT_EMBEDDING_MODEL, DEFAULT_MAX_LENGTH};
#[derive(Debug, Clone)]
#[non_exhaustive]
pub struct SparseInitOptions {
pub model_name: SparseModel,
pub execution_providers: Vec<ExecutionProviderDispatch>,
pub max_length: usize,
pub cache_dir: PathBuf,
pub show_download_progress: bool,
}
impl SparseInitOptions {
pub fn new(model_name: SparseModel) -> Self {
Self {
model_name,
..Default::default()
}
}
pub fn with_max_length(mut self, max_length: usize) -> Self {
self.max_length = max_length;
self
}
pub fn with_cache_dir(mut self, cache_dir: PathBuf) -> Self {
self.cache_dir = cache_dir;
self
}
pub fn with_execution_providers(
mut self,
execution_providers: Vec<ExecutionProviderDispatch>,
) -> Self {
self.execution_providers = execution_providers;
self
}
pub fn with_show_download_progress(mut self, show_download_progress: bool) -> Self {
self.show_download_progress = show_download_progress;
self
}
}
impl Default for SparseInitOptions {
fn default() -> Self {
Self {
model_name: DEFAULT_EMBEDDING_MODEL,
execution_providers: Default::default(),
max_length: DEFAULT_MAX_LENGTH,
cache_dir: Path::new(DEFAULT_CACHE_DIR).to_path_buf(),
show_download_progress: true,
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
#[non_exhaustive]
pub struct UserDefinedSparseModel {
pub onnx_file: Vec<u8>,
pub tokenizer_files: TokenizerFiles,
}
impl UserDefinedSparseModel {
pub fn new(onnx_file: Vec<u8>, tokenizer_files: TokenizerFiles) -> Self {
Self {
onnx_file,
tokenizer_files,
}
}
}
pub struct SparseTextEmbedding {
pub tokenizer: Tokenizer,
pub(crate) session: Session,
pub(crate) need_token_type_ids: bool,
pub(crate) model: SparseModel,
}