reflex/embedding/sinter/
config.rs

1use std::path::PathBuf;
2
3use crate::embedding::error::EmbeddingError;
4
5/// Default Sinter embedding dimension.
6pub const SINTER_EMBEDDING_DIM: usize = crate::constants::DEFAULT_EMBEDDING_DIM;
7
8/// Default Sinter max sequence length.
9pub const SINTER_MAX_SEQ_LEN: usize = crate::constants::DEFAULT_MAX_SEQ_LEN;
10
11#[derive(Debug, Clone)]
12/// Configuration for [`SinterEmbedder`](super::SinterEmbedder).
13pub struct SinterConfig {
14    /// Path to the GGUF model file.
15    pub model_path: PathBuf,
16    /// Path to `tokenizer.json`.
17    pub tokenizer_path: PathBuf,
18    /// Max tokens to consider.
19    pub max_seq_len: usize,
20    /// Output embedding dimension.
21    pub embedding_dim: usize,
22    /// If true, run in deterministic stub mode (no model files required).
23    pub testing_stub: bool,
24}
25
26impl Default for SinterConfig {
27    fn default() -> Self {
28        Self {
29            model_path: PathBuf::new(),
30            tokenizer_path: PathBuf::new(),
31            max_seq_len: SINTER_MAX_SEQ_LEN,
32            embedding_dim: SINTER_EMBEDDING_DIM,
33            testing_stub: false,
34        }
35    }
36}
37
38impl SinterConfig {
39    /// Env var used to locate the model file.
40    pub const ENV_MODEL_PATH: &'static str = "REFLEX_MODEL_PATH";
41    /// Env var used to locate the tokenizer file.
42    pub const ENV_TOKENIZER_PATH: &'static str = "REFLEX_TOKENIZER_PATH";
43
44    /// Loads config from environment variables (missing values become empty paths).
45    pub fn from_env() -> Result<Self, EmbeddingError> {
46        let model_path = std::env::var(Self::ENV_MODEL_PATH)
47            .ok()
48            .map(|v| v.trim().to_string())
49            .filter(|v| !v.is_empty())
50            .map(PathBuf::from)
51            .unwrap_or_default();
52
53        let tokenizer_path = std::env::var(Self::ENV_TOKENIZER_PATH)
54            .ok()
55            .map(|v| v.trim().to_string())
56            .filter(|v| !v.is_empty())
57            .map(PathBuf::from)
58            .unwrap_or_else(|| {
59                if !model_path.as_os_str().is_empty() {
60                    let parent = model_path.parent().unwrap_or(model_path.as_path());
61                    parent.join("tokenizer.json")
62                } else {
63                    PathBuf::new()
64                }
65            });
66
67        Ok(Self {
68            model_path,
69            tokenizer_path,
70            ..Default::default()
71        })
72    }
73
74    /// Creates a config for a model file, inferring `tokenizer.json` from its directory.
75    pub fn new<P: Into<PathBuf>>(model_path: P) -> Self {
76        let model_path = model_path.into();
77        let tokenizer_path = model_path
78            .parent()
79            .map(|p| p.join("tokenizer.json"))
80            .unwrap_or_default();
81
82        Self {
83            model_path,
84            tokenizer_path,
85            ..Default::default()
86        }
87    }
88
89    /// Creates a stub config (no model files; produces deterministic embeddings).
90    pub fn stub() -> Self {
91        Self {
92            testing_stub: true,
93            ..Default::default()
94        }
95    }
96
97    /// Validates required fields for non-stub mode.
98    pub fn validate(&self) -> Result<(), EmbeddingError> {
99        if self.testing_stub {
100            return Ok(());
101        }
102
103        if self.model_path.as_os_str().is_empty() {
104            return Err(EmbeddingError::InvalidConfig {
105                reason: "model_path is required (stubbing is disabled)".to_string(),
106            });
107        }
108
109        if !self.model_path.exists() {
110            return Err(EmbeddingError::ModelNotFound {
111                path: self.model_path.clone(),
112            });
113        }
114
115        Ok(())
116    }
117
118    /// Returns `true` if the model file path exists.
119    pub fn model_available(&self) -> bool {
120        !self.model_path.as_os_str().is_empty() && self.model_path.exists()
121    }
122
123    /// Returns `true` if the tokenizer path exists.
124    pub fn tokenizer_available(&self) -> bool {
125        !self.tokenizer_path.as_os_str().is_empty() && self.tokenizer_path.exists()
126    }
127}