1use serde::{Deserialize, Serialize};
4
5#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
7pub enum FastEmbedModel {
8 BGESmallENV15,
11
12 BGEBaseENV15,
15
16 BGELargeENV15,
19
20 AllMiniLML6V2,
23
24 AllMiniLML12V2,
27
28 MultilingualE5Small,
31
32 MultilingualE5Base,
35
36 MultilingualE5Large,
39
40 Custom {
42 name: String,
43 dimensions: usize,
44 max_sequence_length: Option<usize>,
45 },
46}
47
48impl FastEmbedModel {
49 pub fn model_name(&self) -> &str {
51 match self {
52 FastEmbedModel::BGESmallENV15 => "BAAI/bge-small-en-v1.5",
53 FastEmbedModel::BGEBaseENV15 => "BAAI/bge-base-en-v1.5",
54 FastEmbedModel::BGELargeENV15 => "BAAI/bge-large-en-v1.5",
55 FastEmbedModel::AllMiniLML6V2 => "sentence-transformers/all-MiniLM-L6-v2",
56 FastEmbedModel::AllMiniLML12V2 => "sentence-transformers/all-MiniLM-L12-v2",
57 FastEmbedModel::MultilingualE5Small => "intfloat/multilingual-e5-small",
58 FastEmbedModel::MultilingualE5Base => "intfloat/multilingual-e5-base",
59 FastEmbedModel::MultilingualE5Large => "intfloat/multilingual-e5-large",
60 FastEmbedModel::Custom { name, .. } => name,
61 }
62 }
63
64 pub fn dimensions(&self) -> usize {
66 match self {
67 FastEmbedModel::BGESmallENV15 => 384,
68 FastEmbedModel::BGEBaseENV15 => 768,
69 FastEmbedModel::BGELargeENV15 => 1024,
70 FastEmbedModel::AllMiniLML6V2 => 384,
71 FastEmbedModel::AllMiniLML12V2 => 384,
72 FastEmbedModel::MultilingualE5Small => 384,
73 FastEmbedModel::MultilingualE5Base => 768,
74 FastEmbedModel::MultilingualE5Large => 1024,
75 FastEmbedModel::Custom { dimensions, .. } => *dimensions,
76 }
77 }
78
79 pub fn max_sequence_length(&self) -> usize {
81 match self {
82 FastEmbedModel::BGESmallENV15 => 512,
83 FastEmbedModel::BGEBaseENV15 => 512,
84 FastEmbedModel::BGELargeENV15 => 512,
85 FastEmbedModel::AllMiniLML6V2 => 256,
86 FastEmbedModel::AllMiniLML12V2 => 256,
87 FastEmbedModel::MultilingualE5Small => 512,
88 FastEmbedModel::MultilingualE5Base => 512,
89 FastEmbedModel::MultilingualE5Large => 512,
90 FastEmbedModel::Custom { max_sequence_length, .. } => {
91 max_sequence_length.unwrap_or(512)
92 }
93 }
94 }
95
96 pub fn description(&self) -> &str {
98 match self {
99 FastEmbedModel::BGESmallENV15 => {
100 "BGE Small English v1.5 - Fast and efficient for general purpose embedding tasks"
101 }
102 FastEmbedModel::BGEBaseENV15 => {
103 "BGE Base English v1.5 - Balanced performance and quality for English text"
104 }
105 FastEmbedModel::BGELargeENV15 => {
106 "BGE Large English v1.5 - High quality embeddings with larger model size"
107 }
108 FastEmbedModel::AllMiniLML6V2 => {
109 "All MiniLM L6 v2 - Lightweight model optimized for semantic search"
110 }
111 FastEmbedModel::AllMiniLML12V2 => {
112 "All MiniLM L12 v2 - Better quality than L6 while maintaining efficiency"
113 }
114 FastEmbedModel::MultilingualE5Small => {
115 "Multilingual E5 Small - Supports 100+ languages with good performance"
116 }
117 FastEmbedModel::MultilingualE5Base => {
118 "Multilingual E5 Base - High-quality multilingual embeddings"
119 }
120 FastEmbedModel::MultilingualE5Large => {
121 "Multilingual E5 Large - Best multilingual quality with larger model size"
122 }
123 FastEmbedModel::Custom { name, .. } => name,
124 }
125 }
126
127 pub fn language_support(&self) -> Vec<&str> {
129 match self {
130 FastEmbedModel::BGESmallENV15
131 | FastEmbedModel::BGEBaseENV15
132 | FastEmbedModel::BGELargeENV15
133 | FastEmbedModel::AllMiniLML6V2
134 | FastEmbedModel::AllMiniLML12V2 => {
135 vec!["en"] }
137 FastEmbedModel::MultilingualE5Small
138 | FastEmbedModel::MultilingualE5Base
139 | FastEmbedModel::MultilingualE5Large => {
140 vec![
141 "en", "zh", "es", "fr", "de", "it", "pt", "ru", "ja", "ko",
142 "ar", "hi", "th", "vi", "id", "ms", "tl", "nl", "sv", "da",
143 "no", "fi", "pl", "cs", "sk", "hu", "ro", "bg", "hr", "sl",
144 "et", "lv", "lt", "mt", "ga", "eu", "ca", "gl", "cy", "is",
145 "mk", "sq", "sr", "bs", "me", "hr", "sl", "sk", "cs", "pl",
146 ]
148 }
149 FastEmbedModel::Custom { .. } => {
150 vec!["unknown"] }
152 }
153 }
154
155 pub fn supports_language(&self, language: &str) -> bool {
157 self.language_support().contains(&language)
158 }
159
160 pub fn model_family(&self) -> ModelFamily {
162 match self {
163 FastEmbedModel::BGESmallENV15
164 | FastEmbedModel::BGEBaseENV15
165 | FastEmbedModel::BGELargeENV15 => ModelFamily::BGE,
166
167 FastEmbedModel::AllMiniLML6V2
168 | FastEmbedModel::AllMiniLML12V2 => ModelFamily::MiniLM,
169
170 FastEmbedModel::MultilingualE5Small
171 | FastEmbedModel::MultilingualE5Base
172 | FastEmbedModel::MultilingualE5Large => ModelFamily::E5,
173
174 FastEmbedModel::Custom { .. } => ModelFamily::Custom,
175 }
176 }
177
178 pub fn to_fastembed_model(&self) -> fastembed::EmbeddingModel {
180 match self {
181 FastEmbedModel::BGESmallENV15 => fastembed::EmbeddingModel::BGESmallENV15,
182 FastEmbedModel::BGEBaseENV15 => fastembed::EmbeddingModel::BGEBaseENV15,
183 FastEmbedModel::BGELargeENV15 => fastembed::EmbeddingModel::BGELargeENV15,
184 FastEmbedModel::AllMiniLML6V2 => fastembed::EmbeddingModel::AllMiniLML6V2,
185 FastEmbedModel::AllMiniLML12V2 => fastembed::EmbeddingModel::AllMiniLML12V2,
186 FastEmbedModel::MultilingualE5Small => fastembed::EmbeddingModel::MultilingualE5Small,
187 FastEmbedModel::MultilingualE5Base => fastembed::EmbeddingModel::MultilingualE5Base,
188 FastEmbedModel::MultilingualE5Large => fastembed::EmbeddingModel::MultilingualE5Large,
189 FastEmbedModel::Custom { .. } => {
190 fastembed::EmbeddingModel::BGESmallENV15
193 }
194 }
195 }
196
197 pub fn use_cases(&self) -> Vec<&str> {
199 match self {
200 FastEmbedModel::BGESmallENV15 => {
201 vec!["semantic search", "document similarity", "clustering", "classification"]
202 }
203 FastEmbedModel::BGEBaseENV15 => {
204 vec!["semantic search", "document similarity", "RAG systems", "question answering"]
205 }
206 FastEmbedModel::BGELargeENV15 => {
207 vec!["high-quality RAG", "research applications", "complex similarity tasks"]
208 }
209 FastEmbedModel::AllMiniLML6V2 => {
210 vec!["fast semantic search", "real-time applications", "mobile deployment"]
211 }
212 FastEmbedModel::AllMiniLML12V2 => {
213 vec!["semantic search", "document clustering", "content recommendation"]
214 }
215 FastEmbedModel::MultilingualE5Small => {
216 vec!["multilingual search", "cross-language retrieval", "international applications"]
217 }
218 FastEmbedModel::MultilingualE5Base => {
219 vec!["multilingual RAG", "cross-language similarity", "global content analysis"]
220 }
221 FastEmbedModel::MultilingualE5Large => {
222 vec!["high-quality multilingual RAG", "research", "enterprise multilingual systems"]
223 }
224 FastEmbedModel::Custom { .. } => {
225 vec!["custom applications"]
226 }
227 }
228 }
229}
230
231#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
233pub enum ModelFamily {
234 BGE,
236 MiniLM,
238 E5,
240 Custom,
242}
243
244#[derive(Debug, Clone, Serialize, Deserialize)]
246pub struct ModelInfo {
247 pub name: String,
249 pub dimensions: usize,
251 pub max_sequence_length: usize,
253 pub description: String,
255 pub language_support: Vec<String>,
257}
258
259impl ModelInfo {
260 pub fn from_model(model: &FastEmbedModel) -> Self {
262 Self {
263 name: model.model_name().to_string(),
264 dimensions: model.dimensions(),
265 max_sequence_length: model.max_sequence_length(),
266 description: model.description().to_string(),
267 language_support: model.language_support().iter().map(|s| s.to_string()).collect(),
268 }
269 }
270}
271
272#[cfg(test)]
273mod tests {
274 use super::*;
275
276 #[test]
277 fn test_model_properties() {
278 let model = FastEmbedModel::BGESmallENV15;
279 assert_eq!(model.model_name(), "BAAI/bge-small-en-v1.5");
280 assert_eq!(model.dimensions(), 384);
281 assert_eq!(model.max_sequence_length(), 512);
282 assert!(model.supports_language("en"));
283 assert!(!model.supports_language("zh"));
284 }
285
286 #[test]
287 fn test_multilingual_model() {
288 let model = FastEmbedModel::MultilingualE5Small;
289 assert!(model.supports_language("en"));
290 assert!(model.supports_language("zh"));
291 assert!(model.supports_language("es"));
292 assert_eq!(model.model_family(), ModelFamily::E5);
293 }
294
295 #[test]
296 fn test_custom_model() {
297 let model = FastEmbedModel::Custom {
298 name: "custom-model".to_string(),
299 dimensions: 512,
300 max_sequence_length: Some(1024),
301 };
302
303 assert_eq!(model.model_name(), "custom-model");
304 assert_eq!(model.dimensions(), 512);
305 assert_eq!(model.max_sequence_length(), 1024);
306 assert_eq!(model.model_family(), ModelFamily::Custom);
307 }
308
309 #[test]
310 fn test_model_info() {
311 let model = FastEmbedModel::BGEBaseENV15;
312 let info = ModelInfo::from_model(&model);
313
314 assert_eq!(info.name, "BAAI/bge-base-en-v1.5");
315 assert_eq!(info.dimensions, 768);
316 assert!(info.language_support.contains(&"en".to_string()));
317 }
318}