post_cortex_embeddings/embeddings/config.rs
1// Copyright (c) 2025 Julius ML
2//
3// Permission is hereby granted, free of charge, to any person obtaining a copy
4// of this software and associated documentation files (the "Software"), to deal
5// in the Software without restriction, including without limitation the rights
6// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7// copies of the Software, and to permit persons to whom the Software is
8// furnished to do so, subject to the following conditions:
9//
10// The above copyright notice and this permission notice shall be included in all
11// copies or substantial portions of the Software.
12
13//! Embedding-engine configuration and supported model types.
14
15use serde::{Deserialize, Serialize};
16use std::path::PathBuf;
17
18/// Configuration for the embedding engine
19#[derive(Debug, Clone, Serialize, Deserialize)]
20pub struct EmbeddingConfig {
21 /// Model type for embeddings
22 pub model_type: EmbeddingModelType,
23 /// Maximum batch size for processing
24 pub max_batch_size: usize,
25 /// Enable adaptive batch sizing
26 pub adaptive_batching: bool,
27 /// Memory pool size for vector reuse
28 pub memory_pool_size: usize,
29 /// Maximum concurrent operations
30 pub max_concurrent_ops: usize,
31 /// Enable performance monitoring
32 pub enable_performance_monitoring: bool,
33 /// Model cache directory
34 pub cache_dir: PathBuf,
35 /// Enable model caching
36 pub enable_caching: bool,
37 /// Operation timeout in seconds
38 pub operation_timeout_secs: u64,
39}
40
41impl Default for EmbeddingConfig {
42 fn default() -> Self {
43 Self {
44 model_type: EmbeddingModelType::default(),
45 max_batch_size: 32,
46 adaptive_batching: true,
47 memory_pool_size: 1000,
48 max_concurrent_ops: num_cpus::get() * 2,
49 enable_performance_monitoring: true,
50 cache_dir: PathBuf::from("./models_cache"),
51 enable_caching: true,
52 operation_timeout_secs: 30,
53 }
54 }
55}
56
57/// Embedding model types.
58///
59/// `PotionMultilingual` is the default — Model2Vec static embeddings, small
60/// on disk, ms-per-text inference, multilingual (Latin / Cyrillic / CJK / …).
61/// The BERT variants are kept for users who need transformer-grade semantic
62/// quality and are willing to pay the GPU/CPU cost.
63#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq)]
64#[derive(Default)]
65pub enum EmbeddingModelType {
66 /// Hash-based pseudo-embeddings — kept for backwards compatibility and
67 /// as a last-resort fallback. **Does not** produce semantically
68 /// meaningful vectors; only use when all real backends are disabled.
69 StaticSimilarityMRL,
70 /// MiniLM model (balanced performance, English-only). Requires the
71 /// `bert` feature.
72 MiniLM,
73 /// Multilingual MiniLM model — 50+ languages including Bulgarian.
74 /// Requires the `bert` feature.
75 MultilingualMiniLM,
76 /// TinyBERT model (smallest BERT variant). Requires the `bert` feature.
77 TinyBERT,
78 /// BGE Small model (balanced BERT). Requires the `bert` feature.
79 BGESmall,
80 /// `minishlab/potion-multilingual-128M` — Model2Vec static embeddings,
81 /// multilingual (Latin/Cyrillic/CJK/…). Default — small, fast, no GPU.
82 /// Requires the `model2vec` feature (on by default).
83 #[default]
84 PotionMultilingual,
85 /// `minishlab/potion-code-16M` — Model2Vec static embeddings tuned for
86 /// source code (English-leaning). Requires the `model2vec` feature.
87 PotionCode,
88}
89
90
91impl EmbeddingModelType {
92 /// Get embedding dimension for this model type.
93 ///
94 /// The Potion dimensions are taken from each model's published
95 /// config — `Model2VecBackend` verifies the runtime dimension at load
96 /// time and panics if they disagree, so this constant stays the source
97 /// of truth for HNSW index sizing.
98 pub fn embedding_dimension(&self) -> usize {
99 match self {
100 Self::StaticSimilarityMRL => 1024,
101 Self::MiniLM | Self::MultilingualMiniLM | Self::BGESmall => 384,
102 Self::TinyBERT => 312,
103 // potion-multilingual-128M outputs 256-dim vectors.
104 Self::PotionMultilingual => 256,
105 // potion-code-16M outputs 512-dim vectors.
106 Self::PotionCode => 512,
107 }
108 }
109
110 /// Get model ID for HuggingFace Hub.
111 pub fn model_id(&self) -> &'static str {
112 match self {
113 Self::StaticSimilarityMRL | Self::MiniLM => "sentence-transformers/all-MiniLM-L6-v2",
114 Self::MultilingualMiniLM => {
115 "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
116 }
117 Self::TinyBERT => "huawei-noah/TinyBERT_General_6L_312D",
118 Self::BGESmall => "BAAI/bge-small-en-v1.5",
119 Self::PotionMultilingual => "minishlab/potion-multilingual-128M",
120 Self::PotionCode => "minishlab/potion-code-16M",
121 }
122 }
123
124 /// Check if this is a BERT-based model (Candle transformer).
125 pub fn is_bert_based(&self) -> bool {
126 matches!(
127 self,
128 Self::MiniLM | Self::MultilingualMiniLM | Self::TinyBERT | Self::BGESmall
129 )
130 }
131
132 /// Check if this is a Model2Vec static-embedding model.
133 pub fn is_model2vec(&self) -> bool {
134 matches!(self, Self::PotionMultilingual | Self::PotionCode)
135 }
136}