post_cortex_embeddings/embeddings/config.rs
1// Copyright (c) 2025 Julius ML
2//
3// Permission is hereby granted, free of charge, to any person obtaining a copy
4// of this software and associated documentation files (the "Software"), to deal
5// in the Software without restriction, including without limitation the rights
6// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7// copies of the Software, and to permit persons to whom the Software is
8// furnished to do so, subject to the following conditions:
9//
10// The above copyright notice and this permission notice shall be included in all
11// copies or substantial portions of the Software.
12
13//! Embedding-engine configuration and supported model types.
14
15use serde::{Deserialize, Serialize};
16use std::path::PathBuf;
17
18/// Configuration for the embedding engine
19#[derive(Debug, Clone, Serialize, Deserialize)]
20pub struct EmbeddingConfig {
21 /// Model type for embeddings
22 pub model_type: EmbeddingModelType,
23 /// Maximum batch size for processing
24 pub max_batch_size: usize,
25 /// Enable adaptive batch sizing
26 pub adaptive_batching: bool,
27 /// Memory pool size for vector reuse
28 pub memory_pool_size: usize,
29 /// Maximum concurrent operations
30 pub max_concurrent_ops: usize,
31 /// Enable performance monitoring
32 pub enable_performance_monitoring: bool,
33 /// Model cache directory
34 pub cache_dir: PathBuf,
35 /// Enable model caching
36 pub enable_caching: bool,
37 /// Operation timeout in seconds
38 pub operation_timeout_secs: u64,
39}
40
41impl Default for EmbeddingConfig {
42 fn default() -> Self {
43 Self {
44 model_type: EmbeddingModelType::default(),
45 max_batch_size: 32,
46 adaptive_batching: true,
47 memory_pool_size: 1000,
48 max_concurrent_ops: num_cpus::get() * 2,
49 enable_performance_monitoring: true,
50 cache_dir: PathBuf::from("./models_cache"),
51 enable_caching: true,
52 operation_timeout_secs: 30,
53 }
54 }
55}
56
57/// Embedding model types.
58///
59/// `PotionMultilingual` is the default — Model2Vec static embeddings, small
60/// on disk, ms-per-text inference, multilingual (Latin / Cyrillic / CJK / …).
61/// The BERT variants are kept for users who need transformer-grade semantic
62/// quality and are willing to pay the GPU/CPU cost.
63#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Default)]
64pub enum EmbeddingModelType {
65 /// Hash-based pseudo-embeddings — kept for backwards compatibility and
66 /// as a last-resort fallback. **Does not** produce semantically
67 /// meaningful vectors; only use when all real backends are disabled.
68 StaticSimilarityMRL,
69 /// MiniLM model (balanced performance, English-only). Requires the
70 /// `bert` feature.
71 MiniLM,
72 /// Multilingual MiniLM model — 50+ languages including Bulgarian.
73 /// Requires the `bert` feature.
74 MultilingualMiniLM,
75 /// TinyBERT model (smallest BERT variant). Requires the `bert` feature.
76 TinyBERT,
77 /// BGE Small model (balanced BERT). Requires the `bert` feature.
78 BGESmall,
79 /// `minishlab/potion-multilingual-128M` — Model2Vec static embeddings,
80 /// multilingual (Latin/Cyrillic/CJK/…). Default — small, fast, no GPU.
81 /// Requires the `model2vec` feature (on by default).
82 #[default]
83 PotionMultilingual,
84 /// `minishlab/potion-code-16M` — Model2Vec static embeddings tuned for
85 /// source code (English-leaning). Requires the `model2vec` feature.
86 PotionCode,
87}
88
89impl EmbeddingModelType {
90 /// Get embedding dimension for this model type.
91 ///
92 /// The Potion dimensions are taken from each model's published
93 /// config — `Model2VecBackend` verifies the runtime dimension at load
94 /// time and panics if they disagree, so this constant stays the source
95 /// of truth for HNSW index sizing.
96 pub fn embedding_dimension(&self) -> usize {
97 match self {
98 Self::StaticSimilarityMRL => 1024,
99 Self::MiniLM | Self::MultilingualMiniLM | Self::BGESmall => 384,
100 Self::TinyBERT => 312,
101 // potion-multilingual-128M outputs 256-dim vectors.
102 Self::PotionMultilingual => 256,
103 // potion-code-16M outputs 512-dim vectors.
104 Self::PotionCode => 512,
105 }
106 }
107
108 /// Get model ID for HuggingFace Hub.
109 pub fn model_id(&self) -> &'static str {
110 match self {
111 Self::StaticSimilarityMRL | Self::MiniLM => "sentence-transformers/all-MiniLM-L6-v2",
112 Self::MultilingualMiniLM => {
113 "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
114 }
115 Self::TinyBERT => "huawei-noah/TinyBERT_General_6L_312D",
116 Self::BGESmall => "BAAI/bge-small-en-v1.5",
117 Self::PotionMultilingual => "minishlab/potion-multilingual-128M",
118 Self::PotionCode => "minishlab/potion-code-16M",
119 }
120 }
121
122 /// Check if this is a BERT-based model (Candle transformer).
123 pub fn is_bert_based(&self) -> bool {
124 matches!(
125 self,
126 Self::MiniLM | Self::MultilingualMiniLM | Self::TinyBERT | Self::BGESmall
127 )
128 }
129
130 /// Check if this is a Model2Vec static-embedding model.
131 pub fn is_model2vec(&self) -> bool {
132 matches!(self, Self::PotionMultilingual | Self::PotionCode)
133 }
134}