Skip to main content

post_cortex_embeddings/embeddings/
config.rs

1// Copyright (c) 2025 Julius ML
2//
3// Permission is hereby granted, free of charge, to any person obtaining a copy
4// of this software and associated documentation files (the "Software"), to deal
5// in the Software without restriction, including without limitation the rights
6// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7// copies of the Software, and to permit persons to whom the Software is
8// furnished to do so, subject to the following conditions:
9//
10// The above copyright notice and this permission notice shall be included in all
11// copies or substantial portions of the Software.
12
13//! Embedding-engine configuration and supported model types.
14
15use serde::{Deserialize, Serialize};
16use std::path::PathBuf;
17
18/// Configuration for the embedding engine
19#[derive(Debug, Clone, Serialize, Deserialize)]
20pub struct EmbeddingConfig {
21    /// Model type for embeddings
22    pub model_type: EmbeddingModelType,
23    /// Maximum batch size for processing
24    pub max_batch_size: usize,
25    /// Enable adaptive batch sizing
26    pub adaptive_batching: bool,
27    /// Memory pool size for vector reuse
28    pub memory_pool_size: usize,
29    /// Maximum concurrent operations
30    pub max_concurrent_ops: usize,
31    /// Enable performance monitoring
32    pub enable_performance_monitoring: bool,
33    /// Model cache directory
34    pub cache_dir: PathBuf,
35    /// Enable model caching
36    pub enable_caching: bool,
37    /// Operation timeout in seconds
38    pub operation_timeout_secs: u64,
39}
40
41impl Default for EmbeddingConfig {
42    fn default() -> Self {
43        Self {
44            model_type: EmbeddingModelType::default(),
45            max_batch_size: 32,
46            adaptive_batching: true,
47            memory_pool_size: 1000,
48            max_concurrent_ops: num_cpus::get() * 2,
49            enable_performance_monitoring: true,
50            cache_dir: PathBuf::from("./models_cache"),
51            enable_caching: true,
52            operation_timeout_secs: 30,
53        }
54    }
55}
56
57/// Embedding model types.
58///
59/// `PotionMultilingual` is the default — Model2Vec static embeddings, small
60/// on disk, ms-per-text inference, multilingual (Latin / Cyrillic / CJK / …).
61/// The BERT variants are kept for users who need transformer-grade semantic
62/// quality and are willing to pay the GPU/CPU cost.
63#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Default)]
64pub enum EmbeddingModelType {
65    /// Hash-based pseudo-embeddings — kept for backwards compatibility and
66    /// as a last-resort fallback. **Does not** produce semantically
67    /// meaningful vectors; only use when all real backends are disabled.
68    StaticSimilarityMRL,
69    /// MiniLM model (balanced performance, English-only). Requires the
70    /// `bert` feature.
71    MiniLM,
72    /// Multilingual MiniLM model — 50+ languages including Bulgarian.
73    /// Requires the `bert` feature.
74    MultilingualMiniLM,
75    /// TinyBERT model (smallest BERT variant). Requires the `bert` feature.
76    TinyBERT,
77    /// BGE Small model (balanced BERT). Requires the `bert` feature.
78    BGESmall,
79    /// `minishlab/potion-multilingual-128M` — Model2Vec static embeddings,
80    /// multilingual (Latin/Cyrillic/CJK/…). Default — small, fast, no GPU.
81    /// Requires the `model2vec` feature (on by default).
82    #[default]
83    PotionMultilingual,
84    /// `minishlab/potion-code-16M` — Model2Vec static embeddings tuned for
85    /// source code (English-leaning). Requires the `model2vec` feature.
86    PotionCode,
87}
88
89impl EmbeddingModelType {
90    /// Get embedding dimension for this model type.
91    ///
92    /// The Potion dimensions are taken from each model's published
93    /// config — `Model2VecBackend` verifies the runtime dimension at load
94    /// time and panics if they disagree, so this constant stays the source
95    /// of truth for HNSW index sizing.
96    pub fn embedding_dimension(&self) -> usize {
97        match self {
98            Self::StaticSimilarityMRL => 1024,
99            Self::MiniLM | Self::MultilingualMiniLM | Self::BGESmall => 384,
100            Self::TinyBERT => 312,
101            // potion-multilingual-128M outputs 256-dim vectors.
102            Self::PotionMultilingual => 256,
103            // potion-code-16M outputs 512-dim vectors.
104            Self::PotionCode => 512,
105        }
106    }
107
108    /// Get model ID for HuggingFace Hub.
109    pub fn model_id(&self) -> &'static str {
110        match self {
111            Self::StaticSimilarityMRL | Self::MiniLM => "sentence-transformers/all-MiniLM-L6-v2",
112            Self::MultilingualMiniLM => {
113                "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
114            }
115            Self::TinyBERT => "huawei-noah/TinyBERT_General_6L_312D",
116            Self::BGESmall => "BAAI/bge-small-en-v1.5",
117            Self::PotionMultilingual => "minishlab/potion-multilingual-128M",
118            Self::PotionCode => "minishlab/potion-code-16M",
119        }
120    }
121
122    /// Check if this is a BERT-based model (Candle transformer).
123    pub fn is_bert_based(&self) -> bool {
124        matches!(
125            self,
126            Self::MiniLM | Self::MultilingualMiniLM | Self::TinyBERT | Self::BGESmall
127        )
128    }
129
130    /// Check if this is a Model2Vec static-embedding model.
131    pub fn is_model2vec(&self) -> bool {
132        matches!(self, Self::PotionMultilingual | Self::PotionCode)
133    }
134}