contrag_core/
config.rs

1use serde::{Deserialize, Serialize};
2use crate::error::{ContragError, Result};
3
4/// Main configuration for ContRAG
5#[derive(Clone, Debug, Serialize, Deserialize)]
6pub struct ContragConfig {
7    /// Entity configurations
8    pub entities: Vec<EntityConfig>,
9    
10    /// Embedder configuration
11    pub embedder: EmbedderConfigDef,
12    
13    /// Chunking configuration
14    pub chunking: ChunkingConfig,
15    
16    /// Vector store configuration
17    pub vector_store: VectorStoreConfig,
18    
19    /// Optional system prompt for context generation
20    pub system_prompt: Option<String>,
21}
22
23/// Entity configuration
24#[derive(Clone, Debug, Serialize, Deserialize)]
25pub struct EntityConfig {
26    /// Entity type name (e.g., "User", "Order")
27    pub name: String,
28    
29    /// Canister ID where this entity's data lives
30    pub canister_id: String,
31    
32    /// Method name to call to fetch a single entity (e.g., "get_user")
33    pub fetch_method: String,
34    
35    /// Method name to call to fetch multiple entities (e.g., "get_users")
36    pub fetch_many_method: Option<String>,
37    
38    /// Relationships to other entities
39    pub relationships: Vec<RelationshipConfig>,
40    
41    /// Whether to include this entity in automatic context building
42    pub auto_include: bool,
43}
44
45/// Relationship configuration
46#[derive(Clone, Debug, Serialize, Deserialize)]
47pub struct RelationshipConfig {
48    /// Field name in the source entity that contains the reference
49    pub field_name: String,
50    
51    /// Target entity type
52    pub target_entity: String,
53    
54    /// Relationship type
55    pub relationship_type: String, // "one_to_one", "one_to_many", etc.
56}
57
58/// Embedder provider configuration (from config file)
59#[derive(Clone, Debug, Serialize, Deserialize)]
60pub struct EmbedderConfigDef {
61    /// Provider: "openai" or "gemini"
62    pub provider: String,
63    
64    /// Model name
65    pub model: String,
66    
67    /// Expected dimensions
68    pub dimensions: usize,
69    
70    /// API endpoint (optional, uses default if not provided)
71    pub api_endpoint: Option<String>,
72}
73
74/// Chunking configuration
75#[derive(Clone, Debug, Serialize, Deserialize)]
76pub struct ChunkingConfig {
77    /// Chunk size in characters
78    pub chunk_size: usize,
79    
80    /// Overlap between chunks in characters
81    pub overlap: usize,
82    
83    /// Whether to include field names in chunks
84    pub include_field_names: bool,
85}
86
87impl Default for ChunkingConfig {
88    fn default() -> Self {
89        Self {
90            chunk_size: 1000,
91            overlap: 100,
92            include_field_names: true,
93        }
94    }
95}
96
97/// Vector store configuration
98#[derive(Clone, Debug, Serialize, Deserialize)]
99pub struct VectorStoreConfig {
100    /// Storage type: "stable_memory" or "hybrid"
101    pub storage_type: String,
102    
103    /// Maximum vectors to keep in hot storage (for hybrid mode)
104    pub max_hot_vectors: Option<usize>,
105    
106    /// Whether to enable caching
107    pub enable_cache: bool,
108}
109
110impl Default for VectorStoreConfig {
111    fn default() -> Self {
112        Self {
113            storage_type: "stable_memory".to_string(),
114            max_hot_vectors: Some(10000),
115            enable_cache: true,
116        }
117    }
118}
119
120/// Environment variables structure
121#[derive(Clone, Debug)]
122pub struct EnvVars {
123    pub openai_api_key: Option<String>,
124    pub gemini_api_key: Option<String>,
125}
126
127/// Load configuration from a JSON file
128/// 
129/// This expects the config file to NOT contain API keys.
130/// API keys should be loaded separately from environment variables.
131pub fn load_config_from_json(json_str: &str) -> Result<ContragConfig> {
132    serde_json::from_str(json_str)
133        .map_err(|e| ContragError::ConfigError(format!("Failed to parse config: {}", e)))
134}
135
136/// Validate configuration
137pub fn validate_config(config: &ContragConfig) -> Result<()> {
138    if config.entities.is_empty() {
139        return Err(ContragError::InvalidConfig(
140            "At least one entity must be configured".to_string(),
141        ));
142    }
143
144    if config.embedder.dimensions == 0 {
145        return Err(ContragError::InvalidConfig(
146            "Embedder dimensions must be greater than 0".to_string(),
147        ));
148    }
149
150    if config.chunking.chunk_size == 0 {
151        return Err(ContragError::InvalidConfig(
152            "Chunk size must be greater than 0".to_string(),
153        ));
154    }
155
156    if config.chunking.overlap >= config.chunking.chunk_size {
157        return Err(ContragError::InvalidConfig(
158            "Overlap must be less than chunk size".to_string(),
159        ));
160    }
161
162    // Validate entity configurations
163    for entity in &config.entities {
164        if entity.name.is_empty() {
165            return Err(ContragError::InvalidConfig(
166                "Entity name cannot be empty".to_string(),
167            ));
168        }
169        if entity.canister_id.is_empty() {
170            return Err(ContragError::InvalidConfig(
171                format!("Canister ID for entity '{}' cannot be empty", entity.name),
172            ));
173        }
174    }
175
176    Ok(())
177}
178
179/// Helper to create a minimal config for testing
180pub fn create_default_config() -> ContragConfig {
181    ContragConfig {
182        entities: vec![],
183        embedder: EmbedderConfigDef {
184            provider: "openai".to_string(),
185            model: "text-embedding-3-small".to_string(),
186            dimensions: 1536,
187            api_endpoint: None,
188        },
189        chunking: ChunkingConfig::default(),
190        vector_store: VectorStoreConfig::default(),
191        system_prompt: None,
192    }
193}
194
195/// Load configuration with merged environment variables
196/// 
197/// This is the main entry point for loading configuration.
198/// It reads the config file and merges it with environment variables.
199pub fn load_config(config_json: &str, env_vars: EnvVars) -> Result<ContragConfig> {
200    let config = load_config_from_json(config_json)?;
201    validate_config(&config)?;
202    Ok(config)
203}