codanna/documents/
config.rs1use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5use std::path::PathBuf;
6
7#[derive(Debug, Clone, Default, Serialize, Deserialize)]
9pub struct DocumentsConfig {
10 #[serde(default)]
12 pub enabled: bool,
13
14 #[serde(default)]
16 pub defaults: ChunkingConfig,
17
18 #[serde(default)]
20 pub search: SearchConfig,
21
22 #[serde(default)]
24 pub collections: HashMap<String, CollectionConfig>,
25}
26
27#[derive(Debug, Clone, Serialize, Deserialize)]
29pub struct SearchConfig {
30 #[serde(default)]
32 pub preview_mode: PreviewMode,
33
34 #[serde(default = "default_preview_chars")]
36 pub preview_chars: usize,
37
38 #[serde(default = "default_highlight")]
40 pub highlight: bool,
41}
42
43fn default_preview_chars() -> usize {
44 600
45}
46
47fn default_highlight() -> bool {
48 true
49}
50
51impl Default for SearchConfig {
52 fn default() -> Self {
53 Self {
54 preview_mode: PreviewMode::default(),
55 preview_chars: default_preview_chars(),
56 highlight: default_highlight(),
57 }
58 }
59}
60
61#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
63#[serde(rename_all = "lowercase")]
64pub enum PreviewMode {
65 Full,
67 #[default]
69 Kwic,
70}
71
72#[derive(Debug, Clone, Serialize, Deserialize)]
74pub struct CollectionConfig {
75 #[serde(default)]
77 pub paths: Vec<PathBuf>,
78
79 #[serde(default)]
81 pub patterns: Vec<String>,
82
83 pub strategy: Option<ChunkingStrategy>,
85
86 pub min_chunk_chars: Option<usize>,
88
89 pub max_chunk_chars: Option<usize>,
91
92 pub overlap_chars: Option<usize>,
94}
95
96impl CollectionConfig {
97 pub fn effective_chunking(&self, defaults: &ChunkingConfig) -> ChunkingConfig {
99 ChunkingConfig {
100 strategy: self.strategy.clone().unwrap_or(defaults.strategy.clone()),
101 min_chunk_chars: self.min_chunk_chars.unwrap_or(defaults.min_chunk_chars),
102 max_chunk_chars: self.max_chunk_chars.unwrap_or(defaults.max_chunk_chars),
103 overlap_chars: self.overlap_chars.unwrap_or(defaults.overlap_chars),
104 }
105 }
106
107 pub fn effective_patterns(&self) -> Vec<String> {
109 if self.patterns.is_empty() {
110 vec!["**/*.md".to_string(), "**/*.txt".to_string()]
111 } else {
112 self.patterns.clone()
113 }
114 }
115}
116
117impl Default for CollectionConfig {
118 fn default() -> Self {
119 Self {
120 paths: Vec::new(),
121 patterns: vec!["**/*.md".to_string()],
122 strategy: None,
123 min_chunk_chars: None,
124 max_chunk_chars: None,
125 overlap_chars: None,
126 }
127 }
128}
129
130#[derive(Debug, Clone, Serialize, Deserialize)]
132pub struct ChunkingConfig {
133 #[serde(default)]
135 pub strategy: ChunkingStrategy,
136
137 #[serde(default = "default_min_chunk_chars")]
139 pub min_chunk_chars: usize,
140
141 #[serde(default = "default_max_chunk_chars")]
143 pub max_chunk_chars: usize,
144
145 #[serde(default = "default_overlap_chars")]
147 pub overlap_chars: usize,
148}
149
150fn default_min_chunk_chars() -> usize {
151 200
152}
153
154fn default_max_chunk_chars() -> usize {
155 1500
156}
157
158fn default_overlap_chars() -> usize {
159 100
160}
161
162impl Default for ChunkingConfig {
163 fn default() -> Self {
164 Self {
165 strategy: ChunkingStrategy::default(),
166 min_chunk_chars: default_min_chunk_chars(),
167 max_chunk_chars: default_max_chunk_chars(),
168 overlap_chars: default_overlap_chars(),
169 }
170 }
171}
172
173impl ChunkingConfig {
174 pub fn validate(&self) -> Result<(), String> {
176 if self.min_chunk_chars >= self.max_chunk_chars {
177 return Err(format!(
178 "min_chunk_chars ({}) must be less than max_chunk_chars ({})",
179 self.min_chunk_chars, self.max_chunk_chars
180 ));
181 }
182
183 if self.overlap_chars >= self.min_chunk_chars {
184 return Err(format!(
185 "overlap_chars ({}) should be less than min_chunk_chars ({})",
186 self.overlap_chars, self.min_chunk_chars
187 ));
188 }
189
190 Ok(())
191 }
192}
193
194#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
196#[serde(rename_all = "lowercase")]
197pub enum ChunkingStrategy {
198 #[default]
201 Hybrid,
202}
203
204#[cfg(test)]
205mod tests {
206 use super::*;
207
208 #[test]
209 fn test_chunking_config_defaults() {
210 let config = ChunkingConfig::default();
211 assert_eq!(config.min_chunk_chars, 200);
212 assert_eq!(config.max_chunk_chars, 1500);
213 assert_eq!(config.overlap_chars, 100);
214 assert_eq!(config.strategy, ChunkingStrategy::Hybrid);
215 }
216
217 #[test]
218 fn test_chunking_config_validation() {
219 let mut config = ChunkingConfig::default();
220
221 assert!(config.validate().is_ok());
223
224 config.min_chunk_chars = 2000;
226 assert!(config.validate().is_err());
227
228 config.min_chunk_chars = 200;
230 config.overlap_chars = 300;
231 assert!(config.validate().is_err());
232 }
233
234 #[test]
235 fn test_collection_effective_chunking() {
236 let defaults = ChunkingConfig::default();
237
238 let collection = CollectionConfig::default();
240 let effective = collection.effective_chunking(&defaults);
241 assert_eq!(effective.max_chunk_chars, 1500);
242
243 let collection = CollectionConfig {
245 max_chunk_chars: Some(2000),
246 ..Default::default()
247 };
248 let effective = collection.effective_chunking(&defaults);
249 assert_eq!(effective.max_chunk_chars, 2000);
250 assert_eq!(effective.min_chunk_chars, 200); }
252}