use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(deny_unknown_fields)]
pub struct ChunkerConfig {
#[serde(default = "ChunkerConfig::default_chunk_tokens")]
pub chunk_tokens: usize,
#[serde(default = "ChunkerConfig::default_chunk_overlap")]
pub chunk_overlap: usize,
#[serde(default = "ChunkerConfig::default_chunk_min_tokens")]
pub chunk_min_tokens: usize,
}
impl ChunkerConfig {
pub fn validate(&self) -> Result<(), String> {
if self.chunk_tokens == 0 {
return Err("indexer.chunk_tokens must be greater than 0".to_string());
}
if self.chunk_overlap >= self.chunk_tokens {
return Err("indexer.chunk_overlap must be less than indexer.chunk_tokens".to_string());
}
Ok(())
}
#[must_use]
const fn default_chunk_tokens() -> usize {
crate::search::constants::EMBED_CHUNK_TOKENS_DEFAULT
}
#[must_use]
const fn default_chunk_overlap() -> usize {
crate::search::constants::EMBED_CHUNK_OVERLAP_DEFAULT
}
#[must_use]
const fn default_chunk_min_tokens() -> usize {
crate::search::constants::CHUNK_MIN_TOKENS_DEFAULT
}
}
impl Default for ChunkerConfig {
fn default() -> Self {
Self {
chunk_tokens: Self::default_chunk_tokens(),
chunk_overlap: Self::default_chunk_overlap(),
chunk_min_tokens: Self::default_chunk_min_tokens(),
}
}
}