pub mod code;
pub mod fixed;
pub mod parallel;
pub mod semantic;
pub mod traits;
pub use code::CodeChunker;
pub use fixed::FixedChunker;
pub use parallel::ParallelChunker;
pub use semantic::SemanticChunker;
pub use traits::{ChunkMetadata as ChunkerMetadata, Chunker};
pub const DEFAULT_CHUNK_SIZE: usize = 3_000;
pub const DEFAULT_OVERLAP: usize = 500;
pub const MAX_CHUNK_SIZE: usize = 50_000;
#[must_use]
pub const fn default_chunker() -> SemanticChunker {
SemanticChunker::new()
}
pub fn create_chunker(name: &str) -> crate::error::Result<Box<dyn Chunker>> {
match name.to_lowercase().as_str() {
"fixed" => Ok(Box::new(FixedChunker::new())),
"semantic" => Ok(Box::new(SemanticChunker::new())),
"code" | "ast" => Ok(Box::new(CodeChunker::new())),
"parallel" => Ok(Box::new(ParallelChunker::new(SemanticChunker::new()))),
_ => Err(crate::error::ChunkingError::UnknownStrategy {
name: name.to_string(),
}
.into()),
}
}
#[must_use]
pub fn available_strategies() -> Vec<&'static str> {
vec!["fixed", "semantic", "code", "parallel"]
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_default_chunker() {
let chunker = default_chunker();
assert_eq!(chunker.name(), "semantic");
}
#[test]
fn test_create_chunker_fixed() {
let chunker = create_chunker("fixed").unwrap();
assert_eq!(chunker.name(), "fixed");
}
#[test]
fn test_create_chunker_semantic() {
let chunker = create_chunker("semantic").unwrap();
assert_eq!(chunker.name(), "semantic");
}
#[test]
fn test_create_chunker_parallel() {
let chunker = create_chunker("parallel").unwrap();
assert_eq!(chunker.name(), "parallel");
}
#[test]
fn test_create_chunker_unknown() {
let result = create_chunker("unknown");
assert!(result.is_err());
}
#[test]
fn test_create_chunker_case_insensitive() {
let chunker = create_chunker("FIXED").unwrap();
assert_eq!(chunker.name(), "fixed");
}
#[test]
fn test_available_strategies() {
let strategies = available_strategies();
assert_eq!(strategies.len(), 4);
assert!(strategies.contains(&"fixed"));
assert!(strategies.contains(&"semantic"));
assert!(strategies.contains(&"code"));
assert!(strategies.contains(&"parallel"));
}
#[test]
fn test_create_chunker_code() {
let chunker = create_chunker("code").unwrap();
assert_eq!(chunker.name(), "code");
}
#[test]
fn test_create_chunker_ast_alias() {
let chunker = create_chunker("ast").unwrap();
assert_eq!(chunker.name(), "code");
}
}