1pub mod code;
12pub mod fixed;
13pub mod parallel;
14pub mod semantic;
15pub mod traits;
16
17pub use code::CodeChunker;
18pub use fixed::FixedChunker;
19pub use parallel::ParallelChunker;
20pub use semantic::SemanticChunker;
21pub use traits::{ChunkMetadata as ChunkerMetadata, Chunker};
22
23pub const DEFAULT_CHUNK_SIZE: usize = 3_000;
26
27pub const DEFAULT_OVERLAP: usize = 500;
29
30pub const MAX_CHUNK_SIZE: usize = 50_000;
32
33#[must_use]
35pub const fn default_chunker() -> SemanticChunker {
36 SemanticChunker::new()
37}
38
39pub fn create_chunker(name: &str) -> crate::error::Result<Box<dyn Chunker>> {
53 match name.to_lowercase().as_str() {
54 "fixed" => Ok(Box::new(FixedChunker::new())),
55 "semantic" => Ok(Box::new(SemanticChunker::new())),
56 "code" | "ast" => Ok(Box::new(CodeChunker::new())),
57 "parallel" => Ok(Box::new(ParallelChunker::new(SemanticChunker::new()))),
58 _ => Err(crate::error::ChunkingError::UnknownStrategy {
59 name: name.to_string(),
60 }
61 .into()),
62 }
63}
64
65#[must_use]
67pub fn available_strategies() -> Vec<&'static str> {
68 vec!["fixed", "semantic", "code", "parallel"]
69}
70
71#[cfg(test)]
72mod tests {
73 use super::*;
74
75 #[test]
76 fn test_default_chunker() {
77 let chunker = default_chunker();
79 assert_eq!(chunker.name(), "semantic");
80 }
81
82 #[test]
83 fn test_create_chunker_fixed() {
84 let chunker = create_chunker("fixed").unwrap();
85 assert_eq!(chunker.name(), "fixed");
86 }
87
88 #[test]
89 fn test_create_chunker_semantic() {
90 let chunker = create_chunker("semantic").unwrap();
91 assert_eq!(chunker.name(), "semantic");
92 }
93
94 #[test]
95 fn test_create_chunker_parallel() {
96 let chunker = create_chunker("parallel").unwrap();
97 assert_eq!(chunker.name(), "parallel");
98 }
99
100 #[test]
101 fn test_create_chunker_unknown() {
102 let result = create_chunker("unknown");
103 assert!(result.is_err());
104 }
105
106 #[test]
107 fn test_create_chunker_case_insensitive() {
108 let chunker = create_chunker("FIXED").unwrap();
109 assert_eq!(chunker.name(), "fixed");
110 }
111
112 #[test]
113 fn test_available_strategies() {
114 let strategies = available_strategies();
115 assert_eq!(strategies.len(), 4);
116 assert!(strategies.contains(&"fixed"));
117 assert!(strategies.contains(&"semantic"));
118 assert!(strategies.contains(&"code"));
119 assert!(strategies.contains(&"parallel"));
120 }
121
122 #[test]
123 fn test_create_chunker_code() {
124 let chunker = create_chunker("code").unwrap();
125 assert_eq!(chunker.name(), "code");
126 }
127
128 #[test]
129 fn test_create_chunker_ast_alias() {
130 let chunker = create_chunker("ast").unwrap();
131 assert_eq!(chunker.name(), "code");
132 }
133}