snipsplit-core
Pure-Rust core for snipsplit: a token-aware text chunker for RAG ingestion.
use ;
let chunker = new?;
let chunks = chunker.split?;
for c in &chunks
# Ok::
License
Dual-licensed under MIT or Apache-2.0.
Pure-Rust core for snipsplit: a token-aware text chunker for RAG ingestion.
use snipsplit_core::{ChunkConfig, Chunker};
let chunker = Chunker::new(ChunkConfig {
max_tokens: 256,
overlap_tokens: 32,
..Default::default()
})?;
let chunks = chunker.split("Some long document...")?;
for c in &chunks {
println!("{}..{} ({} tokens)", c.start, c.end, c.token_count);
}
# Ok::<(), snipsplit_core::ChunkerError>(())
Dual-licensed under MIT or Apache-2.0.