pub fn chunk_text_with_overlap(text: &str, chunk_size: usize, overlap: usize) -> Vec<String> {
if text.is_empty() {
return Vec::new();
}
if text.len() <= chunk_size {
return vec![text.to_string()];
}
use trueno_rag::chunk::{Chunker, RecursiveChunker};
use trueno_rag::Document;
let chunker = RecursiveChunker::new(chunk_size, overlap);
let doc = Document::new(text);
match chunker.chunk(&doc) {
Ok(chunks) => chunks
.into_iter()
.map(|c: trueno_rag::Chunk| c.content)
.collect(),
Err(_) => {
chunk_text_fixed(text, chunk_size, overlap)
}
}
}
pub fn chunk_text_recursive(text: &str, chunk_size: usize, overlap: usize) -> Vec<String> {
if text.is_empty() {
return Vec::new();
}
if text.len() <= chunk_size {
return vec![text.to_string()];
}
use trueno_rag::chunk::{Chunker, RecursiveChunker};
use trueno_rag::Document;
let chunker = RecursiveChunker::new(chunk_size, overlap).with_separators(vec![
"\n\n".to_string(), "\n".to_string(), ". ".to_string(), ", ".to_string(), " ".to_string(), ]);
let doc = Document::new(text);
match chunker.chunk(&doc) {
Ok(chunks) => chunks
.into_iter()
.map(|c: trueno_rag::Chunk| c.content)
.collect(),
Err(_) => {
chunk_text_with_overlap(text, chunk_size, overlap)
}
}
}
pub fn chunk_text_fixed(text: &str, chunk_size: usize, overlap: usize) -> Vec<String> {
if text.is_empty() {
return Vec::new();
}
let chars: Vec<char> = text.chars().collect();
if chars.len() <= chunk_size {
return vec![text.to_string()];
}
let mut chunks = Vec::new();
let mut start = 0;
while start < chars.len() {
let end = (start + chunk_size).min(chars.len());
let chunk: String = chars[start..end].iter().collect();
chunks.push(chunk);
if end >= chars.len() {
break;
}
let step = chunk_size.saturating_sub(overlap);
start += if step == 0 { 1 } else { step };
}
chunks
}