use chonkier::chunker::recursive::RecursiveChunker;
use chonkier::types::RecursiveRules;
#[cfg(feature = "tokenizers")]
use chonkier::tokenizer::hftokenizers::HFTokenizer;
use std::fs::{self, File};
use std::io::{BufWriter, Write};
use serde::Serialize;
#[cfg(feature = "tokenizers")]
fn main() -> Result<(), Box<dyn std::error::Error>> {
let text = fs::read_to_string("examples/data/rag.txt")?;
let chunker = RecursiveChunker::new(
HFTokenizer::from_pretrained("gpt2"),
128,
RecursiveRules::default(),
);
let chunks = chunker.chunk(&text);
let file = File::create("chunks.jsonl")?;
let mut writer = BufWriter::new(file);
for chunk in chunks {
let json_string = serde_json::to_string(&chunk)?;
writeln!(writer, "{}", json_string)?;
}
writer.flush()?;
println!("Chunks written to chunks.jsonl");
Ok(())
}