processors_rs/
markdown_processor.rs1use crate::processor::{Document, DocumentProcessor};
2use text_splitter::{Characters, ChunkConfig, ChunkConfigError, MarkdownSplitter};
3
4pub struct MarkdownProcessor {
6 splitter: MarkdownSplitter<Characters>,
7}
8
9impl MarkdownProcessor {
10 pub fn new(chunk_size: usize, overlap: usize) -> Result<MarkdownProcessor, ChunkConfigError> {
11 let splitter_config = ChunkConfig::new(chunk_size).with_overlap(overlap)?;
12 let splitter = MarkdownSplitter::new(splitter_config);
13 Ok(MarkdownProcessor { splitter })
14 }
15}
16
17impl DocumentProcessor for MarkdownProcessor {
18 fn process_document(&self, content: &str) -> anyhow::Result<Document> {
19 let chunks = self
20 .splitter
21 .chunks(content)
22 .map(|x| x.to_string())
23 .collect();
24 Ok(Document { chunks })
25 }
26}