processors_rs/
markdown_processor.rs

1use crate::processor::{Document, DocumentProcessor};
2use text_splitter::{Characters, ChunkConfig, ChunkConfigError, MarkdownSplitter};
3
4/// A struct that provides functionality to process Markdown files.
5pub struct MarkdownProcessor {
6    splitter: MarkdownSplitter<Characters>,
7}
8
9impl MarkdownProcessor {
10    pub fn new(chunk_size: usize, overlap: usize) -> Result<MarkdownProcessor, ChunkConfigError> {
11        let splitter_config = ChunkConfig::new(chunk_size).with_overlap(overlap)?;
12        let splitter = MarkdownSplitter::new(splitter_config);
13        Ok(MarkdownProcessor { splitter })
14    }
15}
16
17impl DocumentProcessor for MarkdownProcessor {
18    fn process_document(&self, content: &str) -> anyhow::Result<Document> {
19        let chunks = self
20            .splitter
21            .chunks(content)
22            .map(|x| x.to_string())
23            .collect();
24        Ok(Document { chunks })
25    }
26}