processors_rs/
markdown_processor.rs

1use text_splitter::{Characters, ChunkConfig, ChunkConfigError, MarkdownSplitter};
2use crate::processor::{Document, DocumentProcessor};
3
4/// A struct that provides functionality to process Markdown files.
5pub struct MarkdownProcessor {
6    splitter: MarkdownSplitter<Characters>
7}
8
9impl MarkdownProcessor {
10    pub fn new(chunk_size: usize, overlap: usize) -> Result<MarkdownProcessor, ChunkConfigError> {
11        let splitter_config = ChunkConfig::new(chunk_size)
12            .with_overlap(overlap)?;
13        let splitter = MarkdownSplitter::new(splitter_config);
14        Ok(MarkdownProcessor {
15            splitter
16        })
17    }
18}
19
20impl DocumentProcessor for MarkdownProcessor {
21
22    fn process_document(&self, content: &str) -> anyhow::Result<Document> {
23        let chunks = self.splitter.chunks(content)
24            .map(|x| x.to_string())
25            .collect();
26        Ok(Document {
27            chunks
28        })
29    }
30}