processors_rs/
markdown_processor.rs1use text_splitter::{Characters, ChunkConfig, ChunkConfigError, MarkdownSplitter};
2use crate::processor::{Document, DocumentProcessor};
3
4pub struct MarkdownProcessor {
6 splitter: MarkdownSplitter<Characters>
7}
8
9impl MarkdownProcessor {
10 pub fn new(chunk_size: usize, overlap: usize) -> Result<MarkdownProcessor, ChunkConfigError> {
11 let splitter_config = ChunkConfig::new(chunk_size)
12 .with_overlap(overlap)?;
13 let splitter = MarkdownSplitter::new(splitter_config);
14 Ok(MarkdownProcessor {
15 splitter
16 })
17 }
18}
19
20impl DocumentProcessor for MarkdownProcessor {
21
22 fn process_document(&self, content: &str) -> anyhow::Result<Document> {
23 let chunks = self.splitter.chunks(content)
24 .map(|x| x.to_string())
25 .collect();
26 Ok(Document {
27 chunks
28 })
29 }
30}