sh_layer3/document_loaders/
markdown.rs1use crate::document_loaders::{DocumentLoader, LoadOptions};
6use crate::retriever_engine::Document;
7use crate::types::Layer3Result;
8use async_trait::async_trait;
9use std::path::PathBuf;
10
11#[allow(dead_code)]
13pub struct MarkdownLoader {
14 #[allow(dead_code)]
15 options: LoadOptions,
16}
17
18impl MarkdownLoader {
19 pub fn new() -> Self {
20 Self {
21 options: LoadOptions::default(),
22 }
23 }
24}
25
26impl Default for MarkdownLoader {
27 fn default() -> Self {
28 Self::new()
29 }
30}
31
32#[async_trait]
33impl DocumentLoader for MarkdownLoader {
34 async fn load(&self, path: PathBuf) -> Layer3Result<Document> {
35 let content = tokio::fs::read_to_string(&path).await?;
36 Ok(Document::new(content).with_source(path.to_string_lossy().to_string()))
37 }
38
39 async fn load_and_split(&self, path: PathBuf) -> Layer3Result<Vec<Document>> {
40 let content = tokio::fs::read_to_string(&path).await?;
41
42 let mut documents = Vec::new();
44 let mut current_section = String::new();
45 let mut current_title = String::from("intro");
46
47 for line in content.lines() {
48 if line.starts_with("#") {
49 if !current_section.trim().is_empty() {
51 documents.push(
52 Document::new(current_section.trim().to_string()).with_source(format!(
53 "{}#{}",
54 path.to_string_lossy(),
55 current_title
56 )),
57 );
58 }
59 current_title = line.trim_start_matches('#').trim().to_string();
60 current_section = format!("{}\n\n", line);
61 } else {
62 current_section.push_str(line);
63 current_section.push('\n');
64 }
65 }
66
67 if !current_section.trim().is_empty() {
69 documents.push(
70 Document::new(current_section.trim().to_string()).with_source(format!(
71 "{}#{}",
72 path.to_string_lossy(),
73 current_title
74 )),
75 );
76 }
77
78 Ok(documents)
79 }
80
81 fn supports(&self, path: &std::path::Path) -> bool {
82 path.extension()
83 .and_then(|e| e.to_str())
84 .map(|e| e == "md" || e == "markdown")
85 .unwrap_or(false)
86 }
87
88 fn extensions(&self) -> &[&str] {
89 &["md", "markdown"]
90 }
91}
92
93#[cfg(test)]
94mod tests {
95 use super::*;
96
97 #[test]
98 fn test_markdown_loader_extensions() {
99 let loader = MarkdownLoader::new();
100 assert!(loader.extensions().contains(&"md"));
101 }
102}