sh_layer3/document_loaders/
text.rs1use crate::document_loaders::{DocumentLoader, LoadOptions};
6use crate::retriever_engine::Document;
7use crate::types::Layer3Result;
8use async_trait::async_trait;
9use std::path::PathBuf;
10
11#[allow(dead_code)]
13pub struct TextLoader {
14 extensions: Vec<&'static str>,
15 #[allow(dead_code)]
16 options: LoadOptions,
17}
18
19impl TextLoader {
20 pub fn new() -> Self {
21 Self {
22 extensions: vec!["txt", "text", "log", "md"],
23 options: LoadOptions::default(),
24 }
25 }
26
27 pub fn with_options(options: LoadOptions) -> Self {
28 Self {
29 extensions: vec!["txt", "text", "log", "md"],
30 options,
31 }
32 }
33}
34
35impl Default for TextLoader {
36 fn default() -> Self {
37 Self::new()
38 }
39}
40
41#[async_trait]
42impl DocumentLoader for TextLoader {
43 async fn load(&self, path: PathBuf) -> Layer3Result<Document> {
44 let content = tokio::fs::read_to_string(&path).await?;
45 Ok(Document::new(content).with_source(path.to_string_lossy().to_string()))
46 }
47
48 async fn load_and_split(&self, path: PathBuf) -> Layer3Result<Vec<Document>> {
49 let content = tokio::fs::read_to_string(&path).await?;
50 let paragraphs: Vec<&str> = content.split("\n\n").collect();
52 Ok(paragraphs
53 .into_iter()
54 .filter(|p| !p.trim().is_empty())
55 .enumerate()
56 .map(|(i, p)| {
57 Document::new(p.to_string()).with_source(format!(
58 "{}#{}",
59 path.to_string_lossy(),
60 i
61 ))
62 })
63 .collect())
64 }
65
66 fn supports(&self, path: &std::path::Path) -> bool {
67 path.extension()
68 .and_then(|e| e.to_str())
69 .map(|e| self.extensions.contains(&e))
70 .unwrap_or(false)
71 }
72
73 fn extensions(&self) -> &[&str] {
74 &self.extensions
75 }
76}
77
78#[cfg(test)]
79mod tests {
80 use super::*;
81
82 #[test]
83 fn test_text_loader_extensions() {
84 let loader = TextLoader::new();
85 assert!(loader.extensions().contains(&"txt"));
86 assert!(loader.extensions().contains(&"md"));
87 }
88}