Skip to main content

sh_layer3/document_loaders/
text.rs

1//! # Text Document Loader
2//!
3//! 纯文本文件加载器。
4
5use crate::document_loaders::{DocumentLoader, LoadOptions};
6use crate::retriever_engine::Document;
7use crate::types::Layer3Result;
8use async_trait::async_trait;
9use std::path::PathBuf;
10
11/// Text Loader 实现
12#[allow(dead_code)]
13pub struct TextLoader {
14    extensions: Vec<&'static str>,
15    #[allow(dead_code)]
16    options: LoadOptions,
17}
18
19impl TextLoader {
20    pub fn new() -> Self {
21        Self {
22            extensions: vec!["txt", "text", "log", "md"],
23            options: LoadOptions::default(),
24        }
25    }
26
27    pub fn with_options(options: LoadOptions) -> Self {
28        Self {
29            extensions: vec!["txt", "text", "log", "md"],
30            options,
31        }
32    }
33}
34
35impl Default for TextLoader {
36    fn default() -> Self {
37        Self::new()
38    }
39}
40
41#[async_trait]
42impl DocumentLoader for TextLoader {
43    async fn load(&self, path: PathBuf) -> Layer3Result<Document> {
44        let content = tokio::fs::read_to_string(&path).await?;
45        Ok(Document::new(content).with_source(path.to_string_lossy().to_string()))
46    }
47
48    async fn load_and_split(&self, path: PathBuf) -> Layer3Result<Vec<Document>> {
49        let content = tokio::fs::read_to_string(&path).await?;
50        // 按段落分割
51        let paragraphs: Vec<&str> = content.split("\n\n").collect();
52        Ok(paragraphs
53            .into_iter()
54            .filter(|p| !p.trim().is_empty())
55            .enumerate()
56            .map(|(i, p)| {
57                Document::new(p.to_string()).with_source(format!(
58                    "{}#{}",
59                    path.to_string_lossy(),
60                    i
61                ))
62            })
63            .collect())
64    }
65
66    fn supports(&self, path: &std::path::Path) -> bool {
67        path.extension()
68            .and_then(|e| e.to_str())
69            .map(|e| self.extensions.contains(&e))
70            .unwrap_or(false)
71    }
72
73    fn extensions(&self) -> &[&str] {
74        &self.extensions
75    }
76}
77
78#[cfg(test)]
79mod tests {
80    use super::*;
81
82    #[test]
83    fn test_text_loader_extensions() {
84        let loader = TextLoader::new();
85        assert!(loader.extensions().contains(&"txt"));
86        assert!(loader.extensions().contains(&"md"));
87    }
88}