Skip to main content

synaptic_loaders/
directory_loader.rs

1use std::collections::HashMap;
2use std::path::{Path, PathBuf};
3
4use crate::Document;
5use async_trait::async_trait;
6use serde_json::Value;
7use synaptic_core::SynapticError;
8
9use crate::Loader;
10
11/// Loads documents from files in a directory.
12///
13/// By default, only reads files in the top-level directory.
14/// Use `with_recursive(true)` to include subdirectories.
15/// Use `with_glob(pattern)` to filter by file extension (e.g., "*.txt").
16pub struct DirectoryLoader {
17    path: PathBuf,
18    glob_pattern: Option<String>,
19    recursive: bool,
20}
21
22impl DirectoryLoader {
23    pub fn new(path: impl Into<PathBuf>) -> Self {
24        Self {
25            path: path.into(),
26            glob_pattern: None,
27            recursive: false,
28        }
29    }
30
31    pub fn with_glob(mut self, pattern: impl Into<String>) -> Self {
32        self.glob_pattern = Some(pattern.into());
33        self
34    }
35
36    pub fn with_recursive(mut self, recursive: bool) -> Self {
37        self.recursive = recursive;
38        self
39    }
40
41    fn collect_files(&self, dir: &Path) -> Result<Vec<PathBuf>, SynapticError> {
42        let mut files = Vec::new();
43        let entries = std::fs::read_dir(dir).map_err(|e| {
44            SynapticError::Loader(format!("cannot read directory {}: {e}", dir.display()))
45        })?;
46
47        for entry in entries {
48            let entry =
49                entry.map_err(|e| SynapticError::Loader(format!("directory entry error: {e}")))?;
50            let path = entry.path();
51
52            if path.is_dir() && self.recursive {
53                files.extend(self.collect_files(&path)?);
54            } else if path.is_file() {
55                if let Some(pattern) = &self.glob_pattern {
56                    if let Some(ext_pattern) = pattern.strip_prefix("*.") {
57                        if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
58                            if ext == ext_pattern {
59                                files.push(path);
60                            }
61                        }
62                    } else {
63                        files.push(path);
64                    }
65                } else {
66                    files.push(path);
67                }
68            }
69        }
70
71        files.sort();
72        Ok(files)
73    }
74}
75
76#[async_trait]
77impl Loader for DirectoryLoader {
78    async fn load(&self) -> Result<Vec<Document>, SynapticError> {
79        let files = self.collect_files(&self.path)?;
80        let mut docs = Vec::new();
81
82        for file_path in files {
83            let content = std::fs::read_to_string(&file_path).map_err(|e| {
84                SynapticError::Loader(format!("cannot read {}: {e}", file_path.display()))
85            })?;
86
87            let id = file_path
88                .strip_prefix(&self.path)
89                .unwrap_or(&file_path)
90                .to_string_lossy()
91                .to_string();
92
93            let mut metadata = HashMap::new();
94            metadata.insert(
95                "source".to_string(),
96                Value::String(file_path.to_string_lossy().to_string()),
97            );
98
99            docs.push(Document::with_metadata(id, content, metadata));
100        }
101
102        Ok(docs)
103    }
104}