agentroot_core/providers/
file.rs

1//! File system provider
2//!
3//! Provides content from local file system using glob patterns.
4
5use super::{ProviderConfig, SourceItem, SourceProvider};
6use crate::db::hash_content;
7use crate::error::Result;
8use crate::index::extract_title;
9use glob::Pattern;
10use std::path::Path;
11use walkdir::{DirEntry, WalkDir};
12
13/// Directories to exclude from scanning
14const EXCLUDE_DIRS: &[&str] = &[
15    "node_modules",
16    ".git",
17    ".cache",
18    "vendor",
19    "dist",
20    "build",
21    "__pycache__",
22    ".venv",
23    "target",
24];
25
26/// File system provider
27pub struct FileProvider;
28
29impl FileProvider {
30    /// Create new file provider
31    pub fn new() -> Self {
32        Self
33    }
34}
35
36impl Default for FileProvider {
37    fn default() -> Self {
38        Self::new()
39    }
40}
41
42#[async_trait::async_trait]
43impl SourceProvider for FileProvider {
44    fn provider_type(&self) -> &'static str {
45        "file"
46    }
47
48    async fn list_items(&self, config: &ProviderConfig) -> Result<Vec<SourceItem>> {
49        let root = Path::new(&config.base_path);
50        let pattern = Pattern::new(&config.pattern)?;
51
52        let exclude_hidden = config
53            .get_option("exclude_hidden")
54            .and_then(|v| v.parse::<bool>().ok())
55            .unwrap_or(true);
56
57        let follow_symlinks = config
58            .get_option("follow_symlinks")
59            .and_then(|v| v.parse::<bool>().ok())
60            .unwrap_or(true);
61
62        let exclude_dirs: Vec<String> = EXCLUDE_DIRS.iter().map(|s| s.to_string()).collect();
63
64        let mut items = Vec::new();
65
66        let walker = WalkDir::new(root)
67            .follow_links(follow_symlinks)
68            .into_iter()
69            .filter_entry(|e| !should_skip(e, &exclude_dirs, exclude_hidden));
70
71        for entry in walker {
72            let entry = entry?;
73            if !entry.file_type().is_file() {
74                continue;
75            }
76
77            let path = entry.path();
78            let relative = path
79                .strip_prefix(root)
80                .map(|p| p.to_string_lossy().to_string())
81                .unwrap_or_else(|_| path.to_string_lossy().to_string());
82
83            if pattern.matches(&relative) {
84                let content = std::fs::read_to_string(path)?;
85                let title = extract_title(&content, &relative);
86                let hash = hash_content(&content);
87
88                items.push(
89                    SourceItem::new(relative, title, content, hash, "file".to_string())
90                        .with_metadata("absolute_path".to_string(), path.display().to_string()),
91                );
92            }
93        }
94
95        Ok(items)
96    }
97
98    async fn fetch_item(&self, uri: &str) -> Result<SourceItem> {
99        let path = Path::new(uri);
100        let content = std::fs::read_to_string(path)?;
101        let title = extract_title(&content, uri);
102        let hash = hash_content(&content);
103
104        Ok(
105            SourceItem::new(uri.to_string(), title, content, hash, "file".to_string())
106                .with_metadata("absolute_path".to_string(), path.display().to_string()),
107        )
108    }
109}
110
111fn should_skip(entry: &DirEntry, exclude_dirs: &[String], exclude_hidden: bool) -> bool {
112    let name = entry.file_name().to_string_lossy();
113
114    if exclude_hidden && name.starts_with('.') {
115        return true;
116    }
117
118    if entry.file_type().is_dir() && exclude_dirs.iter().any(|d| name == *d) {
119        return true;
120    }
121
122    false
123}
124
125#[cfg(test)]
126mod tests {
127    use super::*;
128    use std::fs;
129    use tempfile::TempDir;
130
131    #[test]
132    fn test_file_provider_type() {
133        let provider = FileProvider::new();
134        assert_eq!(provider.provider_type(), "file");
135    }
136
137    #[tokio::test]
138    async fn test_file_provider_list_items() {
139        let temp = TempDir::new().unwrap();
140        let base = temp.path();
141
142        fs::write(base.join("test1.md"), "# Test 1").unwrap();
143        fs::write(base.join("test2.md"), "# Test 2").unwrap();
144        fs::write(base.join("ignore.txt"), "ignore").unwrap();
145
146        let config = ProviderConfig::new(base.to_string_lossy().to_string(), "**/*.md".to_string())
147            .with_option("exclude_hidden".to_string(), "false".to_string());
148        let provider = FileProvider::new();
149        let items = provider.list_items(&config).await.unwrap();
150
151        assert_eq!(items.len(), 2);
152        assert!(items.iter().any(|i| i.uri == "test1.md"));
153        assert!(items.iter().any(|i| i.uri == "test2.md"));
154    }
155
156    #[tokio::test]
157    async fn test_file_provider_fetch_item() {
158        let temp = TempDir::new().unwrap();
159        let base = temp.path();
160        let file = base.join("test.md");
161
162        fs::write(&file, "# Test Content").unwrap();
163
164        let provider = FileProvider::new();
165        let item = provider.fetch_item(file.to_str().unwrap()).await.unwrap();
166
167        assert_eq!(item.content, "# Test Content");
168        assert_eq!(item.title, "Test Content");
169        assert_eq!(item.source_type, "file");
170    }
171
172    #[tokio::test]
173    async fn test_file_provider_database_integration() {
174        use crate::{db::hash_content, Database};
175        use chrono::Utc;
176
177        let temp = TempDir::new().unwrap();
178        let base = temp.path();
179
180        fs::write(base.join("doc1.md"), "# Document 1\nContent for doc 1").unwrap();
181        fs::write(base.join("doc2.md"), "# Document 2\nContent for doc 2").unwrap();
182
183        let db = Database::open_in_memory().unwrap();
184        db.initialize().unwrap();
185
186        db.add_collection("test", &base.to_string_lossy(), "**/*.md", "file", None)
187            .unwrap();
188
189        let config = ProviderConfig::new(base.to_string_lossy().to_string(), "**/*.md".to_string())
190            .with_option("exclude_hidden".to_string(), "false".to_string());
191        let provider = FileProvider::new();
192        let items = provider.list_items(&config).await.unwrap();
193
194        assert_eq!(items.len(), 2, "Should find 2 .md files");
195
196        for item in &items {
197            let hash = hash_content(&item.content);
198            db.insert_content(&hash, &item.content).unwrap();
199
200            let now = Utc::now().to_rfc3339();
201            db.insert_document(
202                "test",
203                &item.uri,
204                &item.title,
205                &hash,
206                &now,
207                &now,
208                &item.source_type,
209                Some(&item.uri),
210            )
211            .unwrap();
212        }
213
214        let collections = db.list_collections().unwrap();
215        assert_eq!(collections.len(), 1);
216        assert_eq!(collections[0].name, "test");
217        assert_eq!(collections[0].provider_type, "file");
218        assert_eq!(
219            collections[0].document_count, 2,
220            "document_count should be 2"
221        );
222        assert_eq!(collections[0].provider_config, None);
223    }
224}