agentroot_core/providers/
mod.rs

1//! Source provider abstraction
2//!
3//! Provides a unified interface for indexing content from different sources:
4//! - File system (local files)
5//! - GitHub (repositories, files, gists)
6//! - URLs (web pages, PDFs)
7//! - Databases (SQL, NoSQL)
8//! - Calendar/Notes/Books
9//!
10//! Each provider implements the SourceProvider trait to enable seamless
11//! integration with agentroot's indexing and search capabilities.
12
13use crate::error::Result;
14use std::collections::HashMap;
15use std::sync::Arc;
16
17pub mod csv;
18pub mod file;
19pub mod github;
20pub mod json;
21pub mod pdf;
22pub mod sql;
23pub mod url;
24
25pub use csv::CSVProvider;
26pub use file::FileProvider;
27pub use github::GitHubProvider;
28pub use json::JSONProvider;
29pub use pdf::PDFProvider;
30pub use sql::SQLProvider;
31pub use url::URLProvider;
32
33/// Source provider trait - all content sources must implement this
34#[async_trait::async_trait]
35pub trait SourceProvider: Send + Sync {
36    /// Provider type identifier (e.g., "file", "github", "url")
37    fn provider_type(&self) -> &'static str;
38
39    /// List all items from source (for scanning/indexing)
40    async fn list_items(&self, config: &ProviderConfig) -> Result<Vec<SourceItem>>;
41
42    /// Fetch single item by URI
43    async fn fetch_item(&self, uri: &str) -> Result<SourceItem>;
44}
45
46/// Configuration for a provider instance
47#[derive(Debug, Clone)]
48pub struct ProviderConfig {
49    /// Base path/URL for the provider
50    pub base_path: String,
51
52    /// Pattern to match items (glob for files, filter for others)
53    pub pattern: String,
54
55    /// Provider-specific options (auth tokens, filters, etc.)
56    pub options: HashMap<String, String>,
57}
58
59impl ProviderConfig {
60    /// Create new provider config
61    pub fn new(base_path: String, pattern: String) -> Self {
62        Self {
63            base_path,
64            pattern,
65            options: HashMap::new(),
66        }
67    }
68
69    /// Add option to config
70    pub fn with_option(mut self, key: String, value: String) -> Self {
71        self.options.insert(key, value);
72        self
73    }
74
75    /// Get option value
76    pub fn get_option(&self, key: &str) -> Option<&String> {
77        self.options.get(key)
78    }
79}
80
81/// Item from a source provider
82#[derive(Debug, Clone)]
83pub struct SourceItem {
84    /// Unique identifier within collection (path for files, URL for GitHub)
85    pub uri: String,
86
87    /// Display title for the item
88    pub title: String,
89
90    /// Full content of the item
91    pub content: String,
92
93    /// Content hash (SHA-256)
94    pub hash: String,
95
96    /// Provider type that created this item
97    pub source_type: String,
98
99    /// Provider-specific metadata (commit SHA, author, URL, etc.)
100    pub metadata: HashMap<String, String>,
101}
102
103impl SourceItem {
104    /// Create new source item
105    pub fn new(
106        uri: String,
107        title: String,
108        content: String,
109        hash: String,
110        source_type: String,
111    ) -> Self {
112        Self {
113            uri,
114            title,
115            content,
116            hash,
117            source_type,
118            metadata: HashMap::new(),
119        }
120    }
121
122    /// Add metadata to item
123    pub fn with_metadata(mut self, key: String, value: String) -> Self {
124        self.metadata.insert(key, value);
125        self
126    }
127}
128
129/// Registry for managing provider instances
130pub struct ProviderRegistry {
131    providers: HashMap<String, Arc<dyn SourceProvider>>,
132}
133
134impl ProviderRegistry {
135    /// Create new empty registry
136    pub fn new() -> Self {
137        Self {
138            providers: HashMap::new(),
139        }
140    }
141
142    /// Create registry with default providers
143    pub fn with_defaults() -> Self {
144        let mut registry = Self::new();
145        registry.register(Arc::new(CSVProvider::new()));
146        registry.register(Arc::new(FileProvider::new()));
147        registry.register(Arc::new(GitHubProvider::new()));
148        registry.register(Arc::new(JSONProvider::new()));
149        registry.register(Arc::new(PDFProvider::new()));
150        registry.register(Arc::new(SQLProvider::new()));
151        registry.register(Arc::new(URLProvider::new()));
152        registry
153    }
154
155    /// Register a provider
156    pub fn register(&mut self, provider: Arc<dyn SourceProvider>) {
157        self.providers
158            .insert(provider.provider_type().to_string(), provider);
159    }
160
161    /// Get provider by type
162    pub fn get(&self, provider_type: &str) -> Option<Arc<dyn SourceProvider>> {
163        self.providers.get(provider_type).cloned()
164    }
165}
166
167impl Default for ProviderRegistry {
168    fn default() -> Self {
169        Self::with_defaults()
170    }
171}
172
173#[cfg(test)]
174mod tests {
175    use super::*;
176
177    #[test]
178    fn test_provider_config_json_parsing() {
179        let json_config =
180            r#"{"exclude_hidden":"false","follow_symlinks":"true","custom_key":"custom_value"}"#;
181
182        let config_map: std::collections::HashMap<String, String> =
183            serde_json::from_str(json_config).unwrap();
184
185        assert_eq!(config_map.get("exclude_hidden"), Some(&"false".to_string()));
186        assert_eq!(config_map.get("follow_symlinks"), Some(&"true".to_string()));
187        assert_eq!(
188            config_map.get("custom_key"),
189            Some(&"custom_value".to_string())
190        );
191
192        let mut config = ProviderConfig::new("/tmp".to_string(), "**/*.md".to_string());
193        for (key, value) in config_map {
194            config = config.with_option(key, value);
195        }
196
197        assert_eq!(
198            config.get_option("exclude_hidden"),
199            Some(&"false".to_string())
200        );
201        assert_eq!(
202            config.get_option("follow_symlinks"),
203            Some(&"true".to_string())
204        );
205        assert_eq!(
206            config.get_option("custom_key"),
207            Some(&"custom_value".to_string())
208        );
209    }
210
211    #[test]
212    fn test_provider_config_json_empty() {
213        let json_config = r#"{}"#;
214        let config_map: std::collections::HashMap<String, String> =
215            serde_json::from_str(json_config).unwrap();
216        assert_eq!(config_map.len(), 0);
217    }
218
219    #[test]
220    fn test_provider_config_json_invalid() {
221        let json_config = r#"{"key": invalid}"#;
222        let result: std::result::Result<std::collections::HashMap<String, String>, _> =
223            serde_json::from_str(json_config);
224        assert!(result.is_err());
225    }
226
227    #[test]
228    fn test_provider_config_json_nested_not_supported() {
229        let json_config = r#"{"key": {"nested": "value"}}"#;
230        let result: std::result::Result<std::collections::HashMap<String, String>, _> =
231            serde_json::from_str(json_config);
232        assert!(
233            result.is_err(),
234            "Nested JSON should not parse into HashMap<String, String>"
235        );
236    }
237
238    #[test]
239    fn test_provider_config_special_characters() {
240        let json_config =
241            r#"{"path":"/tmp/test with spaces","pattern":"**/*.{md,txt}","token":"ghp_abc123"}"#;
242
243        let config_map: std::collections::HashMap<String, String> =
244            serde_json::from_str(json_config).unwrap();
245
246        assert_eq!(
247            config_map.get("path"),
248            Some(&"/tmp/test with spaces".to_string())
249        );
250        assert_eq!(
251            config_map.get("pattern"),
252            Some(&"**/*.{md,txt}".to_string())
253        );
254        assert_eq!(config_map.get("token"), Some(&"ghp_abc123".to_string()));
255    }
256}