use crate::error::Result;
use std::collections::HashMap;
use std::sync::Arc;
pub mod csv;
pub mod file;
pub mod github;
pub mod json;
pub mod pdf;
pub mod sql;
pub mod url;
pub use csv::CSVProvider;
pub use file::FileProvider;
pub use github::GitHubProvider;
pub use json::JSONProvider;
pub use pdf::PDFProvider;
pub use sql::SQLProvider;
pub use url::URLProvider;
#[async_trait::async_trait]
pub trait SourceProvider: Send + Sync {
fn provider_type(&self) -> &'static str;
async fn list_items(&self, config: &ProviderConfig) -> Result<Vec<SourceItem>>;
async fn fetch_item(&self, uri: &str) -> Result<SourceItem>;
}
#[derive(Debug, Clone)]
pub struct ProviderConfig {
pub base_path: String,
pub pattern: String,
pub options: HashMap<String, String>,
}
impl ProviderConfig {
pub fn new(base_path: String, pattern: String) -> Self {
Self {
base_path,
pattern,
options: HashMap::new(),
}
}
pub fn with_option(mut self, key: String, value: String) -> Self {
self.options.insert(key, value);
self
}
pub fn get_option(&self, key: &str) -> Option<&String> {
self.options.get(key)
}
}
#[derive(Debug, Clone)]
pub struct SourceItem {
pub uri: String,
pub title: String,
pub content: String,
pub hash: String,
pub source_type: String,
pub metadata: HashMap<String, String>,
}
impl SourceItem {
pub fn new(
uri: String,
title: String,
content: String,
hash: String,
source_type: String,
) -> Self {
Self {
uri,
title,
content,
hash,
source_type,
metadata: HashMap::new(),
}
}
pub fn with_metadata(mut self, key: String, value: String) -> Self {
self.metadata.insert(key, value);
self
}
}
pub struct ProviderRegistry {
providers: HashMap<String, Arc<dyn SourceProvider>>,
}
impl ProviderRegistry {
pub fn new() -> Self {
Self {
providers: HashMap::new(),
}
}
pub fn with_defaults() -> Self {
let mut registry = Self::new();
registry.register(Arc::new(CSVProvider::new()));
registry.register(Arc::new(FileProvider::new()));
registry.register(Arc::new(GitHubProvider::new()));
registry.register(Arc::new(JSONProvider::new()));
registry.register(Arc::new(PDFProvider::new()));
registry.register(Arc::new(SQLProvider::new()));
registry.register(Arc::new(URLProvider::new()));
registry
}
pub fn register(&mut self, provider: Arc<dyn SourceProvider>) {
self.providers
.insert(provider.provider_type().to_string(), provider);
}
pub fn get(&self, provider_type: &str) -> Option<Arc<dyn SourceProvider>> {
self.providers.get(provider_type).cloned()
}
}
impl Default for ProviderRegistry {
fn default() -> Self {
Self::with_defaults()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_provider_config_json_parsing() {
let json_config =
r#"{"exclude_hidden":"false","follow_symlinks":"true","custom_key":"custom_value"}"#;
let config_map: std::collections::HashMap<String, String> =
serde_json::from_str(json_config).unwrap();
assert_eq!(config_map.get("exclude_hidden"), Some(&"false".to_string()));
assert_eq!(config_map.get("follow_symlinks"), Some(&"true".to_string()));
assert_eq!(
config_map.get("custom_key"),
Some(&"custom_value".to_string())
);
let mut config = ProviderConfig::new("/tmp".to_string(), "**/*.md".to_string());
for (key, value) in config_map {
config = config.with_option(key, value);
}
assert_eq!(
config.get_option("exclude_hidden"),
Some(&"false".to_string())
);
assert_eq!(
config.get_option("follow_symlinks"),
Some(&"true".to_string())
);
assert_eq!(
config.get_option("custom_key"),
Some(&"custom_value".to_string())
);
}
#[test]
fn test_provider_config_json_empty() {
let json_config = r#"{}"#;
let config_map: std::collections::HashMap<String, String> =
serde_json::from_str(json_config).unwrap();
assert_eq!(config_map.len(), 0);
}
#[test]
fn test_provider_config_json_invalid() {
let json_config = r#"{"key": invalid}"#;
let result: std::result::Result<std::collections::HashMap<String, String>, _> =
serde_json::from_str(json_config);
assert!(result.is_err());
}
#[test]
fn test_provider_config_json_nested_not_supported() {
let json_config = r#"{"key": {"nested": "value"}}"#;
let result: std::result::Result<std::collections::HashMap<String, String>, _> =
serde_json::from_str(json_config);
assert!(
result.is_err(),
"Nested JSON should not parse into HashMap<String, String>"
);
}
#[test]
fn test_provider_config_special_characters() {
let json_config =
r#"{"path":"/tmp/test with spaces","pattern":"**/*.{md,txt}","token":"ghp_abc123"}"#;
let config_map: std::collections::HashMap<String, String> =
serde_json::from_str(json_config).unwrap();
assert_eq!(
config_map.get("path"),
Some(&"/tmp/test with spaces".to_string())
);
assert_eq!(
config_map.get("pattern"),
Some(&"**/*.{md,txt}".to_string())
);
assert_eq!(config_map.get("token"), Some(&"ghp_abc123".to_string()));
}
}