agentroot_core/providers/
json.rs

1//! JSON Provider for indexing JSON files with semantic object/array splitting
2
3use crate::db::hash_content;
4use crate::error::{AgentRootError, Result};
5use crate::providers::{ProviderConfig, SourceItem, SourceProvider};
6use async_trait::async_trait;
7use serde_json::Value;
8use std::collections::HashMap;
9use std::fs;
10use std::path::{Path, PathBuf};
11use walkdir::WalkDir;
12
13/// Provider for indexing JSON files
14pub struct JSONProvider;
15
16impl Default for JSONProvider {
17    fn default() -> Self {
18        Self::new()
19    }
20}
21
22impl JSONProvider {
23    /// Create a new JSONProvider
24    pub fn new() -> Self {
25        Self
26    }
27
28    /// Parse JSON file and return objects/arrays as items
29    fn parse_json_file(&self, path: &Path, config: &ProviderConfig) -> Result<Vec<SourceItem>> {
30        let file_content = fs::read_to_string(path).map_err(|e| {
31            AgentRootError::Io(std::io::Error::new(
32                e.kind(),
33                format!("Failed to read JSON file {:?}: {}", path, e),
34            ))
35        })?;
36
37        let json_value: Value = serde_json::from_str(&file_content).map_err(|e| {
38            AgentRootError::Parse(format!("Failed to parse JSON file {:?}: {}", path, e))
39        })?;
40
41        let filename = path
42            .file_name()
43            .and_then(|s| s.to_str())
44            .unwrap_or("unknown.json");
45
46        let index_mode = config
47            .options
48            .get("index_mode")
49            .map(|s| s.as_str())
50            .unwrap_or("array");
51
52        match index_mode {
53            "array" => self.index_as_array(&json_value, filename, path),
54            "object" => self.index_as_object(&json_value, filename, path),
55            "full" => Ok(vec![self.index_full_document(&json_value, filename, path)]),
56            _ => Err(AgentRootError::Parse(format!(
57                "Invalid index_mode: {}. Expected: array, object, or full",
58                index_mode
59            ))),
60        }
61    }
62
63    /// Index JSON as array (each top-level array item becomes a document)
64    fn index_as_array(
65        &self,
66        json_value: &Value,
67        filename: &str,
68        path: &Path,
69    ) -> Result<Vec<SourceItem>> {
70        match json_value {
71            Value::Array(arr) => {
72                let mut items = Vec::new();
73                for (idx, item) in arr.iter().enumerate() {
74                    let content = serde_json::to_string_pretty(item)?;
75                    let title = self.extract_title(item, filename, idx);
76                    let uri = format!("json://{}/item_{}", path.display(), idx);
77                    let hash = hash_content(&content);
78
79                    let mut metadata = HashMap::new();
80                    metadata.insert("file".to_string(), filename.to_string());
81                    metadata.insert("index".to_string(), idx.to_string());
82                    metadata.insert(
83                        "item_type".to_string(),
84                        self.json_type_name(item).to_string(),
85                    );
86
87                    if let Value::Object(obj) = item {
88                        for (key, value) in obj {
89                            if let Some(str_val) = value.as_str() {
90                                metadata.insert(key.clone(), str_val.to_string());
91                            }
92                        }
93                    }
94
95                    items.push(SourceItem {
96                        uri,
97                        title,
98                        content,
99                        hash,
100                        source_type: "json".to_string(),
101                        metadata,
102                    });
103                }
104                Ok(items)
105            }
106            _ => Err(AgentRootError::Parse(format!(
107                "JSON file {:?} is not an array. Use index_mode=object or index_mode=full",
108                path
109            ))),
110        }
111    }
112
113    /// Index JSON as object (each top-level key becomes a document)
114    fn index_as_object(
115        &self,
116        json_value: &Value,
117        filename: &str,
118        path: &Path,
119    ) -> Result<Vec<SourceItem>> {
120        match json_value {
121            Value::Object(obj) => {
122                let mut items = Vec::new();
123                for (idx, (key, value)) in obj.iter().enumerate() {
124                    let content = serde_json::to_string_pretty(value)?;
125                    let title = format!("{} - {}", filename, key);
126                    let uri = format!("json://{}/key_{}", path.display(), key);
127                    let hash = hash_content(&content);
128
129                    let mut metadata = HashMap::new();
130                    metadata.insert("file".to_string(), filename.to_string());
131                    metadata.insert("key".to_string(), key.clone());
132                    metadata.insert("index".to_string(), idx.to_string());
133                    metadata.insert(
134                        "value_type".to_string(),
135                        self.json_type_name(value).to_string(),
136                    );
137
138                    items.push(SourceItem {
139                        uri,
140                        title,
141                        content,
142                        hash,
143                        source_type: "json".to_string(),
144                        metadata,
145                    });
146                }
147                Ok(items)
148            }
149            _ => Err(AgentRootError::Parse(format!(
150                "JSON file {:?} is not an object. Use index_mode=array or index_mode=full",
151                path
152            ))),
153        }
154    }
155
156    /// Index full JSON document as single item
157    fn index_full_document(&self, json_value: &Value, filename: &str, path: &Path) -> SourceItem {
158        let content = serde_json::to_string_pretty(json_value).unwrap_or_default();
159        let title = filename.to_string();
160        let uri = format!("json://{}", path.display());
161        let hash = hash_content(&content);
162
163        let mut metadata = HashMap::new();
164        metadata.insert("file".to_string(), filename.to_string());
165        metadata.insert(
166            "type".to_string(),
167            self.json_type_name(json_value).to_string(),
168        );
169
170        SourceItem {
171            uri,
172            title,
173            content,
174            hash,
175            source_type: "json".to_string(),
176            metadata,
177        }
178    }
179
180    /// Extract meaningful title from JSON value
181    fn extract_title(&self, value: &Value, filename: &str, idx: usize) -> String {
182        if let Value::Object(obj) = value {
183            if let Some(title) = obj.get("title").and_then(|v| v.as_str()) {
184                return title.to_string();
185            }
186            if let Some(name) = obj.get("name").and_then(|v| v.as_str()) {
187                return name.to_string();
188            }
189            if let Some(id) = obj.get("id") {
190                return format!("{} - ID {}", filename, id);
191            }
192        }
193
194        format!("{} - Item {}", filename, idx)
195    }
196
197    /// Get JSON value type name
198    fn json_type_name(&self, value: &Value) -> &'static str {
199        match value {
200            Value::Null => "null",
201            Value::Bool(_) => "boolean",
202            Value::Number(_) => "number",
203            Value::String(_) => "string",
204            Value::Array(_) => "array",
205            Value::Object(_) => "object",
206        }
207    }
208
209    /// Scan directory for JSON files matching pattern
210    fn scan_directory(&self, base_path: &Path, pattern: &str) -> Result<Vec<PathBuf>> {
211        let glob_pattern = glob::Pattern::new(pattern)?;
212        let mut json_files = Vec::new();
213
214        for entry in WalkDir::new(base_path)
215            .follow_links(true)
216            .into_iter()
217            .filter_entry(|e| {
218                let name = e.file_name().to_string_lossy();
219                !name.starts_with('.')
220                    && !matches!(
221                        name.as_ref(),
222                        "node_modules" | ".git" | ".cache" | "target" | "dist" | "build"
223                    )
224            })
225        {
226            let entry = entry?;
227            if !entry.file_type().is_file() {
228                continue;
229            }
230
231            let path = entry.path();
232            if let Some(ext) = path.extension() {
233                if ext.eq_ignore_ascii_case("json") {
234                    if let Ok(relative) = path.strip_prefix(base_path) {
235                        let relative_str = relative.to_string_lossy();
236                        if glob_pattern.matches(&relative_str) {
237                            json_files.push(path.to_path_buf());
238                        }
239                    }
240                }
241            }
242        }
243
244        Ok(json_files)
245    }
246}
247
248#[async_trait]
249impl SourceProvider for JSONProvider {
250    fn provider_type(&self) -> &'static str {
251        "json"
252    }
253
254    async fn list_items(&self, config: &ProviderConfig) -> Result<Vec<SourceItem>> {
255        let base_path = Path::new(&config.base_path);
256
257        if base_path.is_file() {
258            if base_path
259                .extension()
260                .map(|e| e.eq_ignore_ascii_case("json"))
261                .unwrap_or(false)
262            {
263                return self.parse_json_file(base_path, config);
264            } else {
265                return Err(AgentRootError::Parse(format!(
266                    "File {:?} is not a JSON file",
267                    base_path
268                )));
269            }
270        }
271
272        if !base_path.exists() {
273            return Err(AgentRootError::Io(std::io::Error::new(
274                std::io::ErrorKind::NotFound,
275                format!("Path not found: {:?}", base_path),
276            )));
277        }
278
279        let json_files = self.scan_directory(base_path, &config.pattern)?;
280        let mut all_items = Vec::new();
281
282        for json_file in json_files {
283            match self.parse_json_file(&json_file, config) {
284                Ok(items) => all_items.extend(items),
285                Err(e) => {
286                    tracing::warn!("Failed to parse JSON file {:?}: {}", json_file, e);
287                }
288            }
289        }
290
291        Ok(all_items)
292    }
293
294    async fn fetch_item(&self, uri: &str) -> Result<SourceItem> {
295        if !uri.starts_with("json://") {
296            return Err(AgentRootError::Parse(format!(
297                "Invalid JSON URI: {}. Expected format: json://path/to/file.json/item_N or json://path/to/file.json/key_X",
298                uri
299            )));
300        }
301
302        let uri_path = &uri[7..];
303
304        if !uri_path.contains("/item_") && !uri_path.contains("/key_") {
305            let file_path = Path::new(uri_path);
306            let config =
307                ProviderConfig::new(file_path.to_string_lossy().to_string(), "**/*".to_string());
308            let items = self.parse_json_file(file_path, &config)?;
309            return items.into_iter().next().ok_or_else(|| {
310                AgentRootError::Parse(format!("No items found in JSON file {:?}", file_path))
311            });
312        }
313
314        let parts: Vec<&str> = uri_path.rsplitn(2, '/').collect();
315        if parts.len() != 2 {
316            return Err(AgentRootError::Parse(format!(
317                "Invalid JSON URI format: {}",
318                uri
319            )));
320        }
321
322        let file_path = Path::new(parts[1]);
323        let config =
324            ProviderConfig::new(file_path.to_string_lossy().to_string(), "**/*".to_string());
325
326        let all_items = self.parse_json_file(file_path, &config)?;
327
328        all_items
329            .into_iter()
330            .find(|item| item.uri == uri)
331            .ok_or_else(|| {
332                AgentRootError::Parse(format!("Item not found in JSON file {:?}", file_path))
333            })
334    }
335}
336
337#[cfg(test)]
338mod tests {
339    use super::*;
340
341    #[test]
342    fn test_provider_type() {
343        let provider = JSONProvider::new();
344        assert_eq!(provider.provider_type(), "json");
345    }
346
347    #[tokio::test]
348    async fn test_parse_json_array() {
349        let provider = JSONProvider::new();
350        let json_content = r#"[
351            {"name": "Alice", "age": 30},
352            {"name": "Bob", "age": 25}
353        ]"#;
354
355        let temp_dir = tempfile::tempdir().unwrap();
356        let json_path = temp_dir.path().join("test.json");
357        fs::write(&json_path, json_content).unwrap();
358
359        let config = ProviderConfig::new(
360            json_path.to_string_lossy().to_string(),
361            "**/*.json".to_string(),
362        );
363        let items = provider.parse_json_file(&json_path, &config).unwrap();
364
365        assert_eq!(items.len(), 2);
366        assert!(items[0].content.contains("Alice"));
367        assert_eq!(items[0].metadata.get("name").unwrap(), "Alice");
368    }
369
370    #[tokio::test]
371    async fn test_parse_json_object() {
372        let provider = JSONProvider::new();
373        let json_content = r#"{
374            "users": {"count": 100},
375            "posts": {"count": 500}
376        }"#;
377
378        let temp_dir = tempfile::tempdir().unwrap();
379        let json_path = temp_dir.path().join("test.json");
380        fs::write(&json_path, json_content).unwrap();
381
382        let mut config = ProviderConfig::new(
383            json_path.to_string_lossy().to_string(),
384            "**/*.json".to_string(),
385        );
386        config
387            .options
388            .insert("index_mode".to_string(), "object".to_string());
389
390        let items = provider.parse_json_file(&json_path, &config).unwrap();
391
392        assert_eq!(items.len(), 2);
393        assert!(
394            items[0].metadata.get("key").unwrap() == "users"
395                || items[0].metadata.get("key").unwrap() == "posts"
396        );
397    }
398
399    #[tokio::test]
400    async fn test_parse_json_full() {
401        let provider = JSONProvider::new();
402        let json_content = r#"{"name": "Alice", "age": 30}"#;
403
404        let temp_dir = tempfile::tempdir().unwrap();
405        let json_path = temp_dir.path().join("test.json");
406        fs::write(&json_path, json_content).unwrap();
407
408        let mut config = ProviderConfig::new(
409            json_path.to_string_lossy().to_string(),
410            "**/*.json".to_string(),
411        );
412        config
413            .options
414            .insert("index_mode".to_string(), "full".to_string());
415
416        let items = provider.parse_json_file(&json_path, &config).unwrap();
417
418        assert_eq!(items.len(), 1);
419        assert!(items[0].content.contains("Alice"));
420    }
421
422    #[tokio::test]
423    async fn test_fetch_item_by_uri() {
424        let provider = JSONProvider::new();
425        let json_content = r#"[{"name": "Alice"}, {"name": "Bob"}]"#;
426
427        let temp_dir = tempfile::tempdir().unwrap();
428        let json_path = temp_dir.path().join("test.json");
429        fs::write(&json_path, json_content).unwrap();
430
431        let uri = format!("json://{}/item_0", json_path.display());
432        let item = provider.fetch_item(&uri).await.unwrap();
433
434        assert!(item.content.contains("Alice"));
435        assert_eq!(item.metadata.get("index").unwrap(), "0");
436    }
437}