agentroot_core/providers/
json.rs1use crate::db::hash_content;
4use crate::error::{AgentRootError, Result};
5use crate::providers::{ProviderConfig, SourceItem, SourceProvider};
6use async_trait::async_trait;
7use serde_json::Value;
8use std::collections::HashMap;
9use std::fs;
10use std::path::{Path, PathBuf};
11use walkdir::WalkDir;
12
13pub struct JSONProvider;
15
16impl Default for JSONProvider {
17 fn default() -> Self {
18 Self::new()
19 }
20}
21
22impl JSONProvider {
23 pub fn new() -> Self {
25 Self
26 }
27
28 fn parse_json_file(&self, path: &Path, config: &ProviderConfig) -> Result<Vec<SourceItem>> {
30 let file_content = fs::read_to_string(path).map_err(|e| {
31 AgentRootError::Io(std::io::Error::new(
32 e.kind(),
33 format!("Failed to read JSON file {:?}: {}", path, e),
34 ))
35 })?;
36
37 let json_value: Value = serde_json::from_str(&file_content).map_err(|e| {
38 AgentRootError::Parse(format!("Failed to parse JSON file {:?}: {}", path, e))
39 })?;
40
41 let filename = path
42 .file_name()
43 .and_then(|s| s.to_str())
44 .unwrap_or("unknown.json");
45
46 let index_mode = config
47 .options
48 .get("index_mode")
49 .map(|s| s.as_str())
50 .unwrap_or("array");
51
52 match index_mode {
53 "array" => self.index_as_array(&json_value, filename, path),
54 "object" => self.index_as_object(&json_value, filename, path),
55 "full" => Ok(vec![self.index_full_document(&json_value, filename, path)]),
56 _ => Err(AgentRootError::Parse(format!(
57 "Invalid index_mode: {}. Expected: array, object, or full",
58 index_mode
59 ))),
60 }
61 }
62
63 fn index_as_array(
65 &self,
66 json_value: &Value,
67 filename: &str,
68 path: &Path,
69 ) -> Result<Vec<SourceItem>> {
70 match json_value {
71 Value::Array(arr) => {
72 let mut items = Vec::new();
73 for (idx, item) in arr.iter().enumerate() {
74 let content = serde_json::to_string_pretty(item)?;
75 let title = self.extract_title(item, filename, idx);
76 let uri = format!("json://{}/item_{}", path.display(), idx);
77 let hash = hash_content(&content);
78
79 let mut metadata = HashMap::new();
80 metadata.insert("file".to_string(), filename.to_string());
81 metadata.insert("index".to_string(), idx.to_string());
82 metadata.insert(
83 "item_type".to_string(),
84 self.json_type_name(item).to_string(),
85 );
86
87 if let Value::Object(obj) = item {
88 for (key, value) in obj {
89 if let Some(str_val) = value.as_str() {
90 metadata.insert(key.clone(), str_val.to_string());
91 }
92 }
93 }
94
95 items.push(SourceItem {
96 uri,
97 title,
98 content,
99 hash,
100 source_type: "json".to_string(),
101 metadata,
102 });
103 }
104 Ok(items)
105 }
106 _ => Err(AgentRootError::Parse(format!(
107 "JSON file {:?} is not an array. Use index_mode=object or index_mode=full",
108 path
109 ))),
110 }
111 }
112
113 fn index_as_object(
115 &self,
116 json_value: &Value,
117 filename: &str,
118 path: &Path,
119 ) -> Result<Vec<SourceItem>> {
120 match json_value {
121 Value::Object(obj) => {
122 let mut items = Vec::new();
123 for (idx, (key, value)) in obj.iter().enumerate() {
124 let content = serde_json::to_string_pretty(value)?;
125 let title = format!("{} - {}", filename, key);
126 let uri = format!("json://{}/key_{}", path.display(), key);
127 let hash = hash_content(&content);
128
129 let mut metadata = HashMap::new();
130 metadata.insert("file".to_string(), filename.to_string());
131 metadata.insert("key".to_string(), key.clone());
132 metadata.insert("index".to_string(), idx.to_string());
133 metadata.insert(
134 "value_type".to_string(),
135 self.json_type_name(value).to_string(),
136 );
137
138 items.push(SourceItem {
139 uri,
140 title,
141 content,
142 hash,
143 source_type: "json".to_string(),
144 metadata,
145 });
146 }
147 Ok(items)
148 }
149 _ => Err(AgentRootError::Parse(format!(
150 "JSON file {:?} is not an object. Use index_mode=array or index_mode=full",
151 path
152 ))),
153 }
154 }
155
156 fn index_full_document(&self, json_value: &Value, filename: &str, path: &Path) -> SourceItem {
158 let content = serde_json::to_string_pretty(json_value).unwrap_or_default();
159 let title = filename.to_string();
160 let uri = format!("json://{}", path.display());
161 let hash = hash_content(&content);
162
163 let mut metadata = HashMap::new();
164 metadata.insert("file".to_string(), filename.to_string());
165 metadata.insert(
166 "type".to_string(),
167 self.json_type_name(json_value).to_string(),
168 );
169
170 SourceItem {
171 uri,
172 title,
173 content,
174 hash,
175 source_type: "json".to_string(),
176 metadata,
177 }
178 }
179
180 fn extract_title(&self, value: &Value, filename: &str, idx: usize) -> String {
182 if let Value::Object(obj) = value {
183 if let Some(title) = obj.get("title").and_then(|v| v.as_str()) {
184 return title.to_string();
185 }
186 if let Some(name) = obj.get("name").and_then(|v| v.as_str()) {
187 return name.to_string();
188 }
189 if let Some(id) = obj.get("id") {
190 return format!("{} - ID {}", filename, id);
191 }
192 }
193
194 format!("{} - Item {}", filename, idx)
195 }
196
197 fn json_type_name(&self, value: &Value) -> &'static str {
199 match value {
200 Value::Null => "null",
201 Value::Bool(_) => "boolean",
202 Value::Number(_) => "number",
203 Value::String(_) => "string",
204 Value::Array(_) => "array",
205 Value::Object(_) => "object",
206 }
207 }
208
209 fn scan_directory(&self, base_path: &Path, pattern: &str) -> Result<Vec<PathBuf>> {
211 let glob_pattern = glob::Pattern::new(pattern)?;
212 let mut json_files = Vec::new();
213
214 for entry in WalkDir::new(base_path)
215 .follow_links(true)
216 .into_iter()
217 .filter_entry(|e| {
218 let name = e.file_name().to_string_lossy();
219 !name.starts_with('.')
220 && !matches!(
221 name.as_ref(),
222 "node_modules" | ".git" | ".cache" | "target" | "dist" | "build"
223 )
224 })
225 {
226 let entry = entry?;
227 if !entry.file_type().is_file() {
228 continue;
229 }
230
231 let path = entry.path();
232 if let Some(ext) = path.extension() {
233 if ext.eq_ignore_ascii_case("json") {
234 if let Ok(relative) = path.strip_prefix(base_path) {
235 let relative_str = relative.to_string_lossy();
236 if glob_pattern.matches(&relative_str) {
237 json_files.push(path.to_path_buf());
238 }
239 }
240 }
241 }
242 }
243
244 Ok(json_files)
245 }
246}
247
248#[async_trait]
249impl SourceProvider for JSONProvider {
250 fn provider_type(&self) -> &'static str {
251 "json"
252 }
253
254 async fn list_items(&self, config: &ProviderConfig) -> Result<Vec<SourceItem>> {
255 let base_path = Path::new(&config.base_path);
256
257 if base_path.is_file() {
258 if base_path
259 .extension()
260 .map(|e| e.eq_ignore_ascii_case("json"))
261 .unwrap_or(false)
262 {
263 return self.parse_json_file(base_path, config);
264 } else {
265 return Err(AgentRootError::Parse(format!(
266 "File {:?} is not a JSON file",
267 base_path
268 )));
269 }
270 }
271
272 if !base_path.exists() {
273 return Err(AgentRootError::Io(std::io::Error::new(
274 std::io::ErrorKind::NotFound,
275 format!("Path not found: {:?}", base_path),
276 )));
277 }
278
279 let json_files = self.scan_directory(base_path, &config.pattern)?;
280 let mut all_items = Vec::new();
281
282 for json_file in json_files {
283 match self.parse_json_file(&json_file, config) {
284 Ok(items) => all_items.extend(items),
285 Err(e) => {
286 tracing::warn!("Failed to parse JSON file {:?}: {}", json_file, e);
287 }
288 }
289 }
290
291 Ok(all_items)
292 }
293
294 async fn fetch_item(&self, uri: &str) -> Result<SourceItem> {
295 if !uri.starts_with("json://") {
296 return Err(AgentRootError::Parse(format!(
297 "Invalid JSON URI: {}. Expected format: json://path/to/file.json/item_N or json://path/to/file.json/key_X",
298 uri
299 )));
300 }
301
302 let uri_path = &uri[7..];
303
304 if !uri_path.contains("/item_") && !uri_path.contains("/key_") {
305 let file_path = Path::new(uri_path);
306 let config =
307 ProviderConfig::new(file_path.to_string_lossy().to_string(), "**/*".to_string());
308 let items = self.parse_json_file(file_path, &config)?;
309 return items.into_iter().next().ok_or_else(|| {
310 AgentRootError::Parse(format!("No items found in JSON file {:?}", file_path))
311 });
312 }
313
314 let parts: Vec<&str> = uri_path.rsplitn(2, '/').collect();
315 if parts.len() != 2 {
316 return Err(AgentRootError::Parse(format!(
317 "Invalid JSON URI format: {}",
318 uri
319 )));
320 }
321
322 let file_path = Path::new(parts[1]);
323 let config =
324 ProviderConfig::new(file_path.to_string_lossy().to_string(), "**/*".to_string());
325
326 let all_items = self.parse_json_file(file_path, &config)?;
327
328 all_items
329 .into_iter()
330 .find(|item| item.uri == uri)
331 .ok_or_else(|| {
332 AgentRootError::Parse(format!("Item not found in JSON file {:?}", file_path))
333 })
334 }
335}
336
337#[cfg(test)]
338mod tests {
339 use super::*;
340
341 #[test]
342 fn test_provider_type() {
343 let provider = JSONProvider::new();
344 assert_eq!(provider.provider_type(), "json");
345 }
346
347 #[tokio::test]
348 async fn test_parse_json_array() {
349 let provider = JSONProvider::new();
350 let json_content = r#"[
351 {"name": "Alice", "age": 30},
352 {"name": "Bob", "age": 25}
353 ]"#;
354
355 let temp_dir = tempfile::tempdir().unwrap();
356 let json_path = temp_dir.path().join("test.json");
357 fs::write(&json_path, json_content).unwrap();
358
359 let config = ProviderConfig::new(
360 json_path.to_string_lossy().to_string(),
361 "**/*.json".to_string(),
362 );
363 let items = provider.parse_json_file(&json_path, &config).unwrap();
364
365 assert_eq!(items.len(), 2);
366 assert!(items[0].content.contains("Alice"));
367 assert_eq!(items[0].metadata.get("name").unwrap(), "Alice");
368 }
369
370 #[tokio::test]
371 async fn test_parse_json_object() {
372 let provider = JSONProvider::new();
373 let json_content = r#"{
374 "users": {"count": 100},
375 "posts": {"count": 500}
376 }"#;
377
378 let temp_dir = tempfile::tempdir().unwrap();
379 let json_path = temp_dir.path().join("test.json");
380 fs::write(&json_path, json_content).unwrap();
381
382 let mut config = ProviderConfig::new(
383 json_path.to_string_lossy().to_string(),
384 "**/*.json".to_string(),
385 );
386 config
387 .options
388 .insert("index_mode".to_string(), "object".to_string());
389
390 let items = provider.parse_json_file(&json_path, &config).unwrap();
391
392 assert_eq!(items.len(), 2);
393 assert!(
394 items[0].metadata.get("key").unwrap() == "users"
395 || items[0].metadata.get("key").unwrap() == "posts"
396 );
397 }
398
399 #[tokio::test]
400 async fn test_parse_json_full() {
401 let provider = JSONProvider::new();
402 let json_content = r#"{"name": "Alice", "age": 30}"#;
403
404 let temp_dir = tempfile::tempdir().unwrap();
405 let json_path = temp_dir.path().join("test.json");
406 fs::write(&json_path, json_content).unwrap();
407
408 let mut config = ProviderConfig::new(
409 json_path.to_string_lossy().to_string(),
410 "**/*.json".to_string(),
411 );
412 config
413 .options
414 .insert("index_mode".to_string(), "full".to_string());
415
416 let items = provider.parse_json_file(&json_path, &config).unwrap();
417
418 assert_eq!(items.len(), 1);
419 assert!(items[0].content.contains("Alice"));
420 }
421
422 #[tokio::test]
423 async fn test_fetch_item_by_uri() {
424 let provider = JSONProvider::new();
425 let json_content = r#"[{"name": "Alice"}, {"name": "Bob"}]"#;
426
427 let temp_dir = tempfile::tempdir().unwrap();
428 let json_path = temp_dir.path().join("test.json");
429 fs::write(&json_path, json_content).unwrap();
430
431 let uri = format!("json://{}/item_0", json_path.display());
432 let item = provider.fetch_item(&uri).await.unwrap();
433
434 assert!(item.content.contains("Alice"));
435 assert_eq!(item.metadata.get("index").unwrap(), "0");
436 }
437}