Skip to main content

fraiseql_cli/schema/
multi_file_loader.rs

1//! Multi-file schema loader - loads and merges JSON schema files from directories
2//!
3//! Supports flexible schema composition from single files to deeply nested directory structures:
4//! - Load all *.json files from a directory recursively
5//! - Merge types, queries, mutations arrays
6//! - Deduplicate by name with error reporting
7//! - Preserve file path information for error messages
8
9use std::{
10    collections::HashMap,
11    fs,
12    path::{Path, PathBuf},
13};
14
15use anyhow::{Context, Result, bail};
16use serde_json::{Value, json};
17use walkdir::WalkDir;
18
19/// Maximum number of JSON schema files accepted from a single directory tree.
20///
21/// Prevents runaway resource use when pointed at an unexpectedly large directory
22/// (e.g. a mounted filesystem root or a node_modules tree).
23const MAX_SCHEMA_FILES: usize = 1_000;
24
25/// Loads and merges JSON schema files from directories
26pub struct MultiFileLoader;
27
28/// Result of loading files
29pub struct LoadResult {
30    /// Merged JSON value with types, queries, mutations arrays
31    pub merged: Value,
32}
33
34impl MultiFileLoader {
35    /// Load and merge all JSON files from a directory recursively
36    ///
37    /// # Arguments
38    /// * `dir_path` - Path to directory containing *.json files
39    ///
40    /// # Returns
41    /// Merged Value with "types", "queries", "mutations" as arrays
42    ///
43    /// # Errors
44    /// - If directory doesn't exist
45    /// - If JSON parsing fails
46    /// - If duplicate names are found (with file paths)
47    ///
48    /// # Example
49    /// ```no_run
50    /// // Requires: a "schema/" directory containing JSON schema files on disk.
51    /// use fraiseql_cli::schema::multi_file_loader::MultiFileLoader;
52    ///
53    /// # fn example() -> anyhow::Result<()> {
54    /// let merged = MultiFileLoader::load_from_directory("schema/")?;
55    /// # Ok(())
56    /// # }
57    /// ```
58    pub fn load_from_directory(dir_path: &str) -> Result<Value> {
59        let result = Self::load_from_directory_with_tracking(dir_path)?;
60        Ok(result.merged)
61    }
62
63    /// Load from directory with file path tracking for conflict detection
64    ///
65    /// # Errors
66    ///
67    /// Returns an error if `dir_path` is not a directory, if more than
68    /// `MAX_SCHEMA_FILES` JSON files are found, if any file cannot be read or
69    /// parsed as JSON, or if duplicate type/query/mutation names are detected.
70    pub fn load_from_directory_with_tracking(dir_path: &str) -> Result<LoadResult> {
71        let dir = Path::new(dir_path);
72        if !dir.is_dir() {
73            bail!("Schema directory not found: {dir_path}");
74        }
75
76        let mut types = Vec::new();
77        let mut queries = Vec::new();
78        let mut mutations = Vec::new();
79        let mut name_to_file = HashMap::new();
80
81        // Collect all JSON files and sort for deterministic ordering
82        let mut json_files = Vec::new();
83        for entry in WalkDir::new(dir_path)
84            .into_iter()
85            .filter_map(Result::ok)
86            .filter(|e| e.path().extension().is_some_and(|ext| ext == "json"))
87        {
88            json_files.push(entry.path().to_path_buf());
89            if json_files.len() > MAX_SCHEMA_FILES {
90                bail!(
91                    "Schema directory {dir_path:?} contains more than {MAX_SCHEMA_FILES} JSON \
92                     files. Point --schema-dir at a directory containing only schema files."
93                );
94            }
95        }
96
97        json_files.sort();
98
99        // Load and merge each file
100        for file_path in json_files {
101            let content = fs::read_to_string(&file_path)
102                .context(format!("Failed to read {}", file_path.display()))?;
103            let value: Value = serde_json::from_str(&content)
104                .context(format!("Failed to parse JSON from {}", file_path.display()))?;
105
106            // Track source for each item
107            let file_path_str = file_path.to_string_lossy().to_string();
108
109            // Merge types
110            if let Some(Value::Array(type_items)) = value.get("types") {
111                for item in type_items {
112                    if let Some(name) = item.get("name").and_then(|v| v.as_str()) {
113                        let type_key = format!("type:{name}");
114                        if let Some(existing) = name_to_file.get(&type_key) {
115                            bail!(
116                                "Duplicate type '{name}' found in:\n  - {existing}\n  - {file_path_str}"
117                            );
118                        }
119                        name_to_file.insert(type_key, file_path_str.clone());
120                    }
121                    types.push(item.clone());
122                }
123            }
124
125            // Merge queries
126            if let Some(Value::Array(query_items)) = value.get("queries") {
127                for item in query_items {
128                    if let Some(name) = item.get("name").and_then(|v| v.as_str()) {
129                        let query_key = format!("query:{name}");
130                        if let Some(existing) = name_to_file.get(&query_key) {
131                            bail!(
132                                "Duplicate query '{name}' found in:\n  - {existing}\n  - {file_path_str}"
133                            );
134                        }
135                        name_to_file.insert(query_key, file_path_str.clone());
136                    }
137                    queries.push(item.clone());
138                }
139            }
140
141            // Merge mutations
142            if let Some(Value::Array(mutation_items)) = value.get("mutations") {
143                for item in mutation_items {
144                    if let Some(name) = item.get("name").and_then(|v| v.as_str()) {
145                        let mutation_key = format!("mutation:{name}");
146                        if let Some(existing) = name_to_file.get(&mutation_key) {
147                            bail!(
148                                "Duplicate mutation '{name}' found in:\n  - {existing}\n  - {file_path_str}"
149                            );
150                        }
151                        name_to_file.insert(mutation_key, file_path_str.clone());
152                    }
153                    mutations.push(item.clone());
154                }
155            }
156        }
157
158        let merged = json!({
159            "types": types,
160            "queries": queries,
161            "mutations": mutations,
162        });
163
164        Ok(LoadResult { merged })
165    }
166
167    /// Load specific files and merge them
168    ///
169    /// # Arguments
170    /// * `paths` - Vector of file paths to load
171    ///
172    /// # Returns
173    /// Merged `Value` with "types", "queries", "mutations" as arrays.
174    ///
175    /// # Errors
176    ///
177    /// Returns an error if any path does not exist, cannot be read, or cannot
178    /// be parsed as JSON.
179    pub fn load_from_paths(paths: &[PathBuf]) -> Result<Value> {
180        let mut types = Vec::new();
181        let mut queries = Vec::new();
182        let mut mutations = Vec::new();
183
184        for path in paths {
185            if !path.exists() {
186                bail!("File not found: {}", path.display());
187            }
188
189            let content =
190                fs::read_to_string(path).context(format!("Failed to read {}", path.display()))?;
191            let value: Value = serde_json::from_str(&content)
192                .context(format!("Failed to parse JSON from {}", path.display()))?;
193
194            // Merge types
195            if let Some(Value::Array(type_items)) = value.get("types") {
196                types.extend(type_items.clone());
197            }
198
199            // Merge queries
200            if let Some(Value::Array(query_items)) = value.get("queries") {
201                queries.extend(query_items.clone());
202            }
203
204            // Merge mutations
205            if let Some(Value::Array(mutation_items)) = value.get("mutations") {
206                mutations.extend(mutation_items.clone());
207            }
208        }
209
210        Ok(json!({
211            "types": types,
212            "queries": queries,
213            "mutations": mutations,
214        }))
215    }
216}
217
218#[allow(clippy::unwrap_used)] // Reason: test code, panics are acceptable
219#[cfg(test)]
220mod tests {
221    use std::fs;
222
223    use tempfile::TempDir;
224
225    use super::*;
226
227    fn create_test_file(dir: &Path, name: &str, content: &str) -> Result<()> {
228        let path = dir.join(name);
229        fs::write(path, content)?;
230        Ok(())
231    }
232
233    #[test]
234    fn test_load_single_type_file() -> Result<()> {
235        let temp_dir = TempDir::new()?;
236        let schema = json!({
237            "types": [
238                {"name": "User", "fields": []}
239            ],
240            "queries": [],
241            "mutations": []
242        });
243        create_test_file(temp_dir.path(), "types.json", &schema.to_string())?;
244
245        let result = MultiFileLoader::load_from_directory(temp_dir.path().to_str().unwrap())?;
246
247        assert_eq!(result["types"].as_array().unwrap().len(), 1);
248        assert_eq!(result["types"][0]["name"], "User");
249        assert_eq!(result["queries"].as_array().unwrap().len(), 0);
250        assert_eq!(result["mutations"].as_array().unwrap().len(), 0);
251
252        Ok(())
253    }
254
255    #[test]
256    fn test_merge_multiple_type_files() -> Result<()> {
257        let temp_dir = TempDir::new()?;
258
259        let user_schema = json!({
260            "types": [
261                {"name": "User", "fields": []}
262            ],
263            "queries": [],
264            "mutations": []
265        });
266        create_test_file(temp_dir.path(), "user.json", &user_schema.to_string())?;
267
268        let post_schema = json!({
269            "types": [
270                {"name": "Post", "fields": []}
271            ],
272            "queries": [],
273            "mutations": []
274        });
275        create_test_file(temp_dir.path(), "post.json", &post_schema.to_string())?;
276
277        let result = MultiFileLoader::load_from_directory(temp_dir.path().to_str().unwrap())?;
278
279        assert_eq!(result["types"].as_array().unwrap().len(), 2);
280        let type_names: Vec<&str> = result["types"]
281            .as_array()
282            .unwrap()
283            .iter()
284            .filter_map(|t| t["name"].as_str())
285            .collect();
286        assert!(type_names.contains(&"User"));
287        assert!(type_names.contains(&"Post"));
288
289        Ok(())
290    }
291
292    #[test]
293    fn test_merge_respects_alphabetical_order() -> Result<()> {
294        let temp_dir = TempDir::new()?;
295
296        let c_schema = json!({
297            "types": [{"name": "C", "fields": []}],
298            "queries": [],
299            "mutations": []
300        });
301        create_test_file(temp_dir.path(), "c.json", &c_schema.to_string())?;
302
303        let a_schema = json!({
304            "types": [{"name": "A", "fields": []}],
305            "queries": [],
306            "mutations": []
307        });
308        create_test_file(temp_dir.path(), "a.json", &a_schema.to_string())?;
309
310        let b_schema = json!({
311            "types": [{"name": "B", "fields": []}],
312            "queries": [],
313            "mutations": []
314        });
315        create_test_file(temp_dir.path(), "b.json", &b_schema.to_string())?;
316
317        let result = MultiFileLoader::load_from_directory(temp_dir.path().to_str().unwrap())?;
318
319        let type_names: Vec<&str> = result["types"]
320            .as_array()
321            .unwrap()
322            .iter()
323            .filter_map(|t| t["name"].as_str())
324            .collect();
325
326        // Should be ordered by file load order (a.json, b.json, c.json alphabetically)
327        assert_eq!(type_names[0], "A");
328        assert_eq!(type_names[1], "B");
329        assert_eq!(type_names[2], "C");
330
331        Ok(())
332    }
333
334    #[test]
335    fn test_merge_queries_and_mutations() -> Result<()> {
336        let temp_dir = TempDir::new()?;
337
338        let schema = json!({
339            "types": [
340                {"name": "User", "fields": []}
341            ],
342            "queries": [
343                {"name": "getUser", "return_type": "User"}
344            ],
345            "mutations": [
346                {"name": "createUser", "return_type": "User"}
347            ]
348        });
349        create_test_file(temp_dir.path(), "schema.json", &schema.to_string())?;
350
351        let result = MultiFileLoader::load_from_directory(temp_dir.path().to_str().unwrap())?;
352
353        assert_eq!(result["types"].as_array().unwrap().len(), 1);
354        assert_eq!(result["queries"].as_array().unwrap().len(), 1);
355        assert_eq!(result["queries"][0]["name"], "getUser");
356        assert_eq!(result["mutations"].as_array().unwrap().len(), 1);
357        assert_eq!(result["mutations"][0]["name"], "createUser");
358
359        Ok(())
360    }
361
362    #[test]
363    fn test_nested_directory_structure() -> Result<()> {
364        let temp_dir = TempDir::new()?;
365
366        // Create nested structure
367        fs::create_dir_all(temp_dir.path().join("types"))?;
368        fs::create_dir_all(temp_dir.path().join("queries"))?;
369
370        let user_type = json!({
371            "types": [{"name": "User", "fields": []}],
372            "queries": [],
373            "mutations": []
374        });
375        create_test_file(
376            temp_dir.path().join("types").as_path(),
377            "user.json",
378            &user_type.to_string(),
379        )?;
380
381        let post_type = json!({
382            "types": [{"name": "Post", "fields": []}],
383            "queries": [],
384            "mutations": []
385        });
386        create_test_file(
387            temp_dir.path().join("types").as_path(),
388            "post.json",
389            &post_type.to_string(),
390        )?;
391
392        let user_queries = json!({
393            "types": [],
394            "queries": [{"name": "getUser", "return_type": "User"}],
395            "mutations": []
396        });
397        create_test_file(
398            temp_dir.path().join("queries").as_path(),
399            "user_queries.json",
400            &user_queries.to_string(),
401        )?;
402
403        let result = MultiFileLoader::load_from_directory(temp_dir.path().to_str().unwrap())?;
404
405        assert_eq!(result["types"].as_array().unwrap().len(), 2);
406        assert_eq!(result["queries"].as_array().unwrap().len(), 1);
407
408        Ok(())
409    }
410
411    #[test]
412    fn test_duplicate_type_names_error() -> Result<()> {
413        let temp_dir = TempDir::new()?;
414
415        let file1 = json!({
416            "types": [{"name": "User", "fields": []}],
417            "queries": [],
418            "mutations": []
419        });
420        create_test_file(temp_dir.path(), "file1.json", &file1.to_string())?;
421
422        let file2 = json!({
423            "types": [{"name": "User", "fields": []}],
424            "queries": [],
425            "mutations": []
426        });
427        create_test_file(temp_dir.path(), "file2.json", &file2.to_string())?;
428
429        let result = MultiFileLoader::load_from_directory(temp_dir.path().to_str().unwrap());
430
431        assert!(result.is_err(), "expected Err, got: {result:?}");
432        let err_msg = result.unwrap_err().to_string();
433        assert!(err_msg.contains("Duplicate type 'User'"));
434        assert!(err_msg.contains("file1.json"));
435        assert!(err_msg.contains("file2.json"));
436
437        Ok(())
438    }
439
440    #[test]
441    fn test_duplicate_query_names_error() -> Result<()> {
442        let temp_dir = TempDir::new()?;
443
444        let file1 = json!({
445            "types": [],
446            "queries": [{"name": "getUser", "return_type": "User"}],
447            "mutations": []
448        });
449        create_test_file(temp_dir.path(), "file1.json", &file1.to_string())?;
450
451        let file2 = json!({
452            "types": [],
453            "queries": [{"name": "getUser", "return_type": "User"}],
454            "mutations": []
455        });
456        create_test_file(temp_dir.path(), "file2.json", &file2.to_string())?;
457
458        let result = MultiFileLoader::load_from_directory(temp_dir.path().to_str().unwrap());
459
460        assert!(result.is_err(), "expected Err, got: {result:?}");
461        let err_msg = result.unwrap_err().to_string();
462        assert!(err_msg.contains("Duplicate query 'getUser'"));
463
464        Ok(())
465    }
466
467    #[test]
468    fn test_empty_directory() -> Result<()> {
469        let temp_dir = TempDir::new()?;
470
471        let result = MultiFileLoader::load_from_directory(temp_dir.path().to_str().unwrap())?;
472
473        assert_eq!(result["types"].as_array().unwrap().len(), 0);
474        assert_eq!(result["queries"].as_array().unwrap().len(), 0);
475        assert_eq!(result["mutations"].as_array().unwrap().len(), 0);
476
477        Ok(())
478    }
479
480    #[test]
481    fn test_nonexistent_directory() {
482        let result = MultiFileLoader::load_from_directory("/nonexistent/path/to/schema");
483        assert!(result.is_err(), "expected Err for nonexistent directory, got: {result:?}");
484    }
485
486    #[test]
487    fn test_load_from_paths() -> Result<()> {
488        let temp_dir = TempDir::new()?;
489
490        let schema1 = json!({
491            "types": [{"name": "User", "fields": []}],
492            "queries": [],
493            "mutations": []
494        });
495        create_test_file(temp_dir.path(), "schema1.json", &schema1.to_string())?;
496
497        let schema2 = json!({
498            "types": [{"name": "Post", "fields": []}],
499            "queries": [],
500            "mutations": []
501        });
502        create_test_file(temp_dir.path(), "schema2.json", &schema2.to_string())?;
503
504        let paths = vec![
505            temp_dir.path().join("schema1.json"),
506            temp_dir.path().join("schema2.json"),
507        ];
508
509        let result = MultiFileLoader::load_from_paths(&paths)?;
510
511        assert_eq!(result["types"].as_array().unwrap().len(), 2);
512
513        Ok(())
514    }
515
516    #[test]
517    fn test_directory_file_count_limit_exceeded() -> Result<()> {
518        let temp_dir = tempfile::tempdir()?;
519
520        // Create MAX_SCHEMA_FILES + 1 JSON files — should trip the limit.
521        let schema = json!({"types": [], "queries": [], "mutations": []});
522        let content = schema.to_string();
523        for i in 0..=MAX_SCHEMA_FILES {
524            create_test_file(temp_dir.path(), &format!("schema_{i:04}.json"), &content)?;
525        }
526
527        let result =
528            MultiFileLoader::load_from_directory_with_tracking(temp_dir.path().to_str().unwrap());
529        assert!(result.is_err(), "expected error when file count exceeds limit");
530        let msg = result.err().unwrap().to_string();
531        assert!(msg.contains("more than"), "error should mention the limit: {msg}");
532        Ok(())
533    }
534}