Skip to main content

apcore_toolkit/
binding_loader.rs

1// BindingLoader — parse `.binding.yaml` files back into `ScannedModule`.
2//
3// Inverse of `output::yaml_writer::YAMLWriter`. Unlike apcore's own
4// `BindingLoader` (which imports the target and registers a runtime module),
5// this loader is pure data: it parses YAML into `ScannedModule` objects for
6// validation, merging, diffing, or round-trip workflows. No code is loaded.
7
8use std::collections::HashMap;
9use std::fs;
10use std::path::{Path, PathBuf};
11
12use walkdir::WalkDir;
13
14use apcore::module::{ModuleAnnotations, ModuleExample};
15use serde_json::Value;
16use thiserror::Error;
17use tracing::warn;
18
19use crate::types::ScannedModule;
20
21const SUPPORTED_SPEC_VERSIONS: &[&str] = &["1.0"];
22
23/// Keys whose presence in a metadata dict is unsafe for cross-runtime
24/// round-trip — they correspond to JS prototype-pollution sinks. They are
25/// stripped at parse time so a malicious or malformed binding YAML cannot
26/// carry an attacker-controlled `__proto__` / `constructor` / `prototype`
27/// entry into downstream consumers (matches the TypeScript loader's
28/// `PROTO_DENY` guard in `src/binding-parser.ts`).
29const FORBIDDEN_METADATA_KEYS: &[&str] = &["__proto__", "constructor", "prototype"];
30
31/// Maximum size of a single `.binding.yaml` file (16 MiB).
32///
33/// A binding file is a structured YAML document, not a data store.
34/// Files larger than this are almost certainly pathological and would
35/// cause the full content to be loaded into memory twice (raw bytes +
36/// serde_yaml_ng::Value + serde_json::Value).
37const MAX_BINDING_FILE_SIZE: u64 = 16 * 1024 * 1024;
38
39/// Maximum number of `.binding.yaml` files loaded from a single directory.
40///
41/// Prevents a maliciously large directory from causing unbounded memory
42/// consumption in `load_data` callers that accumulate results.
43const MAX_BINDING_FILES_PER_DIR: usize = 10_000;
44
45/// Errors produced by [`BindingLoader`].
46#[derive(Debug, Error)]
47pub enum BindingLoadError {
48    /// The path does not exist or cannot be stat'd.
49    #[error("path does not exist: {path}")]
50    PathNotFound { path: String },
51
52    /// A binding file exceeds the maximum allowed size.
53    #[error("binding file {path} is too large ({size} bytes > {max} byte limit)")]
54    FileTooLarge { path: String, size: u64, max: u64 },
55
56    /// The directory contains more binding files than the per-directory limit.
57    #[error("directory {path} contains more than {max} binding files")]
58    TooManyFiles { path: String, max: usize },
59
60    /// Failure reading a binding file from disk.
61    #[error("failed to read {path}: {source}")]
62    FileRead {
63        path: String,
64        #[source]
65        source: std::io::Error,
66    },
67
68    /// The file content is not valid YAML.
69    #[error("failed to parse YAML in {path}: {source}")]
70    YamlParse {
71        path: String,
72        #[source]
73        source: serde_yaml_ng::Error,
74    },
75
76    /// A binding entry is missing or has an invalid value for one or more
77    /// required fields. Covers three cases: absent key, explicit `null`, and
78    /// wrong-type scalar (e.g. `module_id: 42`, `target: true`). All three
79    /// are treated as "required field not supplied" rather than silently
80    /// coerced to empty strings or zero values downstream.
81    #[error("missing or invalid required fields {missing_fields:?} (file={}, module_id={})",
82        .path.as_deref().unwrap_or("<inline>"),
83        .module_id.as_deref().unwrap_or("<unknown>"))]
84    MissingFields {
85        path: Option<String>,
86        module_id: Option<String>,
87        missing_fields: Vec<String>,
88    },
89
90    /// The document structure is invalid (e.g. top-level is not a mapping,
91    /// or `bindings` is not a list).
92    #[error("invalid binding structure in {}: {reason}", .path.as_deref().unwrap_or("<inline>"))]
93    InvalidStructure {
94        path: Option<String>,
95        reason: String,
96    },
97}
98
99/// Loads `.binding.yaml` files into [`ScannedModule`] objects.
100///
101/// # Usage
102///
103/// ```ignore
104/// let loader = BindingLoader;
105/// let modules = loader.load(Path::new("bindings/"), false, false)?;
106/// let strict = loader.load(Path::new("foo.binding.yaml"), true, false)?;
107/// ```
108///
109/// In loose mode (`strict=false`, default), only `module_id` and `target`
110/// are required; missing optional fields fall back to defaults.
111///
112/// In strict mode (`strict=true`), `input_schema` and `output_schema` are
113/// additionally required.
114#[derive(Debug, Default)]
115pub struct BindingLoader;
116
117impl BindingLoader {
118    /// Create a new BindingLoader.
119    pub fn new() -> Self {
120        Self
121    }
122
123    /// Load one file or every `*.binding.yaml` in a directory.
124    ///
125    /// When `recursive` is `true`, subdirectories are traversed depth-first using
126    /// `walkdir`. When `false` (default), only the immediate directory is scanned.
127    ///
128    /// # OS error handling vs. TypeScript
129    ///
130    /// Note: unlike the TypeScript implementation which warns and continues on OS
131    /// permission errors (EACCES/EPERM), this implementation propagates any IO error
132    /// as `BindingLoadError::FileRead` and aborts the entire load. This is the
133    /// fail-fast behavior.
134    pub fn load(
135        &self,
136        path: &Path,
137        strict: bool,
138        recursive: bool,
139    ) -> Result<Vec<ScannedModule>, BindingLoadError> {
140        let files: Vec<PathBuf> = if path.is_file() {
141            vec![path.to_path_buf()]
142        } else if path.is_dir() {
143            let mut entries: Vec<PathBuf> = if recursive {
144                // Surface per-entry traversal failures (permission denied,
145                // broken symlink, I/O errors) rather than silently dropping
146                // them — matches the non-recursive branch's policy so a
147                // caller switching `recursive=false` → `true` gets a
148                // consistent error contract.
149                let mut flat: Vec<PathBuf> = Vec::new();
150                for entry_result in WalkDir::new(path).max_depth(64).follow_links(false) {
151                    let entry = entry_result.map_err(|e| {
152                        let io_err = e
153                            .into_io_error()
154                            .unwrap_or_else(|| std::io::Error::other("walkdir traversal error"));
155                        BindingLoadError::FileRead {
156                            path: path.display().to_string(),
157                            source: io_err,
158                        }
159                    })?;
160                    if entry.file_type().is_file()
161                        && entry
162                            .file_name()
163                            .to_string_lossy()
164                            .ends_with(".binding.yaml")
165                    {
166                        flat.push(entry.into_path());
167                    }
168                }
169                flat
170            } else {
171                let read_dir = fs::read_dir(path).map_err(|e| BindingLoadError::FileRead {
172                    path: path.display().to_string(),
173                    source: e,
174                })?;
175                let mut flat: Vec<PathBuf> = Vec::new();
176                for entry_result in read_dir {
177                    match entry_result {
178                        Ok(entry) => {
179                            let p = entry.path();
180                            let is_binding = p
181                                .file_name()
182                                .and_then(|n| n.to_str())
183                                .is_some_and(|n| n.ends_with(".binding.yaml"));
184                            if is_binding {
185                                flat.push(p);
186                            }
187                        }
188                        Err(e) => {
189                            // Surface per-entry failures rather than silently
190                            // discarding them; a permission error on a single
191                            // file should not make the directory load partial.
192                            return Err(BindingLoadError::FileRead {
193                                path: path.display().to_string(),
194                                source: e,
195                            });
196                        }
197                    }
198                }
199                flat
200            };
201            entries.sort();
202            entries
203        } else {
204            return Err(BindingLoadError::PathNotFound {
205                path: path.display().to_string(),
206            });
207        };
208
209        if files.len() > MAX_BINDING_FILES_PER_DIR {
210            return Err(BindingLoadError::TooManyFiles {
211                path: path.display().to_string(),
212                max: MAX_BINDING_FILES_PER_DIR,
213            });
214        }
215
216        let mut modules: Vec<ScannedModule> = Vec::new();
217        for f in files {
218            let file_size = fs::metadata(&f)
219                .map_err(|e| BindingLoadError::FileRead {
220                    path: f.display().to_string(),
221                    source: e,
222                })?
223                .len();
224            if file_size > MAX_BINDING_FILE_SIZE {
225                return Err(BindingLoadError::FileTooLarge {
226                    path: f.display().to_string(),
227                    size: file_size,
228                    max: MAX_BINDING_FILE_SIZE,
229                });
230            }
231            let content = fs::read_to_string(&f).map_err(|e| BindingLoadError::FileRead {
232                path: f.display().to_string(),
233                source: e,
234            })?;
235            let raw: serde_yaml_ng::Value =
236                serde_yaml_ng::from_str(&content).map_err(|e| BindingLoadError::YamlParse {
237                    path: f.display().to_string(),
238                    source: e,
239                })?;
240            if raw.is_null() {
241                warn!("BindingLoader: {} is empty, skipping", f.display());
242                continue;
243            }
244            let json_value =
245                serde_json::to_value(raw).map_err(|e| BindingLoadError::InvalidStructure {
246                    path: Some(f.display().to_string()),
247                    reason: format!("YAML → JSON conversion failed: {e}"),
248                })?;
249            modules.extend(self.parse_document(
250                &json_value,
251                Some(&f.display().to_string()),
252                strict,
253            )?);
254        }
255        Ok(modules)
256    }
257
258    /// Parse a pre-loaded binding JSON value (`{"bindings": [...]}`).
259    pub fn load_data(
260        &self,
261        data: &Value,
262        strict: bool,
263    ) -> Result<Vec<ScannedModule>, BindingLoadError> {
264        self.parse_document(data, None, strict)
265    }
266
267    // ------------------------------------------------------------------
268    // Internal helpers
269    // ------------------------------------------------------------------
270
271    fn parse_document(
272        &self,
273        raw: &Value,
274        file_path: Option<&str>,
275        strict: bool,
276    ) -> Result<Vec<ScannedModule>, BindingLoadError> {
277        let obj = raw
278            .as_object()
279            .ok_or_else(|| BindingLoadError::InvalidStructure {
280                path: file_path.map(String::from),
281                reason: "top-level binding document must be a mapping".into(),
282            })?;
283
284        Self::check_spec_version(obj.get("spec_version"), file_path);
285
286        let bindings = obj
287            .get("bindings")
288            .and_then(|v| v.as_array())
289            .ok_or_else(|| BindingLoadError::InvalidStructure {
290                path: file_path.map(String::from),
291                reason: "'bindings' key missing or not a list".into(),
292            })?;
293
294        let mut modules: Vec<ScannedModule> = Vec::with_capacity(bindings.len());
295        for entry in bindings {
296            let entry_obj =
297                entry
298                    .as_object()
299                    .ok_or_else(|| BindingLoadError::InvalidStructure {
300                        path: file_path.map(String::from),
301                        reason: "binding entry must be a mapping".into(),
302                    })?;
303            modules.push(Self::parse_entry(entry_obj, file_path, strict)?);
304        }
305        Ok(modules)
306    }
307
308    fn check_spec_version(spec_version: Option<&Value>, file_path: Option<&str>) {
309        let where_str = file_path.unwrap_or("<inline>");
310        match spec_version {
311            None | Some(Value::Null) => {
312                warn!(
313                    "BindingLoader: {} missing 'spec_version'; defaulting to '1.0'.",
314                    where_str
315                );
316            }
317            Some(v) => {
318                let as_str = v.as_str();
319                if !as_str.is_some_and(|s| SUPPORTED_SPEC_VERSIONS.contains(&s)) {
320                    warn!(
321                        "BindingLoader: {} has spec_version={} newer than supported {:?}; proceeding best-effort.",
322                        where_str, v, SUPPORTED_SPEC_VERSIONS
323                    );
324                }
325            }
326        }
327    }
328
329    fn parse_entry(
330        entry: &serde_json::Map<String, Value>,
331        file_path: Option<&str>,
332        strict: bool,
333    ) -> Result<ScannedModule, BindingLoadError> {
334        let required: &[&str] = if strict {
335            &["module_id", "target", "input_schema", "output_schema"]
336        } else {
337            &["module_id", "target"]
338        };
339
340        // A required field is "missing or invalid" when absent, null, or of
341        // the wrong type. Previously only None/Null was rejected, so
342        // `module_id: 42` or `target: true` would silently coerce to an
343        // empty string downstream and corrupt the registered module.
344        let missing: Vec<String> = required
345            .iter()
346            .filter(|f| match entry.get(**f) {
347                None | Some(Value::Null) => true,
348                Some(v) => match **f {
349                    // Schemas must be objects.
350                    "input_schema" | "output_schema" => !v.is_object(),
351                    // Identifiers must be non-empty strings.
352                    _ => v.as_str().is_none_or(|s| s.is_empty()),
353                },
354            })
355            .map(|f| (*f).to_string())
356            .collect();
357        if !missing.is_empty() {
358            return Err(BindingLoadError::MissingFields {
359                path: file_path.map(String::from),
360                module_id: entry
361                    .get("module_id")
362                    .and_then(|v| v.as_str())
363                    .map(String::from),
364                missing_fields: missing,
365            });
366        }
367
368        let module_id = entry
369            .get("module_id")
370            .and_then(|v| v.as_str())
371            .unwrap_or_default()
372            .to_string();
373
374        let target = entry
375            .get("target")
376            .and_then(|v| v.as_str())
377            .unwrap_or_default()
378            .to_string();
379
380        let description = entry
381            .get("description")
382            .and_then(|v| v.as_str())
383            .unwrap_or("")
384            .to_string();
385
386        let version = entry
387            .get("version")
388            .and_then(|v| v.as_str())
389            .unwrap_or("1.0.0")
390            .to_string();
391
392        let documentation = entry
393            .get("documentation")
394            .and_then(|v| v.as_str())
395            .map(String::from);
396
397        let suggested_alias = entry
398            .get("suggested_alias")
399            .and_then(|v| v.as_str())
400            .map(String::from);
401
402        let input_schema = entry
403            .get("input_schema")
404            .filter(|v| !v.is_null())
405            .cloned()
406            .unwrap_or_else(|| Value::Object(serde_json::Map::new()));
407
408        let output_schema = entry
409            .get("output_schema")
410            .filter(|v| !v.is_null())
411            .cloned()
412            .unwrap_or_else(|| Value::Object(serde_json::Map::new()));
413
414        let tags: Vec<String> = entry
415            .get("tags")
416            .and_then(|v| v.as_array())
417            .map(|arr| {
418                arr.iter()
419                    .filter_map(|v| v.as_str().map(String::from))
420                    .collect()
421            })
422            .unwrap_or_default();
423
424        let warnings: Vec<String> = entry
425            .get("warnings")
426            .and_then(|v| v.as_array())
427            .map(|arr| {
428                arr.iter()
429                    .filter_map(|v| v.as_str().map(String::from))
430                    .collect()
431            })
432            .unwrap_or_default();
433
434        let metadata: HashMap<String, Value> = entry
435            .get("metadata")
436            .and_then(|v| v.as_object())
437            .map(|o| {
438                o.iter()
439                    .filter_map(|(k, v)| {
440                        if FORBIDDEN_METADATA_KEYS.contains(&k.as_str()) {
441                            warn!(
442                                module_id = %module_id,
443                                key = %k,
444                                "BindingLoader: dropping forbidden metadata key (prototype-pollution guard)"
445                            );
446                            None
447                        } else {
448                            Some((k.clone(), v.clone()))
449                        }
450                    })
451                    .collect()
452            })
453            .unwrap_or_default();
454
455        let display = Self::parse_display(entry.get("display"), &module_id);
456
457        let annotations = Self::parse_annotations(entry.get("annotations"), &module_id);
458        let examples = Self::parse_examples(entry.get("examples"), &module_id);
459
460        Ok(ScannedModule {
461            module_id,
462            description,
463            input_schema,
464            output_schema,
465            tags,
466            target,
467            version,
468            annotations,
469            documentation,
470            suggested_alias,
471            examples,
472            metadata,
473            display,
474            warnings,
475        })
476    }
477
478    fn parse_display(value: Option<&Value>, module_id: &str) -> Option<Value> {
479        let v = value?;
480        if v.is_null() {
481            return None;
482        }
483        if !v.is_object() {
484            warn!(
485                "BindingLoader: display for module {} is not an object; ignoring",
486                module_id
487            );
488            return None;
489        }
490        Some(v.clone())
491    }
492
493    fn parse_annotations(value: Option<&Value>, module_id: &str) -> Option<ModuleAnnotations> {
494        let v = value?;
495        if v.is_null() {
496            return None;
497        }
498        if !v.is_object() {
499            warn!(
500                "BindingLoader: annotations for module {} is not a dict; treating as None",
501                module_id
502            );
503            return None;
504        }
505        match serde_json::from_value::<ModuleAnnotations>(v.clone()) {
506            Ok(ann) => Some(ann),
507            Err(e) => {
508                warn!(
509                    "BindingLoader: failed to parse annotations for module {}: {}; treating as None",
510                    module_id, e
511                );
512                None
513            }
514        }
515    }
516
517    fn parse_examples(value: Option<&Value>, module_id: &str) -> Vec<ModuleExample> {
518        let Some(v) = value else {
519            return Vec::new();
520        };
521        if v.is_null() {
522            return Vec::new();
523        }
524        let Some(arr) = v.as_array() else {
525            warn!(
526                "BindingLoader: examples for module {} is not a list; ignoring",
527                module_id
528            );
529            return Vec::new();
530        };
531        let mut result = Vec::with_capacity(arr.len());
532        for (i, ex) in arr.iter().enumerate() {
533            if !ex.is_object() {
534                warn!(
535                    "BindingLoader: examples[{}] of module {} is not a dict; ignoring",
536                    i, module_id
537                );
538                continue;
539            }
540            match serde_json::from_value::<ModuleExample>(ex.clone()) {
541                Ok(parsed) => result.push(parsed),
542                Err(e) => warn!(
543                    "BindingLoader: examples[{}] of module {} malformed: {}; ignoring",
544                    i, module_id, e
545                ),
546            }
547        }
548        result
549    }
550}
551
552#[cfg(test)]
553mod tests {
554    use super::*;
555    use serde_json::json;
556    use std::fs;
557    use tempfile::TempDir;
558
559    fn minimal_entry() -> Value {
560        json!({"module_id": "x.y", "target": "pkg:func"})
561    }
562
563    fn full_entry() -> Value {
564        json!({
565            "module_id": "users.get_user",
566            "target": "myapp.views:get_user",
567            "description": "Get a user",
568            "documentation": "Returns a user by ID.",
569            "tags": ["users", "get"],
570            "version": "2.0.0",
571            "annotations": {"readonly": true, "cacheable": true, "cache_ttl": 60},
572            "examples": [
573                {"title": "happy", "inputs": {"id": 1}, "output": {"name": "alice"}}
574            ],
575            "metadata": {"http_method": "GET"},
576            "input_schema": {"type": "object"},
577            "output_schema": {"type": "object"},
578            "display": {"mcp": {"alias": "users_get"}, "alias": "users.get"},
579            "suggested_alias": "users.get.alt",
580            "warnings": ["stale"]
581        })
582    }
583
584    #[test]
585    fn test_loose_minimum_entry() {
586        let loader = BindingLoader::new();
587        let modules = loader
588            .load_data(&json!({"bindings": [minimal_entry()]}), false)
589            .unwrap();
590        assert_eq!(modules.len(), 1);
591        let m = &modules[0];
592        assert_eq!(m.module_id, "x.y");
593        assert_eq!(m.target, "pkg:func");
594        assert_eq!(m.description, "");
595        assert_eq!(m.version, "1.0.0");
596        assert!(m.annotations.is_none());
597        assert!(m.display.is_none());
598        assert!(m.tags.is_empty());
599        assert_eq!(m.input_schema, json!({}));
600        assert_eq!(m.output_schema, json!({}));
601    }
602
603    // D11-5b regression: `__proto__` / `constructor` / `prototype` keys in
604    // binding-YAML metadata must be dropped at parse time so they cannot
605    // propagate to JS-side consumers (cross-language prototype-pollution
606    // guard). Mirrors the TypeScript loader's `PROTO_DENY` and the Python
607    // loader's `_FORBIDDEN_METADATA_KEYS`.
608    #[test]
609    fn test_metadata_filters_proto_pollution_keys() {
610        let loader = BindingLoader::new();
611        let entry = json!({
612            "module_id": "x.y",
613            "target": "pkg:func",
614            "metadata": {
615                "__proto__": {"polluted": true},
616                "constructor": "evil",
617                "prototype": ["bad"],
618                "safe_key": "kept",
619            }
620        });
621        let modules = loader
622            .load_data(&json!({"bindings": [entry]}), false)
623            .unwrap();
624        assert_eq!(modules.len(), 1);
625        let metadata = &modules[0].metadata;
626        assert!(!metadata.contains_key("__proto__"));
627        assert!(!metadata.contains_key("constructor"));
628        assert!(!metadata.contains_key("prototype"));
629        assert_eq!(metadata.get("safe_key"), Some(&json!("kept")));
630    }
631
632    #[test]
633    fn test_strict_requires_input_schema() {
634        let loader = BindingLoader::new();
635        let err = loader
636            .load_data(&json!({"bindings": [minimal_entry()]}), true)
637            .unwrap_err();
638        match err {
639            BindingLoadError::MissingFields {
640                missing_fields,
641                module_id,
642                ..
643            } => {
644                assert!(missing_fields.contains(&"input_schema".to_string()));
645                assert!(missing_fields.contains(&"output_schema".to_string()));
646                assert_eq!(module_id.as_deref(), Some("x.y"));
647            }
648            _ => panic!("expected MissingFields, got {err:?}"),
649        }
650    }
651
652    #[test]
653    fn test_strict_accepts_when_schemas_present() {
654        let loader = BindingLoader::new();
655        let entry = json!({
656            "module_id": "x.y",
657            "target": "pkg:func",
658            "input_schema": {"type": "object"},
659            "output_schema": {"type": "object"}
660        });
661        let modules = loader
662            .load_data(&json!({"bindings": [entry]}), true)
663            .unwrap();
664        assert_eq!(modules.len(), 1);
665    }
666
667    #[test]
668    fn test_missing_module_id_always_fails() {
669        let loader = BindingLoader::new();
670        let err = loader
671            .load_data(&json!({"bindings": [{"target": "p:f"}]}), false)
672            .unwrap_err();
673        assert!(matches!(
674            err,
675            BindingLoadError::MissingFields { ref missing_fields, .. }
676                if missing_fields.contains(&"module_id".to_string())
677        ));
678    }
679
680    #[test]
681    fn test_missing_target_always_fails() {
682        let loader = BindingLoader::new();
683        let err = loader
684            .load_data(&json!({"bindings": [{"module_id": "x"}]}), false)
685            .unwrap_err();
686        assert!(matches!(
687            err,
688            BindingLoadError::MissingFields { ref missing_fields, .. }
689                if missing_fields.contains(&"target".to_string())
690        ));
691    }
692
693    #[test]
694    fn test_missing_bindings_key() {
695        let loader = BindingLoader::new();
696        let err = loader
697            .load_data(&json!({"spec_version": "1.0"}), false)
698            .unwrap_err();
699        assert!(matches!(
700            err,
701            BindingLoadError::InvalidStructure { ref reason, .. } if reason.contains("bindings")
702        ));
703    }
704
705    #[test]
706    fn test_top_level_not_mapping() {
707        let loader = BindingLoader::new();
708        let err = loader.load_data(&json!(["a", "b"]), false).unwrap_err();
709        assert!(matches!(
710            err,
711            BindingLoadError::InvalidStructure { ref reason, .. } if reason.contains("mapping")
712        ));
713    }
714
715    #[test]
716    fn test_entry_not_a_mapping() {
717        let loader = BindingLoader::new();
718        let err = loader
719            .load_data(&json!({"bindings": ["scalar"]}), false)
720            .unwrap_err();
721        assert!(matches!(
722            err,
723            BindingLoadError::InvalidStructure { ref reason, .. } if reason.contains("mapping")
724        ));
725    }
726
727    #[test]
728    fn test_annotations_parsed() {
729        let loader = BindingLoader::new();
730        let m = &loader
731            .load_data(&json!({"bindings": [full_entry()]}), false)
732            .unwrap()[0];
733        let ann = m.annotations.as_ref().expect("annotations should parse");
734        assert!(ann.readonly);
735        assert!(ann.cacheable);
736        assert_eq!(ann.cache_ttl, 60);
737    }
738
739    #[test]
740    fn test_annotations_wrong_type_treated_as_none() {
741        let loader = BindingLoader::new();
742        let m = &loader
743            .load_data(
744                &json!({"bindings": [{"module_id": "x", "target": "p:f", "annotations": "readonly"}]}),
745                false,
746            )
747            .unwrap()[0];
748        assert!(m.annotations.is_none());
749    }
750
751    #[test]
752    fn test_missing_fields_error_message_is_readable() {
753        let loader = BindingLoader::new();
754        let err = loader
755            .load_data(&json!({"bindings": [{"module_id": "x"}]}), false)
756            .unwrap_err();
757        let msg = err.to_string();
758        // No raw debug-format wrappers leak into the user-facing message.
759        assert!(!msg.contains("Some("), "got: {msg}");
760        assert!(!msg.contains("None"), "got: {msg}");
761        assert!(msg.contains("x"), "module_id missing from message: {msg}");
762        assert!(msg.contains("target"), "missing field not listed: {msg}");
763    }
764
765    #[test]
766    fn test_display_wrong_type_dropped() {
767        // Malformed display (non-object) is dropped. We can't easily capture
768        // tracing warnings without a subscriber, but we assert the drop occurs.
769        let loader = BindingLoader::new();
770        let m = &loader
771            .load_data(
772                &json!({"bindings": [{"module_id": "x", "target": "p:f", "display": "not-a-dict"}]}),
773                false,
774            )
775            .unwrap()[0];
776        assert!(m.display.is_none());
777    }
778
779    #[test]
780    fn test_display_null_dropped() {
781        let loader = BindingLoader::new();
782        let m = &loader
783            .load_data(
784                &json!({"bindings": [{"module_id": "x", "target": "p:f", "display": null}]}),
785                false,
786            )
787            .unwrap()[0];
788        assert!(m.display.is_none());
789    }
790
791    #[test]
792    fn test_display_preserved() {
793        let loader = BindingLoader::new();
794        let m = &loader
795            .load_data(&json!({"bindings": [full_entry()]}), false)
796            .unwrap()[0];
797        assert_eq!(
798            m.display.as_ref().unwrap(),
799            &json!({"mcp": {"alias": "users_get"}, "alias": "users.get"})
800        );
801    }
802
803    #[test]
804    fn test_examples_parsed() {
805        let loader = BindingLoader::new();
806        let m = &loader
807            .load_data(&json!({"bindings": [full_entry()]}), false)
808            .unwrap()[0];
809        assert_eq!(m.examples.len(), 1);
810        assert_eq!(m.examples[0].title, "happy");
811    }
812
813    #[test]
814    fn test_file_too_large_error_variant() {
815        // Verify the FileTooLarge variant can be constructed and displays correctly.
816        // The actual 16 MiB threshold is impractical to trigger in a unit test
817        // (we'd need to write a 16 MiB file), but this test confirms the error
818        // type is wired up and the display message is sensible.
819        let err = BindingLoadError::FileTooLarge {
820            path: "/bindings/huge.binding.yaml".to_string(),
821            size: MAX_BINDING_FILE_SIZE + 1,
822            max: MAX_BINDING_FILE_SIZE,
823        };
824        let msg = err.to_string();
825        assert!(
826            msg.contains("too large"),
827            "message should mention size: {msg}"
828        );
829        assert!(
830            msg.contains("huge.binding.yaml"),
831            "message should mention path: {msg}"
832        );
833    }
834
835    #[test]
836    fn test_load_single_file() {
837        let dir = TempDir::new().unwrap();
838        let file = dir.path().join("one.binding.yaml");
839        let doc = json!({"spec_version": "1.0", "bindings": [full_entry()]});
840        fs::write(&file, serde_yaml_ng::to_string(&doc).unwrap()).unwrap();
841        let modules = BindingLoader::new().load(&file, false, false).unwrap();
842        assert_eq!(modules.len(), 1);
843        assert_eq!(modules[0].module_id, "users.get_user");
844    }
845
846    #[test]
847    fn test_load_directory_sorted() {
848        let dir = TempDir::new().unwrap();
849        for (i, name) in ["a", "b", "c"].iter().enumerate() {
850            let f = dir.path().join(format!("{name}.binding.yaml"));
851            let doc = json!({
852                "spec_version": "1.0",
853                "bindings": [{"module_id": name, "target": format!("pkg:f{i}")}]
854            });
855            fs::write(&f, serde_yaml_ng::to_string(&doc).unwrap()).unwrap();
856        }
857        fs::write(dir.path().join("unrelated.yaml"), "irrelevant: true").unwrap();
858
859        let modules = BindingLoader::new().load(dir.path(), false, false).unwrap();
860        let ids: Vec<&str> = modules.iter().map(|m| m.module_id.as_str()).collect();
861        assert_eq!(ids, vec!["a", "b", "c"]);
862    }
863
864    #[test]
865    fn test_nonexistent_path() {
866        let dir = TempDir::new().unwrap();
867        let err = BindingLoader::new()
868            .load(&dir.path().join("nope"), false, false)
869            .unwrap_err();
870        assert!(matches!(err, BindingLoadError::PathNotFound { .. }));
871    }
872
873    #[test]
874    fn test_malformed_yaml() {
875        let dir = TempDir::new().unwrap();
876        let f = dir.path().join("bad.binding.yaml");
877        fs::write(&f, "::: not yaml :::\n  - [").unwrap();
878        let err = BindingLoader::new().load(&f, false, false).unwrap_err();
879        assert!(matches!(err, BindingLoadError::YamlParse { .. }));
880    }
881
882    #[test]
883    fn test_empty_file_skipped() {
884        let dir = TempDir::new().unwrap();
885        let f = dir.path().join("empty.binding.yaml");
886        fs::write(&f, "").unwrap();
887        let modules = BindingLoader::new().load(&f, false, false).unwrap();
888        assert!(modules.is_empty());
889    }
890
891    #[test]
892    fn test_round_trip_with_yaml_writer() {
893        use crate::output::yaml_writer::YAMLWriter;
894
895        let mut original = ScannedModule::new(
896            "round.trip".into(),
897            "Round-trip test".into(),
898            json!({"type": "object", "properties": {"q": {"type": "string"}}}),
899            json!({"type": "object"}),
900            vec!["demo".into()],
901            "demo.app:handler".into(),
902        );
903        original.version = "1.2.3".into();
904        original.annotations = Some(ModuleAnnotations {
905            readonly: true,
906            streaming: true,
907            cache_ttl: 30,
908            ..Default::default()
909        });
910        original.documentation = Some("Docs here".into());
911        original.metadata.insert("http_method".into(), json!("GET"));
912        original.display = Some(json!({"mcp": {"alias": "rt"}, "alias": "round-trip"}));
913
914        let dir = TempDir::new().unwrap();
915        YAMLWriter
916            .write(
917                &[original.clone()],
918                dir.path().to_str().unwrap(),
919                false,
920                false,
921                None,
922            )
923            .unwrap();
924
925        let loaded = BindingLoader::new().load(dir.path(), false, false).unwrap();
926        assert_eq!(loaded.len(), 1);
927        let m = &loaded[0];
928        assert_eq!(m.module_id, original.module_id);
929        assert_eq!(m.target, original.target);
930        assert_eq!(m.description, original.description);
931        assert_eq!(m.documentation, original.documentation);
932        assert_eq!(m.tags, original.tags);
933        assert_eq!(m.version, original.version);
934        assert_eq!(m.input_schema, original.input_schema);
935        assert_eq!(m.output_schema, original.output_schema);
936        assert_eq!(m.metadata, original.metadata);
937        assert_eq!(m.display, original.display);
938        let ann = m.annotations.as_ref().unwrap();
939        assert!(ann.readonly);
940        assert!(ann.streaming);
941        assert_eq!(ann.cache_ttl, 30);
942    }
943
944    // ---- Wrong-type scalar rejection (D1-1 regression guard) ----
945
946    #[test]
947    fn test_wrong_type_module_id_integer_rejected() {
948        let loader = BindingLoader::new();
949        let err = loader
950            .load_data(
951                &json!({"bindings": [{"module_id": 42, "target": "p:f"}]}),
952                false,
953            )
954            .unwrap_err();
955        assert!(
956            matches!(
957                &err,
958                BindingLoadError::MissingFields { missing_fields, .. }
959                    if missing_fields.iter().any(|f| f == "module_id")
960            ),
961            "got: {err:?}"
962        );
963    }
964
965    #[test]
966    fn test_wrong_type_target_bool_rejected() {
967        let loader = BindingLoader::new();
968        let err = loader
969            .load_data(
970                &json!({"bindings": [{"module_id": "x", "target": true}]}),
971                false,
972            )
973            .unwrap_err();
974        assert!(
975            matches!(
976                &err,
977                BindingLoadError::MissingFields { missing_fields, .. }
978                    if missing_fields.iter().any(|f| f == "target")
979            ),
980            "got: {err:?}"
981        );
982    }
983
984    #[test]
985    fn test_empty_string_module_id_rejected() {
986        let loader = BindingLoader::new();
987        let err = loader
988            .load_data(
989                &json!({"bindings": [{"module_id": "", "target": "p:f"}]}),
990                false,
991            )
992            .unwrap_err();
993        assert!(
994            matches!(
995                &err,
996                BindingLoadError::MissingFields { missing_fields, .. }
997                    if missing_fields.iter().any(|f| f == "module_id")
998            ),
999            "got: {err:?}"
1000        );
1001    }
1002
1003    #[test]
1004    fn test_strict_wrong_type_input_schema_rejected() {
1005        let loader = BindingLoader::new();
1006        let err = loader
1007            .load_data(
1008                &json!({"bindings": [{
1009                    "module_id": "x",
1010                    "target": "p:f",
1011                    "input_schema": 42,
1012                    "output_schema": {"type": "object"}
1013                }]}),
1014                true,
1015            )
1016            .unwrap_err();
1017        assert!(
1018            matches!(
1019                &err,
1020                BindingLoadError::MissingFields { missing_fields, .. }
1021                    if missing_fields.iter().any(|f| f == "input_schema")
1022            ),
1023            "got: {err:?}"
1024        );
1025    }
1026
1027    // ---- Recursive WalkDir error propagation (D1-2 regression guard) ----
1028
1029    #[test]
1030    #[cfg(unix)]
1031    fn test_recursive_load_surfaces_walkdir_errors() {
1032        use std::os::unix::fs::PermissionsExt;
1033
1034        // Running as root bypasses UNIX permissions and makes this test
1035        // a no-op. Skip in that case rather than produce a misleading pass.
1036        let is_root = libc_geteuid() == 0;
1037        if is_root {
1038            return;
1039        }
1040
1041        let dir = TempDir::new().unwrap();
1042        let unreadable = dir.path().join("unreadable");
1043        fs::create_dir(&unreadable).unwrap();
1044        fs::set_permissions(&unreadable, fs::Permissions::from_mode(0o000)).unwrap();
1045
1046        let result = BindingLoader::new().load(dir.path(), false, true);
1047
1048        // Restore permissions so TempDir::drop can clean up.
1049        fs::set_permissions(&unreadable, fs::Permissions::from_mode(0o755)).ok();
1050
1051        assert!(
1052            matches!(result, Err(BindingLoadError::FileRead { .. })),
1053            "recursive load should propagate per-entry I/O errors, got: {result:?}",
1054        );
1055    }
1056
1057    #[cfg(unix)]
1058    fn libc_geteuid() -> u32 {
1059        // Avoid a libc dev-dep solely for this test — inline the syscall.
1060        extern "C" {
1061            fn geteuid() -> u32;
1062        }
1063        // SAFETY: `geteuid` is a stateless C function that takes no args
1064        // and returns the effective UID.
1065        unsafe { geteuid() }
1066    }
1067
1068    #[test]
1069    fn test_load_recursive_finds_nested_files() {
1070        let dir = TempDir::new().unwrap();
1071        let subdir = dir.path().join("sub");
1072        fs::create_dir(&subdir).unwrap();
1073
1074        // File in root dir
1075        let doc_root = json!({"spec_version": "1.0", "bindings": [{"module_id": "root.mod", "target": "pkg:f0"}]});
1076        fs::write(
1077            dir.path().join("root.binding.yaml"),
1078            serde_yaml_ng::to_string(&doc_root).unwrap(),
1079        )
1080        .unwrap();
1081
1082        // File in subdir
1083        let doc_sub = json!({"spec_version": "1.0", "bindings": [{"module_id": "sub.mod", "target": "pkg:f1"}]});
1084        fs::write(
1085            subdir.join("sub.binding.yaml"),
1086            serde_yaml_ng::to_string(&doc_sub).unwrap(),
1087        )
1088        .unwrap();
1089
1090        // Non-recursive: only root
1091        let flat = BindingLoader::new().load(dir.path(), false, false).unwrap();
1092        let flat_ids: Vec<&str> = flat.iter().map(|m| m.module_id.as_str()).collect();
1093        assert_eq!(flat_ids, vec!["root.mod"]);
1094
1095        // Recursive: both
1096        let recursive = BindingLoader::new().load(dir.path(), false, true).unwrap();
1097        let mut rec_ids: Vec<&str> = recursive.iter().map(|m| m.module_id.as_str()).collect();
1098        rec_ids.sort();
1099        assert_eq!(rec_ids, vec!["root.mod", "sub.mod"]);
1100    }
1101}