Skip to main content

apcore_toolkit/
binding_loader.rs

1// BindingLoader — parse `.binding.yaml` files back into `ScannedModule`.
2//
3// Inverse of `output::yaml_writer::YAMLWriter`. Unlike apcore's own
4// `BindingLoader` (which imports the target and registers a runtime module),
5// this loader is pure data: it parses YAML into `ScannedModule` objects for
6// validation, merging, diffing, or round-trip workflows. No code is loaded.
7
8use std::collections::HashMap;
9use std::fs;
10use std::path::{Path, PathBuf};
11
12use walkdir::WalkDir;
13
14use apcore::module::{ModuleAnnotations, ModuleExample};
15use serde_json::Value;
16use thiserror::Error;
17use tracing::warn;
18
19use crate::types::ScannedModule;
20
21const SUPPORTED_SPEC_VERSIONS: &[&str] = &["1.0"];
22
23/// Keys whose presence in a metadata dict is unsafe for cross-runtime
24/// round-trip — they correspond to JS prototype-pollution sinks. They are
25/// stripped at parse time so a malicious or malformed binding YAML cannot
26/// carry an attacker-controlled `__proto__` / `constructor` / `prototype`
27/// entry into downstream consumers (matches the TypeScript loader's
28/// `PROTO_DENY` guard in `src/binding-parser.ts`).
29const FORBIDDEN_METADATA_KEYS: &[&str] = &["__proto__", "constructor", "prototype"];
30
31/// Maximum size of a single `.binding.yaml` file (16 MiB).
32///
33/// A binding file is a structured YAML document, not a data store.
34/// Files larger than this are almost certainly pathological and would
35/// cause the full content to be loaded into memory twice (raw bytes +
36/// serde_yaml_ng::Value + serde_json::Value).
37const MAX_BINDING_FILE_SIZE: u64 = 16 * 1024 * 1024;
38
39/// Maximum number of `.binding.yaml` files loaded from a single directory.
40///
41/// Prevents a maliciously large directory from causing unbounded memory
42/// consumption in `load_data` callers that accumulate results.
43const MAX_BINDING_FILES_PER_DIR: usize = 10_000;
44
45// TODO(release-gate): deep-chain parity with Python/TypeScript BindingLoader — manual
46// cross-SDK review required before tagging 0.5.0. D11 audit was inconclusive due to
47// sub-agent file access limits. BindingLoader is the flagship 0.5.0 cross-SDK feature.
48
49/// Errors produced by [`BindingLoader`].
50#[derive(Debug, Error)]
51pub enum BindingLoadError {
52    /// The path does not exist or cannot be stat'd.
53    #[error("path does not exist: {path}")]
54    PathNotFound { path: String },
55
56    /// A binding file exceeds the maximum allowed size.
57    #[error("binding file {path} is too large ({size} bytes > {max} byte limit)")]
58    FileTooLarge { path: String, size: u64, max: u64 },
59
60    /// The directory contains more binding files than the per-directory limit.
61    #[error("directory {path} contains more than {max} binding files")]
62    TooManyFiles { path: String, max: usize },
63
64    /// Failure reading a binding file from disk.
65    #[error("failed to read {path}: {source}")]
66    FileRead {
67        path: String,
68        #[source]
69        source: std::io::Error,
70    },
71
72    /// The file content is not valid YAML.
73    #[error("failed to parse YAML in {path}: {source}")]
74    YamlParse {
75        path: String,
76        #[source]
77        source: serde_yaml_ng::Error,
78    },
79
80    /// A binding entry is missing or has an invalid value for one or more
81    /// required fields. Covers three cases: absent key, explicit `null`, and
82    /// wrong-type scalar (e.g. `module_id: 42`, `target: true`). All three
83    /// are treated as "required field not supplied" rather than silently
84    /// coerced to empty strings or zero values downstream.
85    #[error("missing or invalid required fields {missing_fields:?} (file={}, module_id={})",
86        .path.as_deref().unwrap_or("<inline>"),
87        .module_id.as_deref().unwrap_or("<unknown>"))]
88    MissingFields {
89        path: Option<String>,
90        module_id: Option<String>,
91        missing_fields: Vec<String>,
92    },
93
94    /// The document structure is invalid (e.g. top-level is not a mapping,
95    /// or `bindings` is not a list).
96    #[error("invalid binding structure in {}: {reason}", .path.as_deref().unwrap_or("<inline>"))]
97    InvalidStructure {
98        path: Option<String>,
99        reason: String,
100    },
101}
102
103/// Loads `.binding.yaml` files into [`ScannedModule`] objects.
104///
105/// # Usage
106///
107/// ```ignore
108/// let loader = BindingLoader;
109/// let modules = loader.load(Path::new("bindings/"), false, false)?;
110/// let strict = loader.load(Path::new("foo.binding.yaml"), true, false)?;
111/// ```
112///
113/// In loose mode (`strict=false`, default), only `module_id` and `target`
114/// are required; missing optional fields fall back to defaults.
115///
116/// In strict mode (`strict=true`), `input_schema` and `output_schema` are
117/// additionally required.
118#[derive(Debug, Default)]
119pub struct BindingLoader;
120
121impl BindingLoader {
122    /// Create a new BindingLoader.
123    pub fn new() -> Self {
124        Self
125    }
126
127    /// Load one file or every `*.binding.yaml` in a directory.
128    ///
129    /// When `recursive` is `true`, subdirectories are traversed depth-first using
130    /// `walkdir`. When `false` (default), only the immediate directory is scanned.
131    ///
132    /// # OS error handling vs. TypeScript
133    ///
134    /// Note: unlike the TypeScript implementation which warns and continues on OS
135    /// permission errors (EACCES/EPERM), this implementation propagates any IO error
136    /// as `BindingLoadError::FileRead` and aborts the entire load. This is the
137    /// fail-fast behavior.
138    pub fn load(
139        &self,
140        path: &Path,
141        strict: bool,
142        recursive: bool,
143    ) -> Result<Vec<ScannedModule>, BindingLoadError> {
144        let files: Vec<PathBuf> = if path.is_file() {
145            vec![path.to_path_buf()]
146        } else if path.is_dir() {
147            let mut entries: Vec<PathBuf> = if recursive {
148                // Surface per-entry traversal failures (permission denied,
149                // broken symlink, I/O errors) rather than silently dropping
150                // them — matches the non-recursive branch's policy so a
151                // caller switching `recursive=false` → `true` gets a
152                // consistent error contract.
153                let mut flat: Vec<PathBuf> = Vec::new();
154                for entry_result in WalkDir::new(path) {
155                    let entry = entry_result.map_err(|e| {
156                        let io_err = e
157                            .into_io_error()
158                            .unwrap_or_else(|| std::io::Error::other("walkdir traversal error"));
159                        BindingLoadError::FileRead {
160                            path: path.display().to_string(),
161                            source: io_err,
162                        }
163                    })?;
164                    if entry.file_type().is_file()
165                        && entry
166                            .file_name()
167                            .to_string_lossy()
168                            .ends_with(".binding.yaml")
169                    {
170                        flat.push(entry.into_path());
171                    }
172                }
173                flat
174            } else {
175                let read_dir = fs::read_dir(path).map_err(|e| BindingLoadError::FileRead {
176                    path: path.display().to_string(),
177                    source: e,
178                })?;
179                let mut flat: Vec<PathBuf> = Vec::new();
180                for entry_result in read_dir {
181                    match entry_result {
182                        Ok(entry) => {
183                            let p = entry.path();
184                            let is_binding = p
185                                .file_name()
186                                .and_then(|n| n.to_str())
187                                .is_some_and(|n| n.ends_with(".binding.yaml"));
188                            if is_binding {
189                                flat.push(p);
190                            }
191                        }
192                        Err(e) => {
193                            // Surface per-entry failures rather than silently
194                            // discarding them; a permission error on a single
195                            // file should not make the directory load partial.
196                            return Err(BindingLoadError::FileRead {
197                                path: path.display().to_string(),
198                                source: e,
199                            });
200                        }
201                    }
202                }
203                flat
204            };
205            entries.sort();
206            entries
207        } else {
208            return Err(BindingLoadError::PathNotFound {
209                path: path.display().to_string(),
210            });
211        };
212
213        if files.len() > MAX_BINDING_FILES_PER_DIR {
214            return Err(BindingLoadError::TooManyFiles {
215                path: path.display().to_string(),
216                max: MAX_BINDING_FILES_PER_DIR,
217            });
218        }
219
220        let mut modules: Vec<ScannedModule> = Vec::new();
221        for f in files {
222            let file_size = fs::metadata(&f)
223                .map_err(|e| BindingLoadError::FileRead {
224                    path: f.display().to_string(),
225                    source: e,
226                })?
227                .len();
228            if file_size > MAX_BINDING_FILE_SIZE {
229                return Err(BindingLoadError::FileTooLarge {
230                    path: f.display().to_string(),
231                    size: file_size,
232                    max: MAX_BINDING_FILE_SIZE,
233                });
234            }
235            let content = fs::read_to_string(&f).map_err(|e| BindingLoadError::FileRead {
236                path: f.display().to_string(),
237                source: e,
238            })?;
239            let raw: serde_yaml_ng::Value =
240                serde_yaml_ng::from_str(&content).map_err(|e| BindingLoadError::YamlParse {
241                    path: f.display().to_string(),
242                    source: e,
243                })?;
244            if raw.is_null() {
245                warn!("BindingLoader: {} is empty, skipping", f.display());
246                continue;
247            }
248            let json_value =
249                serde_json::to_value(raw).map_err(|e| BindingLoadError::InvalidStructure {
250                    path: Some(f.display().to_string()),
251                    reason: format!("YAML → JSON conversion failed: {e}"),
252                })?;
253            modules.extend(self.parse_document(
254                &json_value,
255                Some(&f.display().to_string()),
256                strict,
257            )?);
258        }
259        Ok(modules)
260    }
261
262    /// Parse a pre-loaded binding JSON value (`{"bindings": [...]}`).
263    pub fn load_data(
264        &self,
265        data: &Value,
266        strict: bool,
267    ) -> Result<Vec<ScannedModule>, BindingLoadError> {
268        self.parse_document(data, None, strict)
269    }
270
271    // ------------------------------------------------------------------
272    // Internal helpers
273    // ------------------------------------------------------------------
274
275    fn parse_document(
276        &self,
277        raw: &Value,
278        file_path: Option<&str>,
279        strict: bool,
280    ) -> Result<Vec<ScannedModule>, BindingLoadError> {
281        let obj = raw
282            .as_object()
283            .ok_or_else(|| BindingLoadError::InvalidStructure {
284                path: file_path.map(String::from),
285                reason: "top-level binding document must be a mapping".into(),
286            })?;
287
288        Self::check_spec_version(obj.get("spec_version"), file_path);
289
290        let bindings = obj
291            .get("bindings")
292            .and_then(|v| v.as_array())
293            .ok_or_else(|| BindingLoadError::InvalidStructure {
294                path: file_path.map(String::from),
295                reason: "'bindings' key missing or not a list".into(),
296            })?;
297
298        let mut modules: Vec<ScannedModule> = Vec::with_capacity(bindings.len());
299        for entry in bindings {
300            let entry_obj =
301                entry
302                    .as_object()
303                    .ok_or_else(|| BindingLoadError::InvalidStructure {
304                        path: file_path.map(String::from),
305                        reason: "binding entry must be a mapping".into(),
306                    })?;
307            modules.push(Self::parse_entry(entry_obj, file_path, strict)?);
308        }
309        Ok(modules)
310    }
311
312    fn check_spec_version(spec_version: Option<&Value>, file_path: Option<&str>) {
313        let where_str = file_path.unwrap_or("<inline>");
314        match spec_version {
315            None | Some(Value::Null) => {
316                warn!(
317                    "BindingLoader: {} missing 'spec_version'; defaulting to '1.0'.",
318                    where_str
319                );
320            }
321            Some(v) => {
322                let as_str = v.as_str();
323                if !as_str.is_some_and(|s| SUPPORTED_SPEC_VERSIONS.contains(&s)) {
324                    warn!(
325                        "BindingLoader: {} has spec_version={} newer than supported {:?}; proceeding best-effort.",
326                        where_str, v, SUPPORTED_SPEC_VERSIONS
327                    );
328                }
329            }
330        }
331    }
332
333    fn parse_entry(
334        entry: &serde_json::Map<String, Value>,
335        file_path: Option<&str>,
336        strict: bool,
337    ) -> Result<ScannedModule, BindingLoadError> {
338        let required: &[&str] = if strict {
339            &["module_id", "target", "input_schema", "output_schema"]
340        } else {
341            &["module_id", "target"]
342        };
343
344        // A required field is "missing or invalid" when absent, null, or of
345        // the wrong type. Previously only None/Null was rejected, so
346        // `module_id: 42` or `target: true` would silently coerce to an
347        // empty string downstream and corrupt the registered module.
348        let missing: Vec<String> = required
349            .iter()
350            .filter(|f| match entry.get(**f) {
351                None | Some(Value::Null) => true,
352                Some(v) => match **f {
353                    // Schemas must be objects.
354                    "input_schema" | "output_schema" => !v.is_object(),
355                    // Identifiers must be non-empty strings.
356                    _ => v.as_str().is_none_or(|s| s.is_empty()),
357                },
358            })
359            .map(|f| (*f).to_string())
360            .collect();
361        if !missing.is_empty() {
362            return Err(BindingLoadError::MissingFields {
363                path: file_path.map(String::from),
364                module_id: entry
365                    .get("module_id")
366                    .and_then(|v| v.as_str())
367                    .map(String::from),
368                missing_fields: missing,
369            });
370        }
371
372        let module_id = entry
373            .get("module_id")
374            .and_then(|v| v.as_str())
375            .unwrap_or_default()
376            .to_string();
377
378        let target = entry
379            .get("target")
380            .and_then(|v| v.as_str())
381            .unwrap_or_default()
382            .to_string();
383
384        let description = entry
385            .get("description")
386            .and_then(|v| v.as_str())
387            .unwrap_or("")
388            .to_string();
389
390        let version = entry
391            .get("version")
392            .and_then(|v| v.as_str())
393            .unwrap_or("1.0.0")
394            .to_string();
395
396        let documentation = entry
397            .get("documentation")
398            .and_then(|v| v.as_str())
399            .map(String::from);
400
401        let suggested_alias = entry
402            .get("suggested_alias")
403            .and_then(|v| v.as_str())
404            .map(String::from);
405
406        let input_schema = entry
407            .get("input_schema")
408            .filter(|v| !v.is_null())
409            .cloned()
410            .unwrap_or_else(|| Value::Object(serde_json::Map::new()));
411
412        let output_schema = entry
413            .get("output_schema")
414            .filter(|v| !v.is_null())
415            .cloned()
416            .unwrap_or_else(|| Value::Object(serde_json::Map::new()));
417
418        let tags: Vec<String> = entry
419            .get("tags")
420            .and_then(|v| v.as_array())
421            .map(|arr| {
422                arr.iter()
423                    .filter_map(|v| v.as_str().map(String::from))
424                    .collect()
425            })
426            .unwrap_or_default();
427
428        let warnings: Vec<String> = entry
429            .get("warnings")
430            .and_then(|v| v.as_array())
431            .map(|arr| {
432                arr.iter()
433                    .filter_map(|v| v.as_str().map(String::from))
434                    .collect()
435            })
436            .unwrap_or_default();
437
438        let metadata: HashMap<String, Value> = entry
439            .get("metadata")
440            .and_then(|v| v.as_object())
441            .map(|o| {
442                o.iter()
443                    .filter_map(|(k, v)| {
444                        if FORBIDDEN_METADATA_KEYS.contains(&k.as_str()) {
445                            warn!(
446                                module_id = %module_id,
447                                key = %k,
448                                "BindingLoader: dropping forbidden metadata key (prototype-pollution guard)"
449                            );
450                            None
451                        } else {
452                            Some((k.clone(), v.clone()))
453                        }
454                    })
455                    .collect()
456            })
457            .unwrap_or_default();
458
459        let display = Self::parse_display(entry.get("display"), &module_id);
460
461        let annotations = Self::parse_annotations(entry.get("annotations"), &module_id);
462        let examples = Self::parse_examples(entry.get("examples"), &module_id);
463
464        Ok(ScannedModule {
465            module_id,
466            description,
467            input_schema,
468            output_schema,
469            tags,
470            target,
471            version,
472            annotations,
473            documentation,
474            suggested_alias,
475            examples,
476            metadata,
477            display,
478            warnings,
479        })
480    }
481
482    fn parse_display(value: Option<&Value>, module_id: &str) -> Option<Value> {
483        let v = value?;
484        if v.is_null() {
485            return None;
486        }
487        if !v.is_object() {
488            warn!(
489                "BindingLoader: display for module {} is not an object; ignoring",
490                module_id
491            );
492            return None;
493        }
494        Some(v.clone())
495    }
496
497    fn parse_annotations(value: Option<&Value>, module_id: &str) -> Option<ModuleAnnotations> {
498        let v = value?;
499        if v.is_null() {
500            return None;
501        }
502        if !v.is_object() {
503            warn!(
504                "BindingLoader: annotations for module {} is not a dict; treating as None",
505                module_id
506            );
507            return None;
508        }
509        match serde_json::from_value::<ModuleAnnotations>(v.clone()) {
510            Ok(ann) => Some(ann),
511            Err(e) => {
512                warn!(
513                    "BindingLoader: failed to parse annotations for module {}: {}; treating as None",
514                    module_id, e
515                );
516                None
517            }
518        }
519    }
520
521    fn parse_examples(value: Option<&Value>, module_id: &str) -> Vec<ModuleExample> {
522        let Some(v) = value else {
523            return Vec::new();
524        };
525        if v.is_null() {
526            return Vec::new();
527        }
528        let Some(arr) = v.as_array() else {
529            warn!(
530                "BindingLoader: examples for module {} is not a list; ignoring",
531                module_id
532            );
533            return Vec::new();
534        };
535        let mut result = Vec::with_capacity(arr.len());
536        for (i, ex) in arr.iter().enumerate() {
537            if !ex.is_object() {
538                warn!(
539                    "BindingLoader: examples[{}] of module {} is not a dict; ignoring",
540                    i, module_id
541                );
542                continue;
543            }
544            match serde_json::from_value::<ModuleExample>(ex.clone()) {
545                Ok(parsed) => result.push(parsed),
546                Err(e) => warn!(
547                    "BindingLoader: examples[{}] of module {} malformed: {}; ignoring",
548                    i, module_id, e
549                ),
550            }
551        }
552        result
553    }
554}
555
556#[cfg(test)]
557mod tests {
558    use super::*;
559    use serde_json::json;
560    use std::fs;
561    use tempfile::TempDir;
562
563    fn minimal_entry() -> Value {
564        json!({"module_id": "x.y", "target": "pkg:func"})
565    }
566
567    fn full_entry() -> Value {
568        json!({
569            "module_id": "users.get_user",
570            "target": "myapp.views:get_user",
571            "description": "Get a user",
572            "documentation": "Returns a user by ID.",
573            "tags": ["users", "get"],
574            "version": "2.0.0",
575            "annotations": {"readonly": true, "cacheable": true, "cache_ttl": 60},
576            "examples": [
577                {"title": "happy", "inputs": {"id": 1}, "output": {"name": "alice"}}
578            ],
579            "metadata": {"http_method": "GET"},
580            "input_schema": {"type": "object"},
581            "output_schema": {"type": "object"},
582            "display": {"mcp": {"alias": "users_get"}, "alias": "users.get"},
583            "suggested_alias": "users.get.alt",
584            "warnings": ["stale"]
585        })
586    }
587
588    #[test]
589    fn test_loose_minimum_entry() {
590        let loader = BindingLoader::new();
591        let modules = loader
592            .load_data(&json!({"bindings": [minimal_entry()]}), false)
593            .unwrap();
594        assert_eq!(modules.len(), 1);
595        let m = &modules[0];
596        assert_eq!(m.module_id, "x.y");
597        assert_eq!(m.target, "pkg:func");
598        assert_eq!(m.description, "");
599        assert_eq!(m.version, "1.0.0");
600        assert!(m.annotations.is_none());
601        assert!(m.display.is_none());
602        assert!(m.tags.is_empty());
603        assert_eq!(m.input_schema, json!({}));
604        assert_eq!(m.output_schema, json!({}));
605    }
606
607    // D11-5b regression: `__proto__` / `constructor` / `prototype` keys in
608    // binding-YAML metadata must be dropped at parse time so they cannot
609    // propagate to JS-side consumers (cross-language prototype-pollution
610    // guard). Mirrors the TypeScript loader's `PROTO_DENY` and the Python
611    // loader's `_FORBIDDEN_METADATA_KEYS`.
612    #[test]
613    fn test_metadata_filters_proto_pollution_keys() {
614        let loader = BindingLoader::new();
615        let entry = json!({
616            "module_id": "x.y",
617            "target": "pkg:func",
618            "metadata": {
619                "__proto__": {"polluted": true},
620                "constructor": "evil",
621                "prototype": ["bad"],
622                "safe_key": "kept",
623            }
624        });
625        let modules = loader
626            .load_data(&json!({"bindings": [entry]}), false)
627            .unwrap();
628        assert_eq!(modules.len(), 1);
629        let metadata = &modules[0].metadata;
630        assert!(!metadata.contains_key("__proto__"));
631        assert!(!metadata.contains_key("constructor"));
632        assert!(!metadata.contains_key("prototype"));
633        assert_eq!(metadata.get("safe_key"), Some(&json!("kept")));
634    }
635
636    #[test]
637    fn test_strict_requires_input_schema() {
638        let loader = BindingLoader::new();
639        let err = loader
640            .load_data(&json!({"bindings": [minimal_entry()]}), true)
641            .unwrap_err();
642        match err {
643            BindingLoadError::MissingFields {
644                missing_fields,
645                module_id,
646                ..
647            } => {
648                assert!(missing_fields.contains(&"input_schema".to_string()));
649                assert!(missing_fields.contains(&"output_schema".to_string()));
650                assert_eq!(module_id.as_deref(), Some("x.y"));
651            }
652            _ => panic!("expected MissingFields, got {err:?}"),
653        }
654    }
655
656    #[test]
657    fn test_strict_accepts_when_schemas_present() {
658        let loader = BindingLoader::new();
659        let entry = json!({
660            "module_id": "x.y",
661            "target": "pkg:func",
662            "input_schema": {"type": "object"},
663            "output_schema": {"type": "object"}
664        });
665        let modules = loader
666            .load_data(&json!({"bindings": [entry]}), true)
667            .unwrap();
668        assert_eq!(modules.len(), 1);
669    }
670
671    #[test]
672    fn test_missing_module_id_always_fails() {
673        let loader = BindingLoader::new();
674        let err = loader
675            .load_data(&json!({"bindings": [{"target": "p:f"}]}), false)
676            .unwrap_err();
677        assert!(matches!(
678            err,
679            BindingLoadError::MissingFields { ref missing_fields, .. }
680                if missing_fields.contains(&"module_id".to_string())
681        ));
682    }
683
684    #[test]
685    fn test_missing_target_always_fails() {
686        let loader = BindingLoader::new();
687        let err = loader
688            .load_data(&json!({"bindings": [{"module_id": "x"}]}), false)
689            .unwrap_err();
690        assert!(matches!(
691            err,
692            BindingLoadError::MissingFields { ref missing_fields, .. }
693                if missing_fields.contains(&"target".to_string())
694        ));
695    }
696
697    #[test]
698    fn test_missing_bindings_key() {
699        let loader = BindingLoader::new();
700        let err = loader
701            .load_data(&json!({"spec_version": "1.0"}), false)
702            .unwrap_err();
703        assert!(matches!(
704            err,
705            BindingLoadError::InvalidStructure { ref reason, .. } if reason.contains("bindings")
706        ));
707    }
708
709    #[test]
710    fn test_top_level_not_mapping() {
711        let loader = BindingLoader::new();
712        let err = loader.load_data(&json!(["a", "b"]), false).unwrap_err();
713        assert!(matches!(
714            err,
715            BindingLoadError::InvalidStructure { ref reason, .. } if reason.contains("mapping")
716        ));
717    }
718
719    #[test]
720    fn test_entry_not_a_mapping() {
721        let loader = BindingLoader::new();
722        let err = loader
723            .load_data(&json!({"bindings": ["scalar"]}), false)
724            .unwrap_err();
725        assert!(matches!(
726            err,
727            BindingLoadError::InvalidStructure { ref reason, .. } if reason.contains("mapping")
728        ));
729    }
730
731    #[test]
732    fn test_annotations_parsed() {
733        let loader = BindingLoader::new();
734        let m = &loader
735            .load_data(&json!({"bindings": [full_entry()]}), false)
736            .unwrap()[0];
737        let ann = m.annotations.as_ref().expect("annotations should parse");
738        assert!(ann.readonly);
739        assert!(ann.cacheable);
740        assert_eq!(ann.cache_ttl, 60);
741    }
742
743    #[test]
744    fn test_annotations_wrong_type_treated_as_none() {
745        let loader = BindingLoader::new();
746        let m = &loader
747            .load_data(
748                &json!({"bindings": [{"module_id": "x", "target": "p:f", "annotations": "readonly"}]}),
749                false,
750            )
751            .unwrap()[0];
752        assert!(m.annotations.is_none());
753    }
754
755    #[test]
756    fn test_missing_fields_error_message_is_readable() {
757        let loader = BindingLoader::new();
758        let err = loader
759            .load_data(&json!({"bindings": [{"module_id": "x"}]}), false)
760            .unwrap_err();
761        let msg = err.to_string();
762        // No raw debug-format wrappers leak into the user-facing message.
763        assert!(!msg.contains("Some("), "got: {msg}");
764        assert!(!msg.contains("None"), "got: {msg}");
765        assert!(msg.contains("x"), "module_id missing from message: {msg}");
766        assert!(msg.contains("target"), "missing field not listed: {msg}");
767    }
768
769    #[test]
770    fn test_display_wrong_type_dropped() {
771        // Malformed display (non-object) is dropped. We can't easily capture
772        // tracing warnings without a subscriber, but we assert the drop occurs.
773        let loader = BindingLoader::new();
774        let m = &loader
775            .load_data(
776                &json!({"bindings": [{"module_id": "x", "target": "p:f", "display": "not-a-dict"}]}),
777                false,
778            )
779            .unwrap()[0];
780        assert!(m.display.is_none());
781    }
782
783    #[test]
784    fn test_display_null_dropped() {
785        let loader = BindingLoader::new();
786        let m = &loader
787            .load_data(
788                &json!({"bindings": [{"module_id": "x", "target": "p:f", "display": null}]}),
789                false,
790            )
791            .unwrap()[0];
792        assert!(m.display.is_none());
793    }
794
795    #[test]
796    fn test_display_preserved() {
797        let loader = BindingLoader::new();
798        let m = &loader
799            .load_data(&json!({"bindings": [full_entry()]}), false)
800            .unwrap()[0];
801        assert_eq!(
802            m.display.as_ref().unwrap(),
803            &json!({"mcp": {"alias": "users_get"}, "alias": "users.get"})
804        );
805    }
806
807    #[test]
808    fn test_examples_parsed() {
809        let loader = BindingLoader::new();
810        let m = &loader
811            .load_data(&json!({"bindings": [full_entry()]}), false)
812            .unwrap()[0];
813        assert_eq!(m.examples.len(), 1);
814        assert_eq!(m.examples[0].title, "happy");
815    }
816
817    #[test]
818    fn test_file_too_large_error_variant() {
819        // Verify the FileTooLarge variant can be constructed and displays correctly.
820        // The actual 16 MiB threshold is impractical to trigger in a unit test
821        // (we'd need to write a 16 MiB file), but this test confirms the error
822        // type is wired up and the display message is sensible.
823        let err = BindingLoadError::FileTooLarge {
824            path: "/bindings/huge.binding.yaml".to_string(),
825            size: MAX_BINDING_FILE_SIZE + 1,
826            max: MAX_BINDING_FILE_SIZE,
827        };
828        let msg = err.to_string();
829        assert!(
830            msg.contains("too large"),
831            "message should mention size: {msg}"
832        );
833        assert!(
834            msg.contains("huge.binding.yaml"),
835            "message should mention path: {msg}"
836        );
837    }
838
839    #[test]
840    fn test_load_single_file() {
841        let dir = TempDir::new().unwrap();
842        let file = dir.path().join("one.binding.yaml");
843        let doc = json!({"spec_version": "1.0", "bindings": [full_entry()]});
844        fs::write(&file, serde_yaml_ng::to_string(&doc).unwrap()).unwrap();
845        let modules = BindingLoader::new().load(&file, false, false).unwrap();
846        assert_eq!(modules.len(), 1);
847        assert_eq!(modules[0].module_id, "users.get_user");
848    }
849
850    #[test]
851    fn test_load_directory_sorted() {
852        let dir = TempDir::new().unwrap();
853        for (i, name) in ["a", "b", "c"].iter().enumerate() {
854            let f = dir.path().join(format!("{name}.binding.yaml"));
855            let doc = json!({
856                "spec_version": "1.0",
857                "bindings": [{"module_id": name, "target": format!("pkg:f{i}")}]
858            });
859            fs::write(&f, serde_yaml_ng::to_string(&doc).unwrap()).unwrap();
860        }
861        fs::write(dir.path().join("unrelated.yaml"), "irrelevant: true").unwrap();
862
863        let modules = BindingLoader::new().load(dir.path(), false, false).unwrap();
864        let ids: Vec<&str> = modules.iter().map(|m| m.module_id.as_str()).collect();
865        assert_eq!(ids, vec!["a", "b", "c"]);
866    }
867
868    #[test]
869    fn test_nonexistent_path() {
870        let dir = TempDir::new().unwrap();
871        let err = BindingLoader::new()
872            .load(&dir.path().join("nope"), false, false)
873            .unwrap_err();
874        assert!(matches!(err, BindingLoadError::PathNotFound { .. }));
875    }
876
877    #[test]
878    fn test_malformed_yaml() {
879        let dir = TempDir::new().unwrap();
880        let f = dir.path().join("bad.binding.yaml");
881        fs::write(&f, "::: not yaml :::\n  - [").unwrap();
882        let err = BindingLoader::new().load(&f, false, false).unwrap_err();
883        assert!(matches!(err, BindingLoadError::YamlParse { .. }));
884    }
885
886    #[test]
887    fn test_empty_file_skipped() {
888        let dir = TempDir::new().unwrap();
889        let f = dir.path().join("empty.binding.yaml");
890        fs::write(&f, "").unwrap();
891        let modules = BindingLoader::new().load(&f, false, false).unwrap();
892        assert!(modules.is_empty());
893    }
894
895    #[test]
896    fn test_round_trip_with_yaml_writer() {
897        use crate::output::yaml_writer::YAMLWriter;
898
899        let mut original = ScannedModule::new(
900            "round.trip".into(),
901            "Round-trip test".into(),
902            json!({"type": "object", "properties": {"q": {"type": "string"}}}),
903            json!({"type": "object"}),
904            vec!["demo".into()],
905            "demo.app:handler".into(),
906        );
907        original.version = "1.2.3".into();
908        original.annotations = Some(ModuleAnnotations {
909            readonly: true,
910            streaming: true,
911            cache_ttl: 30,
912            ..Default::default()
913        });
914        original.documentation = Some("Docs here".into());
915        original.metadata.insert("http_method".into(), json!("GET"));
916        original.display = Some(json!({"mcp": {"alias": "rt"}, "alias": "round-trip"}));
917
918        let dir = TempDir::new().unwrap();
919        YAMLWriter
920            .write(
921                &[original.clone()],
922                dir.path().to_str().unwrap(),
923                false,
924                false,
925                None,
926            )
927            .unwrap();
928
929        let loaded = BindingLoader::new().load(dir.path(), false, false).unwrap();
930        assert_eq!(loaded.len(), 1);
931        let m = &loaded[0];
932        assert_eq!(m.module_id, original.module_id);
933        assert_eq!(m.target, original.target);
934        assert_eq!(m.description, original.description);
935        assert_eq!(m.documentation, original.documentation);
936        assert_eq!(m.tags, original.tags);
937        assert_eq!(m.version, original.version);
938        assert_eq!(m.input_schema, original.input_schema);
939        assert_eq!(m.output_schema, original.output_schema);
940        assert_eq!(m.metadata, original.metadata);
941        assert_eq!(m.display, original.display);
942        let ann = m.annotations.as_ref().unwrap();
943        assert!(ann.readonly);
944        assert!(ann.streaming);
945        assert_eq!(ann.cache_ttl, 30);
946    }
947
948    // ---- Wrong-type scalar rejection (D1-1 regression guard) ----
949
950    #[test]
951    fn test_wrong_type_module_id_integer_rejected() {
952        let loader = BindingLoader::new();
953        let err = loader
954            .load_data(
955                &json!({"bindings": [{"module_id": 42, "target": "p:f"}]}),
956                false,
957            )
958            .unwrap_err();
959        assert!(
960            matches!(
961                &err,
962                BindingLoadError::MissingFields { missing_fields, .. }
963                    if missing_fields.iter().any(|f| f == "module_id")
964            ),
965            "got: {err:?}"
966        );
967    }
968
969    #[test]
970    fn test_wrong_type_target_bool_rejected() {
971        let loader = BindingLoader::new();
972        let err = loader
973            .load_data(
974                &json!({"bindings": [{"module_id": "x", "target": true}]}),
975                false,
976            )
977            .unwrap_err();
978        assert!(
979            matches!(
980                &err,
981                BindingLoadError::MissingFields { missing_fields, .. }
982                    if missing_fields.iter().any(|f| f == "target")
983            ),
984            "got: {err:?}"
985        );
986    }
987
988    #[test]
989    fn test_empty_string_module_id_rejected() {
990        let loader = BindingLoader::new();
991        let err = loader
992            .load_data(
993                &json!({"bindings": [{"module_id": "", "target": "p:f"}]}),
994                false,
995            )
996            .unwrap_err();
997        assert!(
998            matches!(
999                &err,
1000                BindingLoadError::MissingFields { missing_fields, .. }
1001                    if missing_fields.iter().any(|f| f == "module_id")
1002            ),
1003            "got: {err:?}"
1004        );
1005    }
1006
1007    #[test]
1008    fn test_strict_wrong_type_input_schema_rejected() {
1009        let loader = BindingLoader::new();
1010        let err = loader
1011            .load_data(
1012                &json!({"bindings": [{
1013                    "module_id": "x",
1014                    "target": "p:f",
1015                    "input_schema": 42,
1016                    "output_schema": {"type": "object"}
1017                }]}),
1018                true,
1019            )
1020            .unwrap_err();
1021        assert!(
1022            matches!(
1023                &err,
1024                BindingLoadError::MissingFields { missing_fields, .. }
1025                    if missing_fields.iter().any(|f| f == "input_schema")
1026            ),
1027            "got: {err:?}"
1028        );
1029    }
1030
1031    // ---- Recursive WalkDir error propagation (D1-2 regression guard) ----
1032
1033    #[test]
1034    #[cfg(unix)]
1035    fn test_recursive_load_surfaces_walkdir_errors() {
1036        use std::os::unix::fs::PermissionsExt;
1037
1038        // Running as root bypasses UNIX permissions and makes this test
1039        // a no-op. Skip in that case rather than produce a misleading pass.
1040        let is_root = libc_geteuid() == 0;
1041        if is_root {
1042            return;
1043        }
1044
1045        let dir = TempDir::new().unwrap();
1046        let unreadable = dir.path().join("unreadable");
1047        fs::create_dir(&unreadable).unwrap();
1048        fs::set_permissions(&unreadable, fs::Permissions::from_mode(0o000)).unwrap();
1049
1050        let result = BindingLoader::new().load(dir.path(), false, true);
1051
1052        // Restore permissions so TempDir::drop can clean up.
1053        fs::set_permissions(&unreadable, fs::Permissions::from_mode(0o755)).ok();
1054
1055        assert!(
1056            matches!(result, Err(BindingLoadError::FileRead { .. })),
1057            "recursive load should propagate per-entry I/O errors, got: {result:?}",
1058        );
1059    }
1060
1061    #[cfg(unix)]
1062    fn libc_geteuid() -> u32 {
1063        // Avoid a libc dev-dep solely for this test — inline the syscall.
1064        extern "C" {
1065            fn geteuid() -> u32;
1066        }
1067        // SAFETY: `geteuid` is a stateless C function that takes no args
1068        // and returns the effective UID.
1069        unsafe { geteuid() }
1070    }
1071
1072    #[test]
1073    fn test_load_recursive_finds_nested_files() {
1074        let dir = TempDir::new().unwrap();
1075        let subdir = dir.path().join("sub");
1076        fs::create_dir(&subdir).unwrap();
1077
1078        // File in root dir
1079        let doc_root = json!({"spec_version": "1.0", "bindings": [{"module_id": "root.mod", "target": "pkg:f0"}]});
1080        fs::write(
1081            dir.path().join("root.binding.yaml"),
1082            serde_yaml_ng::to_string(&doc_root).unwrap(),
1083        )
1084        .unwrap();
1085
1086        // File in subdir
1087        let doc_sub = json!({"spec_version": "1.0", "bindings": [{"module_id": "sub.mod", "target": "pkg:f1"}]});
1088        fs::write(
1089            subdir.join("sub.binding.yaml"),
1090            serde_yaml_ng::to_string(&doc_sub).unwrap(),
1091        )
1092        .unwrap();
1093
1094        // Non-recursive: only root
1095        let flat = BindingLoader::new().load(dir.path(), false, false).unwrap();
1096        let flat_ids: Vec<&str> = flat.iter().map(|m| m.module_id.as_str()).collect();
1097        assert_eq!(flat_ids, vec!["root.mod"]);
1098
1099        // Recursive: both
1100        let recursive = BindingLoader::new().load(dir.path(), false, true).unwrap();
1101        let mut rec_ids: Vec<&str> = recursive.iter().map(|m| m.module_id.as_str()).collect();
1102        rec_ids.sort();
1103        assert_eq!(rec_ids, vec!["root.mod", "sub.mod"]);
1104    }
1105}