Skip to main content

apcore_toolkit/output/
yaml_writer.rs

1// YAML binding file generator.
2//
3// Writes ScannedModule instances as .binding.yaml files compatible with
4// apcore::BindingLoader.
5
6use std::fs;
7use std::io::Write;
8use std::path::Path;
9use std::sync::LazyLock;
10
11use chrono::Utc;
12use regex::Regex;
13use tracing::{debug, warn};
14
15use crate::output::errors::WriteError;
16use crate::output::types::{Verifier, WriteResult};
17use crate::output::verifiers::{run_verifier_chain, YAMLVerifier};
18use crate::serializers::annotations_to_dict;
19use crate::types::ScannedModule;
20
21/// Generates `.binding.yaml` files from ScannedModule instances.
22pub struct YAMLWriter;
23
24impl YAMLWriter {
25    /// Write YAML binding files for each ScannedModule.
26    ///
27    /// - `output_dir`: Directory path to write files to.
28    /// - `dry_run`: If true, return results without writing to disk.
29    /// - `verify`: If true, verify written files are valid YAML with required fields.
30    /// - `verifiers`: Optional custom verifiers run after the built-in check.
31    ///
32    /// # Error handling vs. Python/TypeScript
33    ///
34    /// Unlike the Python and TypeScript implementations which raise/throw on I/O
35    /// failures, this method returns `Err(WriteError)` for any I/O error (e.g.
36    /// permission denied, disk full). Callers expecting the Python/TypeScript error
37    /// contract should propagate errors with `?` or handle them via `match`.
38    pub fn write(
39        &self,
40        modules: &[ScannedModule],
41        output_dir: &str,
42        dry_run: bool,
43        verify: bool,
44        verifiers: Option<&[&dyn Verifier]>,
45    ) -> Result<Vec<WriteResult>, WriteError> {
46        if modules.is_empty() {
47            return Ok(vec![]);
48        }
49
50        if !dry_run {
51            fs::create_dir_all(output_dir).map_err(|e| WriteError::io(output_dir.into(), e))?;
52        }
53
54        let output_path = if dry_run {
55            Path::new(output_dir).to_path_buf()
56        } else {
57            Path::new(output_dir)
58                .canonicalize()
59                .map_err(|e| WriteError::io(output_dir.into(), e))?
60        };
61
62        let mut results: Vec<WriteResult> = Vec::new();
63        let timestamp = Utc::now().to_rfc3339();
64        // Track filenames written in this batch to detect collisions within a single
65        // write() call. When two module_ids sanitize to the same filename, the second
66        // and subsequent modules receive a numeric suffix (e.g. `foo_1.binding.yaml`).
67        // This matches the TypeScript YAMLWriter collision-avoidance behaviour.
68        let mut written_names: std::collections::HashMap<String, String> =
69            std::collections::HashMap::new();
70
71        for module in modules {
72            let binding_data = build_binding(module);
73
74            if dry_run {
75                results.push(WriteResult::new(module.module_id.clone()));
76                continue;
77            }
78
79            // sanitize_filename removes all unsafe chars and collapses consecutive dots,
80            // ensuring the resulting filename cannot escape output_path.
81            let safe_id = sanitize_filename(&module.module_id);
82            let base_filename = format!("{safe_id}.binding.yaml");
83
84            // Resolve filename collision within this batch.
85            let mut final_filename = base_filename.clone();
86            let mut counter = 0u32;
87            while written_names.contains_key(&final_filename) {
88                counter += 1;
89                final_filename = format!("{safe_id}_{counter}.binding.yaml");
90            }
91            written_names.insert(final_filename.clone(), module.module_id.clone());
92
93            let file_path = output_path.join(&final_filename);
94
95            if file_path.exists() {
96                warn!(file_path = %file_path.display(), "Overwriting existing file");
97            }
98
99            let header = format!(
100                "# Auto-generated by apcore-toolkit scanner\n\
101                 # Generated: {timestamp}\n\
102                 # Do not edit manually unless you intend to customize schemas.\n\n"
103            );
104            let yaml_content = serde_yaml_ng::to_string(&binding_data)
105                .map_err(|e| WriteError::new(file_path.display().to_string(), e.to_string()))?;
106            let full_content = format!("{header}{yaml_content}");
107
108            // Atomic write: write bytes to a sibling .yaml.tmp file, call sync_all()
109            // to flush OS page cache to durable storage, then rename atomically.
110            // fs::rename on the same filesystem is atomic on POSIX; on Windows it
111            // replaces any existing target atomically on NTFS.
112            // On Unix we also fsync the parent directory after rename to make the
113            // new directory entry durable.
114            // The tmp file is removed on any failure so no stale `.yaml.tmp` is left.
115            let tmp_path = file_path.with_extension("yaml.tmp");
116            let write_res = (|| -> std::io::Result<()> {
117                let mut tmp_file = fs::File::create(&tmp_path)?;
118                tmp_file.write_all(full_content.as_bytes())?;
119                tmp_file.flush()?;
120                tmp_file.sync_all()
121            })();
122            if let Err(e) = write_res {
123                let _ = fs::remove_file(&tmp_path);
124                return Err(WriteError::io(tmp_path.display().to_string(), e));
125            }
126            if let Err(e) = fs::rename(&tmp_path, &file_path) {
127                let _ = fs::remove_file(&tmp_path);
128                return Err(WriteError::io(file_path.display().to_string(), e));
129            }
130            #[cfg(unix)]
131            {
132                if let Some(parent) = file_path.parent() {
133                    if let Ok(dir) = fs::File::open(parent) {
134                        let _ = dir.sync_all();
135                    }
136                }
137            }
138            debug!(file_path = %file_path.display(), "Written");
139
140            let mut result =
141                WriteResult::with_path(module.module_id.clone(), file_path.display().to_string());
142
143            if verify {
144                result = verify_yaml(&result, &file_path);
145            }
146            if result.verified {
147                if let Some(vs) = verifiers {
148                    let chain_result =
149                        run_verifier_chain(vs, &file_path.display().to_string(), &module.module_id);
150                    if !chain_result.ok {
151                        result = WriteResult::failed(
152                            result.module_id,
153                            result.path,
154                            chain_result.error.unwrap_or_default(),
155                        );
156                    }
157                }
158            }
159            results.push(result);
160        }
161
162        Ok(results)
163    }
164}
165
166/// Regex matching characters unsafe for filenames.
167static UNSAFE_CHARS_RE: LazyLock<Regex> =
168    LazyLock::new(|| Regex::new(r"[^a-zA-Z0-9._-]").expect("static regex"));
169
170/// Regex matching consecutive dots (path traversal prevention).
171static CONSECUTIVE_DOTS_RE: LazyLock<Regex> =
172    LazyLock::new(|| Regex::new(r"\.{2,}").expect("static regex"));
173
174/// Sanitize module_id for safe filename construction.
175fn sanitize_filename(module_id: &str) -> String {
176    let safe = UNSAFE_CHARS_RE.replace_all(module_id, "_");
177    // Collapse consecutive dots to prevent path traversal
178    CONSECUTIVE_DOTS_RE.replace_all(&safe, "_").to_string()
179}
180
181/// Build the YAML-serializable value for a ScannedModule.
182fn build_binding(module: &ScannedModule) -> serde_json::Value {
183    let mut binding = serde_json::Map::new();
184    binding.insert(
185        "module_id".into(),
186        serde_json::Value::from(module.module_id.clone()),
187    );
188    binding.insert(
189        "target".into(),
190        serde_json::Value::from(module.target.clone()),
191    );
192    binding.insert(
193        "description".into(),
194        serde_json::Value::from(module.description.clone()),
195    );
196    binding.insert(
197        "documentation".into(),
198        serde_json::to_value(&module.documentation).unwrap_or(serde_json::Value::Null),
199    );
200    binding.insert(
201        "tags".into(),
202        serde_json::to_value(&module.tags).unwrap_or(serde_json::json!([])),
203    );
204    binding.insert(
205        "version".into(),
206        serde_json::Value::from(module.version.clone()),
207    );
208    binding.insert(
209        "annotations".into(),
210        annotations_to_dict(module.annotations.as_ref()),
211    );
212    binding.insert(
213        "examples".into(),
214        serde_json::to_value(&module.examples).unwrap_or(serde_json::json!([])),
215    );
216    binding.insert(
217        "metadata".into(),
218        serde_json::to_value(&module.metadata).unwrap_or(serde_json::json!({})),
219    );
220    if let Some(alias) = &module.suggested_alias {
221        binding.insert(
222            "suggested_alias".into(),
223            serde_json::Value::from(alias.clone()),
224        );
225    }
226    binding.insert("input_schema".into(), module.input_schema.clone());
227    binding.insert("output_schema".into(), module.output_schema.clone());
228    if let Some(display) = &module.display {
229        binding.insert("display".into(), display.clone());
230    }
231
232    serde_json::json!({
233        "spec_version": "1.0",
234        "bindings": [serde_json::Value::Object(binding)]
235    })
236}
237
238/// Verify that a written YAML file is well-formed and contains required fields.
239fn verify_yaml(result: &WriteResult, file_path: &Path) -> WriteResult {
240    let vr = YAMLVerifier.verify(&file_path.display().to_string(), &result.module_id);
241    if vr.ok {
242        result.clone()
243    } else {
244        WriteResult::failed(
245            result.module_id.clone(),
246            result.path.clone(),
247            vr.error.unwrap_or_default(),
248        )
249    }
250}
251
252#[cfg(test)]
253mod tests {
254    use super::*;
255    use serde_json::json;
256    use tempfile::TempDir;
257
258    fn sample_module() -> ScannedModule {
259        ScannedModule::new(
260            "users.get_user".into(),
261            "Get a user".into(),
262            json!({"type": "object", "properties": {"user_id": {"type": "integer"}}}),
263            json!({"type": "object"}),
264            vec!["users".into()],
265            "myapp.views:get_user".into(),
266        )
267    }
268
269    #[test]
270    fn test_sanitize_filename_basic() {
271        assert_eq!(sanitize_filename("users.get_user"), "users.get_user");
272    }
273
274    #[test]
275    fn test_sanitize_filename_special_chars() {
276        assert_eq!(sanitize_filename("a/b\\c d"), "a_b_c_d");
277    }
278
279    #[test]
280    fn test_sanitize_filename_path_traversal() {
281        let result = sanitize_filename("../../etc/passwd");
282        assert!(!result.contains(".."));
283    }
284
285    #[test]
286    fn test_write_empty_modules() {
287        let writer = YAMLWriter;
288        let result = writer.write(&[], "/tmp/test", false, false, None).unwrap();
289        assert!(result.is_empty());
290    }
291
292    #[test]
293    fn test_write_dry_run() {
294        let writer = YAMLWriter;
295        let modules = vec![sample_module()];
296        let result = writer
297            .write(&modules, "/tmp/nonexistent", true, false, None)
298            .unwrap();
299        assert_eq!(result.len(), 1);
300        assert_eq!(result[0].module_id, "users.get_user");
301        assert!(result[0].path.is_none());
302    }
303
304    #[test]
305    fn test_write_creates_file() {
306        let dir = TempDir::new().unwrap();
307        let writer = YAMLWriter;
308        let modules = vec![sample_module()];
309        let result = writer
310            .write(&modules, dir.path().to_str().unwrap(), false, false, None)
311            .unwrap();
312        assert_eq!(result.len(), 1);
313        assert!(result[0].path.is_some());
314
315        let file_path = result[0].path.as_ref().unwrap();
316        assert!(Path::new(file_path).exists());
317        let content = fs::read_to_string(file_path).unwrap();
318        assert!(content.contains("Auto-generated"));
319        assert!(content.contains("users.get_user"));
320    }
321
322    #[test]
323    fn test_write_with_verify() {
324        let dir = TempDir::new().unwrap();
325        let writer = YAMLWriter;
326        let modules = vec![sample_module()];
327        let result = writer
328            .write(&modules, dir.path().to_str().unwrap(), false, true, None)
329            .unwrap();
330        assert_eq!(result.len(), 1);
331        assert!(result[0].verified);
332    }
333
334    #[test]
335    fn test_write_multiple_modules() {
336        let dir = TempDir::new().unwrap();
337        let writer = YAMLWriter;
338        let modules = vec![
339            ScannedModule::new(
340                "mod_a".into(),
341                "Module A".into(),
342                json!({"type": "object"}),
343                json!({"type": "object"}),
344                vec![],
345                "app:a".into(),
346            ),
347            ScannedModule::new(
348                "mod_b".into(),
349                "Module B".into(),
350                json!({"type": "object"}),
351                json!({"type": "object"}),
352                vec![],
353                "app:b".into(),
354            ),
355            ScannedModule::new(
356                "mod_c".into(),
357                "Module C".into(),
358                json!({"type": "object"}),
359                json!({"type": "object"}),
360                vec![],
361                "app:c".into(),
362            ),
363        ];
364        let results = writer
365            .write(&modules, dir.path().to_str().unwrap(), false, false, None)
366            .unwrap();
367        assert_eq!(results.len(), 3);
368        // Each result should have a file path and the file should exist
369        for result in &results {
370            let path = result.path.as_ref().expect("path should be set");
371            assert!(Path::new(path).exists(), "file should exist: {path}");
372        }
373    }
374
375    #[test]
376    fn test_binding_contains_all_fields() {
377        let dir = TempDir::new().unwrap();
378        let writer = YAMLWriter;
379        let mut module = sample_module();
380        module.documentation = Some("Full docs here".into());
381        module.version = "2.0.0".into();
382        let modules = vec![module];
383        let results = writer
384            .write(&modules, dir.path().to_str().unwrap(), false, false, None)
385            .unwrap();
386        let file_path = results[0].path.as_ref().unwrap();
387        let content = fs::read_to_string(file_path).unwrap();
388        // Verify all expected fields are present in the YAML content
389        for field in &[
390            "spec_version",
391            "module_id",
392            "target",
393            "description",
394            "documentation",
395            "tags",
396            "version",
397            "annotations",
398            "examples",
399            "metadata",
400            "input_schema",
401            "output_schema",
402        ] {
403            assert!(
404                content.contains(field),
405                "YAML should contain field '{field}'"
406            );
407        }
408        assert!(content.contains("users.get_user"));
409        assert!(content.contains("Full docs here"));
410        assert!(content.contains("2.0.0"));
411    }
412
413    #[test]
414    fn test_creates_nested_output_dir() {
415        let dir = TempDir::new().unwrap();
416        let nested = dir.path().join("a").join("b").join("c");
417        let writer = YAMLWriter;
418        let modules = vec![sample_module()];
419        // The nested directory does not exist yet
420        assert!(!nested.exists());
421        let results = writer
422            .write(&modules, nested.to_str().unwrap(), false, false, None)
423            .unwrap();
424        assert_eq!(results.len(), 1);
425        assert!(nested.exists(), "nested directory should have been created");
426        let file_path = results[0].path.as_ref().unwrap();
427        assert!(Path::new(file_path).exists());
428    }
429
430    #[test]
431    fn test_filename_sanitization_dots() {
432        let result = sanitize_filename("foo..bar");
433        assert!(
434            !result.contains(".."),
435            "consecutive dots should be collapsed: got '{result}'"
436        );
437        let result2 = sanitize_filename("a...b....c");
438        assert!(
439            !result2.contains(".."),
440            "consecutive dots should be collapsed: got '{result2}'"
441        );
442    }
443
444    #[test]
445    fn test_display_omitted_when_none() {
446        let dir = TempDir::new().unwrap();
447        let writer = YAMLWriter;
448        let module = sample_module();
449        let modules = vec![module];
450        let results = writer
451            .write(&modules, dir.path().to_str().unwrap(), false, false, None)
452            .unwrap();
453        let file_path = results[0].path.as_ref().unwrap();
454        let content = fs::read_to_string(file_path).unwrap();
455        let parsed: serde_yaml_ng::Value = serde_yaml_ng::from_str(&content).unwrap();
456        let bindings = parsed["bindings"].as_sequence().unwrap();
457        assert!(
458            bindings[0].get("display").is_none(),
459            "display should be absent when module.display is None"
460        );
461    }
462
463    #[test]
464    fn test_display_emitted_when_set() {
465        let dir = TempDir::new().unwrap();
466        let writer = YAMLWriter;
467        let mut module = sample_module();
468        module.display = Some(json!({"mcp": {"alias": "users_get"}, "alias": "users.get"}));
469        let modules = vec![module];
470        let results = writer
471            .write(&modules, dir.path().to_str().unwrap(), false, false, None)
472            .unwrap();
473        let file_path = results[0].path.as_ref().unwrap();
474        let content = fs::read_to_string(file_path).unwrap();
475        let parsed: serde_yaml_ng::Value = serde_yaml_ng::from_str(&content).unwrap();
476        let bindings = parsed["bindings"].as_sequence().unwrap();
477        let display = bindings[0]
478            .get("display")
479            .expect("display should be present");
480        assert_eq!(
481            display["alias"],
482            serde_yaml_ng::Value::String("users.get".into())
483        );
484        assert_eq!(
485            display["mcp"]["alias"],
486            serde_yaml_ng::Value::String("users_get".into())
487        );
488    }
489
490    #[test]
491    fn test_none_annotations_in_binding() {
492        let dir = TempDir::new().unwrap();
493        let writer = YAMLWriter;
494        let mut module = sample_module();
495        module.annotations = None;
496        let modules = vec![module];
497        let results = writer
498            .write(&modules, dir.path().to_str().unwrap(), false, false, None)
499            .unwrap();
500        let file_path = results[0].path.as_ref().unwrap();
501        let content = fs::read_to_string(file_path).unwrap();
502        // The file should still be valid YAML and contain the annotations key
503        let parsed: serde_yaml_ng::Value = serde_yaml_ng::from_str(&content).unwrap();
504        let bindings = parsed["bindings"].as_sequence().unwrap();
505        assert_eq!(bindings.len(), 1);
506        // annotations should be present (as null)
507        assert!(bindings[0].get("annotations").is_some());
508    }
509
510    #[test]
511    fn test_overwrite_existing_file() {
512        let dir = TempDir::new().unwrap();
513        let writer = YAMLWriter;
514
515        // Write the first version
516        let module_v1 = ScannedModule::new(
517            "overwrite_test".into(),
518            "Version 1".into(),
519            json!({"type": "object"}),
520            json!({"type": "object"}),
521            vec![],
522            "app:v1".into(),
523        );
524        let results_v1 = writer
525            .write(
526                &[module_v1],
527                dir.path().to_str().unwrap(),
528                false,
529                false,
530                None,
531            )
532            .unwrap();
533        let file_path = results_v1[0].path.as_ref().unwrap();
534        let content_v1 = fs::read_to_string(file_path).unwrap();
535        assert!(content_v1.contains("Version 1"));
536
537        // Write the second version with the same module_id
538        let module_v2 = ScannedModule::new(
539            "overwrite_test".into(),
540            "Version 2".into(),
541            json!({"type": "object"}),
542            json!({"type": "object"}),
543            vec![],
544            "app:v2".into(),
545        );
546        let results_v2 = writer
547            .write(
548                &[module_v2],
549                dir.path().to_str().unwrap(),
550                false,
551                false,
552                None,
553            )
554            .unwrap();
555        let file_path_v2 = results_v2[0].path.as_ref().unwrap();
556        let content_v2 = fs::read_to_string(file_path_v2).unwrap();
557        assert!(content_v2.contains("Version 2"));
558        assert!(!content_v2.contains("Version 1"));
559    }
560
561    #[test]
562    fn test_suggested_alias_round_trip() {
563        let dir = TempDir::new().unwrap();
564        let writer = YAMLWriter;
565        let mut module = sample_module();
566        module.suggested_alias = Some("users.get".into());
567        let results = writer
568            .write(&[module], dir.path().to_str().unwrap(), false, false, None)
569            .unwrap();
570        let file_path = results[0].path.as_ref().unwrap();
571        let content = fs::read_to_string(file_path).unwrap();
572        let parsed: serde_yaml_ng::Value = serde_yaml_ng::from_str(&content).unwrap();
573        let bindings = parsed["bindings"].as_sequence().unwrap();
574        assert_eq!(
575            bindings[0]["suggested_alias"]
576                .as_str()
577                .expect("suggested_alias should be a string"),
578            "users.get"
579        );
580    }
581
582    #[test]
583    fn test_suggested_alias_absent_when_none() {
584        let dir = TempDir::new().unwrap();
585        let writer = YAMLWriter;
586        let module = sample_module();
587        let results = writer
588            .write(&[module], dir.path().to_str().unwrap(), false, false, None)
589            .unwrap();
590        let file_path = results[0].path.as_ref().unwrap();
591        let content = fs::read_to_string(file_path).unwrap();
592        let parsed: serde_yaml_ng::Value = serde_yaml_ng::from_str(&content).unwrap();
593        let bindings = parsed["bindings"].as_sequence().unwrap();
594        assert!(
595            bindings[0].get("suggested_alias").is_none(),
596            "suggested_alias should be absent when module.suggested_alias is None"
597        );
598    }
599
600    #[test]
601    fn test_filename_collision_produces_distinct_files() {
602        // D11-010: two modules whose module_ids both sanitize to the same filename
603        // must produce two distinct files in a single write() call.
604        // The second module receives a numeric suffix (e.g. `foo_1.binding.yaml`).
605        let dir = TempDir::new().unwrap();
606        let writer = YAMLWriter;
607
608        // Both module_ids sanitize to "a_b" (slash → underscore)
609        let mod1 = ScannedModule::new(
610            "a/b".into(),
611            "Module slash".into(),
612            json!({"type": "object"}),
613            json!({"type": "object"}),
614            vec![],
615            "app:slash".into(),
616        );
617        let mod2 = ScannedModule::new(
618            "a_b".into(),
619            "Module underscore".into(),
620            json!({"type": "object"}),
621            json!({"type": "object"}),
622            vec![],
623            "app:underscore".into(),
624        );
625
626        let results = writer
627            .write(
628                &[mod1, mod2],
629                dir.path().to_str().unwrap(),
630                false,
631                false,
632                None,
633            )
634            .unwrap();
635        assert_eq!(results.len(), 2, "should produce two results");
636
637        let path1 = results[0]
638            .path
639            .as_ref()
640            .expect("first result must have path");
641        let path2 = results[1]
642            .path
643            .as_ref()
644            .expect("second result must have path");
645        assert_ne!(path1, path2, "collision must produce distinct file paths");
646        assert!(Path::new(path1).exists(), "first file must exist: {path1}");
647        assert!(Path::new(path2).exists(), "second file must exist: {path2}");
648    }
649
650    #[test]
651    fn test_custom_verifier_failure_produces_failed_result() {
652        use crate::output::types::{Verifier, VerifyResult};
653
654        struct AlwaysFail;
655        impl Verifier for AlwaysFail {
656            fn verify(&self, _path: &str, _module_id: &str) -> VerifyResult {
657                VerifyResult::fail("intentional failure".into())
658            }
659        }
660
661        let dir = TempDir::new().unwrap();
662        let writer = YAMLWriter;
663        let module = sample_module();
664        let verifier = AlwaysFail;
665        let verifiers: &[&dyn Verifier] = &[&verifier];
666        let results = writer
667            .write(
668                &[module],
669                dir.path().to_str().unwrap(),
670                false,
671                true,
672                Some(verifiers),
673            )
674            .unwrap();
675        assert!(!results[0].verified, "result should be marked not verified");
676        assert!(results[0]
677            .verification_error
678            .as_deref()
679            .unwrap_or("")
680            .contains("intentional failure"));
681    }
682}