Skip to main content

apcore_toolkit/output/
yaml_writer.rs

1// YAML binding file generator.
2//
3// Writes ScannedModule instances as .binding.yaml files compatible with
4// apcore::BindingLoader.
5
6use std::fs;
7use std::io::Write;
8use std::path::Path;
9use std::sync::LazyLock;
10
11use chrono::Utc;
12use regex::Regex;
13use tracing::{debug, warn};
14
15use crate::output::errors::WriteError;
16use crate::output::types::{Verifier, WriteResult};
17use crate::output::verifiers::{run_verifier_chain, YAMLVerifier};
18use crate::serializers::annotations_to_dict;
19use crate::types::ScannedModule;
20
21/// Generates `.binding.yaml` files from ScannedModule instances.
22pub struct YAMLWriter;
23
24impl YAMLWriter {
25    /// Write YAML binding files for each ScannedModule.
26    ///
27    /// - `output_dir`: Directory path to write files to.
28    /// - `dry_run`: If true, return results without writing to disk.
29    /// - `verify`: If true, verify written files are valid YAML with required fields.
30    /// - `verifiers`: Optional custom verifiers run after the built-in check.
31    pub fn write(
32        &self,
33        modules: &[ScannedModule],
34        output_dir: &str,
35        dry_run: bool,
36        verify: bool,
37        verifiers: Option<&[&dyn Verifier]>,
38    ) -> Result<Vec<WriteResult>, WriteError> {
39        if modules.is_empty() {
40            return Ok(vec![]);
41        }
42
43        if !dry_run {
44            fs::create_dir_all(output_dir).map_err(|e| WriteError::io(output_dir.into(), e))?;
45        }
46
47        let output_path = if dry_run {
48            Path::new(output_dir).to_path_buf()
49        } else {
50            Path::new(output_dir)
51                .canonicalize()
52                .map_err(|e| WriteError::io(output_dir.into(), e))?
53        };
54
55        let mut results: Vec<WriteResult> = Vec::new();
56        let timestamp = Utc::now().to_rfc3339();
57
58        for module in modules {
59            let binding_data = build_binding(module);
60
61            if dry_run {
62                results.push(WriteResult::new(module.module_id.clone()));
63                continue;
64            }
65
66            // sanitize_filename removes all unsafe chars and collapses consecutive dots,
67            // ensuring the resulting filename cannot escape output_path.
68            let safe_id = sanitize_filename(&module.module_id);
69            let filename = format!("{safe_id}.binding.yaml");
70            let file_path = output_path.join(&filename);
71
72            if file_path.exists() {
73                warn!(file_path = %file_path.display(), "Overwriting existing file");
74            }
75
76            let header = format!(
77                "# Auto-generated by apcore-toolkit scanner\n\
78                 # Generated: {timestamp}\n\
79                 # Do not edit manually unless you intend to customize schemas.\n\n"
80            );
81            let yaml_content = serde_yaml_ng::to_string(&binding_data)
82                .map_err(|e| WriteError::new(file_path.display().to_string(), e.to_string()))?;
83            let full_content = format!("{header}{yaml_content}");
84
85            // Atomic write: write bytes to a sibling .yaml.tmp file, call sync_all()
86            // to flush OS page cache to durable storage, then rename atomically.
87            // fs::rename on the same filesystem is atomic on POSIX; on Windows it
88            // replaces any existing target atomically on NTFS.
89            // On Unix we also fsync the parent directory after rename to make the
90            // new directory entry durable.
91            // The tmp file is removed on any failure so no stale `.yaml.tmp` is left.
92            let tmp_path = file_path.with_extension("yaml.tmp");
93            let write_res = (|| -> std::io::Result<()> {
94                let mut tmp_file = fs::File::create(&tmp_path)?;
95                tmp_file.write_all(full_content.as_bytes())?;
96                tmp_file.flush()?;
97                tmp_file.sync_all()
98            })();
99            if let Err(e) = write_res {
100                let _ = fs::remove_file(&tmp_path);
101                return Err(WriteError::io(tmp_path.display().to_string(), e));
102            }
103            if let Err(e) = fs::rename(&tmp_path, &file_path) {
104                let _ = fs::remove_file(&tmp_path);
105                return Err(WriteError::io(file_path.display().to_string(), e));
106            }
107            #[cfg(unix)]
108            {
109                if let Some(parent) = file_path.parent() {
110                    if let Ok(dir) = fs::File::open(parent) {
111                        let _ = dir.sync_all();
112                    }
113                }
114            }
115            debug!(file_path = %file_path.display(), "Written");
116
117            let mut result =
118                WriteResult::with_path(module.module_id.clone(), file_path.display().to_string());
119
120            if verify {
121                result = verify_yaml(&result, &file_path);
122            }
123            if result.verified {
124                if let Some(vs) = verifiers {
125                    let chain_result =
126                        run_verifier_chain(vs, &file_path.display().to_string(), &module.module_id);
127                    if !chain_result.ok {
128                        result = WriteResult::failed(
129                            result.module_id,
130                            result.path,
131                            chain_result.error.unwrap_or_default(),
132                        );
133                    }
134                }
135            }
136            results.push(result);
137        }
138
139        Ok(results)
140    }
141}
142
143/// Regex matching characters unsafe for filenames.
144static UNSAFE_CHARS_RE: LazyLock<Regex> =
145    LazyLock::new(|| Regex::new(r"[^a-zA-Z0-9._-]").expect("static regex"));
146
147/// Regex matching consecutive dots (path traversal prevention).
148static CONSECUTIVE_DOTS_RE: LazyLock<Regex> =
149    LazyLock::new(|| Regex::new(r"\.{2,}").expect("static regex"));
150
151/// Sanitize module_id for safe filename construction.
152fn sanitize_filename(module_id: &str) -> String {
153    let safe = UNSAFE_CHARS_RE.replace_all(module_id, "_");
154    // Collapse consecutive dots to prevent path traversal
155    CONSECUTIVE_DOTS_RE.replace_all(&safe, "_").to_string()
156}
157
158/// Build the YAML-serializable value for a ScannedModule.
159fn build_binding(module: &ScannedModule) -> serde_json::Value {
160    let mut binding = serde_json::Map::new();
161    binding.insert(
162        "module_id".into(),
163        serde_json::Value::from(module.module_id.clone()),
164    );
165    binding.insert(
166        "target".into(),
167        serde_json::Value::from(module.target.clone()),
168    );
169    binding.insert(
170        "description".into(),
171        serde_json::Value::from(module.description.clone()),
172    );
173    binding.insert(
174        "documentation".into(),
175        serde_json::to_value(&module.documentation).unwrap_or(serde_json::Value::Null),
176    );
177    binding.insert(
178        "tags".into(),
179        serde_json::to_value(&module.tags).unwrap_or(serde_json::json!([])),
180    );
181    binding.insert(
182        "version".into(),
183        serde_json::Value::from(module.version.clone()),
184    );
185    binding.insert(
186        "annotations".into(),
187        annotations_to_dict(module.annotations.as_ref()),
188    );
189    binding.insert(
190        "examples".into(),
191        serde_json::to_value(&module.examples).unwrap_or(serde_json::json!([])),
192    );
193    binding.insert(
194        "metadata".into(),
195        serde_json::to_value(&module.metadata).unwrap_or(serde_json::json!({})),
196    );
197    if let Some(alias) = &module.suggested_alias {
198        binding.insert(
199            "suggested_alias".into(),
200            serde_json::Value::from(alias.clone()),
201        );
202    }
203    binding.insert("input_schema".into(), module.input_schema.clone());
204    binding.insert("output_schema".into(), module.output_schema.clone());
205    if let Some(display) = &module.display {
206        binding.insert("display".into(), display.clone());
207    }
208
209    serde_json::json!({
210        "spec_version": "1.0",
211        "bindings": [serde_json::Value::Object(binding)]
212    })
213}
214
215/// Verify that a written YAML file is well-formed and contains required fields.
216fn verify_yaml(result: &WriteResult, file_path: &Path) -> WriteResult {
217    let vr = YAMLVerifier.verify(&file_path.display().to_string(), &result.module_id);
218    if vr.ok {
219        result.clone()
220    } else {
221        WriteResult::failed(
222            result.module_id.clone(),
223            result.path.clone(),
224            vr.error.unwrap_or_default(),
225        )
226    }
227}
228
229#[cfg(test)]
230mod tests {
231    use super::*;
232    use serde_json::json;
233    use tempfile::TempDir;
234
235    fn sample_module() -> ScannedModule {
236        ScannedModule::new(
237            "users.get_user".into(),
238            "Get a user".into(),
239            json!({"type": "object", "properties": {"user_id": {"type": "integer"}}}),
240            json!({"type": "object"}),
241            vec!["users".into()],
242            "myapp.views:get_user".into(),
243        )
244    }
245
246    #[test]
247    fn test_sanitize_filename_basic() {
248        assert_eq!(sanitize_filename("users.get_user"), "users.get_user");
249    }
250
251    #[test]
252    fn test_sanitize_filename_special_chars() {
253        assert_eq!(sanitize_filename("a/b\\c d"), "a_b_c_d");
254    }
255
256    #[test]
257    fn test_sanitize_filename_path_traversal() {
258        let result = sanitize_filename("../../etc/passwd");
259        assert!(!result.contains(".."));
260    }
261
262    #[test]
263    fn test_write_empty_modules() {
264        let writer = YAMLWriter;
265        let result = writer.write(&[], "/tmp/test", false, false, None).unwrap();
266        assert!(result.is_empty());
267    }
268
269    #[test]
270    fn test_write_dry_run() {
271        let writer = YAMLWriter;
272        let modules = vec![sample_module()];
273        let result = writer
274            .write(&modules, "/tmp/nonexistent", true, false, None)
275            .unwrap();
276        assert_eq!(result.len(), 1);
277        assert_eq!(result[0].module_id, "users.get_user");
278        assert!(result[0].path.is_none());
279    }
280
281    #[test]
282    fn test_write_creates_file() {
283        let dir = TempDir::new().unwrap();
284        let writer = YAMLWriter;
285        let modules = vec![sample_module()];
286        let result = writer
287            .write(&modules, dir.path().to_str().unwrap(), false, false, None)
288            .unwrap();
289        assert_eq!(result.len(), 1);
290        assert!(result[0].path.is_some());
291
292        let file_path = result[0].path.as_ref().unwrap();
293        assert!(Path::new(file_path).exists());
294        let content = fs::read_to_string(file_path).unwrap();
295        assert!(content.contains("Auto-generated"));
296        assert!(content.contains("users.get_user"));
297    }
298
299    #[test]
300    fn test_write_with_verify() {
301        let dir = TempDir::new().unwrap();
302        let writer = YAMLWriter;
303        let modules = vec![sample_module()];
304        let result = writer
305            .write(&modules, dir.path().to_str().unwrap(), false, true, None)
306            .unwrap();
307        assert_eq!(result.len(), 1);
308        assert!(result[0].verified);
309    }
310
311    #[test]
312    fn test_write_multiple_modules() {
313        let dir = TempDir::new().unwrap();
314        let writer = YAMLWriter;
315        let modules = vec![
316            ScannedModule::new(
317                "mod_a".into(),
318                "Module A".into(),
319                json!({"type": "object"}),
320                json!({"type": "object"}),
321                vec![],
322                "app:a".into(),
323            ),
324            ScannedModule::new(
325                "mod_b".into(),
326                "Module B".into(),
327                json!({"type": "object"}),
328                json!({"type": "object"}),
329                vec![],
330                "app:b".into(),
331            ),
332            ScannedModule::new(
333                "mod_c".into(),
334                "Module C".into(),
335                json!({"type": "object"}),
336                json!({"type": "object"}),
337                vec![],
338                "app:c".into(),
339            ),
340        ];
341        let results = writer
342            .write(&modules, dir.path().to_str().unwrap(), false, false, None)
343            .unwrap();
344        assert_eq!(results.len(), 3);
345        // Each result should have a file path and the file should exist
346        for result in &results {
347            let path = result.path.as_ref().expect("path should be set");
348            assert!(Path::new(path).exists(), "file should exist: {path}");
349        }
350    }
351
352    #[test]
353    fn test_binding_contains_all_fields() {
354        let dir = TempDir::new().unwrap();
355        let writer = YAMLWriter;
356        let mut module = sample_module();
357        module.documentation = Some("Full docs here".into());
358        module.version = "2.0.0".into();
359        let modules = vec![module];
360        let results = writer
361            .write(&modules, dir.path().to_str().unwrap(), false, false, None)
362            .unwrap();
363        let file_path = results[0].path.as_ref().unwrap();
364        let content = fs::read_to_string(file_path).unwrap();
365        // Verify all expected fields are present in the YAML content
366        for field in &[
367            "spec_version",
368            "module_id",
369            "target",
370            "description",
371            "documentation",
372            "tags",
373            "version",
374            "annotations",
375            "examples",
376            "metadata",
377            "input_schema",
378            "output_schema",
379        ] {
380            assert!(
381                content.contains(field),
382                "YAML should contain field '{field}'"
383            );
384        }
385        assert!(content.contains("users.get_user"));
386        assert!(content.contains("Full docs here"));
387        assert!(content.contains("2.0.0"));
388    }
389
390    #[test]
391    fn test_creates_nested_output_dir() {
392        let dir = TempDir::new().unwrap();
393        let nested = dir.path().join("a").join("b").join("c");
394        let writer = YAMLWriter;
395        let modules = vec![sample_module()];
396        // The nested directory does not exist yet
397        assert!(!nested.exists());
398        let results = writer
399            .write(&modules, nested.to_str().unwrap(), false, false, None)
400            .unwrap();
401        assert_eq!(results.len(), 1);
402        assert!(nested.exists(), "nested directory should have been created");
403        let file_path = results[0].path.as_ref().unwrap();
404        assert!(Path::new(file_path).exists());
405    }
406
407    #[test]
408    fn test_filename_sanitization_dots() {
409        let result = sanitize_filename("foo..bar");
410        assert!(
411            !result.contains(".."),
412            "consecutive dots should be collapsed: got '{result}'"
413        );
414        let result2 = sanitize_filename("a...b....c");
415        assert!(
416            !result2.contains(".."),
417            "consecutive dots should be collapsed: got '{result2}'"
418        );
419    }
420
421    #[test]
422    fn test_display_omitted_when_none() {
423        let dir = TempDir::new().unwrap();
424        let writer = YAMLWriter;
425        let module = sample_module();
426        let modules = vec![module];
427        let results = writer
428            .write(&modules, dir.path().to_str().unwrap(), false, false, None)
429            .unwrap();
430        let file_path = results[0].path.as_ref().unwrap();
431        let content = fs::read_to_string(file_path).unwrap();
432        let parsed: serde_yaml_ng::Value = serde_yaml_ng::from_str(&content).unwrap();
433        let bindings = parsed["bindings"].as_sequence().unwrap();
434        assert!(
435            bindings[0].get("display").is_none(),
436            "display should be absent when module.display is None"
437        );
438    }
439
440    #[test]
441    fn test_display_emitted_when_set() {
442        let dir = TempDir::new().unwrap();
443        let writer = YAMLWriter;
444        let mut module = sample_module();
445        module.display = Some(json!({"mcp": {"alias": "users_get"}, "alias": "users.get"}));
446        let modules = vec![module];
447        let results = writer
448            .write(&modules, dir.path().to_str().unwrap(), false, false, None)
449            .unwrap();
450        let file_path = results[0].path.as_ref().unwrap();
451        let content = fs::read_to_string(file_path).unwrap();
452        let parsed: serde_yaml_ng::Value = serde_yaml_ng::from_str(&content).unwrap();
453        let bindings = parsed["bindings"].as_sequence().unwrap();
454        let display = bindings[0]
455            .get("display")
456            .expect("display should be present");
457        assert_eq!(
458            display["alias"],
459            serde_yaml_ng::Value::String("users.get".into())
460        );
461        assert_eq!(
462            display["mcp"]["alias"],
463            serde_yaml_ng::Value::String("users_get".into())
464        );
465    }
466
467    #[test]
468    fn test_none_annotations_in_binding() {
469        let dir = TempDir::new().unwrap();
470        let writer = YAMLWriter;
471        let mut module = sample_module();
472        module.annotations = None;
473        let modules = vec![module];
474        let results = writer
475            .write(&modules, dir.path().to_str().unwrap(), false, false, None)
476            .unwrap();
477        let file_path = results[0].path.as_ref().unwrap();
478        let content = fs::read_to_string(file_path).unwrap();
479        // The file should still be valid YAML and contain the annotations key
480        let parsed: serde_yaml_ng::Value = serde_yaml_ng::from_str(&content).unwrap();
481        let bindings = parsed["bindings"].as_sequence().unwrap();
482        assert_eq!(bindings.len(), 1);
483        // annotations should be present (as null)
484        assert!(bindings[0].get("annotations").is_some());
485    }
486
487    #[test]
488    fn test_overwrite_existing_file() {
489        let dir = TempDir::new().unwrap();
490        let writer = YAMLWriter;
491
492        // Write the first version
493        let module_v1 = ScannedModule::new(
494            "overwrite_test".into(),
495            "Version 1".into(),
496            json!({"type": "object"}),
497            json!({"type": "object"}),
498            vec![],
499            "app:v1".into(),
500        );
501        let results_v1 = writer
502            .write(
503                &[module_v1],
504                dir.path().to_str().unwrap(),
505                false,
506                false,
507                None,
508            )
509            .unwrap();
510        let file_path = results_v1[0].path.as_ref().unwrap();
511        let content_v1 = fs::read_to_string(file_path).unwrap();
512        assert!(content_v1.contains("Version 1"));
513
514        // Write the second version with the same module_id
515        let module_v2 = ScannedModule::new(
516            "overwrite_test".into(),
517            "Version 2".into(),
518            json!({"type": "object"}),
519            json!({"type": "object"}),
520            vec![],
521            "app:v2".into(),
522        );
523        let results_v2 = writer
524            .write(
525                &[module_v2],
526                dir.path().to_str().unwrap(),
527                false,
528                false,
529                None,
530            )
531            .unwrap();
532        let file_path_v2 = results_v2[0].path.as_ref().unwrap();
533        let content_v2 = fs::read_to_string(file_path_v2).unwrap();
534        assert!(content_v2.contains("Version 2"));
535        assert!(!content_v2.contains("Version 1"));
536    }
537
538    #[test]
539    fn test_suggested_alias_round_trip() {
540        let dir = TempDir::new().unwrap();
541        let writer = YAMLWriter;
542        let mut module = sample_module();
543        module.suggested_alias = Some("users.get".into());
544        let results = writer
545            .write(&[module], dir.path().to_str().unwrap(), false, false, None)
546            .unwrap();
547        let file_path = results[0].path.as_ref().unwrap();
548        let content = fs::read_to_string(file_path).unwrap();
549        let parsed: serde_yaml_ng::Value = serde_yaml_ng::from_str(&content).unwrap();
550        let bindings = parsed["bindings"].as_sequence().unwrap();
551        assert_eq!(
552            bindings[0]["suggested_alias"]
553                .as_str()
554                .expect("suggested_alias should be a string"),
555            "users.get"
556        );
557    }
558
559    #[test]
560    fn test_suggested_alias_absent_when_none() {
561        let dir = TempDir::new().unwrap();
562        let writer = YAMLWriter;
563        let module = sample_module();
564        let results = writer
565            .write(&[module], dir.path().to_str().unwrap(), false, false, None)
566            .unwrap();
567        let file_path = results[0].path.as_ref().unwrap();
568        let content = fs::read_to_string(file_path).unwrap();
569        let parsed: serde_yaml_ng::Value = serde_yaml_ng::from_str(&content).unwrap();
570        let bindings = parsed["bindings"].as_sequence().unwrap();
571        assert!(
572            bindings[0].get("suggested_alias").is_none(),
573            "suggested_alias should be absent when module.suggested_alias is None"
574        );
575    }
576
577    #[test]
578    fn test_custom_verifier_failure_produces_failed_result() {
579        use crate::output::types::{Verifier, VerifyResult};
580
581        struct AlwaysFail;
582        impl Verifier for AlwaysFail {
583            fn verify(&self, _path: &str, _module_id: &str) -> VerifyResult {
584                VerifyResult::fail("intentional failure".into())
585            }
586        }
587
588        let dir = TempDir::new().unwrap();
589        let writer = YAMLWriter;
590        let module = sample_module();
591        let verifier = AlwaysFail;
592        let verifiers: &[&dyn Verifier] = &[&verifier];
593        let results = writer
594            .write(
595                &[module],
596                dir.path().to_str().unwrap(),
597                false,
598                true,
599                Some(verifiers),
600            )
601            .unwrap();
602        assert!(!results[0].verified, "result should be marked not verified");
603        assert!(results[0]
604            .verification_error
605            .as_deref()
606            .unwrap_or("")
607            .contains("intentional failure"));
608    }
609}