Skip to main content

apcore_toolkit/output/
yaml_writer.rs

1// YAML binding file generator.
2//
3// Writes ScannedModule instances as .binding.yaml files compatible with
4// apcore::BindingLoader.
5
6use std::fs;
7use std::path::Path;
8use std::sync::LazyLock;
9
10use chrono::Utc;
11use regex::Regex;
12use tracing::{debug, warn};
13
14use crate::output::errors::WriteError;
15use crate::output::types::{Verifier, WriteResult};
16use crate::output::verifiers::{run_verifier_chain, YAMLVerifier};
17use crate::serializers::annotations_to_value;
18use crate::types::ScannedModule;
19
20/// Generates `.binding.yaml` files from ScannedModule instances.
21pub struct YAMLWriter;
22
23impl YAMLWriter {
24    /// Write YAML binding files for each ScannedModule.
25    ///
26    /// - `output_dir`: Directory path to write files to.
27    /// - `dry_run`: If true, return results without writing to disk.
28    /// - `verify`: If true, verify written files are valid YAML with required fields.
29    /// - `verifiers`: Optional custom verifiers run after the built-in check.
30    pub fn write(
31        &self,
32        modules: &[ScannedModule],
33        output_dir: &str,
34        dry_run: bool,
35        verify: bool,
36        verifiers: Option<&[&dyn Verifier]>,
37    ) -> Result<Vec<WriteResult>, WriteError> {
38        if modules.is_empty() {
39            return Ok(vec![]);
40        }
41
42        if !dry_run {
43            fs::create_dir_all(output_dir)
44                .map_err(|e| WriteError::new(output_dir.into(), e.to_string()))?;
45        }
46
47        let output_path = Path::new(output_dir)
48            .canonicalize()
49            .unwrap_or_else(|_| Path::new(output_dir).to_path_buf());
50
51        let mut results: Vec<WriteResult> = Vec::new();
52        let timestamp = Utc::now().to_rfc3339();
53
54        for module in modules {
55            let binding_data = build_binding(module);
56
57            if dry_run {
58                results.push(WriteResult::new(module.module_id.clone()));
59                continue;
60            }
61
62            let safe_id = sanitize_filename(&module.module_id);
63            let filename = format!("{safe_id}.binding.yaml");
64            let file_path = output_path.join(&filename);
65
66            // Path traversal protection
67            if !file_path.starts_with(&output_path) {
68                warn!(
69                    "Skipping file outside output directory: {}",
70                    file_path.display()
71                );
72                continue;
73            }
74
75            if file_path.exists() {
76                warn!("Overwriting existing file: {}", file_path.display());
77            }
78
79            let header = format!(
80                "# Auto-generated by apcore-toolkit scanner\n\
81                 # Generated: {timestamp}\n\
82                 # Do not edit manually unless you intend to customize schemas.\n\n"
83            );
84            let yaml_content = serde_yaml::to_string(&binding_data)
85                .map_err(|e| WriteError::new(file_path.display().to_string(), e.to_string()))?;
86
87            fs::write(&file_path, format!("{header}{yaml_content}"))
88                .map_err(|e| WriteError::new(file_path.display().to_string(), e.to_string()))?;
89            debug!("Written: {}", file_path.display());
90
91            let mut result =
92                WriteResult::with_path(module.module_id.clone(), file_path.display().to_string());
93
94            if verify {
95                result = verify_yaml(&result, &file_path);
96            }
97            if result.verified {
98                if let Some(vs) = verifiers {
99                    let chain_result =
100                        run_verifier_chain(vs, &file_path.display().to_string(), &module.module_id);
101                    if !chain_result.ok {
102                        result = WriteResult::failed(
103                            result.module_id,
104                            result.path,
105                            chain_result.error.unwrap_or_default(),
106                        );
107                    }
108                }
109            }
110            results.push(result);
111        }
112
113        Ok(results)
114    }
115}
116
117/// Regex matching characters unsafe for filenames.
118static UNSAFE_CHARS_RE: LazyLock<Regex> =
119    LazyLock::new(|| Regex::new(r"[^a-zA-Z0-9._-]").expect("static regex"));
120
121/// Regex matching consecutive dots (path traversal prevention).
122static CONSECUTIVE_DOTS_RE: LazyLock<Regex> =
123    LazyLock::new(|| Regex::new(r"\.{2,}").expect("static regex"));
124
125/// Sanitize module_id for safe filename construction.
126fn sanitize_filename(module_id: &str) -> String {
127    let safe = UNSAFE_CHARS_RE.replace_all(module_id, "_");
128    // Collapse consecutive dots to prevent path traversal
129    CONSECUTIVE_DOTS_RE.replace_all(&safe, "_").to_string()
130}
131
132/// Build the YAML-serializable value for a ScannedModule.
133fn build_binding(module: &ScannedModule) -> serde_json::Value {
134    serde_json::json!({
135        "bindings": [{
136            "module_id": module.module_id,
137            "target": module.target,
138            "description": module.description,
139            "documentation": module.documentation,
140            "tags": module.tags,
141            "version": module.version,
142            "annotations": annotations_to_value(module.annotations.as_ref()),
143            "examples": serde_json::to_value(&module.examples).unwrap_or(serde_json::json!([])),
144            "metadata": module.metadata,
145            "input_schema": module.input_schema,
146            "output_schema": module.output_schema,
147        }]
148    })
149}
150
151/// Verify that a written YAML file is well-formed and contains required fields.
152fn verify_yaml(result: &WriteResult, file_path: &Path) -> WriteResult {
153    let vr = YAMLVerifier.verify(&file_path.display().to_string(), &result.module_id);
154    if vr.ok {
155        result.clone()
156    } else {
157        WriteResult::failed(
158            result.module_id.clone(),
159            result.path.clone(),
160            vr.error.unwrap_or_default(),
161        )
162    }
163}
164
165#[cfg(test)]
166mod tests {
167    use super::*;
168    use serde_json::json;
169    use tempfile::TempDir;
170
171    fn sample_module() -> ScannedModule {
172        ScannedModule::new(
173            "users.get_user".into(),
174            "Get a user".into(),
175            json!({"type": "object", "properties": {"user_id": {"type": "integer"}}}),
176            json!({"type": "object"}),
177            vec!["users".into()],
178            "myapp.views:get_user".into(),
179        )
180    }
181
182    #[test]
183    fn test_sanitize_filename_basic() {
184        assert_eq!(sanitize_filename("users.get_user"), "users.get_user");
185    }
186
187    #[test]
188    fn test_sanitize_filename_special_chars() {
189        assert_eq!(sanitize_filename("a/b\\c d"), "a_b_c_d");
190    }
191
192    #[test]
193    fn test_sanitize_filename_path_traversal() {
194        let result = sanitize_filename("../../etc/passwd");
195        assert!(!result.contains(".."));
196    }
197
198    #[test]
199    fn test_write_empty_modules() {
200        let writer = YAMLWriter;
201        let result = writer.write(&[], "/tmp/test", false, false, None).unwrap();
202        assert!(result.is_empty());
203    }
204
205    #[test]
206    fn test_write_dry_run() {
207        let writer = YAMLWriter;
208        let modules = vec![sample_module()];
209        let result = writer
210            .write(&modules, "/tmp/nonexistent", true, false, None)
211            .unwrap();
212        assert_eq!(result.len(), 1);
213        assert_eq!(result[0].module_id, "users.get_user");
214        assert!(result[0].path.is_none());
215    }
216
217    #[test]
218    fn test_write_creates_file() {
219        let dir = TempDir::new().unwrap();
220        let writer = YAMLWriter;
221        let modules = vec![sample_module()];
222        let result = writer
223            .write(&modules, dir.path().to_str().unwrap(), false, false, None)
224            .unwrap();
225        assert_eq!(result.len(), 1);
226        assert!(result[0].path.is_some());
227
228        let file_path = result[0].path.as_ref().unwrap();
229        assert!(Path::new(file_path).exists());
230        let content = fs::read_to_string(file_path).unwrap();
231        assert!(content.contains("Auto-generated"));
232        assert!(content.contains("users.get_user"));
233    }
234
235    #[test]
236    fn test_write_with_verify() {
237        let dir = TempDir::new().unwrap();
238        let writer = YAMLWriter;
239        let modules = vec![sample_module()];
240        let result = writer
241            .write(&modules, dir.path().to_str().unwrap(), false, true, None)
242            .unwrap();
243        assert_eq!(result.len(), 1);
244        assert!(result[0].verified);
245    }
246
247    #[test]
248    fn test_write_multiple_modules() {
249        let dir = TempDir::new().unwrap();
250        let writer = YAMLWriter;
251        let modules = vec![
252            ScannedModule::new(
253                "mod_a".into(),
254                "Module A".into(),
255                json!({"type": "object"}),
256                json!({"type": "object"}),
257                vec![],
258                "app:a".into(),
259            ),
260            ScannedModule::new(
261                "mod_b".into(),
262                "Module B".into(),
263                json!({"type": "object"}),
264                json!({"type": "object"}),
265                vec![],
266                "app:b".into(),
267            ),
268            ScannedModule::new(
269                "mod_c".into(),
270                "Module C".into(),
271                json!({"type": "object"}),
272                json!({"type": "object"}),
273                vec![],
274                "app:c".into(),
275            ),
276        ];
277        let results = writer
278            .write(&modules, dir.path().to_str().unwrap(), false, false, None)
279            .unwrap();
280        assert_eq!(results.len(), 3);
281        // Each result should have a file path and the file should exist
282        for result in &results {
283            let path = result.path.as_ref().expect("path should be set");
284            assert!(Path::new(path).exists(), "file should exist: {path}");
285        }
286    }
287
288    #[test]
289    fn test_binding_contains_all_fields() {
290        let dir = TempDir::new().unwrap();
291        let writer = YAMLWriter;
292        let mut module = sample_module();
293        module.documentation = Some("Full docs here".into());
294        module.version = "2.0.0".into();
295        let modules = vec![module];
296        let results = writer
297            .write(&modules, dir.path().to_str().unwrap(), false, false, None)
298            .unwrap();
299        let file_path = results[0].path.as_ref().unwrap();
300        let content = fs::read_to_string(file_path).unwrap();
301        // Verify all expected fields are present in the YAML content
302        for field in &[
303            "module_id",
304            "target",
305            "description",
306            "documentation",
307            "tags",
308            "version",
309            "annotations",
310            "examples",
311            "metadata",
312            "input_schema",
313            "output_schema",
314        ] {
315            assert!(
316                content.contains(field),
317                "YAML should contain field '{field}'"
318            );
319        }
320        assert!(content.contains("users.get_user"));
321        assert!(content.contains("Full docs here"));
322        assert!(content.contains("2.0.0"));
323    }
324
325    #[test]
326    fn test_creates_nested_output_dir() {
327        let dir = TempDir::new().unwrap();
328        let nested = dir.path().join("a").join("b").join("c");
329        let writer = YAMLWriter;
330        let modules = vec![sample_module()];
331        // The nested directory does not exist yet
332        assert!(!nested.exists());
333        let results = writer
334            .write(&modules, nested.to_str().unwrap(), false, false, None)
335            .unwrap();
336        assert_eq!(results.len(), 1);
337        assert!(nested.exists(), "nested directory should have been created");
338        let file_path = results[0].path.as_ref().unwrap();
339        assert!(Path::new(file_path).exists());
340    }
341
342    #[test]
343    fn test_filename_sanitization_dots() {
344        let result = sanitize_filename("foo..bar");
345        assert!(
346            !result.contains(".."),
347            "consecutive dots should be collapsed: got '{result}'"
348        );
349        let result2 = sanitize_filename("a...b....c");
350        assert!(
351            !result2.contains(".."),
352            "consecutive dots should be collapsed: got '{result2}'"
353        );
354    }
355
356    #[test]
357    fn test_none_annotations_in_binding() {
358        let dir = TempDir::new().unwrap();
359        let writer = YAMLWriter;
360        let mut module = sample_module();
361        module.annotations = None;
362        let modules = vec![module];
363        let results = writer
364            .write(&modules, dir.path().to_str().unwrap(), false, false, None)
365            .unwrap();
366        let file_path = results[0].path.as_ref().unwrap();
367        let content = fs::read_to_string(file_path).unwrap();
368        // The file should still be valid YAML and contain the annotations key
369        let parsed: serde_yaml::Value = serde_yaml::from_str(&content).unwrap();
370        let bindings = parsed["bindings"].as_sequence().unwrap();
371        assert_eq!(bindings.len(), 1);
372        // annotations should be present (as null)
373        assert!(bindings[0].get("annotations").is_some());
374    }
375
376    #[test]
377    fn test_overwrite_existing_file() {
378        let dir = TempDir::new().unwrap();
379        let writer = YAMLWriter;
380
381        // Write the first version
382        let module_v1 = ScannedModule::new(
383            "overwrite_test".into(),
384            "Version 1".into(),
385            json!({"type": "object"}),
386            json!({"type": "object"}),
387            vec![],
388            "app:v1".into(),
389        );
390        let results_v1 = writer
391            .write(
392                &[module_v1],
393                dir.path().to_str().unwrap(),
394                false,
395                false,
396                None,
397            )
398            .unwrap();
399        let file_path = results_v1[0].path.as_ref().unwrap();
400        let content_v1 = fs::read_to_string(file_path).unwrap();
401        assert!(content_v1.contains("Version 1"));
402
403        // Write the second version with the same module_id
404        let module_v2 = ScannedModule::new(
405            "overwrite_test".into(),
406            "Version 2".into(),
407            json!({"type": "object"}),
408            json!({"type": "object"}),
409            vec![],
410            "app:v2".into(),
411        );
412        let results_v2 = writer
413            .write(
414                &[module_v2],
415                dir.path().to_str().unwrap(),
416                false,
417                false,
418                None,
419            )
420            .unwrap();
421        let file_path_v2 = results_v2[0].path.as_ref().unwrap();
422        let content_v2 = fs::read_to_string(file_path_v2).unwrap();
423        assert!(content_v2.contains("Version 2"));
424        assert!(!content_v2.contains("Version 1"));
425    }
426}