use std::path::Path;
use serde_json::Value;
use crate::error::{ScanError, ScanErrorKind, ValidationError};
use crate::format::json::walk_json_value;
fn split_yaml_documents(content: &str) -> Vec<String> {
let mut documents = Vec::new();
let mut current_doc: Vec<&str> = Vec::new();
for line in content.lines() {
if line.trim() == "---" {
let doc = current_doc.join("\n");
if !doc.trim().is_empty() {
documents.push(doc);
}
current_doc.clear();
continue;
}
current_doc.push(line);
}
let doc = current_doc.join("\n");
if !doc.trim().is_empty() {
documents.push(doc);
}
documents
}
pub fn scan_yaml_content(
content: &str,
path: &Path,
vendor: Option<&str>,
scan_keys: bool,
) -> (Vec<ValidationError>, Vec<ScanError>) {
let mut validation_errors = Vec::new();
let mut scan_errors = Vec::new();
let documents: Vec<Value> = match serde_saphyr::from_multiple(content) {
Ok(docs) => docs,
Err(stream_err) => {
let segments = split_yaml_documents(content);
let mut any_parsed = false;
for (idx, segment) in segments.iter().enumerate() {
match serde_saphyr::from_str::<Value>(segment) {
Ok(doc) => {
any_parsed = true;
walk_json_value(&doc, path, vendor, &mut validation_errors, "$", scan_keys);
}
Err(doc_err) => {
scan_errors.push(ScanError {
file: path.to_owned(),
kind: ScanErrorKind::YamlParseError,
message: format!(
"YAML parse error in document {} of multi-document stream: {doc_err}",
idx + 1
),
});
}
}
}
if !any_parsed {
scan_errors.clear();
scan_errors.push(ScanError {
file: path.to_owned(),
kind: ScanErrorKind::YamlParseError,
message: format!("YAML parse error: {stream_err}"),
});
}
return (validation_errors, scan_errors);
}
};
for value in documents {
walk_json_value(&value, path, vendor, &mut validation_errors, "$", scan_keys);
}
(validation_errors, scan_errors)
}
#[cfg(test)]
pub fn scan_yaml_file(
path: &Path,
vendor: Option<&str>,
max_file_size: u64,
scan_keys: bool,
) -> Result<Vec<ValidationError>, ScanError> {
use crate::strategy::fs::{ScanResult, read_file_bounded};
let content = match read_file_bounded(path, max_file_size) {
ScanResult::Ok(c) => c,
ScanResult::Err(e) => return Err(e),
};
let (val_errs, scan_errs) = scan_yaml_content(&content, path, vendor, scan_keys);
if let Some(first_scan_err) = scan_errs.into_iter().next() {
return Err(first_scan_err);
}
Ok(val_errs)
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Write;
use tempfile::NamedTempFile;
fn create_temp_yaml(content: &str) -> NamedTempFile {
let mut file = NamedTempFile::new().unwrap();
file.write_all(content.as_bytes()).unwrap();
file
}
#[test]
fn test_scan_yaml_valid_id() {
let content = r"
$id: gts://gts.x.core.events.type.v1~
";
let file = create_temp_yaml(content);
let errors = scan_yaml_file(file.path(), None, 10_485_760, false).unwrap();
assert!(errors.is_empty(), "Unexpected errors: {errors:?}");
}
#[test]
fn test_scan_yaml_invalid_id() {
let content = r"
$id: gts.invalid
";
let file = create_temp_yaml(content);
let errors = scan_yaml_file(file.path(), None, 10_485_760, false).unwrap();
assert!(!errors.is_empty());
}
#[test]
fn test_scan_yaml_xgts_ref_wildcard() {
let content = r"
x-gts-ref: gts.x.core.*
";
let file = create_temp_yaml(content);
let errors = scan_yaml_file(file.path(), None, 10_485_760, false).unwrap();
assert!(
errors.is_empty(),
"Wildcards in x-gts-ref should be allowed"
);
}
#[test]
fn test_scan_yaml_xgts_ref_bare_wildcard() {
let content = r#"
x-gts-ref: "*"
"#;
let file = create_temp_yaml(content);
let errors = scan_yaml_file(file.path(), None, 10_485_760, false).unwrap();
assert!(
errors.is_empty(),
"Bare wildcard in x-gts-ref should be skipped"
);
}
#[test]
fn test_scan_yaml_nested_values() {
let content = r"
properties:
type:
x-gts-ref: gts.x.core.events.type.v1~
";
let file = create_temp_yaml(content);
let errors = scan_yaml_file(file.path(), None, 10_485_760, false).unwrap();
assert!(
errors.is_empty(),
"Nested values should be found and validated"
);
}
#[test]
fn test_scan_yaml_array_values() {
let content = r"
capabilities:
- gts.x.core.events.type.v1~
- gts.x.core.events.topic.v1~
";
let file = create_temp_yaml(content);
let errors = scan_yaml_file(file.path(), None, 10_485_760, false).unwrap();
assert!(
errors.is_empty(),
"Array values should be found and validated"
);
}
#[test]
fn test_scan_yaml_invalid_yaml_is_scan_error() {
let content = ": : :\n - [unclosed\n";
let file = create_temp_yaml(content);
let result = scan_yaml_file(file.path(), None, 10_485_760, false);
assert!(
result.is_err(),
"Completely invalid YAML must produce a ScanError, not silent success"
);
let err = result.unwrap_err();
assert_eq!(err.kind, crate::error::ScanErrorKind::YamlParseError);
}
#[test]
fn test_scan_yaml_multi_document_all_validated() {
let content = "\
$id: gts.x.core.events.type.v1~
---
$id: gts.invalid
";
let (val_errs, scan_errs) =
scan_yaml_content(content, Path::new("multi.yaml"), None, false);
assert!(
scan_errs.is_empty(),
"No scan errors expected for well-formed stream: {scan_errs:?}"
);
assert!(
!val_errs.is_empty(),
"Multi-document YAML: second document with invalid ID should be caught, got no errors"
);
}
#[test]
fn test_scan_yaml_multi_document_malformed_doc_does_not_suppress_valid_doc() {
let content = "\
$id: gts.y.core.pkg.mytype.v1~
---
invalid: yaml: syntax:
---
$id: gts.y.core.pkg.mytype.v1~
";
let (val_errs, scan_errs) =
scan_yaml_content(content, Path::new("multi.yaml"), Some("x"), false);
assert!(
!val_errs.is_empty(),
"Valid documents must be validated even when a sibling document is malformed, got no errors"
);
assert!(
!scan_errs.is_empty(),
"Malformed document must produce a ScanError, got none"
);
assert_eq!(
scan_errs[0].kind,
crate::error::ScanErrorKind::YamlParseError,
"Malformed doc scan error must have YamlParseError kind"
);
}
}