use std::borrow::Cow;
use std::collections::HashSet;
use saphyr_parser::{Event, Parser, ScanError};
#[derive(Clone, Debug)]
pub struct Budget {
pub max_events: usize,
pub max_aliases: usize,
pub max_anchors: usize,
pub max_depth: usize,
pub max_documents: usize,
pub max_nodes: usize,
pub max_total_scalar_bytes: usize,
pub enforce_alias_anchor_ratio: bool,
pub alias_anchor_min_aliases: usize,
pub alias_anchor_ratio_multiplier: usize,
}
impl Default for Budget {
fn default() -> Self {
Self {
max_events: 1_000_000, max_aliases: 50_000, max_anchors: 50_000,
max_depth: 2_000, max_documents: 1_024, max_nodes: 250_000, max_total_scalar_bytes: 64 * 1024 * 1024, enforce_alias_anchor_ratio: true,
alias_anchor_min_aliases: 100,
alias_anchor_ratio_multiplier: 10,
}
}
}
#[derive(Clone, Debug)]
pub enum BudgetBreach {
Events {
events: usize,
},
Aliases {
aliases: usize,
},
Anchors {
anchors: usize,
},
Depth {
depth: usize,
},
Documents {
documents: usize,
},
Nodes {
nodes: usize,
},
ScalarBytes {
total_scalar_bytes: usize,
},
AliasAnchorRatio {
aliases: usize,
anchors: usize,
},
SequenceUnbalanced,
}
#[derive(Clone, Debug, Default)]
pub struct BudgetReport {
pub breached: Option<BudgetBreach>,
pub events: usize,
pub aliases: usize,
pub anchors: usize,
pub documents: usize,
pub nodes: usize,
pub max_depth: usize,
pub total_scalar_bytes: usize,
}
pub fn check_yaml_budget(input: &str, budget: &Budget) -> Result<BudgetReport, ScanError> {
let mut parser = Parser::new_from_str(input);
let mut report = BudgetReport::default();
let mut depth: usize = 0;
let mut defined_anchors: HashSet<usize> = HashSet::with_capacity(256);
macro_rules! breach {
($kind:expr) => {{
report.breached = Some($kind);
return Ok(report);
}};
}
while let Some(item) = parser.next() {
let (ev, _span) = item?;
report.events += 1;
if report.events > budget.max_events {
breach!(BudgetBreach::Events {
events: report.events
});
}
match ev {
Event::StreamStart => {}
Event::StreamEnd => {}
Event::DocumentStart(_explicit) => {
report.documents += 1;
if report.documents > budget.max_documents {
breach!(BudgetBreach::Documents {
documents: report.documents
});
}
}
Event::DocumentEnd => {}
Event::Alias(anchor_id) => {
report.aliases += 1;
if report.aliases > budget.max_aliases {
breach!(BudgetBreach::Aliases {
aliases: report.aliases
});
}
let _ = anchor_id; }
Event::Scalar(value, _style, anchor_id, _tag_opt) => {
report.nodes += 1;
if report.nodes > budget.max_nodes {
breach!(BudgetBreach::Nodes {
nodes: report.nodes
});
}
let len = match value {
Cow::Borrowed(s) => s.len(),
Cow::Owned(s) => s.len(),
};
report.total_scalar_bytes = report.total_scalar_bytes.saturating_add(len);
if report.total_scalar_bytes > budget.max_total_scalar_bytes {
breach!(BudgetBreach::ScalarBytes {
total_scalar_bytes: report.total_scalar_bytes
});
}
if anchor_id != 0 {
if defined_anchors.insert(anchor_id) {
if defined_anchors.len() > budget.max_anchors {
breach!(BudgetBreach::Anchors {
anchors: defined_anchors.len()
});
}
}
}
}
Event::SequenceStart(anchor_id, _tag_opt) => {
report.nodes += 1;
if report.nodes > budget.max_nodes {
breach!(BudgetBreach::Nodes {
nodes: report.nodes
});
}
depth += 1;
if depth > report.max_depth {
report.max_depth = depth;
}
if report.max_depth > budget.max_depth {
breach!(BudgetBreach::Depth {
depth: report.max_depth
});
}
if anchor_id != 0 {
if defined_anchors.insert(anchor_id) {
if defined_anchors.len() > budget.max_anchors {
breach!(BudgetBreach::Anchors {
anchors: defined_anchors.len()
});
}
}
}
}
Event::SequenceEnd => {
if let Some(new_depth) = depth.checked_sub(1) {
depth = new_depth;
} else {
breach!(BudgetBreach::SequenceUnbalanced);
}
}
Event::MappingStart(anchor_id, _tag_opt) => {
report.nodes += 1;
if report.nodes > budget.max_nodes {
breach!(BudgetBreach::Nodes {
nodes: report.nodes
});
}
depth += 1;
if depth > report.max_depth {
report.max_depth = depth;
}
if report.max_depth > budget.max_depth {
breach!(BudgetBreach::Depth {
depth: report.max_depth
});
}
if anchor_id != 0 {
if defined_anchors.insert(anchor_id) {
if defined_anchors.len() > budget.max_anchors {
breach!(BudgetBreach::Anchors {
anchors: defined_anchors.len()
});
}
}
}
}
Event::MappingEnd => {
if let Some(new_depth) = depth.checked_sub(1) {
depth = new_depth;
} else {
breach!(BudgetBreach::SequenceUnbalanced);
}
}
Event::Nothing => {}
}
}
report.anchors = defined_anchors.len();
if budget.enforce_alias_anchor_ratio && report.aliases >= budget.alias_anchor_min_aliases {
if report.anchors == 0
|| report.aliases > budget.alias_anchor_ratio_multiplier * report.anchors
{
breach!(BudgetBreach::AliasAnchorRatio {
aliases: report.aliases,
anchors: report.anchors,
});
}
}
Ok(report)
}
pub fn exceeds_yaml_budget(input: &str, budget: &Budget) -> Result<bool, ScanError> {
let report = check_yaml_budget(input, budget)?;
Ok(report.breached.is_some())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn tiny_yaml_ok() {
let b = Budget::default();
let y = "a: [1, 2, 3]\n";
let r = check_yaml_budget(y, &b).unwrap();
assert!(r.breached.is_none());
assert_eq!(r.documents, 1);
assert_eq!(r.nodes > 0, true);
}
#[test]
fn alias_bomb_trips_alias_limit() {
let y = r#"root: &A [1, 2]
a: *A
b: *A
c: *A
d: *A
e: *A
"#;
let mut b = Budget::default();
b.max_aliases = 3;
let rep = check_yaml_budget(y, &b).unwrap();
assert!(matches!(rep.breached, Some(BudgetBreach::Aliases { .. })));
}
#[test]
fn deep_nesting_trips_depth() {
let mut y = String::new();
for _ in 0..200 {
y.push('[');
}
for _ in 0..200 {
y.push(']');
}
let mut b = Budget::default();
b.max_depth = 150;
let rep = check_yaml_budget(&y, &b).unwrap();
assert!(matches!(rep.breached, Some(BudgetBreach::Depth { .. })));
}
#[test]
fn anchors_limit_trips() {
let y = "a: &A 1\nb: &B 2\nc: &C 3\n";
let mut b = Budget::default();
b.max_anchors = 2;
let rep = check_yaml_budget(y, &b).unwrap();
assert!(matches!(
rep.breached,
Some(BudgetBreach::Anchors { anchors: 3 })
));
}
}