use crate::error::{ParseError, ParseErrorKind};
use crate::types::Document;
pub fn parse(input: &str) -> Result<Document, ParseError> {
if input.trim().is_empty() {
return Err(ParseError {
kind: ParseErrorKind::Syntax,
message: "empty input".to_string(),
path: None,
line: None,
column: None,
});
}
const MAX_INPUT_SIZE: usize = 10 * 1024 * 1024; if input.len() > MAX_INPUT_SIZE {
return Err(ParseError {
kind: ParseErrorKind::Syntax,
message: format!(
"input size {} bytes exceeds maximum of {} bytes",
input.len(),
MAX_INPUT_SIZE
),
path: None,
line: None,
column: None,
});
}
check_yaml_anchors_aliases(input)?;
check_multi_document(input)?;
let value: serde_json::Value = serde_saphyr::from_str(input).map_err(|e| {
let msg = e.to_string();
ParseError {
kind: classify_saphyr_error(&msg),
message: msg,
path: None,
line: None,
column: None,
}
})?;
if !value.is_object() {
return Err(ParseError {
kind: ParseErrorKind::TypeMismatch,
message: "document root must be a YAML mapping".to_string(),
path: None,
line: None,
column: None,
});
}
let oatf_is_first_key = if let Some(obj) = value.as_object() {
for key in obj.keys() {
match key.as_str() {
"oatf" | "$schema" | "attack" => {}
other => {
return Err(ParseError {
kind: ParseErrorKind::TypeMismatch,
message: format!("unknown top-level field: {}", other),
path: Some(other.to_string()),
line: None,
column: None,
});
}
}
}
obj.keys().next().map(|k| k == "oatf").unwrap_or(false)
} else {
false
};
let mut doc: Document = serde_json::from_value(value).map_err(|e| {
let msg = e.to_string();
ParseError {
kind: classify_json_error(&msg),
message: msg,
path: None,
line: None,
column: None,
}
})?;
doc.oatf_is_first_key = oatf_is_first_key;
validate_extension_keys(&doc)?;
Ok(doc)
}
fn validate_extension_keys(doc: &Document) -> Result<(), ParseError> {
check_extensions(&doc.attack.extensions, "attack")?;
check_extensions(&doc.attack.execution.extensions, "attack.execution")?;
if let Some(actors) = &doc.attack.execution.actors {
for (i, actor) in actors.iter().enumerate() {
check_extensions(
&actor.extensions,
&format!("attack.execution.actors[{}]", i),
)?;
for (j, phase) in actor.phases.iter().enumerate() {
check_extensions(
&phase.extensions,
&format!("attack.execution.actors[{}].phases[{}]", i, j),
)?;
}
}
}
if let Some(phases) = &doc.attack.execution.phases {
for (j, phase) in phases.iter().enumerate() {
check_extensions(
&phase.extensions,
&format!("attack.execution.phases[{}]", j),
)?;
}
}
if let Some(indicators) = &doc.attack.indicators {
for (i, ind) in indicators.iter().enumerate() {
check_extensions(&ind.extensions, &format!("attack.indicators[{}]", i))?;
}
}
Ok(())
}
fn check_extensions(
extensions: &indexmap::IndexMap<String, serde_json::Value>,
path: &str,
) -> Result<(), ParseError> {
for key in extensions.keys() {
if !key.starts_with("x-") {
return Err(ParseError {
kind: ParseErrorKind::TypeMismatch,
message: format!(
"unknown field '{}' at {} (non-extension fields must not use reserved names; extension fields must start with 'x-')",
key, path
),
path: Some(format!("{}.{}", path, key)),
line: None,
column: None,
});
}
}
Ok(())
}
fn check_yaml_anchors_aliases(input: &str) -> Result<(), ParseError> {
let lines: Vec<&str> = input.lines().collect();
let mut i = 0;
while i < lines.len() {
let line = lines[i];
let trimmed = line.trim();
if trimmed.is_empty() || trimmed.starts_with('#') {
i += 1;
continue;
}
if line_introduces_block_scalar(trimmed) {
i = skip_block_scalar(&lines, i);
continue;
}
let no_comment = strip_trailing_comment(trimmed);
let in_content = strip_yaml_string_literals(no_comment);
if in_content.contains("<<:") || in_content.contains("<< :") {
return Err(ParseError {
kind: ParseErrorKind::Syntax,
message: "YAML merge keys (<<) are not allowed in OATF documents".to_string(),
path: None,
line: Some(i + 1),
column: None,
});
}
let scannable = mask_plain_scalar_values(&in_content);
if let Some(pos) = find_yaml_anchor(&scannable) {
return Err(ParseError {
kind: ParseErrorKind::Syntax,
message: "YAML anchors (&) are not allowed in OATF documents".to_string(),
path: None,
line: Some(i + 1),
column: Some(pos + 1),
});
}
if let Some(pos) = find_yaml_alias(&scannable) {
return Err(ParseError {
kind: ParseErrorKind::Syntax,
message: "YAML aliases (*) are not allowed in OATF documents".to_string(),
path: None,
line: Some(i + 1),
column: Some(pos + 1),
});
}
if let Some(pos) = find_yaml_tag(&in_content) {
return Err(ParseError {
kind: ParseErrorKind::Syntax,
message: "custom YAML tags are not allowed in OATF documents".to_string(),
path: None,
line: Some(i + 1),
column: Some(pos + 1),
});
}
i += 1;
}
Ok(())
}
fn mask_plain_scalar_values(line: &str) -> String {
let mut result = line.to_string();
if let Some(colon_pos) = find_colon_in_yaml(line) {
let after_colon = &line[colon_pos + 1..];
let value_start = after_colon.find(|c: char| c != ' ' && c != '\t');
if let Some(offset) = value_start {
let abs_pos = colon_pos + 1 + offset;
let first_char = line.as_bytes()[abs_pos];
if first_char != b'&' && first_char != b'*' && first_char != b'[' && first_char != b'{'
{
let mask = " ".repeat(line.len() - abs_pos);
result = format!("{}{}", &line[..abs_pos], mask);
}
}
} else if line.trim_start().starts_with("- ") {
let trimmed = line.trim_start();
let prefix_len = line.len() - trimmed.len();
let after_dash = &trimmed[2..]; let value_start = after_dash.find(|c: char| c != ' ' && c != '\t');
if let Some(offset) = value_start {
let abs_pos = prefix_len + 2 + offset;
let first_char = line.as_bytes()[abs_pos];
if first_char != b'&' && first_char != b'*' && first_char != b'[' && first_char != b'{'
{
let mask = " ".repeat(line.len() - abs_pos);
result = format!("{}{}", &line[..abs_pos], mask);
}
}
}
result
}
fn line_introduces_block_scalar(trimmed: &str) -> bool {
let value_part = if let Some(colon_pos) = find_colon_in_yaml(trimmed) {
trimmed[colon_pos + 1..].trim()
} else if let Some(rest) = trimmed.strip_prefix("- ") {
rest.trim()
} else {
return false;
};
let value_no_comment = strip_trailing_comment(value_part);
let v = value_no_comment.trim();
matches!(v, "|" | ">" | "|-" | "|+" | ">-" | ">+")
}
fn skip_double_quoted(bytes: &[u8], start: usize) -> usize {
let mut i = start + 1;
while i < bytes.len() {
if bytes[i] == b'\\' {
i += 2;
continue;
}
if bytes[i] == b'"' {
return i + 1;
}
i += 1;
}
i
}
fn skip_single_quoted(bytes: &[u8], start: usize) -> usize {
let mut i = start + 1;
while i < bytes.len() {
if bytes[i] == b'\'' {
i += 1;
if i < bytes.len() && bytes[i] == b'\'' {
i += 1;
} else {
break;
}
} else {
i += 1;
}
}
i
}
fn find_colon_in_yaml(line: &str) -> Option<usize> {
let bytes = line.as_bytes();
let mut i = 0;
while i < bytes.len() {
match bytes[i] {
b'"' => i = skip_double_quoted(bytes, i),
b'\'' => i = skip_single_quoted(bytes, i),
b':' if i + 1 >= bytes.len() || bytes[i + 1] == b' ' || bytes[i + 1] == b'\t' => {
return Some(i);
}
_ => i += 1,
}
}
None
}
fn strip_trailing_comment(value: &str) -> &str {
let bytes = value.as_bytes();
let mut i = 0;
while i < bytes.len() {
match bytes[i] {
b'"' => i = skip_double_quoted(bytes, i),
b'\'' => i = skip_single_quoted(bytes, i),
b' ' if i + 1 < bytes.len() && bytes[i + 1] == b'#' => {
return &value[..i];
}
b'#' if i == 0 => {
return "";
}
_ => i += 1,
}
}
value
}
fn skip_block_scalar(lines: &[&str], start_idx: usize) -> usize {
let mut i = start_idx + 1;
let content_indent = loop {
if i >= lines.len() {
return i;
}
let line = lines[i];
if line.trim().is_empty() {
i += 1;
continue;
}
let indent = line.len() - line.trim_start().len();
break indent;
};
let header_indent = lines[start_idx].len() - lines[start_idx].trim_start().len();
if content_indent <= header_indent {
return start_idx + 1;
}
while i < lines.len() {
let line = lines[i];
if line.trim().is_empty() {
i += 1;
continue;
}
let indent = line.len() - line.trim_start().len();
if indent >= content_indent {
i += 1;
} else {
break;
}
}
i
}
fn find_yaml_anchor(line: &str) -> Option<usize> {
let bytes = line.as_bytes();
let mut i = 0;
while i < bytes.len() {
if bytes[i] == b'&' {
if i + 1 < bytes.len()
&& is_yaml_anchor_char(bytes[i + 1])
&& (i == 0 || bytes[i - 1] == b' ' || bytes[i - 1] == b':' || bytes[i - 1] == b'-')
{
return Some(i);
}
}
i += 1;
}
None
}
fn find_yaml_alias(line: &str) -> Option<usize> {
let bytes = line.as_bytes();
let mut i = 0;
while i < bytes.len() {
if bytes[i] == b'*' {
if i + 1 < bytes.len()
&& is_yaml_anchor_char(bytes[i + 1])
&& (i == 0 || bytes[i - 1] == b' ' || bytes[i - 1] == b':' || bytes[i - 1] == b'-')
{
return Some(i);
}
}
i += 1;
}
None
}
fn find_yaml_tag(line: &str) -> Option<usize> {
let bytes = line.as_bytes();
for i in 0..bytes.len() {
if bytes[i] == b'!' {
if i == 0 || bytes[i - 1] == b' ' || bytes[i - 1] == b':' || bytes[i - 1] == b'-' {
if i + 1 < bytes.len() && bytes[i + 1] != b' ' {
return Some(i);
}
}
}
}
None
}
fn is_yaml_anchor_char(b: u8) -> bool {
b.is_ascii_alphanumeric() || b == b'_' || b == b'-'
}
fn strip_yaml_string_literals(line: &str) -> String {
let mut result = String::new();
let mut chars = line.chars().peekable();
while let Some(c) = chars.next() {
match c {
'"' => {
result.push(' ');
loop {
match chars.next() {
Some('\\') => {
chars.next(); }
Some('"') | None => break,
_ => {}
}
}
}
'\'' => {
result.push(' ');
loop {
match chars.next() {
Some('\'') => {
if chars.peek() == Some(&'\'') {
chars.next(); } else {
break;
}
}
None => break,
_ => {}
}
}
}
_ => result.push(c),
}
}
result
}
fn check_multi_document(input: &str) -> Result<(), ParseError> {
let mut doc_count = 0;
for line in input.lines() {
if line.starts_with("---") && line[3..].trim().is_empty() {
doc_count += 1;
if doc_count > 1 {
return Err(ParseError {
kind: ParseErrorKind::Syntax,
message: "multi-document YAML is not supported".to_string(),
path: None,
line: None,
column: None,
});
}
}
}
Ok(())
}
fn classify_saphyr_error(msg: &str) -> ParseErrorKind {
let lower = msg.to_lowercase();
if lower.contains("unknown") || lower.contains("variant") {
ParseErrorKind::UnknownVariant
} else if lower.contains("type") || lower.contains("invalid") || lower.contains("expected") {
ParseErrorKind::TypeMismatch
} else {
ParseErrorKind::Syntax
}
}
fn classify_json_error(msg: &str) -> ParseErrorKind {
let lower = msg.to_lowercase();
if lower.contains("unknown variant") || lower.contains("unknown field") {
ParseErrorKind::UnknownVariant
} else if lower.contains("missing field") || lower.contains("invalid type") {
ParseErrorKind::TypeMismatch
} else {
ParseErrorKind::Syntax
}
}