use crate::mermaid_engine::error::ParseError;
pub fn validate(input: &str) -> Result<(), ParseError> {
let lines: Vec<&str> = input.lines().collect();
check_init_directive(&lines)?;
check_subgraph_balance(&lines)?;
check_leading_arrow(&lines)?;
check_click_quotes(&lines)?;
check_sequence_participants(&lines)?;
Ok(())
}
fn check_init_directive(lines: &[&str]) -> Result<(), ParseError> {
for (idx, raw) in lines.iter().enumerate() {
let line_no = u32_from_index(idx);
let trimmed = raw.trim_start();
let col = col_of_first_nonws(raw);
let Some(rest) = trimmed.strip_prefix("%%{") else {
continue;
};
let Some(inside) = rest.trim_end().strip_suffix("}%%") else {
return Err(ParseError::InvalidDirective {
line: line_no,
col,
directive: "unknown".to_string(),
reason: "missing closing '}%%' fence".to_string(),
});
};
let Some(colon) = inside.find(':') else {
return Err(ParseError::InvalidDirective {
line: line_no,
col,
directive: "unknown".to_string(),
reason: "missing ':' between directive name and body".to_string(),
});
};
let name = inside[..colon].trim();
let body = inside[colon + 1..].trim();
if name != "init" {
continue;
}
if body.is_empty() {
return Err(ParseError::InvalidDirective {
line: line_no,
col,
directive: name.to_string(),
reason: "empty body".to_string(),
});
}
if let Err(e) = json5::from_str::<serde_json::Value>(body) {
return Err(ParseError::InvalidDirective {
line: line_no,
col,
directive: name.to_string(),
reason: format!("JSON parse error: {e}"),
});
}
}
Ok(())
}
fn check_subgraph_balance(lines: &[&str]) -> Result<(), ParseError> {
let mut open_stack: Vec<u32> = Vec::new();
for (idx, raw) in lines.iter().enumerate() {
let line_no = u32_from_index(idx);
let trimmed = raw.trim();
if trimmed.is_empty() || trimmed.starts_with("%%") {
continue;
}
if is_subgraph_open(trimmed) {
open_stack.push(line_no);
} else if is_subgraph_close(trimmed) && open_stack.pop().is_none() {
let col = col_of_first_nonws(raw);
return Err(ParseError::UnexpectedToken {
line: line_no,
col,
found: "end".to_string(),
expected: "matching subgraph".to_string(),
});
}
}
if let Some(opened_at) = open_stack.first() {
return Err(ParseError::UnclosedSubgraph {
opened_at: *opened_at,
});
}
Ok(())
}
fn check_leading_arrow(lines: &[&str]) -> Result<(), ParseError> {
for (idx, raw) in lines.iter().enumerate() {
let line_no = u32_from_index(idx);
let trimmed = raw.trim_start();
if trimmed.is_empty() || trimmed.starts_with("%%") {
continue;
}
if starts_with_arrow(trimmed) {
let col = col_of_first_nonws(raw);
let found_token: String = trimmed.chars().take_while(|c| !c.is_whitespace()).collect();
return Err(ParseError::UnexpectedToken {
line: line_no,
col,
found: found_token,
expected: "node identifier".to_string(),
});
}
}
Ok(())
}
fn check_click_quotes(lines: &[&str]) -> Result<(), ParseError> {
for (idx, raw) in lines.iter().enumerate() {
let line_no = u32_from_index(idx);
let trimmed = raw.trim_start();
if !trimmed.starts_with("click ") && !trimmed.starts_with("click\t") {
continue;
}
let quote_count = trimmed.chars().filter(|c| *c == '"').count();
if quote_count % 2 == 1 {
let leading_ws = raw.len() - trimmed.len();
let quote_byte = trimmed.find('"').unwrap_or(0);
let col = col_of_char_offset(raw, leading_ws + quote_byte)
.unwrap_or_else(|| col_of_first_nonws(raw));
return Err(ParseError::UnexpectedToken {
line: line_no,
col,
found: "\"".to_string(),
expected: "matching double quote".to_string(),
});
}
}
Ok(())
}
fn check_sequence_participants(lines: &[&str]) -> Result<(), ParseError> {
if !looks_like_sequence_diagram(lines) {
return Ok(());
}
let declared = collect_declared_participants(lines);
if declared.is_empty() {
return Ok(());
}
for (idx, raw) in lines.iter().enumerate() {
let line_no = u32_from_index(idx);
let trimmed = raw.trim();
if trimmed.is_empty() || trimmed.starts_with("%%") {
continue;
}
if let Some((left, right)) = split_sequence_arrow(trimmed) {
for name in [left.trim(), right.trim()] {
if name.is_empty() {
continue;
}
if !declared.iter().any(|d| d == name) {
let candidates = nearest_candidates(name, &declared);
return Err(ParseError::UnknownParticipant {
name: name.to_string(),
line: line_no,
candidates,
});
}
}
}
}
Ok(())
}
fn u32_from_index(idx: usize) -> u32 {
u32::try_from(idx + 1).unwrap_or(u32::MAX)
}
fn col_of_first_nonws(raw: &str) -> u32 {
let col = raw
.char_indices()
.find(|(_, c)| !c.is_whitespace())
.map_or(0, |(i, _)| raw[..i].chars().count());
u32::try_from(col + 1).unwrap_or(u32::MAX)
}
fn col_of_char_offset(raw: &str, byte_offset: usize) -> Option<u32> {
if !raw.is_char_boundary(byte_offset) {
return None;
}
let col = raw[..byte_offset].chars().count();
Some(u32::try_from(col + 1).unwrap_or(u32::MAX))
}
fn is_subgraph_open(trimmed: &str) -> bool {
let lower = trimmed.to_ascii_lowercase();
lower == "subgraph" || lower.starts_with("subgraph ") || lower.starts_with("subgraph\t")
}
fn is_subgraph_close(trimmed: &str) -> bool {
let lower = trimmed.to_ascii_lowercase();
lower == "end"
|| lower.starts_with("end ")
|| lower.starts_with("end\t")
|| lower.starts_with("end%%")
}
fn starts_with_arrow(trimmed: &str) -> bool {
let token = trimmed.split_whitespace().next().unwrap_or("");
if token.starts_with("<<") {
return false;
}
let bytes = token.as_bytes();
if bytes.is_empty() {
return false;
}
match bytes[0] {
b'-' | b'=' | b'~' | b'.' | b'<' => {
let operator_len = bytes
.iter()
.copied()
.take_while(|&b| {
matches!(
b,
b'-' | b'=' | b'~' | b'.' | b'<' | b'>' | b'o' | b'x' | b'|'
)
})
.count();
let operator = &bytes[..operator_len];
let has_edge_stem = operator
.iter()
.copied()
.any(|b| matches!(b, b'-' | b'=' | b'.' | b'~'));
let has_arrow_shape = operator
.iter()
.copied()
.any(|b| matches!(b, b'>' | b'<' | b'o' | b'x' | b'|'));
has_edge_stem && (has_arrow_shape || operator_len >= 2)
}
_ => false,
}
}
fn looks_like_sequence_diagram(lines: &[&str]) -> bool {
for raw in lines {
let t = raw.trim();
if t.is_empty() || t.starts_with("%%") {
continue;
}
return t.to_ascii_lowercase().starts_with("sequencediagram");
}
false
}
fn collect_declared_participants(lines: &[&str]) -> Vec<String> {
let mut declared: Vec<String> = Vec::new();
for raw in lines {
let t = raw.trim();
let (keyword, rest) = if let Some(r) = t.strip_prefix("participant ") {
("participant", r)
} else if let Some(r) = t.strip_prefix("actor ") {
("actor", r)
} else {
continue;
};
let _ = keyword;
let rest = rest.trim();
if let Some((name, alias_part)) = rest.split_once(" as ") {
let name = name.trim().to_string();
let alias = alias_part.trim().to_string();
if !name.is_empty() {
declared.push(name);
}
if !alias.is_empty() {
declared.push(alias);
}
} else if !rest.is_empty() {
declared.push(rest.to_string());
}
}
declared
}
fn split_sequence_arrow(trimmed: &str) -> Option<(&str, &str)> {
let before_colon = trimmed.split_once(':').map_or(trimmed, |(a, _)| a);
const PATTERNS: &[&str] = &["-->>", "--x", "--)", "-->", "->>", "->", "-x", "-)"];
for pat in PATTERNS {
if let Some((lhs, rhs)) = before_colon.split_once(pat) {
return Some((lhs, rhs));
}
}
None
}
fn nearest_candidates(target: &str, declared: &[String]) -> Vec<String> {
let target_lower = target.to_ascii_lowercase();
declared
.iter()
.filter(|d| {
let dl = d.to_ascii_lowercase();
dl == target_lower || dl.starts_with(&target_lower) || target_lower.starts_with(&dl)
})
.take(3)
.cloned()
.collect()
}
#[cfg(all(test, feature = "mermaid_engine_internal_tests"))]
mod tests {
use super::*;
#[test]
fn init_directive_valid_json_passes() {
let input = r#"%%{init: {"theme": "dark"}}%%
flowchart LR
A-->B"#;
assert!(validate(input).is_ok());
}
#[test]
fn init_directive_invalid_json_is_reported() {
let input = r#"%%{init: {theme dark}}%%
flowchart LR"#;
let err = validate(input).unwrap_err();
assert!(
matches!(err, ParseError::InvalidDirective { directive, .. } if directive == "init")
);
}
#[test]
fn init_directive_missing_colon_is_reported() {
let input = r#"%%{init}%%
flowchart LR"#;
let err = validate(input).unwrap_err();
assert!(matches!(err, ParseError::InvalidDirective { .. }));
}
#[test]
fn init_directive_unknown_name_is_tolerated() {
let input = r#"%%{customdirective: {"x": 1}}%%
flowchart LR"#;
assert!(validate(input).is_ok());
}
#[test]
fn subgraph_unclosed_is_reported() {
let input = "flowchart LR\nsubgraph S\n A --> B\n";
let err = validate(input).unwrap_err();
assert!(
matches!(err, ParseError::UnclosedSubgraph { opened_at: 2 }),
"got {err:?}"
);
}
#[test]
fn subgraph_balanced_passes() {
let input = "flowchart LR\nsubgraph S\n A --> B\nend\n";
assert!(validate(input).is_ok());
}
#[test]
fn nested_subgraphs_balanced_pass() {
let input = "flowchart LR\nsubgraph O\n subgraph I\n A --> B\n end\nend\n";
assert!(validate(input).is_ok());
}
#[test]
fn nested_subgraphs_inner_unclosed_is_reported() {
let input = "flowchart LR\nsubgraph O\n subgraph I\n A --> B\nend\n";
let err = validate(input).unwrap_err();
assert!(matches!(err, ParseError::UnclosedSubgraph { .. }));
}
#[test]
fn stray_end_without_open_is_reported() {
let input = "flowchart LR\nA --> B\nend\n";
let err = validate(input).unwrap_err();
assert!(matches!(
err,
ParseError::UnexpectedToken { found, expected, .. }
if found == "end" && expected == "matching subgraph"
));
}
#[test]
fn leading_arrow_is_reported() {
let input = "flowchart LR\n--> B\n";
let err = validate(input).unwrap_err();
assert!(matches!(
err,
ParseError::UnexpectedToken { expected, .. }
if expected == "node identifier"
));
}
#[test]
fn leading_thick_arrow_is_reported() {
let input = "flowchart LR\n==> B\n";
let err = validate(input).unwrap_err();
assert!(matches!(err, ParseError::UnexpectedToken { .. }));
}
#[test]
fn regular_edge_passes() {
let input = "flowchart LR\nA --> B\n";
assert!(validate(input).is_ok());
}
#[test]
fn class_stereotype_body_line_passes() {
let input = "classDiagram\nclass Backend {\n <<trait>>\n +run()\n}\n";
assert!(validate(input).is_ok());
}
#[test]
fn class_private_member_body_line_passes() {
let input = "classDiagram\nclass Backend {\n -markdown: String\n}\n";
assert!(validate(input).is_ok());
}
#[test]
fn leading_class_arrow_is_reported() {
let input = "classDiagram\n<|-- Backend\n";
let err = validate(input).unwrap_err();
assert!(matches!(err, ParseError::UnexpectedToken { .. }));
}
#[test]
fn click_unbalanced_quote_is_reported() {
let input = "flowchart LR\nA --> B\nclick A \"https://example.com\n";
let err = validate(input).unwrap_err();
assert!(matches!(
err,
ParseError::UnexpectedToken { expected, .. }
if expected == "matching double quote"
));
}
#[test]
fn click_balanced_passes() {
let input = "flowchart LR\nA --> B\nclick A \"https://example.com\"\n";
assert!(validate(input).is_ok());
}
#[test]
fn sequence_without_declarations_passes() {
let input = "sequenceDiagram\nAlice->>Bob: hi\n";
assert!(validate(input).is_ok());
}
#[test]
fn sequence_declared_participants_match_passes() {
let input = "sequenceDiagram\nparticipant Alice\nparticipant Bob\nAlice->>Bob: hi\n";
assert!(validate(input).is_ok());
}
#[test]
fn sequence_unknown_participant_on_right_is_reported() {
let input = "sequenceDiagram\nparticipant Alice\nparticipant Bob\nAlice->>Carol: hi\n";
let err = validate(input).unwrap_err();
assert!(
matches!(err, ParseError::UnknownParticipant { ref name, line: 4, .. }
if name == "Carol"),
"got {err:?}"
);
}
#[test]
fn sequence_unknown_participant_on_left_is_reported() {
let input = "sequenceDiagram\nparticipant Alice\nparticipant Bob\nCarol->>Bob: hi\n";
let err = validate(input).unwrap_err();
assert!(matches!(err, ParseError::UnknownParticipant { .. }));
}
#[test]
fn sequence_participant_as_alias_is_honored() {
let input = "sequenceDiagram\nparticipant A as Alice\nA->>A: hi\n";
assert!(validate(input).is_ok());
}
#[test]
fn empty_input_passes() {
assert!(validate("").is_ok());
}
#[test]
fn comment_only_input_passes() {
assert!(validate("%% just a comment\n%% and another\n").is_ok());
}
}