#![allow(
clippy::module_name_repetitions,
clippy::too_many_lines,
clippy::too_many_arguments,
clippy::map_unwrap_or,
clippy::option_if_let_else,
clippy::elidable_lifetime_names,
clippy::items_after_statements,
clippy::needless_pass_by_value,
clippy::single_match_else,
clippy::manual_let_else,
clippy::match_same_arms,
clippy::missing_const_for_fn,
clippy::single_char_pattern,
clippy::naive_bytecount,
clippy::expect_used,
clippy::redundant_pub_crate,
clippy::used_underscore_binding,
clippy::redundant_field_names,
clippy::struct_field_names,
clippy::redundant_else,
clippy::similar_names
)]
mod complement;
mod cursor;
mod grammar;
mod helpers;
mod layout;
mod review;
mod unify;
pub(crate) use crate::error::ParseError;
pub(crate) use panproto_schema::{Edge, Schema};
pub(crate) use serde::Deserialize;
pub(crate) use std::collections::BTreeMap;
pub(crate) use complement::*;
pub(crate) use cursor::*;
pub(crate) use grammar::*;
pub(crate) use helpers::*;
pub(crate) use layout::*;
pub(crate) use review::*;
pub(crate) use unify::*;
pub use grammar::{Grammar, Production, TokenRole};
pub use layout::FormatPolicy;
pub fn emit_pretty(
protocol: &str,
schema: &Schema,
grammar: &Grammar,
policy: &FormatPolicy,
cassette: Option<&dyn crate::languages::cassettes::GrammarCassette>,
) -> Result<Vec<u8>, ParseError> {
let roots = collect_roots(schema);
if roots.is_empty() {
return Err(ParseError::EmitFailed {
protocol: protocol.to_owned(),
reason: "schema has no entry vertices".to_owned(),
});
}
let doc_prefix: Vec<u8> = schema
.constraints
.get(roots[0])
.and_then(|cs| cs.iter().find(|c| c.sort.as_ref() == "doc-prefix"))
.map(|c| c.value.as_bytes().to_vec())
.unwrap_or_default();
let root_span_end: Option<usize> = roots
.iter()
.filter_map(|&r| {
schema.constraints.get(r).and_then(|cs| {
cs.iter()
.find(|c| c.sort.as_ref() == "end-byte")
.and_then(|c| c.value.parse::<usize>().ok())
})
})
.max();
let mut out = Output::new(policy, grammar, cassette);
for (i, root) in roots.iter().enumerate() {
if i > 0 {
out.newline();
}
emit_vertex(protocol, schema, grammar, root, &mut out)?;
}
let mut body = out.finish();
if let Some(span_end) = root_span_end {
if body.len() == span_end + 1 && body.last() == Some(&b'\n') {
body.pop();
}
}
if doc_prefix.is_empty() {
Ok(body)
} else {
let mut result = doc_prefix;
result.extend_from_slice(&body);
Ok(result)
}
}
#[cfg(test)]
#[allow(clippy::unwrap_used)]
mod tests {
use super::*;
fn test_grammar() -> Grammar {
Grammar::from_bytes("test", b"{\"name\":\"test\",\"rules\":{}}").unwrap_or_else(|_| {
serde_json::from_str::<Grammar>(r#"{"name":"test","rules":{}}"#).unwrap()
})
}
#[test]
fn parses_simple_grammar_json() {
let bytes = br#"{
"name": "tiny",
"rules": {
"program": {
"type": "SEQ",
"members": [
{"type": "STRING", "value": "hello"},
{"type": "STRING", "value": ";"}
]
}
}
}"#;
let g = Grammar::from_bytes("tiny", bytes).expect("valid tiny grammar");
assert!(g.rules.contains_key("program"));
}
#[test]
fn marker_literal_with_trailing_space_is_not_doubled() {
let policy = FormatPolicy::default();
let g = test_grammar();
let mut out = Output::new(&policy, &g, None);
out.token_with_role("> ", Some(TokenRole::Terminal));
out.token_with_role("quoted", Some(TokenRole::Terminal));
let bytes = out.finish();
let s = std::str::from_utf8(&bytes).expect("ascii output");
assert_eq!(s.trim_end(), "> quoted", "got {s:?}");
let mut out = Output::new(&policy, &g, None);
out.token_with_role("foo", Some(TokenRole::Terminal));
out.token_with_role(" bar", Some(TokenRole::Terminal));
let bytes = out.finish();
let s = std::str::from_utf8(&bytes).expect("ascii output");
assert_eq!(s.trim_end(), "foo bar", "got {s:?}");
let mut out = Output::new(&policy, &g, None);
out.token_with_role("*", Some(TokenRole::Terminal));
out.token_with_role("Heading", Some(TokenRole::Terminal));
let bytes = out.finish();
let s = std::str::from_utf8(&bytes).expect("ascii output");
assert_eq!(s.trim_end(), "* Heading", "got {s:?}");
}
#[test]
fn output_emits_punctuation_without_leading_space() {
let policy = FormatPolicy::default();
let g = test_grammar();
let mut out = Output::new(&policy, &g, None);
out.token_with_role("foo", Some(TokenRole::Terminal));
out.token_with_role("(", Some(TokenRole::BracketOpen));
out.token_with_role(")", Some(TokenRole::BracketClose));
out.token_with_role(";", Some(TokenRole::Separator));
let bytes = out.finish();
let s = std::str::from_utf8(&bytes).expect("ascii output");
assert!(s.starts_with("foo();"), "got {s:?}");
}
#[test]
fn rest_of_line_pattern_detects_unbounded_tail_only() {
assert!(is_rest_of_line_pattern("#!.*"));
assert!(is_rest_of_line_pattern(
"#![\\r\\f\\t\\v ]*([^\\[\\n].*)?\\n"
));
assert!(is_rest_of_line_pattern("(;|#!|# ).*"));
assert!(is_rest_of_line_pattern("\\\\[^\\n]*")); assert!(is_rest_of_line_pattern("//[^\\n]*"));
assert!(is_rest_of_line_pattern("#[^\\r\\n]*"));
assert!(!is_rest_of_line_pattern("@\\[.*\\]")); assert!(!is_rest_of_line_pattern("[^\"\\\\\\r\\n]+")); assert!(!is_rest_of_line_pattern("[^\\\\\"\\n]+")); assert!(!is_rest_of_line_pattern("[a-z]+")); assert!(!is_rest_of_line_pattern("foo\\.*bar")); assert!(!is_rest_of_line_pattern("\"[^\"\\n]*\"")); }
#[test]
fn line_rest_kinds_classifies_hash_bang() {
let bytes = br##"{
"name": "tiny",
"rules": {
"hash_bang_line": {"type": "PATTERN", "value": "#!.*"},
"info": {"type": "PATTERN", "value": "@\\[.*\\]"},
"ident": {"type": "PATTERN", "value": "[a-z]+"}
}
}"##;
let g = Grammar::from_bytes("tiny", bytes).expect("valid grammar");
assert!(g.line_rest_kinds.contains("hash_bang_line"));
assert!(!g.line_rest_kinds.contains("info"));
assert!(!g.line_rest_kinds.contains("ident"));
}
#[test]
fn trailing_break_markers_detect_hard_line_break_idiom() {
let bytes = br#"{
"name": "tiny",
"rules": {
"doc": {"type": "SYMBOL", "name": "hard_line_break"},
"hard_line_break": {"type": "SEQ", "members": [
{"type": "CHOICE", "members": [
{"type": "STRING", "value": "\\"},
{"type": "SYMBOL", "name": "_ws"}
]},
{"type": "SYMBOL", "name": "_nl"}
]},
"_ws": {"type": "PATTERN", "value": "\\t| [ \\t]+"},
"_nl": {"type": "PATTERN", "value": "\\n|\\r\\n?"}
}
}"#;
let g = Grammar::from_bytes("tiny", bytes).expect("valid grammar");
assert!(g.trailing_break_markers.iter().any(|m| m == "\\"));
assert!(g.trailing_break_on_whitespace);
let bytes2 = br#"{
"name": "t2",
"rules": {
"doc": {"type": "SYMBOL", "name": "directive"},
"directive": {"type": "SEQ", "members": [
{"type": "STRING", "value": "go"},
{"type": "PATTERN", "value": "\\n"}
]}
}
}"#;
let g2 = Grammar::from_bytes("t2", bytes2).expect("valid grammar");
assert!(g2.trailing_break_markers.is_empty());
assert!(!g2.trailing_break_on_whitespace);
}
#[test]
fn top_level_text_admits_newline_detects_template_content() {
let bytes = br#"{
"name": "tmpl",
"rules": {
"program": {"type": "REPEAT", "content":
{"type": "SYMBOL", "name": "_node"}},
"_node": {"type": "CHOICE", "members": [
{"type": "SYMBOL", "name": "tag"},
{"type": "SYMBOL", "name": "template_content"}
]},
"tag": {"type": "STRING", "value": "{%%}"},
"template_content": {"type": "REPEAT1", "content":
{"type": "PATTERN", "value": "[^{]+"}}
}
}"#;
let g = Grammar::from_bytes("tmpl", bytes).expect("valid grammar");
assert!(g.start_symbol == "program");
assert!(g.top_level_text_admits_newline);
let bytes2 = br#"{
"name": "prog",
"rules": {
"source_file": {"type": "REPEAT", "content":
{"type": "SYMBOL", "name": "statement"}},
"statement": {"type": "STRING", "value": "x"},
"comment": {"type": "SEQ", "members": [
{"type": "STRING", "value": "/*"},
{"type": "PATTERN", "value": "[^*]+"},
{"type": "STRING", "value": "*/"}
]}
}
}"#;
let g2 = Grammar::from_bytes("prog", bytes2).expect("valid grammar");
assert!(g2.start_symbol == "source_file");
assert!(!g2.top_level_text_admits_newline);
}
#[test]
fn immediate_token_alias_kinds_classifies_char_body() {
let bytes = br#"{
"name": "tiny",
"rules": {
"char_literal": {
"type": "SEQ",
"members": [
{"type": "STRING", "value": "'"},
{
"type": "REPEAT1",
"content": {
"type": "ALIAS",
"named": true,
"value": "character",
"content": {
"type": "IMMEDIATE_TOKEN",
"content": {"type": "PATTERN", "value": "[^\\n']"}
}
}
},
{"type": "STRING", "value": "'"}
]
},
"brace_expression": {
"type": "SEQ",
"members": [
{"type": "STRING", "value": "{"},
{
"type": "ALIAS",
"named": true,
"value": "number",
"content": {
"type": "IMMEDIATE_TOKEN",
"content": {"type": "PATTERN", "value": "\\d+"}
}
},
{"type": "STRING", "value": "}"}
]
},
"plain_alias": {
"type": "ALIAS",
"named": true,
"value": "ident",
"content": {"type": "SYMBOL", "name": "x"}
},
"kw_literal": {
"type": "SEQ",
"members": [
{"type": "STRING", "value": "'"},
{
"type": "ALIAS",
"named": true,
"value": "identifier",
"content": {
"type": "IMMEDIATE_TOKEN",
"content": {"type": "STRING", "value": "module"}
}
},
{"type": "STRING", "value": "'"}
]
}
}
}"#;
let g = Grammar::from_bytes("tiny", bytes).expect("valid grammar");
assert!(g.immediate_token_alias_kinds.contains("character"));
assert!(!g.immediate_token_alias_kinds.contains("number"));
assert!(!g.immediate_token_alias_kinds.contains("ident"));
assert!(!g.immediate_token_alias_kinds.contains("identifier"));
}
#[test]
fn grammar_from_bytes_rejects_malformed_input() {
let result = Grammar::from_bytes("malformed", b"not json");
let err = result.expect_err("malformed bytes must yield Err");
let msg = err.to_string();
assert!(
msg.contains("malformed"),
"error message should name the protocol: {msg:?}"
);
}
#[test]
fn output_indents_after_open_brace() {
let policy = FormatPolicy::default();
let g = test_grammar();
let mut out = Output::new(&policy, &g, None);
out.token_with_role("fn", Some(TokenRole::Keyword));
out.token_with_role("foo", Some(TokenRole::Terminal));
out.token_with_role("(", Some(TokenRole::BracketOpen));
out.token_with_role(")", Some(TokenRole::BracketClose));
out.token_with_role("{", Some(TokenRole::BracketOpen));
out.token_with_role("body", Some(TokenRole::Terminal));
out.token_with_role("}", Some(TokenRole::BracketClose));
let bytes = out.finish();
let s = std::str::from_utf8(&bytes).expect("ascii output");
assert!(s.contains("{\n"), "newline after opening brace: {s:?}");
assert!(s.contains("body"), "body inside block: {s:?}");
assert!(s.ends_with("}\n"), "newline after closing brace: {s:?}");
}
#[test]
fn output_no_space_between_word_and_dot() {
let policy = FormatPolicy::default();
let g = test_grammar();
let mut out = Output::new(&policy, &g, None);
out.token_with_role("foo", Some(TokenRole::Terminal));
out.token_with_role(".", Some(TokenRole::Operator));
out.token_with_role("bar", Some(TokenRole::Terminal));
let bytes = out.finish();
let s = std::str::from_utf8(&bytes).expect("ascii output");
assert!(
s.contains("foo") && s.contains("bar"),
"both identifiers present: {s:?}"
);
}
#[test]
fn output_snapshot_restore_truncates_bytes() {
let policy = FormatPolicy::default();
let g = test_grammar();
let mut out = Output::new(&policy, &g, None);
out.token("keep");
let snap = out.snapshot();
out.token("drop");
out.token("more");
out.restore(snap);
out.token("after");
let bytes = out.finish();
let s = std::str::from_utf8(&bytes).expect("ascii output");
assert!(s.contains("keep"), "kept token survives: {s:?}");
assert!(s.contains("after"), "post-restore token visible: {s:?}");
assert!(!s.contains("drop"), "rolled-back token removed: {s:?}");
assert!(!s.contains("more"), "rolled-back token removed: {s:?}");
}
#[test]
fn child_cursor_take_field_consumes_once() {
let edges_owned: Vec<Edge> = vec![Edge {
src: panproto_gat::Name::from("p"),
tgt: panproto_gat::Name::from("c"),
kind: panproto_gat::Name::from("name"),
name: None,
}];
let edges: Vec<&Edge> = edges_owned.iter().collect();
let mut cursor = ChildCursor::new(&edges);
let first = cursor.take_field("name");
let second = cursor.take_field("name");
assert!(first.is_some(), "first take returns the edge");
assert!(
second.is_none(),
"second take returns None (already consumed)"
);
}
#[test]
fn child_cursor_take_matching_predicate() {
let edges_owned: Vec<Edge> = vec![
Edge {
src: "p".into(),
tgt: "c1".into(),
kind: "child_of".into(),
name: None,
},
Edge {
src: "p".into(),
tgt: "c2".into(),
kind: "key".into(),
name: None,
},
];
let edges: Vec<&Edge> = edges_owned.iter().collect();
let mut cursor = ChildCursor::new(&edges);
assert!(cursor.has_matching(|e| e.kind.as_ref() == "key"));
let taken = cursor.take_matching(|e| e.kind.as_ref() == "key");
assert!(taken.is_some());
assert!(
!cursor.has_matching(|e| e.kind.as_ref() == "key"),
"consumed edge no longer matches"
);
assert!(
cursor.has_matching(|e| e.kind.as_ref() == "child_of"),
"the other edge is still available"
);
}
#[test]
fn kind_satisfies_symbol_direct_match() {
let bytes = br#"{
"name": "tiny",
"rules": {
"x": {"type": "STRING", "value": "x"}
}
}"#;
let g = Grammar::from_bytes("tiny", bytes).expect("valid grammar");
assert!(kind_satisfies_symbol(&g, Some("x"), "x"));
assert!(!kind_satisfies_symbol(&g, Some("y"), "x"));
assert!(!kind_satisfies_symbol(&g, None, "x"));
}
#[test]
fn kind_satisfies_symbol_through_hidden_rule() {
let bytes = br#"{
"name": "tiny",
"rules": {
"_value": {
"type": "CHOICE",
"members": [
{"type": "SYMBOL", "name": "object"},
{"type": "SYMBOL", "name": "number"}
]
},
"object": {"type": "STRING", "value": "{}"},
"number": {"type": "PATTERN", "value": "[0-9]+"}
}
}"#;
let g = Grammar::from_bytes("tiny", bytes).expect("valid grammar");
assert!(
kind_satisfies_symbol(&g, Some("number"), "_value"),
"number is reachable from _value via CHOICE"
);
assert!(
kind_satisfies_symbol(&g, Some("object"), "_value"),
"object is reachable from _value via CHOICE"
);
assert!(
!kind_satisfies_symbol(&g, Some("string"), "_value"),
"string is NOT among the alternatives"
);
}
#[test]
fn first_symbol_skips_string_terminals() {
let prod: Production = serde_json::from_str(
r#"{
"type": "SEQ",
"members": [
{"type": "STRING", "value": "{"},
{"type": "SYMBOL", "name": "body"},
{"type": "STRING", "value": "}"}
]
}"#,
)
.expect("valid SEQ");
assert_eq!(first_symbol(&prod), Some("body"));
}
#[test]
fn is_newline_like_pattern_handles_alternations_and_classes() {
assert!(is_newline_like_pattern("\\n"));
assert!(is_newline_like_pattern("\\r\\n"));
assert!(is_newline_like_pattern("\\r?\\n"));
assert!(is_newline_like_pattern("\\r|\\r\\n|\\n"));
assert!(is_newline_like_pattern("[\\r\\n]|\\r\\n"));
assert!(is_newline_like_pattern("[\\r\\n]"));
assert!(!is_newline_like_pattern(".+"));
assert!(!is_newline_like_pattern("\\n|."));
assert!(!is_newline_like_pattern("[a\\n]"));
assert!(!is_newline_like_pattern(""));
}
#[test]
fn is_blank_line_rule_matches_only_a_newline_field() {
use super::helpers::is_blank_line_rule;
let blank: Production = serde_json::from_str(
r#"{"type":"FIELD","name":"blank","content":{"type":"PATTERN","value":"\\n"}}"#,
)
.unwrap();
assert!(is_blank_line_rule(&blank));
let bare: Production =
serde_json::from_str(r#"{"type":"PATTERN","value":"\\r?\\n"}"#).unwrap();
assert!(is_blank_line_rule(&bare));
let semi: Production = serde_json::from_str(r#"{"type":"STRING","value":";"}"#).unwrap();
assert!(!is_blank_line_rule(&semi));
let seq: Production = serde_json::from_str(
r#"{"type":"SEQ","members":[{"type":"SYMBOL","name":"x"},{"type":"PATTERN","value":"\\n"}]}"#,
)
.unwrap();
assert!(!is_blank_line_rule(&seq));
let text: Production = serde_json::from_str(r#"{"type":"PATTERN","value":".+"}"#).unwrap();
assert!(!is_blank_line_rule(&text));
}
#[test]
fn is_whitespace_only_pattern_recognizes_unicode_space_separator() {
use super::helpers::is_whitespace_only_pattern;
assert!(is_whitespace_only_pattern("\\p{Zs}+"));
assert!(is_whitespace_only_pattern("\\p{Zs}*"));
assert!(is_whitespace_only_pattern("\\p{Zs}"));
assert!(is_whitespace_only_pattern("\\s+"));
assert!(is_whitespace_only_pattern("[ \\t]+"));
assert!(!is_whitespace_only_pattern("\\p{L}+"));
assert!(!is_whitespace_only_pattern(".+"));
}
#[test]
fn decode_whitespace_padded_literal_handles_trailing_only_padding() {
use super::helpers::decode_whitespace_padded_literal;
assert_eq!(
decode_whitespace_padded_literal("#\\s*").as_deref(),
Some("#")
);
assert_eq!(
decode_whitespace_padded_literal("//\\s*").as_deref(),
Some("//")
);
assert_eq!(
decode_whitespace_padded_literal("[ \\t]*:[ \\t]*").as_deref(),
Some(":")
);
assert_eq!(decode_whitespace_padded_literal("[a-z]+\\s*"), None);
assert_eq!(decode_whitespace_padded_literal("\\s*"), None);
}
#[test]
fn pattern_absorbs_leading_space_detects_space_admitting_terminals() {
assert!(pattern_absorbs_leading_space(".+"));
assert!(pattern_absorbs_leading_space(".*"));
assert!(pattern_absorbs_leading_space("^.+"));
assert!(pattern_absorbs_leading_space("[^;#]+"));
assert!(!pattern_absorbs_leading_space("[^;#=\\s\\[]+"));
assert!(!pattern_absorbs_leading_space("[^ \\t]+"));
assert!(!pattern_absorbs_leading_space("[a-zA-Z_]\\w*"));
assert!(!pattern_absorbs_leading_space("[0-9]+"));
assert!(!pattern_absorbs_leading_space("\\w+"));
assert!(!pattern_absorbs_leading_space(""));
}
#[test]
fn placeholder_for_pattern_routes_by_regex_class() {
assert_eq!(placeholder_for_pattern("[0-9]+"), "0");
assert_eq!(placeholder_for_pattern("[a-zA-Z_]\\w*"), "_x");
assert_eq!(placeholder_for_pattern("\"[^\"]*\""), "\"\"");
assert_eq!(placeholder_for_pattern("\\d+\\.\\d+"), "0");
}
#[test]
fn format_policy_default_breaks_after_semicolon() {
let policy = FormatPolicy::default();
assert!(policy.line_break_after.iter().any(|t| t == ";"));
assert!(policy.indent_open.iter().any(|t| t == "{"));
assert!(policy.indent_close.iter().any(|t| t == "}"));
assert_eq!(policy.indent_width, 2);
}
#[test]
fn placeholder_decodes_literal_pattern_separators() {
assert_eq!(placeholder_for_pattern("\\n"), "\n");
assert_eq!(placeholder_for_pattern("\\r\\n"), "\r\n");
assert_eq!(placeholder_for_pattern(";"), ";");
assert_eq!(placeholder_for_pattern("[0-9]+"), "0");
assert_eq!(placeholder_for_pattern("a|b"), "_");
}
#[test]
fn placeholder_decodes_whitespace_padded_literal() {
assert_eq!(placeholder_for_pattern("[ \\t]*:[ \\t]*"), ":");
assert_eq!(placeholder_for_pattern("\\s*=\\s*"), "=");
assert_eq!(placeholder_for_pattern("[ \\t]*->"), "->");
assert_eq!(placeholder_for_pattern("[a-z]*:[ \\t]*"), "_");
}
#[test]
fn supertypes_decode_from_grammar_json_strings() {
let bytes = br#"{
"name": "tiny",
"supertypes": ["expression"],
"rules": {
"expression": {
"type": "CHOICE",
"members": [
{"type": "SYMBOL", "name": "binary_expression"},
{"type": "SYMBOL", "name": "identifier"}
]
},
"binary_expression": {"type": "STRING", "value": "x"},
"identifier": {"type": "PATTERN", "value": "[a-z]+"}
}
}"#;
let g = Grammar::from_bytes("tiny", bytes).expect("parse");
assert!(g.supertypes.contains("expression"));
assert!(kind_satisfies_symbol(&g, Some("identifier"), "expression"));
assert!(!kind_satisfies_symbol(&g, Some("string"), "expression"));
}
#[test]
fn supertypes_decode_from_grammar_json_objects() {
let bytes = br#"{
"name": "tiny",
"supertypes": [{"type": "SYMBOL", "name": "stmt"}],
"rules": {
"stmt": {
"type": "CHOICE",
"members": [
{"type": "SYMBOL", "name": "while_stmt"},
{"type": "SYMBOL", "name": "if_stmt"}
]
},
"while_stmt": {"type": "STRING", "value": "while"},
"if_stmt": {"type": "STRING", "value": "if"}
}
}"#;
let g = Grammar::from_bytes("tiny", bytes).expect("parse");
assert!(g.supertypes.contains("stmt"));
assert!(kind_satisfies_symbol(&g, Some("while_stmt"), "stmt"));
}
#[test]
fn alias_value_matches_kind() {
let bytes = br#"{
"name": "tiny",
"rules": {
"_package_identifier": {
"type": "ALIAS",
"named": true,
"value": "package_identifier",
"content": {"type": "SYMBOL", "name": "identifier"}
},
"identifier": {"type": "PATTERN", "value": "[a-z]+"}
}
}"#;
let g = Grammar::from_bytes("tiny", bytes).expect("parse");
assert!(kind_satisfies_symbol(
&g,
Some("package_identifier"),
"_package_identifier"
));
}
#[test]
fn referenced_symbols_walks_nested_seq() {
let prod: Production = serde_json::from_str(
r#"{
"type": "SEQ",
"members": [
{"type": "CHOICE", "members": [
{"type": "SYMBOL", "name": "attribute_item"},
{"type": "BLANK"}
]},
{"type": "SYMBOL", "name": "parameter"},
{"type": "REPEAT", "content": {
"type": "SEQ",
"members": [
{"type": "STRING", "value": ","},
{"type": "SYMBOL", "name": "parameter"}
]
}}
]
}"#,
)
.expect("seq");
let symbols = referenced_symbols(&prod);
assert!(symbols.contains(&"attribute_item"));
assert!(symbols.contains(&"parameter"));
}
#[test]
fn literal_strings_collects_choice_members() {
let prod: Production = serde_json::from_str(
r#"{
"type": "CHOICE",
"members": [
{"type": "STRING", "value": "+"},
{"type": "STRING", "value": "-"},
{"type": "STRING", "value": "*"}
]
}"#,
)
.expect("choice");
let strings = literal_strings(&prod);
assert_eq!(strings, vec!["+", "-", "*"]);
}
#[test]
fn reserved_variant_deserialises() {
let prod: Production = serde_json::from_str(
r#"{
"type": "RESERVED",
"content": {"type": "SYMBOL", "name": "_lowercase_identifier"},
"context_name": "attribute_id"
}"#,
)
.expect("RESERVED parses");
match prod {
Production::Reserved { content, .. } => match *content {
Production::Symbol { name } => assert_eq!(name, "_lowercase_identifier"),
other => panic!("expected inner SYMBOL, got {other:?}"),
},
other => panic!("expected RESERVED, got {other:?}"),
}
}
#[test]
fn reserved_grammar_loads_end_to_end() {
let bytes = br#"{
"name": "tiny_reserved",
"rules": {
"program": {
"type": "RESERVED",
"content": {"type": "SYMBOL", "name": "ident"},
"context_name": "keywords"
},
"ident": {"type": "PATTERN", "value": "[a-z]+"}
}
}"#;
let g = Grammar::from_bytes("tiny_reserved", bytes).expect("RESERVED-using grammar loads");
assert!(g.rules.contains_key("program"));
}
#[test]
fn reserved_walker_helpers_recurse_into_content() {
let prod: Production = serde_json::from_str(
r#"{
"type": "RESERVED",
"content": {
"type": "FIELD",
"name": "lhs",
"content": {"type": "SYMBOL", "name": "expr"}
},
"context_name": "ctx"
}"#,
)
.expect("nested RESERVED parses");
assert_eq!(first_symbol(&prod), Some("expr"));
assert!(has_field_in(&prod, &["lhs"]));
let symbols = referenced_symbols(&prod);
assert!(symbols.contains(&"expr"));
}
fn yield_of(grammar: &Grammar, prod: &Production) -> std::collections::HashSet<String> {
let mut visited = std::collections::HashSet::new();
let mut cache = grammar.yield_sets.clone();
yield_of_production(grammar, prod, &mut visited, &mut cache)
}
#[test]
fn yield_set_seq_only_first_member() {
let prod: Production = serde_json::from_str(
r#"{
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "identifier"},
{"type": "STRING", "value": "as"},
{"type": "SYMBOL", "name": "target"}
]
}"#,
)
.expect("valid SEQ");
let g = Grammar::from_bytes("test", b"{}").unwrap_or_else(|_| {
serde_json::from_str::<Grammar>(r#"{"name":"t","rules":{}}"#).unwrap()
});
let ys = yield_of(&g, &prod);
assert!(ys.contains("identifier"), "SEQ yields first member");
assert!(
!ys.contains("target"),
"SEQ must NOT yield non-first members"
);
}
#[test]
fn yield_set_choice_union() {
let prod: Production = serde_json::from_str(
r#"{
"type": "CHOICE",
"members": [
{"type": "SYMBOL", "name": "a"},
{"type": "SYMBOL", "name": "b"}
]
}"#,
)
.expect("valid CHOICE");
let g = serde_json::from_str::<Grammar>(r#"{"name":"t","rules":{}}"#).unwrap();
let ys = yield_of(&g, &prod);
assert_eq!(ys.len(), 2);
assert!(ys.contains("a"));
assert!(ys.contains("b"));
}
#[test]
fn yield_set_hidden_expansion() {
let g = serde_json::from_str::<Grammar>(
r#"{"name":"t","rules":{
"_value": {
"type": "CHOICE",
"members": [
{"type": "SYMBOL", "name": "number"},
{"type": "SYMBOL", "name": "object"}
]
}
}}"#,
)
.unwrap();
let mut g = g;
g.subtypes = compute_subtype_closure(&g);
g.yield_sets = compute_yield_sets(&g);
let sym: Production =
serde_json::from_str(r#"{"type": "SYMBOL", "name": "_value"}"#).unwrap();
let ys = yield_of(&g, &sym);
assert!(
ys.contains("number"),
"hidden rule expands into its CHOICE members"
);
assert!(ys.contains("object"));
assert!(
!ys.contains("_value"),
"hidden rule name is not in yield set"
);
}
#[test]
fn yield_set_optional_includes_epsilon() {
let prod: Production = serde_json::from_str(
r#"{"type": "OPTIONAL", "content": {"type": "SYMBOL", "name": "x"}}"#,
)
.unwrap();
let g = serde_json::from_str::<Grammar>(r#"{"name":"t","rules":{}}"#).unwrap();
let ys = yield_of(&g, &prod);
assert!(ys.contains("x"));
assert!(ys.contains(""), "OPTIONAL includes epsilon");
}
#[test]
fn yield_set_alias_uses_value() {
let prod: Production = serde_json::from_str(
r#"{"type": "ALIAS", "content": {"type": "SYMBOL", "name": "real"},
"named": true, "value": "alias_name"}"#,
)
.unwrap();
let g = serde_json::from_str::<Grammar>(r#"{"name":"t","rules":{}}"#).unwrap();
let ys = yield_of(&g, &prod);
assert_eq!(ys.len(), 1);
assert!(ys.contains("alias_name"), "named ALIAS yields its value");
}
}