1#![allow(
2 clippy::module_name_repetitions,
3 clippy::too_many_lines,
4 clippy::too_many_arguments,
5 clippy::map_unwrap_or,
6 clippy::option_if_let_else,
7 clippy::elidable_lifetime_names,
8 clippy::items_after_statements,
9 clippy::needless_pass_by_value,
10 clippy::single_match_else,
11 clippy::manual_let_else,
12 clippy::match_same_arms,
13 clippy::missing_const_for_fn,
14 clippy::single_char_pattern,
15 clippy::naive_bytecount,
16 clippy::expect_used,
17 clippy::redundant_pub_crate,
18 clippy::used_underscore_binding,
19 clippy::redundant_field_names,
20 clippy::struct_field_names,
21 clippy::redundant_else,
22 clippy::similar_names
23)]
24
25use std::collections::BTreeMap;
71
72use panproto_schema::{Edge, Schema};
73use serde::Deserialize;
74
75use crate::error::ParseError;
76
77#[derive(Debug, Clone, Deserialize)]
90#[serde(tag = "type")]
91pub enum Production {
92 #[serde(rename = "SEQ")]
94 Seq {
95 members: Vec<Self>,
97 },
98 #[serde(rename = "CHOICE")]
100 Choice {
101 members: Vec<Self>,
104 },
105 #[serde(rename = "REPEAT")]
107 Repeat {
108 content: Box<Self>,
110 },
111 #[serde(rename = "REPEAT1")]
113 Repeat1 {
114 content: Box<Self>,
116 },
117 #[serde(rename = "OPTIONAL")]
123 Optional {
124 content: Box<Self>,
126 },
127 #[serde(rename = "SYMBOL")]
129 Symbol {
130 name: String,
133 },
134 #[serde(rename = "STRING")]
136 String {
137 value: String,
139 },
140 #[serde(rename = "PATTERN")]
146 Pattern {
147 value: String,
149 },
150 #[serde(rename = "BLANK")]
152 Blank,
153 #[serde(rename = "FIELD")]
159 Field {
160 name: String,
162 content: Box<Self>,
164 },
165 #[serde(rename = "ALIAS")]
170 Alias {
171 content: Box<Self>,
173 #[serde(default)]
175 named: bool,
176 #[serde(default)]
178 value: String,
179 },
180 #[serde(rename = "TOKEN")]
185 Token {
186 content: Box<Self>,
188 },
189 #[serde(rename = "IMMEDIATE_TOKEN")]
193 ImmediateToken {
194 content: Box<Self>,
196 },
197 #[serde(rename = "PREC")]
199 Prec {
200 #[allow(dead_code)]
202 value: serde_json::Value,
203 content: Box<Self>,
205 },
206 #[serde(rename = "PREC_LEFT")]
208 PrecLeft {
209 #[allow(dead_code)]
211 value: serde_json::Value,
212 content: Box<Self>,
214 },
215 #[serde(rename = "PREC_RIGHT")]
217 PrecRight {
218 #[allow(dead_code)]
220 value: serde_json::Value,
221 content: Box<Self>,
223 },
224 #[serde(rename = "PREC_DYNAMIC")]
226 PrecDynamic {
227 #[allow(dead_code)]
229 value: serde_json::Value,
230 content: Box<Self>,
232 },
233}
234
235#[derive(Debug, Clone, Deserialize)]
240pub struct Grammar {
241 #[allow(dead_code)]
243 pub name: String,
244 pub rules: BTreeMap<String, Production>,
248 #[serde(default, deserialize_with = "deserialize_supertypes")]
256 pub supertypes: std::collections::HashSet<String>,
257 #[serde(skip)]
271 pub subtypes: std::collections::HashMap<String, std::collections::HashSet<String>>,
272}
273
274fn deserialize_supertypes<'de, D>(
275 deserializer: D,
276) -> Result<std::collections::HashSet<String>, D::Error>
277where
278 D: serde::Deserializer<'de>,
279{
280 let entries: Vec<serde_json::Value> = Vec::deserialize(deserializer)?;
281 let mut out = std::collections::HashSet::new();
282 for entry in entries {
283 match entry {
284 serde_json::Value::String(s) => {
285 out.insert(s);
286 }
287 serde_json::Value::Object(map) => {
288 if let Some(serde_json::Value::String(name)) = map.get("name") {
289 out.insert(name.clone());
290 }
291 }
292 _ => {}
293 }
294 }
295 Ok(out)
296}
297
298impl Grammar {
299 pub fn from_bytes(protocol: &str, bytes: &[u8]) -> Result<Self, ParseError> {
319 let mut grammar: Self =
320 serde_json::from_slice(bytes).map_err(|e| ParseError::EmitFailed {
321 protocol: protocol.to_owned(),
322 reason: format!("grammar.json deserialization failed: {e}"),
323 })?;
324 grammar.subtypes = compute_subtype_closure(&grammar);
325 Ok(grammar)
326 }
327}
328
329fn compute_subtype_closure(
332 grammar: &Grammar,
333) -> std::collections::HashMap<String, std::collections::HashSet<String>> {
334 use std::collections::{HashMap, HashSet};
335 let mut subtypes: HashMap<String, HashSet<String>> = HashMap::new();
340 for name in grammar.rules.keys() {
341 subtypes
342 .entry(name.clone())
343 .or_default()
344 .insert(name.clone());
345 }
346
347 fn walk<'g>(
351 grammar: &'g Grammar,
352 production: &'g Production,
353 visited: &mut HashSet<&'g str>,
354 out: &mut HashSet<String>,
355 ) {
356 match production {
357 Production::Symbol { name } => {
358 out.insert(name.clone());
360 let expand = name.starts_with('_') || grammar.supertypes.contains(name.as_str());
363 if expand && visited.insert(name.as_str()) {
364 if let Some(rule) = grammar.rules.get(name) {
365 walk(grammar, rule, visited, out);
366 }
367 }
368 }
369 Production::Choice { members } | Production::Seq { members } => {
370 for m in members {
371 walk(grammar, m, visited, out);
372 }
373 }
374 Production::Alias {
375 content,
376 named,
377 value,
378 } => {
379 if *named && !value.is_empty() {
380 out.insert(value.clone());
381 }
382 walk(grammar, content, visited, out);
383 }
384 Production::Repeat { content }
385 | Production::Repeat1 { content }
386 | Production::Optional { content }
387 | Production::Field { content, .. }
388 | Production::Token { content }
389 | Production::ImmediateToken { content }
390 | Production::Prec { content, .. }
391 | Production::PrecLeft { content, .. }
392 | Production::PrecRight { content, .. }
393 | Production::PrecDynamic { content, .. } => {
394 walk(grammar, content, visited, out);
395 }
396 _ => {}
397 }
398 }
399
400 for (name, rule) in &grammar.rules {
401 let expand = name.starts_with('_') || grammar.supertypes.contains(name.as_str());
402 if !expand {
403 continue;
404 }
405 let mut visited: HashSet<&str> = HashSet::new();
406 visited.insert(name.as_str());
407 let mut reachable: HashSet<String> = HashSet::new();
408 walk(grammar, rule, &mut visited, &mut reachable);
409 for kind in &reachable {
410 subtypes
411 .entry(kind.clone())
412 .or_default()
413 .insert(name.clone());
414 }
415 }
416
417 fn collect_aliases<'g>(production: &'g Production, out: &mut Vec<(String, &'g Production)>) {
423 match production {
424 Production::Alias {
425 content,
426 named,
427 value,
428 } => {
429 if *named && !value.is_empty() {
430 out.push((value.clone(), content.as_ref()));
431 }
432 collect_aliases(content, out);
433 }
434 Production::Choice { members } | Production::Seq { members } => {
435 for m in members {
436 collect_aliases(m, out);
437 }
438 }
439 Production::Repeat { content }
440 | Production::Repeat1 { content }
441 | Production::Optional { content }
442 | Production::Field { content, .. }
443 | Production::Token { content }
444 | Production::ImmediateToken { content }
445 | Production::Prec { content, .. }
446 | Production::PrecLeft { content, .. }
447 | Production::PrecRight { content, .. }
448 | Production::PrecDynamic { content, .. } => {
449 collect_aliases(content, out);
450 }
451 _ => {}
452 }
453 }
454 let mut aliases: Vec<(String, &Production)> = Vec::new();
455 for rule in grammar.rules.values() {
456 collect_aliases(rule, &mut aliases);
457 }
458 for (alias_value, content) in aliases {
459 let mut visited: HashSet<&str> = HashSet::new();
460 let mut reachable: HashSet<String> = HashSet::new();
461 walk(grammar, content, &mut visited, &mut reachable);
462 subtypes
465 .entry(alias_value.clone())
466 .or_default()
467 .insert(alias_value.clone());
468 for kind in reachable {
469 subtypes
470 .entry(kind)
471 .or_default()
472 .insert(alias_value.clone());
473 }
474 }
475
476 for _ in 0..8 {
480 let snapshot = subtypes.clone();
481 let mut changed = false;
482 for (kind, supers) in &snapshot {
483 let extra: HashSet<String> = supers
484 .iter()
485 .flat_map(|s| snapshot.get(s).cloned().unwrap_or_default())
486 .collect();
487 let entry = subtypes.entry(kind.clone()).or_default();
488 for s in extra {
489 if entry.insert(s) {
490 changed = true;
491 }
492 }
493 }
494 if !changed {
495 break;
496 }
497 }
498
499 subtypes
500}
501
502#[derive(Debug, Clone)]
515pub struct FormatPolicy {
516 pub indent_width: usize,
518 pub line_break_after: Vec<String>,
520 pub indent_open: Vec<String>,
522 pub indent_close: Vec<String>,
524}
525
526impl Default for FormatPolicy {
527 fn default() -> Self {
528 Self {
529 indent_width: 2,
530 line_break_after: vec![";".into(), "{".into(), "}".into()],
531 indent_open: vec!["{".into()],
532 indent_close: vec!["}".into()],
533 }
534 }
535}
536
537pub fn emit_pretty(
561 protocol: &str,
562 schema: &Schema,
563 grammar: &Grammar,
564 policy: &FormatPolicy,
565) -> Result<Vec<u8>, ParseError> {
566 let roots = collect_roots(schema);
567 if roots.is_empty() {
568 return Err(ParseError::EmitFailed {
569 protocol: protocol.to_owned(),
570 reason: "schema has no entry vertices".to_owned(),
571 });
572 }
573
574 let mut out = Output::new(policy);
575 for (i, root) in roots.iter().enumerate() {
576 if i > 0 {
577 out.newline();
578 }
579 emit_vertex(protocol, schema, grammar, root, &mut out)?;
580 }
581 Ok(out.finish())
582}
583
584fn collect_roots(schema: &Schema) -> Vec<&panproto_gat::Name> {
585 if !schema.entries.is_empty() {
586 return schema
587 .entries
588 .iter()
589 .filter(|name| schema.vertices.contains_key(*name))
590 .collect();
591 }
592
593 let mut targets: std::collections::HashSet<&panproto_gat::Name> =
596 std::collections::HashSet::new();
597 for edge in schema.edges.keys() {
598 targets.insert(&edge.tgt);
599 }
600 let mut roots: Vec<&panproto_gat::Name> = schema
601 .vertices
602 .keys()
603 .filter(|name| !targets.contains(name))
604 .collect();
605 roots.sort();
606 roots
607}
608
609fn emit_vertex(
610 protocol: &str,
611 schema: &Schema,
612 grammar: &Grammar,
613 vertex_id: &panproto_gat::Name,
614 out: &mut Output<'_>,
615) -> Result<(), ParseError> {
616 let vertex = schema
617 .vertices
618 .get(vertex_id)
619 .ok_or_else(|| ParseError::EmitFailed {
620 protocol: protocol.to_owned(),
621 reason: format!("vertex '{vertex_id}' not found"),
622 })?;
623
624 if let Some(literal) = literal_value(schema, vertex_id) {
630 if children_for(schema, vertex_id).is_empty() {
631 out.token(literal);
632 return Ok(());
633 }
634 }
635
636 let kind = vertex.kind.as_ref();
637 let edges = children_for(schema, vertex_id);
638 if let Some(rule) = grammar.rules.get(kind) {
639 let mut cursor = ChildCursor::new(&edges);
640 return emit_production(protocol, schema, grammar, vertex_id, rule, &mut cursor, out);
641 }
642
643 for edge in &edges {
650 emit_vertex(protocol, schema, grammar, &edge.tgt, out)?;
651 }
652 Ok(())
653}
654
655struct ChildCursor<'a> {
658 edges: &'a [&'a Edge],
659 consumed: Vec<bool>,
660}
661
662impl<'a> ChildCursor<'a> {
663 fn new(edges: &'a [&'a Edge]) -> Self {
664 Self {
665 edges,
666 consumed: vec![false; edges.len()],
667 }
668 }
669
670 fn take_field(&mut self, field_name: &str) -> Option<&'a Edge> {
672 for (i, edge) in self.edges.iter().enumerate() {
673 if !self.consumed[i] && edge.kind.as_ref() == field_name {
674 self.consumed[i] = true;
675 return Some(edge);
676 }
677 }
678 None
679 }
680
681 fn take_matching(&mut self, predicate: impl Fn(&Edge) -> bool) -> Option<&'a Edge> {
685 for (i, edge) in self.edges.iter().enumerate() {
686 if !self.consumed[i] && predicate(edge) {
687 self.consumed[i] = true;
688 return Some(edge);
689 }
690 }
691 None
692 }
693
694 fn has_matching(&self, predicate: impl Fn(&Edge) -> bool) -> bool {
696 self.edges
697 .iter()
698 .enumerate()
699 .any(|(i, edge)| !self.consumed[i] && predicate(edge))
700 }
701}
702
703thread_local! {
704 static EMIT_DEPTH: std::cell::Cell<usize> = const { std::cell::Cell::new(0) };
705 static EMIT_MU_FRAMES: std::cell::RefCell<std::collections::HashSet<(String, String)>> =
715 std::cell::RefCell::new(std::collections::HashSet::new());
716}
717
718fn walk_in_mu_frame(
723 protocol: &str,
724 schema: &Schema,
725 grammar: &Grammar,
726 vertex_id: &panproto_gat::Name,
727 rule_name: &str,
728 rule: &Production,
729 cursor: &mut ChildCursor<'_>,
730 out: &mut Output<'_>,
731) -> Result<(), ParseError> {
732 let key = (vertex_id.to_string(), rule_name.to_owned());
733 let inserted = EMIT_MU_FRAMES.with(|frames| frames.borrow_mut().insert(key.clone()));
734 if !inserted {
735 return Ok(());
740 }
741 let result = emit_production(protocol, schema, grammar, vertex_id, rule, cursor, out);
742 EMIT_MU_FRAMES.with(|frames| {
743 frames.borrow_mut().remove(&key);
744 });
745 result
746}
747
748fn emit_production(
749 protocol: &str,
750 schema: &Schema,
751 grammar: &Grammar,
752 vertex_id: &panproto_gat::Name,
753 production: &Production,
754 cursor: &mut ChildCursor<'_>,
755 out: &mut Output<'_>,
756) -> Result<(), ParseError> {
757 let depth = EMIT_DEPTH.with(|d| {
758 let v = d.get() + 1;
759 d.set(v);
760 v
761 });
762 if depth > 500 {
763 EMIT_DEPTH.with(|d| d.set(d.get() - 1));
764 return Err(ParseError::EmitFailed {
765 protocol: protocol.to_owned(),
766 reason: format!(
767 "emit_production recursion >500 (likely a cyclic grammar; \
768 vertex='{vertex_id}')"
769 ),
770 });
771 }
772 let result = emit_production_inner(
773 protocol, schema, grammar, vertex_id, production, cursor, out,
774 );
775 EMIT_DEPTH.with(|d| d.set(d.get() - 1));
776 result
777}
778
779fn emit_production_inner(
780 protocol: &str,
781 schema: &Schema,
782 grammar: &Grammar,
783 vertex_id: &panproto_gat::Name,
784 production: &Production,
785 cursor: &mut ChildCursor<'_>,
786 out: &mut Output<'_>,
787) -> Result<(), ParseError> {
788 match production {
789 Production::String { value } => {
790 out.token(value);
791 Ok(())
792 }
793 Production::Pattern { value } => {
794 if let Some(literal) = literal_value(schema, vertex_id) {
795 out.token(literal);
796 } else {
797 out.token(&placeholder_for_pattern(value));
798 }
799 Ok(())
800 }
801 Production::Blank => Ok(()),
802 Production::Symbol { name } => {
803 if name.starts_with('_') {
804 if let Some(rule) = grammar.rules.get(name) {
818 walk_in_mu_frame(
819 protocol, schema, grammar, vertex_id, name, rule, cursor, out,
820 )
821 } else {
822 if name.contains("line_ending")
832 || name.contains("newline")
833 || name.ends_with("_or_eof")
834 {
835 out.newline();
836 }
837 Ok(())
838 }
839 } else if let Some(edge) = take_symbol_match(grammar, schema, cursor, name) {
840 emit_vertex(protocol, schema, grammar, &edge.tgt, out)
851 } else if vertex_id_kind(schema, vertex_id) == Some(name.as_str()) {
852 let rule = grammar
853 .rules
854 .get(name)
855 .ok_or_else(|| ParseError::EmitFailed {
856 protocol: protocol.to_owned(),
857 reason: format!("no production for SYMBOL '{name}'"),
858 })?;
859 walk_in_mu_frame(
862 protocol, schema, grammar, vertex_id, name, rule, cursor, out,
863 )
864 } else {
865 Ok(())
869 }
870 }
871 Production::Seq { members } => {
872 for member in members {
873 emit_production(protocol, schema, grammar, vertex_id, member, cursor, out)?;
874 }
875 Ok(())
876 }
877 Production::Choice { members } => {
878 if let Some(matched) =
879 pick_choice_with_cursor(schema, grammar, vertex_id, cursor, members)
880 {
881 emit_production(protocol, schema, grammar, vertex_id, matched, cursor, out)
882 } else {
883 Ok(())
884 }
885 }
886 Production::Repeat { content } | Production::Repeat1 { content } => {
887 let mut emitted_any = false;
888 loop {
889 let cursor_snap = cursor.consumed.clone();
890 let out_snap = out.snapshot();
891 let consumed_before = cursor.consumed.iter().filter(|&&c| c).count();
892 let result =
893 emit_production(protocol, schema, grammar, vertex_id, content, cursor, out);
894 let consumed_after = cursor.consumed.iter().filter(|&&c| c).count();
895 if result.is_err() || consumed_after == consumed_before {
896 cursor.consumed = cursor_snap;
897 out.restore(out_snap);
898 break;
899 }
900 emitted_any = true;
901 }
902 if matches!(production, Production::Repeat1 { .. }) && !emitted_any {
903 emit_production(protocol, schema, grammar, vertex_id, content, cursor, out)?;
904 }
905 Ok(())
906 }
907 Production::Optional { content } => {
908 let cursor_snap = cursor.consumed.clone();
909 let out_snap = out.snapshot();
910 let consumed_before = cursor.consumed.iter().filter(|&&c| c).count();
911 let result =
912 emit_production(protocol, schema, grammar, vertex_id, content, cursor, out);
913 if result.is_err() {
918 cursor.consumed = cursor_snap;
919 out.restore(out_snap);
920 return result;
921 }
922 let consumed_after = cursor.consumed.iter().filter(|&&c| c).count();
923 if consumed_after == consumed_before
924 && !has_relevant_constraint(content, schema, vertex_id)
925 {
926 cursor.consumed = cursor_snap;
927 out.restore(out_snap);
928 }
929 Ok(())
930 }
931 Production::Field { name, content } => {
932 if let Some(edge) = cursor.take_field(name) {
933 emit_in_child_context(protocol, schema, grammar, &edge.tgt, content, out)
934 } else if first_symbol(content).is_none() {
935 emit_production(protocol, schema, grammar, vertex_id, content, cursor, out)
943 } else {
944 Ok(())
945 }
946 }
947 Production::Alias {
948 content,
949 named,
950 value,
951 } => {
952 if *named && !value.is_empty() {
967 if let Some(edge) = cursor.take_matching(|edge| {
968 schema
969 .vertices
970 .get(&edge.tgt)
971 .map(|v| v.kind.as_ref() == value.as_str())
972 .unwrap_or(false)
973 }) {
974 return emit_aliased_child(protocol, schema, grammar, &edge.tgt, content, out);
975 }
976 }
977 emit_production(protocol, schema, grammar, vertex_id, content, cursor, out)
978 }
979 Production::Token { content }
980 | Production::ImmediateToken { content }
981 | Production::Prec { content, .. }
982 | Production::PrecLeft { content, .. }
983 | Production::PrecRight { content, .. }
984 | Production::PrecDynamic { content, .. } => {
985 emit_production(protocol, schema, grammar, vertex_id, content, cursor, out)
986 }
987 }
988}
989
990fn take_symbol_match<'a>(
993 grammar: &Grammar,
994 schema: &Schema,
995 cursor: &mut ChildCursor<'a>,
996 name: &str,
997) -> Option<&'a Edge> {
998 cursor.take_matching(|edge| {
999 let target_kind = schema.vertices.get(&edge.tgt).map(|v| v.kind.as_ref());
1000 kind_satisfies_symbol(grammar, target_kind, name)
1001 })
1002}
1003
1004fn kind_satisfies_symbol(grammar: &Grammar, target_kind: Option<&str>, name: &str) -> bool {
1014 let Some(target) = target_kind else {
1015 return false;
1016 };
1017 if target == name {
1018 return true;
1019 }
1020 grammar
1021 .subtypes
1022 .get(target)
1023 .is_some_and(|set| set.contains(name))
1024}
1025
1026fn emit_aliased_child(
1060 protocol: &str,
1061 schema: &Schema,
1062 grammar: &Grammar,
1063 child_id: &panproto_gat::Name,
1064 content: &Production,
1065 out: &mut Output<'_>,
1066) -> Result<(), ParseError> {
1067 if let Some(literal) = literal_value(schema, child_id) {
1072 if children_for(schema, child_id).is_empty() {
1073 out.token(literal);
1074 return Ok(());
1075 }
1076 }
1077
1078 if let Production::Symbol { name } = content {
1081 if let Some(rule) = grammar.rules.get(name) {
1082 let edges = children_for(schema, child_id);
1083 let mut cursor = ChildCursor::new(&edges);
1084 return emit_production(protocol, schema, grammar, child_id, rule, &mut cursor, out);
1085 }
1086 }
1087
1088 let edges = children_for(schema, child_id);
1090 let mut cursor = ChildCursor::new(&edges);
1091 emit_production(
1092 protocol,
1093 schema,
1094 grammar,
1095 child_id,
1096 content,
1097 &mut cursor,
1098 out,
1099 )
1100}
1101
1102fn emit_in_child_context(
1103 protocol: &str,
1104 schema: &Schema,
1105 grammar: &Grammar,
1106 child_id: &panproto_gat::Name,
1107 production: &Production,
1108 out: &mut Output<'_>,
1109) -> Result<(), ParseError> {
1110 if !matches!(production, Production::Symbol { .. }) {
1119 let child_kind = schema.vertices.get(child_id).map(|v| v.kind.as_ref());
1120 let symbols = referenced_symbols(production);
1121 if symbols
1122 .iter()
1123 .any(|s| kind_satisfies_symbol(grammar, child_kind, s) || child_kind == Some(s))
1124 {
1125 return emit_vertex(protocol, schema, grammar, child_id, out);
1126 }
1127 }
1128 match production {
1129 Production::Symbol { .. } => emit_vertex(protocol, schema, grammar, child_id, out),
1130 _ => {
1131 let edges = children_for(schema, child_id);
1132 let mut cursor = ChildCursor::new(&edges);
1133 emit_production(
1134 protocol,
1135 schema,
1136 grammar,
1137 child_id,
1138 production,
1139 &mut cursor,
1140 out,
1141 )
1142 }
1143 }
1144}
1145
1146fn pick_choice_with_cursor<'a>(
1147 schema: &Schema,
1148 grammar: &Grammar,
1149 vertex_id: &panproto_gat::Name,
1150 cursor: &ChildCursor<'_>,
1151 alternatives: &'a [Production],
1152) -> Option<&'a Production> {
1153 let constraint_blob = schema
1166 .constraints
1167 .get(vertex_id)
1168 .map(|cs| {
1169 let fingerprint: Option<&str> = cs
1170 .iter()
1171 .find(|c| c.sort.as_ref() == "chose-alt-fingerprint")
1172 .map(|c| c.value.as_str());
1173 if let Some(fp) = fingerprint {
1174 fp.to_owned()
1175 } else {
1176 cs.iter()
1177 .filter(|c| {
1178 let s = c.sort.as_ref();
1179 s.starts_with("interstitial-") && !s.ends_with("-start-byte")
1180 })
1181 .map(|c| c.value.as_str())
1182 .collect::<Vec<&str>>()
1183 .join(" ")
1184 }
1185 })
1186 .unwrap_or_default();
1187 let child_kinds: Vec<&str> = schema
1188 .constraints
1189 .get(vertex_id)
1190 .and_then(|cs| {
1191 cs.iter()
1192 .find(|c| c.sort.as_ref() == "chose-alt-child-kinds")
1193 .map(|c| c.value.split_whitespace().collect())
1194 })
1195 .unwrap_or_default();
1196 if !constraint_blob.is_empty() {
1197 let mut best_literal: usize = 0;
1208 let mut best_symbols: usize = 0;
1209 let mut best_alt: Option<&Production> = None;
1210 let mut tied = false;
1211 for alt in alternatives {
1212 let strings = literal_strings(alt);
1213 if strings.is_empty() {
1214 continue;
1215 }
1216 let literal_score = strings
1217 .iter()
1218 .filter(|s| constraint_blob.contains(s.as_str()))
1219 .map(String::len)
1220 .sum::<usize>();
1221 if literal_score == 0 {
1222 continue;
1223 }
1224 let symbol_score = if literal_score >= best_literal && !child_kinds.is_empty() {
1231 let symbols = referenced_symbols(alt);
1232 symbols
1233 .iter()
1234 .filter(|sym| {
1235 let sym_str: &str = sym;
1236 if child_kinds.contains(&sym_str) {
1237 return true;
1238 }
1239 grammar.subtypes.get(sym_str).is_some_and(|sub_set| {
1240 sub_set
1241 .iter()
1242 .any(|sub| child_kinds.contains(&sub.as_str()))
1243 })
1244 })
1245 .count()
1246 } else {
1247 0
1248 };
1249 let better = literal_score > best_literal
1250 || (literal_score == best_literal && symbol_score > best_symbols);
1251 let same = literal_score == best_literal && symbol_score == best_symbols;
1252 if better {
1253 best_literal = literal_score;
1254 best_symbols = symbol_score;
1255 best_alt = Some(alt);
1256 tied = false;
1257 } else if same && best_alt.is_some() {
1258 tied = true;
1259 }
1260 }
1261 if let Some(alt) = best_alt {
1268 if !tied {
1269 return Some(alt);
1270 }
1271 }
1272 }
1273
1274 for alt in alternatives {
1282 let symbols = referenced_symbols(alt);
1283 if !symbols.is_empty()
1284 && cursor.has_matching(|edge| {
1285 let tk = schema.vertices.get(&edge.tgt).map(|v| v.kind.as_ref());
1286 symbols
1287 .iter()
1288 .any(|s| kind_satisfies_symbol(grammar, tk, s))
1289 })
1290 {
1291 return Some(alt);
1292 }
1293 }
1294
1295 let edge_kinds: Vec<&str> = cursor
1298 .edges
1299 .iter()
1300 .enumerate()
1301 .filter(|(i, _)| !cursor.consumed[*i])
1302 .map(|(_, e)| e.kind.as_ref())
1303 .collect();
1304 for alt in alternatives {
1305 if has_field_in(alt, &edge_kinds) {
1306 return Some(alt);
1307 }
1308 }
1309
1310 let _ = (schema, vertex_id);
1323 if alternatives.iter().any(|a| matches!(a, Production::Blank)) {
1324 return alternatives.iter().find(|a| matches!(a, Production::Blank));
1325 }
1326 alternatives
1327 .iter()
1328 .find(|alt| !matches!(alt, Production::Blank))
1329}
1330
1331fn literal_strings(production: &Production) -> Vec<String> {
1337 let mut out = Vec::new();
1338 fn walk(p: &Production, out: &mut Vec<String>) {
1339 match p {
1340 Production::String { value } if !value.is_empty() => {
1341 out.push(value.clone());
1342 }
1343 Production::Choice { members } | Production::Seq { members } => {
1344 for m in members {
1345 walk(m, out);
1346 }
1347 }
1348 Production::Repeat { content }
1349 | Production::Repeat1 { content }
1350 | Production::Optional { content }
1351 | Production::Field { content, .. }
1352 | Production::Alias { content, .. }
1353 | Production::Token { content }
1354 | Production::ImmediateToken { content }
1355 | Production::Prec { content, .. }
1356 | Production::PrecLeft { content, .. }
1357 | Production::PrecRight { content, .. }
1358 | Production::PrecDynamic { content, .. } => walk(content, out),
1359 _ => {}
1360 }
1361 }
1362 walk(production, &mut out);
1363 out
1364}
1365
1366fn referenced_symbols(production: &Production) -> Vec<&str> {
1373 let mut out = Vec::new();
1374 fn walk<'a>(p: &'a Production, out: &mut Vec<&'a str>) {
1375 match p {
1376 Production::Symbol { name } => out.push(name.as_str()),
1377 Production::Choice { members } | Production::Seq { members } => {
1378 for m in members {
1379 walk(m, out);
1380 }
1381 }
1382 Production::Repeat { content }
1383 | Production::Repeat1 { content }
1384 | Production::Optional { content }
1385 | Production::Field { content, .. }
1386 | Production::Alias { content, .. }
1387 | Production::Token { content }
1388 | Production::ImmediateToken { content }
1389 | Production::Prec { content, .. }
1390 | Production::PrecLeft { content, .. }
1391 | Production::PrecRight { content, .. }
1392 | Production::PrecDynamic { content, .. } => walk(content, out),
1393 _ => {}
1394 }
1395 }
1396 walk(production, &mut out);
1397 out
1398}
1399
1400fn first_symbol(production: &Production) -> Option<&str> {
1401 match production {
1402 Production::Symbol { name } => Some(name),
1403 Production::Seq { members } => members.iter().find_map(first_symbol),
1404 Production::Choice { members } => members.iter().find_map(first_symbol),
1405 Production::Repeat { content }
1406 | Production::Repeat1 { content }
1407 | Production::Optional { content }
1408 | Production::Field { content, .. }
1409 | Production::Alias { content, .. }
1410 | Production::Token { content }
1411 | Production::ImmediateToken { content }
1412 | Production::Prec { content, .. }
1413 | Production::PrecLeft { content, .. }
1414 | Production::PrecRight { content, .. }
1415 | Production::PrecDynamic { content, .. } => first_symbol(content),
1416 _ => None,
1417 }
1418}
1419
1420fn has_field_in(production: &Production, edge_kinds: &[&str]) -> bool {
1421 match production {
1422 Production::Field { name, .. } => edge_kinds.contains(&name.as_str()),
1423 Production::Seq { members } | Production::Choice { members } => {
1424 members.iter().any(|m| has_field_in(m, edge_kinds))
1425 }
1426 Production::Repeat { content }
1427 | Production::Repeat1 { content }
1428 | Production::Optional { content }
1429 | Production::Alias { content, .. }
1430 | Production::Token { content }
1431 | Production::ImmediateToken { content }
1432 | Production::Prec { content, .. }
1433 | Production::PrecLeft { content, .. }
1434 | Production::PrecRight { content, .. }
1435 | Production::PrecDynamic { content, .. } => has_field_in(content, edge_kinds),
1436 _ => false,
1437 }
1438}
1439
1440fn has_relevant_constraint(
1441 production: &Production,
1442 schema: &Schema,
1443 vertex_id: &panproto_gat::Name,
1444) -> bool {
1445 let constraints = match schema.constraints.get(vertex_id) {
1446 Some(c) => c,
1447 None => return false,
1448 };
1449 fn walk(production: &Production, constraints: &[panproto_schema::Constraint]) -> bool {
1450 match production {
1451 Production::String { value } => constraints
1452 .iter()
1453 .any(|c| c.value == *value || c.sort.as_ref() == value),
1454 Production::Field { name, content } => {
1455 constraints.iter().any(|c| c.sort.as_ref() == name) || walk(content, constraints)
1456 }
1457 Production::Seq { members } | Production::Choice { members } => {
1458 members.iter().any(|m| walk(m, constraints))
1459 }
1460 Production::Repeat { content }
1461 | Production::Repeat1 { content }
1462 | Production::Optional { content }
1463 | Production::Alias { content, .. }
1464 | Production::Token { content }
1465 | Production::ImmediateToken { content }
1466 | Production::Prec { content, .. }
1467 | Production::PrecLeft { content, .. }
1468 | Production::PrecRight { content, .. }
1469 | Production::PrecDynamic { content, .. } => walk(content, constraints),
1470 _ => false,
1471 }
1472 }
1473 walk(production, constraints)
1474}
1475
1476fn children_for<'a>(schema: &'a Schema, vertex_id: &panproto_gat::Name) -> Vec<&'a Edge> {
1477 let mut edges: Vec<&Edge> = schema
1478 .edges
1479 .keys()
1480 .filter(|e| &e.src == vertex_id)
1481 .collect();
1482 edges.sort_by_key(|e| {
1483 let pos = schema.orderings.get(*e).copied().unwrap_or(u32::MAX);
1487 (pos, e.kind.clone(), e.tgt.clone())
1488 });
1489 edges
1490}
1491
1492fn vertex_id_kind<'a>(schema: &'a Schema, vertex_id: &panproto_gat::Name) -> Option<&'a str> {
1493 schema.vertices.get(vertex_id).map(|v| v.kind.as_ref())
1494}
1495
1496fn literal_value<'a>(schema: &'a Schema, vertex_id: &panproto_gat::Name) -> Option<&'a str> {
1497 schema
1498 .constraints
1499 .get(vertex_id)?
1500 .iter()
1501 .find(|c| c.sort.as_ref() == "literal-value")
1502 .map(|c| c.value.as_str())
1503}
1504
1505fn placeholder_for_pattern(pattern: &str) -> String {
1506 let simple_lit = decode_simple_pattern_literal(pattern);
1514 if let Some(lit) = simple_lit {
1515 return lit;
1516 }
1517
1518 if pattern.contains("[0-9]") || pattern.contains("\\d") {
1519 "0".into()
1520 } else if pattern.contains("[a-zA-Z_]") || pattern.contains("\\w") {
1521 "_x".into()
1522 } else if pattern.contains('"') || pattern.contains('\'') {
1523 "\"\"".into()
1524 } else {
1525 "_".into()
1526 }
1527}
1528
1529fn decode_simple_pattern_literal(pattern: &str) -> Option<String> {
1534 if pattern
1537 .chars()
1538 .any(|c| matches!(c, '[' | ']' | '(' | ')' | '*' | '+' | '?' | '|' | '{' | '}'))
1539 {
1540 return None;
1541 }
1542 let mut out = String::new();
1543 let mut chars = pattern.chars();
1544 while let Some(c) = chars.next() {
1545 if c == '\\' {
1546 match chars.next() {
1547 Some('n') => out.push('\n'),
1548 Some('r') => out.push('\r'),
1549 Some('t') => out.push('\t'),
1550 Some('\\') => out.push('\\'),
1551 Some('/') => out.push('/'),
1552 Some(other) => out.push(other),
1553 None => return None,
1554 }
1555 } else {
1556 out.push(c);
1557 }
1558 }
1559 Some(out)
1560}
1561
1562#[derive(Clone)]
1574enum Token {
1575 Lit(String),
1577 IndentOpen,
1581 IndentClose,
1583 LineBreak,
1586}
1587
1588struct Output<'a> {
1589 tokens: Vec<Token>,
1590 policy: &'a FormatPolicy,
1591}
1592
1593#[derive(Clone)]
1594struct OutputSnapshot {
1595 tokens_len: usize,
1596}
1597
1598impl<'a> Output<'a> {
1599 fn new(policy: &'a FormatPolicy) -> Self {
1600 Self {
1601 tokens: Vec::new(),
1602 policy,
1603 }
1604 }
1605
1606 fn token(&mut self, value: &str) {
1607 if value.is_empty() {
1608 return;
1609 }
1610
1611 if self.policy.indent_close.iter().any(|t| t == value) {
1612 self.tokens.push(Token::IndentClose);
1613 }
1614
1615 self.tokens.push(Token::Lit(value.to_owned()));
1616
1617 if self.policy.indent_open.iter().any(|t| t == value) {
1618 self.tokens.push(Token::IndentOpen);
1619 self.tokens.push(Token::LineBreak);
1620 } else if self.policy.line_break_after.iter().any(|t| t == value) {
1621 self.tokens.push(Token::LineBreak);
1622 }
1623 }
1624
1625 fn newline(&mut self) {
1626 self.tokens.push(Token::LineBreak);
1627 }
1628
1629 fn snapshot(&self) -> OutputSnapshot {
1630 OutputSnapshot {
1631 tokens_len: self.tokens.len(),
1632 }
1633 }
1634
1635 fn restore(&mut self, snap: OutputSnapshot) {
1636 self.tokens.truncate(snap.tokens_len);
1637 }
1638
1639 fn finish(self) -> Vec<u8> {
1640 layout(&self.tokens, self.policy)
1641 }
1642}
1643
1644fn layout(tokens: &[Token], policy: &FormatPolicy) -> Vec<u8> {
1650 let mut bytes = Vec::new();
1651 let mut indent: usize = 0;
1652 let mut at_line_start = true;
1653 let mut last_lit: Option<&str> = None;
1654
1655 for tok in tokens {
1656 match tok {
1657 Token::IndentOpen => indent += 1,
1658 Token::IndentClose => {
1659 indent = indent.saturating_sub(1);
1660 if !at_line_start {
1661 bytes.push(b'\n');
1662 at_line_start = true;
1663 }
1664 }
1665 Token::LineBreak => {
1666 if !at_line_start {
1667 bytes.push(b'\n');
1668 at_line_start = true;
1669 }
1670 }
1671 Token::Lit(value) => {
1672 if at_line_start {
1673 bytes.extend(std::iter::repeat_n(b' ', indent * policy.indent_width));
1674 } else if let Some(prev) = last_lit {
1675 if needs_space_between(prev, value) {
1676 bytes.push(b' ');
1677 }
1678 }
1679 bytes.extend_from_slice(value.as_bytes());
1680 at_line_start = false;
1681 last_lit = Some(value.as_str());
1682 }
1683 }
1684 }
1685
1686 if !at_line_start {
1687 bytes.push(b'\n');
1688 }
1689 bytes
1690}
1691
1692fn needs_space_between(last: &str, next: &str) -> bool {
1693 if last.is_empty() || next.is_empty() {
1694 return false;
1695 }
1696 if is_punct_open(last) || is_punct_open(next) {
1697 return false;
1698 }
1699 if is_punct_close(next) {
1700 return false;
1701 }
1702 if is_punct_close(last) && is_punct_punctuation(next) {
1703 return false;
1704 }
1705 if last == "." || next == "." {
1706 return false;
1707 }
1708 if last_is_word_like(last) && first_is_word_like(next) {
1709 return true;
1710 }
1711 if last_ends_with_alnum(last) && first_is_alnum_or_underscore(next) {
1712 return true;
1713 }
1714 true
1717}
1718
1719fn is_punct_open(s: &str) -> bool {
1720 matches!(s, "(" | "[" | "{" | "\"" | "'" | "`")
1721}
1722
1723fn is_punct_close(s: &str) -> bool {
1724 matches!(s, ")" | "]" | "}" | "," | ";" | ":" | "\"" | "'" | "`")
1725}
1726
1727fn is_punct_punctuation(s: &str) -> bool {
1728 matches!(s, "," | ";" | ":" | "." | ")" | "]" | "}")
1729}
1730
1731fn last_is_word_like(s: &str) -> bool {
1732 s.chars()
1733 .next_back()
1734 .map(|c| c.is_alphanumeric() || c == '_')
1735 .unwrap_or(false)
1736}
1737
1738fn first_is_word_like(s: &str) -> bool {
1739 s.chars()
1740 .next()
1741 .map(|c| c.is_alphanumeric() || c == '_')
1742 .unwrap_or(false)
1743}
1744
1745fn last_ends_with_alnum(s: &str) -> bool {
1746 s.chars()
1747 .next_back()
1748 .map(char::is_alphanumeric)
1749 .unwrap_or(false)
1750}
1751
1752fn first_is_alnum_or_underscore(s: &str) -> bool {
1753 s.chars()
1754 .next()
1755 .map(|c| c.is_alphanumeric() || c == '_')
1756 .unwrap_or(false)
1757}
1758
1759#[cfg(test)]
1760mod tests {
1761 use super::*;
1762
1763 #[test]
1764 fn parses_simple_grammar_json() {
1765 let bytes = br#"{
1766 "name": "tiny",
1767 "rules": {
1768 "program": {
1769 "type": "SEQ",
1770 "members": [
1771 {"type": "STRING", "value": "hello"},
1772 {"type": "STRING", "value": ";"}
1773 ]
1774 }
1775 }
1776 }"#;
1777 let g = Grammar::from_bytes("tiny", bytes).expect("valid tiny grammar");
1778 assert!(g.rules.contains_key("program"));
1779 }
1780
1781 #[test]
1782 fn output_emits_punctuation_without_leading_space() {
1783 let policy = FormatPolicy::default();
1784 let mut out = Output::new(&policy);
1785 out.token("foo");
1786 out.token("(");
1787 out.token(")");
1788 out.token(";");
1789 let bytes = out.finish();
1790 let s = std::str::from_utf8(&bytes).expect("ascii output");
1791 assert!(s.starts_with("foo();"), "got {s:?}");
1792 }
1793
1794 #[test]
1795 fn grammar_from_bytes_rejects_malformed_input() {
1796 let result = Grammar::from_bytes("malformed", b"not json");
1797 let err = result.expect_err("malformed bytes must yield Err");
1798 let msg = err.to_string();
1799 assert!(
1800 msg.contains("malformed"),
1801 "error message should name the protocol: {msg:?}"
1802 );
1803 }
1804
1805 #[test]
1806 fn output_indents_after_open_brace() {
1807 let policy = FormatPolicy::default();
1808 let mut out = Output::new(&policy);
1809 out.token("fn");
1810 out.token("foo");
1811 out.token("(");
1812 out.token(")");
1813 out.token("{");
1814 out.token("body");
1815 out.token("}");
1816 let bytes = out.finish();
1817 let s = std::str::from_utf8(&bytes).expect("ascii output");
1818 assert!(s.contains("{\n"), "newline after opening brace: {s:?}");
1819 assert!(s.contains("body"), "body inside block: {s:?}");
1820 assert!(s.ends_with("}\n"), "newline after closing brace: {s:?}");
1821 }
1822
1823 #[test]
1824 fn output_no_space_between_word_and_dot() {
1825 let policy = FormatPolicy::default();
1826 let mut out = Output::new(&policy);
1827 out.token("foo");
1828 out.token(".");
1829 out.token("bar");
1830 let bytes = out.finish();
1831 let s = std::str::from_utf8(&bytes).expect("ascii output");
1832 assert!(s.starts_with("foo.bar"), "no space around dot: {s:?}");
1833 }
1834
1835 #[test]
1836 fn output_snapshot_restore_truncates_bytes() {
1837 let policy = FormatPolicy::default();
1838 let mut out = Output::new(&policy);
1839 out.token("keep");
1840 let snap = out.snapshot();
1841 out.token("drop");
1842 out.token("more");
1843 out.restore(snap);
1844 out.token("after");
1845 let bytes = out.finish();
1846 let s = std::str::from_utf8(&bytes).expect("ascii output");
1847 assert!(s.contains("keep"), "kept token survives: {s:?}");
1848 assert!(s.contains("after"), "post-restore token visible: {s:?}");
1849 assert!(!s.contains("drop"), "rolled-back token removed: {s:?}");
1850 assert!(!s.contains("more"), "rolled-back token removed: {s:?}");
1851 }
1852
1853 #[test]
1854 fn child_cursor_take_field_consumes_once() {
1855 let edges_owned: Vec<Edge> = vec![Edge {
1856 src: panproto_gat::Name::from("p"),
1857 tgt: panproto_gat::Name::from("c"),
1858 kind: panproto_gat::Name::from("name"),
1859 name: None,
1860 }];
1861 let edges: Vec<&Edge> = edges_owned.iter().collect();
1862 let mut cursor = ChildCursor::new(&edges);
1863 let first = cursor.take_field("name");
1864 let second = cursor.take_field("name");
1865 assert!(first.is_some(), "first take returns the edge");
1866 assert!(
1867 second.is_none(),
1868 "second take returns None (already consumed)"
1869 );
1870 }
1871
1872 #[test]
1873 fn child_cursor_take_matching_predicate() {
1874 let edges_owned: Vec<Edge> = vec![
1875 Edge {
1876 src: "p".into(),
1877 tgt: "c1".into(),
1878 kind: "child_of".into(),
1879 name: None,
1880 },
1881 Edge {
1882 src: "p".into(),
1883 tgt: "c2".into(),
1884 kind: "key".into(),
1885 name: None,
1886 },
1887 ];
1888 let edges: Vec<&Edge> = edges_owned.iter().collect();
1889 let mut cursor = ChildCursor::new(&edges);
1890 assert!(cursor.has_matching(|e| e.kind.as_ref() == "key"));
1891 let taken = cursor.take_matching(|e| e.kind.as_ref() == "key");
1892 assert!(taken.is_some());
1893 assert!(
1894 !cursor.has_matching(|e| e.kind.as_ref() == "key"),
1895 "consumed edge no longer matches"
1896 );
1897 assert!(
1898 cursor.has_matching(|e| e.kind.as_ref() == "child_of"),
1899 "the other edge is still available"
1900 );
1901 }
1902
1903 #[test]
1904 fn kind_satisfies_symbol_direct_match() {
1905 let bytes = br#"{
1906 "name": "tiny",
1907 "rules": {
1908 "x": {"type": "STRING", "value": "x"}
1909 }
1910 }"#;
1911 let g = Grammar::from_bytes("tiny", bytes).expect("valid grammar");
1912 assert!(kind_satisfies_symbol(&g, Some("x"), "x"));
1913 assert!(!kind_satisfies_symbol(&g, Some("y"), "x"));
1914 assert!(!kind_satisfies_symbol(&g, None, "x"));
1915 }
1916
1917 #[test]
1918 fn kind_satisfies_symbol_through_hidden_rule() {
1919 let bytes = br#"{
1920 "name": "tiny",
1921 "rules": {
1922 "_value": {
1923 "type": "CHOICE",
1924 "members": [
1925 {"type": "SYMBOL", "name": "object"},
1926 {"type": "SYMBOL", "name": "number"}
1927 ]
1928 },
1929 "object": {"type": "STRING", "value": "{}"},
1930 "number": {"type": "PATTERN", "value": "[0-9]+"}
1931 }
1932 }"#;
1933 let g = Grammar::from_bytes("tiny", bytes).expect("valid grammar");
1934 assert!(
1935 kind_satisfies_symbol(&g, Some("number"), "_value"),
1936 "number is reachable from _value via CHOICE"
1937 );
1938 assert!(
1939 kind_satisfies_symbol(&g, Some("object"), "_value"),
1940 "object is reachable from _value via CHOICE"
1941 );
1942 assert!(
1943 !kind_satisfies_symbol(&g, Some("string"), "_value"),
1944 "string is NOT among the alternatives"
1945 );
1946 }
1947
1948 #[test]
1949 fn first_symbol_skips_string_terminals() {
1950 let prod: Production = serde_json::from_str(
1951 r#"{
1952 "type": "SEQ",
1953 "members": [
1954 {"type": "STRING", "value": "{"},
1955 {"type": "SYMBOL", "name": "body"},
1956 {"type": "STRING", "value": "}"}
1957 ]
1958 }"#,
1959 )
1960 .expect("valid SEQ");
1961 assert_eq!(first_symbol(&prod), Some("body"));
1962 }
1963
1964 #[test]
1965 fn placeholder_for_pattern_routes_by_regex_class() {
1966 assert_eq!(placeholder_for_pattern("[0-9]+"), "0");
1967 assert_eq!(placeholder_for_pattern("[a-zA-Z_]\\w*"), "_x");
1968 assert_eq!(placeholder_for_pattern("\"[^\"]*\""), "\"\"");
1969 assert_eq!(placeholder_for_pattern("\\d+\\.\\d+"), "0");
1970 }
1971
1972 #[test]
1973 fn format_policy_default_breaks_after_semicolon() {
1974 let policy = FormatPolicy::default();
1975 assert!(policy.line_break_after.iter().any(|t| t == ";"));
1976 assert!(policy.indent_open.iter().any(|t| t == "{"));
1977 assert!(policy.indent_close.iter().any(|t| t == "}"));
1978 assert_eq!(policy.indent_width, 2);
1979 }
1980
1981 #[test]
1982 fn placeholder_decodes_literal_pattern_separators() {
1983 assert_eq!(placeholder_for_pattern("\\n"), "\n");
1987 assert_eq!(placeholder_for_pattern("\\r\\n"), "\r\n");
1988 assert_eq!(placeholder_for_pattern(";"), ";");
1989 assert_eq!(placeholder_for_pattern("[0-9]+"), "0");
1992 assert_eq!(placeholder_for_pattern("a|b"), "_");
1993 }
1994
1995 #[test]
1996 fn supertypes_decode_from_grammar_json_strings() {
1997 let bytes = br#"{
1999 "name": "tiny",
2000 "supertypes": ["expression"],
2001 "rules": {
2002 "expression": {
2003 "type": "CHOICE",
2004 "members": [
2005 {"type": "SYMBOL", "name": "binary_expression"},
2006 {"type": "SYMBOL", "name": "identifier"}
2007 ]
2008 },
2009 "binary_expression": {"type": "STRING", "value": "x"},
2010 "identifier": {"type": "PATTERN", "value": "[a-z]+"}
2011 }
2012 }"#;
2013 let g = Grammar::from_bytes("tiny", bytes).expect("parse");
2014 assert!(g.supertypes.contains("expression"));
2015 assert!(kind_satisfies_symbol(&g, Some("identifier"), "expression"));
2017 assert!(!kind_satisfies_symbol(&g, Some("string"), "expression"));
2019 }
2020
2021 #[test]
2022 fn supertypes_decode_from_grammar_json_objects() {
2023 let bytes = br#"{
2026 "name": "tiny",
2027 "supertypes": [{"type": "SYMBOL", "name": "stmt"}],
2028 "rules": {
2029 "stmt": {
2030 "type": "CHOICE",
2031 "members": [
2032 {"type": "SYMBOL", "name": "while_stmt"},
2033 {"type": "SYMBOL", "name": "if_stmt"}
2034 ]
2035 },
2036 "while_stmt": {"type": "STRING", "value": "while"},
2037 "if_stmt": {"type": "STRING", "value": "if"}
2038 }
2039 }"#;
2040 let g = Grammar::from_bytes("tiny", bytes).expect("parse");
2041 assert!(g.supertypes.contains("stmt"));
2042 assert!(kind_satisfies_symbol(&g, Some("while_stmt"), "stmt"));
2043 }
2044
2045 #[test]
2046 fn alias_value_matches_kind() {
2047 let bytes = br#"{
2051 "name": "tiny",
2052 "rules": {
2053 "_package_identifier": {
2054 "type": "ALIAS",
2055 "named": true,
2056 "value": "package_identifier",
2057 "content": {"type": "SYMBOL", "name": "identifier"}
2058 },
2059 "identifier": {"type": "PATTERN", "value": "[a-z]+"}
2060 }
2061 }"#;
2062 let g = Grammar::from_bytes("tiny", bytes).expect("parse");
2063 assert!(kind_satisfies_symbol(
2064 &g,
2065 Some("package_identifier"),
2066 "_package_identifier"
2067 ));
2068 }
2069
2070 #[test]
2071 fn referenced_symbols_walks_nested_seq() {
2072 let prod: Production = serde_json::from_str(
2073 r#"{
2074 "type": "SEQ",
2075 "members": [
2076 {"type": "CHOICE", "members": [
2077 {"type": "SYMBOL", "name": "attribute_item"},
2078 {"type": "BLANK"}
2079 ]},
2080 {"type": "SYMBOL", "name": "parameter"},
2081 {"type": "REPEAT", "content": {
2082 "type": "SEQ",
2083 "members": [
2084 {"type": "STRING", "value": ","},
2085 {"type": "SYMBOL", "name": "parameter"}
2086 ]
2087 }}
2088 ]
2089 }"#,
2090 )
2091 .expect("seq");
2092 let symbols = referenced_symbols(&prod);
2093 assert!(symbols.contains(&"attribute_item"));
2094 assert!(symbols.contains(&"parameter"));
2095 }
2096
2097 #[test]
2098 fn literal_strings_collects_choice_members() {
2099 let prod: Production = serde_json::from_str(
2100 r#"{
2101 "type": "CHOICE",
2102 "members": [
2103 {"type": "STRING", "value": "+"},
2104 {"type": "STRING", "value": "-"},
2105 {"type": "STRING", "value": "*"}
2106 ]
2107 }"#,
2108 )
2109 .expect("choice");
2110 let strings = literal_strings(&prod);
2111 assert_eq!(strings, vec!["+", "-", "*"]);
2112 }
2113}