1#![allow(
2 clippy::module_name_repetitions,
3 clippy::too_many_lines,
4 clippy::too_many_arguments,
5 clippy::map_unwrap_or,
6 clippy::option_if_let_else,
7 clippy::elidable_lifetime_names,
8 clippy::items_after_statements,
9 clippy::needless_pass_by_value,
10 clippy::single_match_else,
11 clippy::manual_let_else,
12 clippy::match_same_arms,
13 clippy::missing_const_for_fn,
14 clippy::single_char_pattern,
15 clippy::naive_bytecount,
16 clippy::expect_used,
17 clippy::redundant_pub_crate,
18 clippy::used_underscore_binding,
19 clippy::redundant_field_names,
20 clippy::struct_field_names,
21 clippy::redundant_else,
22 clippy::similar_names
23)]
24
25use std::collections::BTreeMap;
71
72use panproto_schema::{Edge, Schema};
73use serde::Deserialize;
74
75use crate::error::ParseError;
76
77#[derive(Debug, Clone, Deserialize)]
90#[serde(tag = "type")]
91#[non_exhaustive]
92pub enum Production {
93 #[serde(rename = "SEQ")]
95 Seq {
96 members: Vec<Self>,
98 },
99 #[serde(rename = "CHOICE")]
101 Choice {
102 members: Vec<Self>,
105 },
106 #[serde(rename = "REPEAT")]
108 Repeat {
109 content: Box<Self>,
111 },
112 #[serde(rename = "REPEAT1")]
114 Repeat1 {
115 content: Box<Self>,
117 },
118 #[serde(rename = "OPTIONAL")]
124 Optional {
125 content: Box<Self>,
127 },
128 #[serde(rename = "SYMBOL")]
130 Symbol {
131 name: String,
134 },
135 #[serde(rename = "STRING")]
137 String {
138 value: String,
140 },
141 #[serde(rename = "PATTERN")]
147 Pattern {
148 value: String,
150 },
151 #[serde(rename = "BLANK")]
153 Blank,
154 #[serde(rename = "FIELD")]
160 Field {
161 name: String,
163 content: Box<Self>,
165 },
166 #[serde(rename = "ALIAS")]
171 Alias {
172 content: Box<Self>,
174 #[serde(default)]
176 named: bool,
177 #[serde(default)]
179 value: String,
180 },
181 #[serde(rename = "TOKEN")]
186 Token {
187 content: Box<Self>,
189 },
190 #[serde(rename = "IMMEDIATE_TOKEN")]
194 ImmediateToken {
195 content: Box<Self>,
197 },
198 #[serde(rename = "PREC")]
200 Prec {
201 #[allow(dead_code)]
203 value: serde_json::Value,
204 content: Box<Self>,
206 },
207 #[serde(rename = "PREC_LEFT")]
209 PrecLeft {
210 #[allow(dead_code)]
212 value: serde_json::Value,
213 content: Box<Self>,
215 },
216 #[serde(rename = "PREC_RIGHT")]
218 PrecRight {
219 #[allow(dead_code)]
221 value: serde_json::Value,
222 content: Box<Self>,
224 },
225 #[serde(rename = "PREC_DYNAMIC")]
227 PrecDynamic {
228 #[allow(dead_code)]
230 value: serde_json::Value,
231 content: Box<Self>,
233 },
234 #[serde(rename = "RESERVED")]
244 Reserved {
245 content: Box<Self>,
247 #[allow(dead_code)]
249 #[serde(default)]
250 context_name: String,
251 },
252}
253
254#[derive(Debug, Clone, Deserialize)]
259pub struct Grammar {
260 #[allow(dead_code)]
262 pub name: String,
263 pub rules: BTreeMap<String, Production>,
267 #[serde(default, deserialize_with = "deserialize_supertypes")]
275 pub supertypes: std::collections::HashSet<String>,
276 #[serde(skip)]
290 pub subtypes: std::collections::HashMap<String, std::collections::HashSet<String>>,
291}
292
293fn deserialize_supertypes<'de, D>(
294 deserializer: D,
295) -> Result<std::collections::HashSet<String>, D::Error>
296where
297 D: serde::Deserializer<'de>,
298{
299 let entries: Vec<serde_json::Value> = Vec::deserialize(deserializer)?;
300 let mut out = std::collections::HashSet::new();
301 for entry in entries {
302 match entry {
303 serde_json::Value::String(s) => {
304 out.insert(s);
305 }
306 serde_json::Value::Object(map) => {
307 if let Some(serde_json::Value::String(name)) = map.get("name") {
308 out.insert(name.clone());
309 }
310 }
311 _ => {}
312 }
313 }
314 Ok(out)
315}
316
317impl Grammar {
318 pub fn from_bytes(protocol: &str, bytes: &[u8]) -> Result<Self, ParseError> {
338 let mut grammar: Self =
339 serde_json::from_slice(bytes).map_err(|e| ParseError::EmitFailed {
340 protocol: protocol.to_owned(),
341 reason: format!("grammar.json deserialization failed: {e}"),
342 })?;
343 grammar.subtypes = compute_subtype_closure(&grammar);
344 Ok(grammar)
345 }
346}
347
348fn compute_subtype_closure(
351 grammar: &Grammar,
352) -> std::collections::HashMap<String, std::collections::HashSet<String>> {
353 use std::collections::{HashMap, HashSet};
354 let mut subtypes: HashMap<String, HashSet<String>> = HashMap::new();
359 for name in grammar.rules.keys() {
360 subtypes
361 .entry(name.clone())
362 .or_default()
363 .insert(name.clone());
364 }
365
366 fn walk<'g>(
370 grammar: &'g Grammar,
371 production: &'g Production,
372 visited: &mut HashSet<&'g str>,
373 out: &mut HashSet<String>,
374 ) {
375 match production {
376 Production::Symbol { name } => {
377 out.insert(name.clone());
379 let expand = name.starts_with('_') || grammar.supertypes.contains(name.as_str());
382 if expand && visited.insert(name.as_str()) {
383 if let Some(rule) = grammar.rules.get(name) {
384 walk(grammar, rule, visited, out);
385 }
386 }
387 }
388 Production::Choice { members } | Production::Seq { members } => {
389 for m in members {
390 walk(grammar, m, visited, out);
391 }
392 }
393 Production::Alias {
394 content,
395 named,
396 value,
397 } => {
398 if *named && !value.is_empty() {
399 out.insert(value.clone());
400 }
401 walk(grammar, content, visited, out);
402 }
403 Production::Repeat { content }
404 | Production::Repeat1 { content }
405 | Production::Optional { content }
406 | Production::Field { content, .. }
407 | Production::Token { content }
408 | Production::ImmediateToken { content }
409 | Production::Prec { content, .. }
410 | Production::PrecLeft { content, .. }
411 | Production::PrecRight { content, .. }
412 | Production::PrecDynamic { content, .. }
413 | Production::Reserved { content, .. } => {
414 walk(grammar, content, visited, out);
415 }
416 _ => {}
417 }
418 }
419
420 for (name, rule) in &grammar.rules {
421 let expand = name.starts_with('_') || grammar.supertypes.contains(name.as_str());
422 if !expand {
423 continue;
424 }
425 let mut visited: HashSet<&str> = HashSet::new();
426 visited.insert(name.as_str());
427 let mut reachable: HashSet<String> = HashSet::new();
428 walk(grammar, rule, &mut visited, &mut reachable);
429 for kind in &reachable {
430 subtypes
431 .entry(kind.clone())
432 .or_default()
433 .insert(name.clone());
434 }
435 }
436
437 fn collect_aliases<'g>(production: &'g Production, out: &mut Vec<(String, &'g Production)>) {
443 match production {
444 Production::Alias {
445 content,
446 named,
447 value,
448 } => {
449 if *named && !value.is_empty() {
450 out.push((value.clone(), content.as_ref()));
451 }
452 collect_aliases(content, out);
453 }
454 Production::Choice { members } | Production::Seq { members } => {
455 for m in members {
456 collect_aliases(m, out);
457 }
458 }
459 Production::Repeat { content }
460 | Production::Repeat1 { content }
461 | Production::Optional { content }
462 | Production::Field { content, .. }
463 | Production::Token { content }
464 | Production::ImmediateToken { content }
465 | Production::Prec { content, .. }
466 | Production::PrecLeft { content, .. }
467 | Production::PrecRight { content, .. }
468 | Production::PrecDynamic { content, .. }
469 | Production::Reserved { content, .. } => {
470 collect_aliases(content, out);
471 }
472 _ => {}
473 }
474 }
475 let mut aliases: Vec<(String, &Production)> = Vec::new();
476 for rule in grammar.rules.values() {
477 collect_aliases(rule, &mut aliases);
478 }
479 for (alias_value, content) in aliases {
480 let mut visited: HashSet<&str> = HashSet::new();
481 let mut reachable: HashSet<String> = HashSet::new();
482 walk(grammar, content, &mut visited, &mut reachable);
483 subtypes
486 .entry(alias_value.clone())
487 .or_default()
488 .insert(alias_value.clone());
489 for kind in reachable {
490 subtypes
491 .entry(kind)
492 .or_default()
493 .insert(alias_value.clone());
494 }
495 }
496
497 for _ in 0..8 {
501 let snapshot = subtypes.clone();
502 let mut changed = false;
503 for (kind, supers) in &snapshot {
504 let extra: HashSet<String> = supers
505 .iter()
506 .flat_map(|s| snapshot.get(s).cloned().unwrap_or_default())
507 .collect();
508 let entry = subtypes.entry(kind.clone()).or_default();
509 for s in extra {
510 if entry.insert(s) {
511 changed = true;
512 }
513 }
514 }
515 if !changed {
516 break;
517 }
518 }
519
520 subtypes
521}
522
523#[derive(Debug, Clone)]
536pub struct FormatPolicy {
537 pub indent_width: usize,
539 pub line_break_after: Vec<String>,
541 pub indent_open: Vec<String>,
543 pub indent_close: Vec<String>,
545}
546
547impl Default for FormatPolicy {
548 fn default() -> Self {
549 Self {
550 indent_width: 2,
551 line_break_after: vec![";".into(), "{".into(), "}".into()],
552 indent_open: vec!["{".into()],
553 indent_close: vec!["}".into()],
554 }
555 }
556}
557
558pub fn emit_pretty(
582 protocol: &str,
583 schema: &Schema,
584 grammar: &Grammar,
585 policy: &FormatPolicy,
586) -> Result<Vec<u8>, ParseError> {
587 let roots = collect_roots(schema);
588 if roots.is_empty() {
589 return Err(ParseError::EmitFailed {
590 protocol: protocol.to_owned(),
591 reason: "schema has no entry vertices".to_owned(),
592 });
593 }
594
595 let mut out = Output::new(policy);
596 for (i, root) in roots.iter().enumerate() {
597 if i > 0 {
598 out.newline();
599 }
600 emit_vertex(protocol, schema, grammar, root, &mut out)?;
601 }
602 Ok(out.finish())
603}
604
605fn collect_roots(schema: &Schema) -> Vec<&panproto_gat::Name> {
606 if !schema.entries.is_empty() {
607 return schema
608 .entries
609 .iter()
610 .filter(|name| schema.vertices.contains_key(*name))
611 .collect();
612 }
613
614 let mut targets: std::collections::HashSet<&panproto_gat::Name> =
617 std::collections::HashSet::new();
618 for edge in schema.edges.keys() {
619 targets.insert(&edge.tgt);
620 }
621 let mut roots: Vec<&panproto_gat::Name> = schema
622 .vertices
623 .keys()
624 .filter(|name| !targets.contains(name))
625 .collect();
626 roots.sort();
627 roots
628}
629
630fn emit_vertex(
631 protocol: &str,
632 schema: &Schema,
633 grammar: &Grammar,
634 vertex_id: &panproto_gat::Name,
635 out: &mut Output<'_>,
636) -> Result<(), ParseError> {
637 let vertex = schema
638 .vertices
639 .get(vertex_id)
640 .ok_or_else(|| ParseError::EmitFailed {
641 protocol: protocol.to_owned(),
642 reason: format!("vertex '{vertex_id}' not found"),
643 })?;
644
645 if let Some(literal) = literal_value(schema, vertex_id) {
651 if children_for(schema, vertex_id).is_empty() {
652 out.token(literal);
653 return Ok(());
654 }
655 }
656
657 let kind = vertex.kind.as_ref();
658 let edges = children_for(schema, vertex_id);
659 if let Some(rule) = grammar.rules.get(kind) {
660 let mut cursor = ChildCursor::new(&edges);
661 return emit_production(protocol, schema, grammar, vertex_id, rule, &mut cursor, out);
662 }
663
664 for edge in &edges {
671 emit_vertex(protocol, schema, grammar, &edge.tgt, out)?;
672 }
673 Ok(())
674}
675
676struct ChildCursor<'a> {
679 edges: &'a [&'a Edge],
680 consumed: Vec<bool>,
681}
682
683impl<'a> ChildCursor<'a> {
684 fn new(edges: &'a [&'a Edge]) -> Self {
685 Self {
686 edges,
687 consumed: vec![false; edges.len()],
688 }
689 }
690
691 fn take_field(&mut self, field_name: &str) -> Option<&'a Edge> {
693 for (i, edge) in self.edges.iter().enumerate() {
694 if !self.consumed[i] && edge.kind.as_ref() == field_name {
695 self.consumed[i] = true;
696 return Some(edge);
697 }
698 }
699 None
700 }
701
702 fn take_matching(&mut self, predicate: impl Fn(&Edge) -> bool) -> Option<&'a Edge> {
706 for (i, edge) in self.edges.iter().enumerate() {
707 if !self.consumed[i] && predicate(edge) {
708 self.consumed[i] = true;
709 return Some(edge);
710 }
711 }
712 None
713 }
714
715 fn has_matching(&self, predicate: impl Fn(&Edge) -> bool) -> bool {
717 self.edges
718 .iter()
719 .enumerate()
720 .any(|(i, edge)| !self.consumed[i] && predicate(edge))
721 }
722}
723
724thread_local! {
725 static EMIT_DEPTH: std::cell::Cell<usize> = const { std::cell::Cell::new(0) };
726 static EMIT_MU_FRAMES: std::cell::RefCell<std::collections::HashSet<(String, String)>> =
736 std::cell::RefCell::new(std::collections::HashSet::new());
737}
738
739fn walk_in_mu_frame(
744 protocol: &str,
745 schema: &Schema,
746 grammar: &Grammar,
747 vertex_id: &panproto_gat::Name,
748 rule_name: &str,
749 rule: &Production,
750 cursor: &mut ChildCursor<'_>,
751 out: &mut Output<'_>,
752) -> Result<(), ParseError> {
753 let key = (vertex_id.to_string(), rule_name.to_owned());
754 let inserted = EMIT_MU_FRAMES.with(|frames| frames.borrow_mut().insert(key.clone()));
755 if !inserted {
756 return Ok(());
761 }
762 let result = emit_production(protocol, schema, grammar, vertex_id, rule, cursor, out);
763 EMIT_MU_FRAMES.with(|frames| {
764 frames.borrow_mut().remove(&key);
765 });
766 result
767}
768
769fn emit_production(
770 protocol: &str,
771 schema: &Schema,
772 grammar: &Grammar,
773 vertex_id: &panproto_gat::Name,
774 production: &Production,
775 cursor: &mut ChildCursor<'_>,
776 out: &mut Output<'_>,
777) -> Result<(), ParseError> {
778 let depth = EMIT_DEPTH.with(|d| {
779 let v = d.get() + 1;
780 d.set(v);
781 v
782 });
783 if depth > 500 {
784 EMIT_DEPTH.with(|d| d.set(d.get() - 1));
785 return Err(ParseError::EmitFailed {
786 protocol: protocol.to_owned(),
787 reason: format!(
788 "emit_production recursion >500 (likely a cyclic grammar; \
789 vertex='{vertex_id}')"
790 ),
791 });
792 }
793 let result = emit_production_inner(
794 protocol, schema, grammar, vertex_id, production, cursor, out,
795 );
796 EMIT_DEPTH.with(|d| d.set(d.get() - 1));
797 result
798}
799
800fn emit_production_inner(
801 protocol: &str,
802 schema: &Schema,
803 grammar: &Grammar,
804 vertex_id: &panproto_gat::Name,
805 production: &Production,
806 cursor: &mut ChildCursor<'_>,
807 out: &mut Output<'_>,
808) -> Result<(), ParseError> {
809 match production {
810 Production::String { value } => {
811 out.token(value);
812 Ok(())
813 }
814 Production::Pattern { value } => {
815 if let Some(literal) = literal_value(schema, vertex_id) {
816 out.token(literal);
817 } else {
818 out.token(&placeholder_for_pattern(value));
819 }
820 Ok(())
821 }
822 Production::Blank => Ok(()),
823 Production::Symbol { name } => {
824 if name.starts_with('_') {
825 if let Some(rule) = grammar.rules.get(name) {
839 walk_in_mu_frame(
840 protocol, schema, grammar, vertex_id, name, rule, cursor, out,
841 )
842 } else {
843 if name.contains("line_ending")
853 || name.contains("newline")
854 || name.ends_with("_or_eof")
855 {
856 out.newline();
857 }
858 Ok(())
859 }
860 } else if let Some(edge) = take_symbol_match(grammar, schema, cursor, name) {
861 emit_vertex(protocol, schema, grammar, &edge.tgt, out)
872 } else if vertex_id_kind(schema, vertex_id) == Some(name.as_str()) {
873 let rule = grammar
874 .rules
875 .get(name)
876 .ok_or_else(|| ParseError::EmitFailed {
877 protocol: protocol.to_owned(),
878 reason: format!("no production for SYMBOL '{name}'"),
879 })?;
880 walk_in_mu_frame(
883 protocol, schema, grammar, vertex_id, name, rule, cursor, out,
884 )
885 } else {
886 Ok(())
890 }
891 }
892 Production::Seq { members } => {
893 for member in members {
894 emit_production(protocol, schema, grammar, vertex_id, member, cursor, out)?;
895 }
896 Ok(())
897 }
898 Production::Choice { members } => {
899 if let Some(matched) =
900 pick_choice_with_cursor(schema, grammar, vertex_id, cursor, members)
901 {
902 emit_production(protocol, schema, grammar, vertex_id, matched, cursor, out)
903 } else {
904 Ok(())
905 }
906 }
907 Production::Repeat { content } | Production::Repeat1 { content } => {
908 let mut emitted_any = false;
909 loop {
910 let cursor_snap = cursor.consumed.clone();
911 let out_snap = out.snapshot();
912 let consumed_before = cursor.consumed.iter().filter(|&&c| c).count();
913 let result =
914 emit_production(protocol, schema, grammar, vertex_id, content, cursor, out);
915 let consumed_after = cursor.consumed.iter().filter(|&&c| c).count();
916 if result.is_err() || consumed_after == consumed_before {
917 cursor.consumed = cursor_snap;
918 out.restore(out_snap);
919 break;
920 }
921 emitted_any = true;
922 }
923 if matches!(production, Production::Repeat1 { .. }) && !emitted_any {
924 emit_production(protocol, schema, grammar, vertex_id, content, cursor, out)?;
925 }
926 Ok(())
927 }
928 Production::Optional { content } => {
929 let cursor_snap = cursor.consumed.clone();
930 let out_snap = out.snapshot();
931 let consumed_before = cursor.consumed.iter().filter(|&&c| c).count();
932 let result =
933 emit_production(protocol, schema, grammar, vertex_id, content, cursor, out);
934 if result.is_err() {
939 cursor.consumed = cursor_snap;
940 out.restore(out_snap);
941 return result;
942 }
943 let consumed_after = cursor.consumed.iter().filter(|&&c| c).count();
944 if consumed_after == consumed_before
945 && !has_relevant_constraint(content, schema, vertex_id)
946 {
947 cursor.consumed = cursor_snap;
948 out.restore(out_snap);
949 }
950 Ok(())
951 }
952 Production::Field { name, content } => {
953 if let Some(edge) = cursor.take_field(name) {
954 emit_in_child_context(protocol, schema, grammar, &edge.tgt, content, out)
955 } else if first_symbol(content).is_none() {
956 emit_production(protocol, schema, grammar, vertex_id, content, cursor, out)
964 } else {
965 Ok(())
966 }
967 }
968 Production::Alias {
969 content,
970 named,
971 value,
972 } => {
973 if *named && !value.is_empty() {
988 if let Some(edge) = cursor.take_matching(|edge| {
989 schema
990 .vertices
991 .get(&edge.tgt)
992 .map(|v| v.kind.as_ref() == value.as_str())
993 .unwrap_or(false)
994 }) {
995 return emit_aliased_child(protocol, schema, grammar, &edge.tgt, content, out);
996 }
997 }
998 emit_production(protocol, schema, grammar, vertex_id, content, cursor, out)
999 }
1000 Production::Token { content }
1001 | Production::ImmediateToken { content }
1002 | Production::Prec { content, .. }
1003 | Production::PrecLeft { content, .. }
1004 | Production::PrecRight { content, .. }
1005 | Production::PrecDynamic { content, .. }
1006 | Production::Reserved { content, .. } => {
1007 emit_production(protocol, schema, grammar, vertex_id, content, cursor, out)
1008 }
1009 }
1010}
1011
1012fn take_symbol_match<'a>(
1015 grammar: &Grammar,
1016 schema: &Schema,
1017 cursor: &mut ChildCursor<'a>,
1018 name: &str,
1019) -> Option<&'a Edge> {
1020 cursor.take_matching(|edge| {
1021 let target_kind = schema.vertices.get(&edge.tgt).map(|v| v.kind.as_ref());
1022 kind_satisfies_symbol(grammar, target_kind, name)
1023 })
1024}
1025
1026fn kind_satisfies_symbol(grammar: &Grammar, target_kind: Option<&str>, name: &str) -> bool {
1036 let Some(target) = target_kind else {
1037 return false;
1038 };
1039 if target == name {
1040 return true;
1041 }
1042 grammar
1043 .subtypes
1044 .get(target)
1045 .is_some_and(|set| set.contains(name))
1046}
1047
1048fn emit_aliased_child(
1082 protocol: &str,
1083 schema: &Schema,
1084 grammar: &Grammar,
1085 child_id: &panproto_gat::Name,
1086 content: &Production,
1087 out: &mut Output<'_>,
1088) -> Result<(), ParseError> {
1089 if let Some(literal) = literal_value(schema, child_id) {
1094 if children_for(schema, child_id).is_empty() {
1095 out.token(literal);
1096 return Ok(());
1097 }
1098 }
1099
1100 if let Production::Symbol { name } = content {
1103 if let Some(rule) = grammar.rules.get(name) {
1104 let edges = children_for(schema, child_id);
1105 let mut cursor = ChildCursor::new(&edges);
1106 return emit_production(protocol, schema, grammar, child_id, rule, &mut cursor, out);
1107 }
1108 }
1109
1110 let edges = children_for(schema, child_id);
1112 let mut cursor = ChildCursor::new(&edges);
1113 emit_production(
1114 protocol,
1115 schema,
1116 grammar,
1117 child_id,
1118 content,
1119 &mut cursor,
1120 out,
1121 )
1122}
1123
1124fn emit_in_child_context(
1125 protocol: &str,
1126 schema: &Schema,
1127 grammar: &Grammar,
1128 child_id: &panproto_gat::Name,
1129 production: &Production,
1130 out: &mut Output<'_>,
1131) -> Result<(), ParseError> {
1132 if !matches!(production, Production::Symbol { .. }) {
1141 let child_kind = schema.vertices.get(child_id).map(|v| v.kind.as_ref());
1142 let symbols = referenced_symbols(production);
1143 if symbols
1144 .iter()
1145 .any(|s| kind_satisfies_symbol(grammar, child_kind, s) || child_kind == Some(s))
1146 {
1147 return emit_vertex(protocol, schema, grammar, child_id, out);
1148 }
1149 }
1150 match production {
1151 Production::Symbol { .. } => emit_vertex(protocol, schema, grammar, child_id, out),
1152 _ => {
1153 let edges = children_for(schema, child_id);
1154 let mut cursor = ChildCursor::new(&edges);
1155 emit_production(
1156 protocol,
1157 schema,
1158 grammar,
1159 child_id,
1160 production,
1161 &mut cursor,
1162 out,
1163 )
1164 }
1165 }
1166}
1167
1168fn pick_choice_with_cursor<'a>(
1169 schema: &Schema,
1170 grammar: &Grammar,
1171 vertex_id: &panproto_gat::Name,
1172 cursor: &ChildCursor<'_>,
1173 alternatives: &'a [Production],
1174) -> Option<&'a Production> {
1175 let constraint_blob = schema
1188 .constraints
1189 .get(vertex_id)
1190 .map(|cs| {
1191 let fingerprint: Option<&str> = cs
1192 .iter()
1193 .find(|c| c.sort.as_ref() == "chose-alt-fingerprint")
1194 .map(|c| c.value.as_str());
1195 if let Some(fp) = fingerprint {
1196 fp.to_owned()
1197 } else {
1198 cs.iter()
1199 .filter(|c| {
1200 let s = c.sort.as_ref();
1201 s.starts_with("interstitial-") && !s.ends_with("-start-byte")
1202 })
1203 .map(|c| c.value.as_str())
1204 .collect::<Vec<&str>>()
1205 .join(" ")
1206 }
1207 })
1208 .unwrap_or_default();
1209 let child_kinds: Vec<&str> = schema
1210 .constraints
1211 .get(vertex_id)
1212 .and_then(|cs| {
1213 cs.iter()
1214 .find(|c| c.sort.as_ref() == "chose-alt-child-kinds")
1215 .map(|c| c.value.split_whitespace().collect())
1216 })
1217 .unwrap_or_default();
1218 if !constraint_blob.is_empty() {
1219 let mut best_literal: usize = 0;
1230 let mut best_symbols: usize = 0;
1231 let mut best_alt: Option<&Production> = None;
1232 let mut tied = false;
1233 for alt in alternatives {
1234 let strings = literal_strings(alt);
1235 if strings.is_empty() {
1236 continue;
1237 }
1238 let literal_score = strings
1239 .iter()
1240 .filter(|s| constraint_blob.contains(s.as_str()))
1241 .map(String::len)
1242 .sum::<usize>();
1243 if literal_score == 0 {
1244 continue;
1245 }
1246 let symbol_score = if literal_score >= best_literal && !child_kinds.is_empty() {
1253 let symbols = referenced_symbols(alt);
1254 symbols
1255 .iter()
1256 .filter(|sym| {
1257 let sym_str: &str = sym;
1258 if child_kinds.contains(&sym_str) {
1259 return true;
1260 }
1261 grammar.subtypes.get(sym_str).is_some_and(|sub_set| {
1262 sub_set
1263 .iter()
1264 .any(|sub| child_kinds.contains(&sub.as_str()))
1265 })
1266 })
1267 .count()
1268 } else {
1269 0
1270 };
1271 let better = literal_score > best_literal
1272 || (literal_score == best_literal && symbol_score > best_symbols);
1273 let same = literal_score == best_literal && symbol_score == best_symbols;
1274 if better {
1275 best_literal = literal_score;
1276 best_symbols = symbol_score;
1277 best_alt = Some(alt);
1278 tied = false;
1279 } else if same && best_alt.is_some() {
1280 tied = true;
1281 }
1282 }
1283 if let Some(alt) = best_alt {
1290 if !tied {
1291 return Some(alt);
1292 }
1293 }
1294 }
1295
1296 for alt in alternatives {
1304 let symbols = referenced_symbols(alt);
1305 if !symbols.is_empty()
1306 && cursor.has_matching(|edge| {
1307 let tk = schema.vertices.get(&edge.tgt).map(|v| v.kind.as_ref());
1308 symbols
1309 .iter()
1310 .any(|s| kind_satisfies_symbol(grammar, tk, s))
1311 })
1312 {
1313 return Some(alt);
1314 }
1315 }
1316
1317 let edge_kinds: Vec<&str> = cursor
1320 .edges
1321 .iter()
1322 .enumerate()
1323 .filter(|(i, _)| !cursor.consumed[*i])
1324 .map(|(_, e)| e.kind.as_ref())
1325 .collect();
1326 for alt in alternatives {
1327 if has_field_in(alt, &edge_kinds) {
1328 return Some(alt);
1329 }
1330 }
1331
1332 let _ = (schema, vertex_id);
1345 if alternatives.iter().any(|a| matches!(a, Production::Blank)) {
1346 return alternatives.iter().find(|a| matches!(a, Production::Blank));
1347 }
1348 alternatives
1349 .iter()
1350 .find(|alt| !matches!(alt, Production::Blank))
1351}
1352
1353fn literal_strings(production: &Production) -> Vec<String> {
1359 let mut out = Vec::new();
1360 fn walk(p: &Production, out: &mut Vec<String>) {
1361 match p {
1362 Production::String { value } if !value.is_empty() => {
1363 out.push(value.clone());
1364 }
1365 Production::Choice { members } | Production::Seq { members } => {
1366 for m in members {
1367 walk(m, out);
1368 }
1369 }
1370 Production::Repeat { content }
1371 | Production::Repeat1 { content }
1372 | Production::Optional { content }
1373 | Production::Field { content, .. }
1374 | Production::Alias { content, .. }
1375 | Production::Token { content }
1376 | Production::ImmediateToken { content }
1377 | Production::Prec { content, .. }
1378 | Production::PrecLeft { content, .. }
1379 | Production::PrecRight { content, .. }
1380 | Production::PrecDynamic { content, .. }
1381 | Production::Reserved { content, .. } => walk(content, out),
1382 _ => {}
1383 }
1384 }
1385 walk(production, &mut out);
1386 out
1387}
1388
1389fn referenced_symbols(production: &Production) -> Vec<&str> {
1396 let mut out = Vec::new();
1397 fn walk<'a>(p: &'a Production, out: &mut Vec<&'a str>) {
1398 match p {
1399 Production::Symbol { name } => out.push(name.as_str()),
1400 Production::Choice { members } | Production::Seq { members } => {
1401 for m in members {
1402 walk(m, out);
1403 }
1404 }
1405 Production::Repeat { content }
1406 | Production::Repeat1 { content }
1407 | Production::Optional { content }
1408 | Production::Field { content, .. }
1409 | Production::Alias { content, .. }
1410 | Production::Token { content }
1411 | Production::ImmediateToken { content }
1412 | Production::Prec { content, .. }
1413 | Production::PrecLeft { content, .. }
1414 | Production::PrecRight { content, .. }
1415 | Production::PrecDynamic { content, .. }
1416 | Production::Reserved { content, .. } => walk(content, out),
1417 _ => {}
1418 }
1419 }
1420 walk(production, &mut out);
1421 out
1422}
1423
1424fn first_symbol(production: &Production) -> Option<&str> {
1425 match production {
1426 Production::Symbol { name } => Some(name),
1427 Production::Seq { members } => members.iter().find_map(first_symbol),
1428 Production::Choice { members } => members.iter().find_map(first_symbol),
1429 Production::Repeat { content }
1430 | Production::Repeat1 { content }
1431 | Production::Optional { content }
1432 | Production::Field { content, .. }
1433 | Production::Alias { content, .. }
1434 | Production::Token { content }
1435 | Production::ImmediateToken { content }
1436 | Production::Prec { content, .. }
1437 | Production::PrecLeft { content, .. }
1438 | Production::PrecRight { content, .. }
1439 | Production::PrecDynamic { content, .. }
1440 | Production::Reserved { content, .. } => first_symbol(content),
1441 _ => None,
1442 }
1443}
1444
1445fn has_field_in(production: &Production, edge_kinds: &[&str]) -> bool {
1446 match production {
1447 Production::Field { name, .. } => edge_kinds.contains(&name.as_str()),
1448 Production::Seq { members } | Production::Choice { members } => {
1449 members.iter().any(|m| has_field_in(m, edge_kinds))
1450 }
1451 Production::Repeat { content }
1452 | Production::Repeat1 { content }
1453 | Production::Optional { content }
1454 | Production::Alias { content, .. }
1455 | Production::Token { content }
1456 | Production::ImmediateToken { content }
1457 | Production::Prec { content, .. }
1458 | Production::PrecLeft { content, .. }
1459 | Production::PrecRight { content, .. }
1460 | Production::PrecDynamic { content, .. }
1461 | Production::Reserved { content, .. } => has_field_in(content, edge_kinds),
1462 _ => false,
1463 }
1464}
1465
1466fn has_relevant_constraint(
1467 production: &Production,
1468 schema: &Schema,
1469 vertex_id: &panproto_gat::Name,
1470) -> bool {
1471 let constraints = match schema.constraints.get(vertex_id) {
1472 Some(c) => c,
1473 None => return false,
1474 };
1475 fn walk(production: &Production, constraints: &[panproto_schema::Constraint]) -> bool {
1476 match production {
1477 Production::String { value } => constraints
1478 .iter()
1479 .any(|c| c.value == *value || c.sort.as_ref() == value),
1480 Production::Field { name, content } => {
1481 constraints.iter().any(|c| c.sort.as_ref() == name) || walk(content, constraints)
1482 }
1483 Production::Seq { members } | Production::Choice { members } => {
1484 members.iter().any(|m| walk(m, constraints))
1485 }
1486 Production::Repeat { content }
1487 | Production::Repeat1 { content }
1488 | Production::Optional { content }
1489 | Production::Alias { content, .. }
1490 | Production::Token { content }
1491 | Production::ImmediateToken { content }
1492 | Production::Prec { content, .. }
1493 | Production::PrecLeft { content, .. }
1494 | Production::PrecRight { content, .. }
1495 | Production::PrecDynamic { content, .. }
1496 | Production::Reserved { content, .. } => walk(content, constraints),
1497 _ => false,
1498 }
1499 }
1500 walk(production, constraints)
1501}
1502
1503fn children_for<'a>(schema: &'a Schema, vertex_id: &panproto_gat::Name) -> Vec<&'a Edge> {
1504 let mut edges: Vec<&Edge> = schema
1505 .edges
1506 .keys()
1507 .filter(|e| &e.src == vertex_id)
1508 .collect();
1509 edges.sort_by_key(|e| {
1510 let pos = schema.orderings.get(*e).copied().unwrap_or(u32::MAX);
1514 (pos, e.kind.clone(), e.tgt.clone())
1515 });
1516 edges
1517}
1518
1519fn vertex_id_kind<'a>(schema: &'a Schema, vertex_id: &panproto_gat::Name) -> Option<&'a str> {
1520 schema.vertices.get(vertex_id).map(|v| v.kind.as_ref())
1521}
1522
1523fn literal_value<'a>(schema: &'a Schema, vertex_id: &panproto_gat::Name) -> Option<&'a str> {
1524 schema
1525 .constraints
1526 .get(vertex_id)?
1527 .iter()
1528 .find(|c| c.sort.as_ref() == "literal-value")
1529 .map(|c| c.value.as_str())
1530}
1531
1532fn placeholder_for_pattern(pattern: &str) -> String {
1533 let simple_lit = decode_simple_pattern_literal(pattern);
1541 if let Some(lit) = simple_lit {
1542 return lit;
1543 }
1544
1545 if pattern.contains("[0-9]") || pattern.contains("\\d") {
1546 "0".into()
1547 } else if pattern.contains("[a-zA-Z_]") || pattern.contains("\\w") {
1548 "_x".into()
1549 } else if pattern.contains('"') || pattern.contains('\'') {
1550 "\"\"".into()
1551 } else {
1552 "_".into()
1553 }
1554}
1555
1556fn decode_simple_pattern_literal(pattern: &str) -> Option<String> {
1561 if pattern
1564 .chars()
1565 .any(|c| matches!(c, '[' | ']' | '(' | ')' | '*' | '+' | '?' | '|' | '{' | '}'))
1566 {
1567 return None;
1568 }
1569 let mut out = String::new();
1570 let mut chars = pattern.chars();
1571 while let Some(c) = chars.next() {
1572 if c == '\\' {
1573 match chars.next() {
1574 Some('n') => out.push('\n'),
1575 Some('r') => out.push('\r'),
1576 Some('t') => out.push('\t'),
1577 Some('\\') => out.push('\\'),
1578 Some('/') => out.push('/'),
1579 Some(other) => out.push(other),
1580 None => return None,
1581 }
1582 } else {
1583 out.push(c);
1584 }
1585 }
1586 Some(out)
1587}
1588
1589#[derive(Clone)]
1601enum Token {
1602 Lit(String),
1604 IndentOpen,
1608 IndentClose,
1610 LineBreak,
1613}
1614
1615struct Output<'a> {
1616 tokens: Vec<Token>,
1617 policy: &'a FormatPolicy,
1618}
1619
1620#[derive(Clone)]
1621struct OutputSnapshot {
1622 tokens_len: usize,
1623}
1624
1625impl<'a> Output<'a> {
1626 fn new(policy: &'a FormatPolicy) -> Self {
1627 Self {
1628 tokens: Vec::new(),
1629 policy,
1630 }
1631 }
1632
1633 fn token(&mut self, value: &str) {
1634 if value.is_empty() {
1635 return;
1636 }
1637
1638 if self.policy.indent_close.iter().any(|t| t == value) {
1639 self.tokens.push(Token::IndentClose);
1640 }
1641
1642 self.tokens.push(Token::Lit(value.to_owned()));
1643
1644 if self.policy.indent_open.iter().any(|t| t == value) {
1645 self.tokens.push(Token::IndentOpen);
1646 self.tokens.push(Token::LineBreak);
1647 } else if self.policy.line_break_after.iter().any(|t| t == value) {
1648 self.tokens.push(Token::LineBreak);
1649 }
1650 }
1651
1652 fn newline(&mut self) {
1653 self.tokens.push(Token::LineBreak);
1654 }
1655
1656 fn snapshot(&self) -> OutputSnapshot {
1657 OutputSnapshot {
1658 tokens_len: self.tokens.len(),
1659 }
1660 }
1661
1662 fn restore(&mut self, snap: OutputSnapshot) {
1663 self.tokens.truncate(snap.tokens_len);
1664 }
1665
1666 fn finish(self) -> Vec<u8> {
1667 layout(&self.tokens, self.policy)
1668 }
1669}
1670
1671fn layout(tokens: &[Token], policy: &FormatPolicy) -> Vec<u8> {
1677 let mut bytes = Vec::new();
1678 let mut indent: usize = 0;
1679 let mut at_line_start = true;
1680 let mut last_lit: Option<&str> = None;
1681
1682 for tok in tokens {
1683 match tok {
1684 Token::IndentOpen => indent += 1,
1685 Token::IndentClose => {
1686 indent = indent.saturating_sub(1);
1687 if !at_line_start {
1688 bytes.push(b'\n');
1689 at_line_start = true;
1690 }
1691 }
1692 Token::LineBreak => {
1693 if !at_line_start {
1694 bytes.push(b'\n');
1695 at_line_start = true;
1696 }
1697 }
1698 Token::Lit(value) => {
1699 if at_line_start {
1700 bytes.extend(std::iter::repeat_n(b' ', indent * policy.indent_width));
1701 } else if let Some(prev) = last_lit {
1702 if needs_space_between(prev, value) {
1703 bytes.push(b' ');
1704 }
1705 }
1706 bytes.extend_from_slice(value.as_bytes());
1707 at_line_start = false;
1708 last_lit = Some(value.as_str());
1709 }
1710 }
1711 }
1712
1713 if !at_line_start {
1714 bytes.push(b'\n');
1715 }
1716 bytes
1717}
1718
1719fn needs_space_between(last: &str, next: &str) -> bool {
1720 if last.is_empty() || next.is_empty() {
1721 return false;
1722 }
1723 if is_punct_open(last) || is_punct_open(next) {
1724 return false;
1725 }
1726 if is_punct_close(next) {
1727 return false;
1728 }
1729 if is_punct_close(last) && is_punct_punctuation(next) {
1730 return false;
1731 }
1732 if last == "." || next == "." {
1733 return false;
1734 }
1735 if last_is_word_like(last) && first_is_word_like(next) {
1736 return true;
1737 }
1738 if last_ends_with_alnum(last) && first_is_alnum_or_underscore(next) {
1739 return true;
1740 }
1741 true
1744}
1745
1746fn is_punct_open(s: &str) -> bool {
1747 matches!(s, "(" | "[" | "{" | "\"" | "'" | "`")
1748}
1749
1750fn is_punct_close(s: &str) -> bool {
1751 matches!(s, ")" | "]" | "}" | "," | ";" | ":" | "\"" | "'" | "`")
1752}
1753
1754fn is_punct_punctuation(s: &str) -> bool {
1755 matches!(s, "," | ";" | ":" | "." | ")" | "]" | "}")
1756}
1757
1758fn last_is_word_like(s: &str) -> bool {
1759 s.chars()
1760 .next_back()
1761 .map(|c| c.is_alphanumeric() || c == '_')
1762 .unwrap_or(false)
1763}
1764
1765fn first_is_word_like(s: &str) -> bool {
1766 s.chars()
1767 .next()
1768 .map(|c| c.is_alphanumeric() || c == '_')
1769 .unwrap_or(false)
1770}
1771
1772fn last_ends_with_alnum(s: &str) -> bool {
1773 s.chars()
1774 .next_back()
1775 .map(char::is_alphanumeric)
1776 .unwrap_or(false)
1777}
1778
1779fn first_is_alnum_or_underscore(s: &str) -> bool {
1780 s.chars()
1781 .next()
1782 .map(|c| c.is_alphanumeric() || c == '_')
1783 .unwrap_or(false)
1784}
1785
1786#[cfg(test)]
1787mod tests {
1788 use super::*;
1789
1790 #[test]
1791 fn parses_simple_grammar_json() {
1792 let bytes = br#"{
1793 "name": "tiny",
1794 "rules": {
1795 "program": {
1796 "type": "SEQ",
1797 "members": [
1798 {"type": "STRING", "value": "hello"},
1799 {"type": "STRING", "value": ";"}
1800 ]
1801 }
1802 }
1803 }"#;
1804 let g = Grammar::from_bytes("tiny", bytes).expect("valid tiny grammar");
1805 assert!(g.rules.contains_key("program"));
1806 }
1807
1808 #[test]
1809 fn output_emits_punctuation_without_leading_space() {
1810 let policy = FormatPolicy::default();
1811 let mut out = Output::new(&policy);
1812 out.token("foo");
1813 out.token("(");
1814 out.token(")");
1815 out.token(";");
1816 let bytes = out.finish();
1817 let s = std::str::from_utf8(&bytes).expect("ascii output");
1818 assert!(s.starts_with("foo();"), "got {s:?}");
1819 }
1820
1821 #[test]
1822 fn grammar_from_bytes_rejects_malformed_input() {
1823 let result = Grammar::from_bytes("malformed", b"not json");
1824 let err = result.expect_err("malformed bytes must yield Err");
1825 let msg = err.to_string();
1826 assert!(
1827 msg.contains("malformed"),
1828 "error message should name the protocol: {msg:?}"
1829 );
1830 }
1831
1832 #[test]
1833 fn output_indents_after_open_brace() {
1834 let policy = FormatPolicy::default();
1835 let mut out = Output::new(&policy);
1836 out.token("fn");
1837 out.token("foo");
1838 out.token("(");
1839 out.token(")");
1840 out.token("{");
1841 out.token("body");
1842 out.token("}");
1843 let bytes = out.finish();
1844 let s = std::str::from_utf8(&bytes).expect("ascii output");
1845 assert!(s.contains("{\n"), "newline after opening brace: {s:?}");
1846 assert!(s.contains("body"), "body inside block: {s:?}");
1847 assert!(s.ends_with("}\n"), "newline after closing brace: {s:?}");
1848 }
1849
1850 #[test]
1851 fn output_no_space_between_word_and_dot() {
1852 let policy = FormatPolicy::default();
1853 let mut out = Output::new(&policy);
1854 out.token("foo");
1855 out.token(".");
1856 out.token("bar");
1857 let bytes = out.finish();
1858 let s = std::str::from_utf8(&bytes).expect("ascii output");
1859 assert!(s.starts_with("foo.bar"), "no space around dot: {s:?}");
1860 }
1861
1862 #[test]
1863 fn output_snapshot_restore_truncates_bytes() {
1864 let policy = FormatPolicy::default();
1865 let mut out = Output::new(&policy);
1866 out.token("keep");
1867 let snap = out.snapshot();
1868 out.token("drop");
1869 out.token("more");
1870 out.restore(snap);
1871 out.token("after");
1872 let bytes = out.finish();
1873 let s = std::str::from_utf8(&bytes).expect("ascii output");
1874 assert!(s.contains("keep"), "kept token survives: {s:?}");
1875 assert!(s.contains("after"), "post-restore token visible: {s:?}");
1876 assert!(!s.contains("drop"), "rolled-back token removed: {s:?}");
1877 assert!(!s.contains("more"), "rolled-back token removed: {s:?}");
1878 }
1879
1880 #[test]
1881 fn child_cursor_take_field_consumes_once() {
1882 let edges_owned: Vec<Edge> = vec![Edge {
1883 src: panproto_gat::Name::from("p"),
1884 tgt: panproto_gat::Name::from("c"),
1885 kind: panproto_gat::Name::from("name"),
1886 name: None,
1887 }];
1888 let edges: Vec<&Edge> = edges_owned.iter().collect();
1889 let mut cursor = ChildCursor::new(&edges);
1890 let first = cursor.take_field("name");
1891 let second = cursor.take_field("name");
1892 assert!(first.is_some(), "first take returns the edge");
1893 assert!(
1894 second.is_none(),
1895 "second take returns None (already consumed)"
1896 );
1897 }
1898
1899 #[test]
1900 fn child_cursor_take_matching_predicate() {
1901 let edges_owned: Vec<Edge> = vec![
1902 Edge {
1903 src: "p".into(),
1904 tgt: "c1".into(),
1905 kind: "child_of".into(),
1906 name: None,
1907 },
1908 Edge {
1909 src: "p".into(),
1910 tgt: "c2".into(),
1911 kind: "key".into(),
1912 name: None,
1913 },
1914 ];
1915 let edges: Vec<&Edge> = edges_owned.iter().collect();
1916 let mut cursor = ChildCursor::new(&edges);
1917 assert!(cursor.has_matching(|e| e.kind.as_ref() == "key"));
1918 let taken = cursor.take_matching(|e| e.kind.as_ref() == "key");
1919 assert!(taken.is_some());
1920 assert!(
1921 !cursor.has_matching(|e| e.kind.as_ref() == "key"),
1922 "consumed edge no longer matches"
1923 );
1924 assert!(
1925 cursor.has_matching(|e| e.kind.as_ref() == "child_of"),
1926 "the other edge is still available"
1927 );
1928 }
1929
1930 #[test]
1931 fn kind_satisfies_symbol_direct_match() {
1932 let bytes = br#"{
1933 "name": "tiny",
1934 "rules": {
1935 "x": {"type": "STRING", "value": "x"}
1936 }
1937 }"#;
1938 let g = Grammar::from_bytes("tiny", bytes).expect("valid grammar");
1939 assert!(kind_satisfies_symbol(&g, Some("x"), "x"));
1940 assert!(!kind_satisfies_symbol(&g, Some("y"), "x"));
1941 assert!(!kind_satisfies_symbol(&g, None, "x"));
1942 }
1943
1944 #[test]
1945 fn kind_satisfies_symbol_through_hidden_rule() {
1946 let bytes = br#"{
1947 "name": "tiny",
1948 "rules": {
1949 "_value": {
1950 "type": "CHOICE",
1951 "members": [
1952 {"type": "SYMBOL", "name": "object"},
1953 {"type": "SYMBOL", "name": "number"}
1954 ]
1955 },
1956 "object": {"type": "STRING", "value": "{}"},
1957 "number": {"type": "PATTERN", "value": "[0-9]+"}
1958 }
1959 }"#;
1960 let g = Grammar::from_bytes("tiny", bytes).expect("valid grammar");
1961 assert!(
1962 kind_satisfies_symbol(&g, Some("number"), "_value"),
1963 "number is reachable from _value via CHOICE"
1964 );
1965 assert!(
1966 kind_satisfies_symbol(&g, Some("object"), "_value"),
1967 "object is reachable from _value via CHOICE"
1968 );
1969 assert!(
1970 !kind_satisfies_symbol(&g, Some("string"), "_value"),
1971 "string is NOT among the alternatives"
1972 );
1973 }
1974
1975 #[test]
1976 fn first_symbol_skips_string_terminals() {
1977 let prod: Production = serde_json::from_str(
1978 r#"{
1979 "type": "SEQ",
1980 "members": [
1981 {"type": "STRING", "value": "{"},
1982 {"type": "SYMBOL", "name": "body"},
1983 {"type": "STRING", "value": "}"}
1984 ]
1985 }"#,
1986 )
1987 .expect("valid SEQ");
1988 assert_eq!(first_symbol(&prod), Some("body"));
1989 }
1990
1991 #[test]
1992 fn placeholder_for_pattern_routes_by_regex_class() {
1993 assert_eq!(placeholder_for_pattern("[0-9]+"), "0");
1994 assert_eq!(placeholder_for_pattern("[a-zA-Z_]\\w*"), "_x");
1995 assert_eq!(placeholder_for_pattern("\"[^\"]*\""), "\"\"");
1996 assert_eq!(placeholder_for_pattern("\\d+\\.\\d+"), "0");
1997 }
1998
1999 #[test]
2000 fn format_policy_default_breaks_after_semicolon() {
2001 let policy = FormatPolicy::default();
2002 assert!(policy.line_break_after.iter().any(|t| t == ";"));
2003 assert!(policy.indent_open.iter().any(|t| t == "{"));
2004 assert!(policy.indent_close.iter().any(|t| t == "}"));
2005 assert_eq!(policy.indent_width, 2);
2006 }
2007
2008 #[test]
2009 fn placeholder_decodes_literal_pattern_separators() {
2010 assert_eq!(placeholder_for_pattern("\\n"), "\n");
2014 assert_eq!(placeholder_for_pattern("\\r\\n"), "\r\n");
2015 assert_eq!(placeholder_for_pattern(";"), ";");
2016 assert_eq!(placeholder_for_pattern("[0-9]+"), "0");
2019 assert_eq!(placeholder_for_pattern("a|b"), "_");
2020 }
2021
2022 #[test]
2023 fn supertypes_decode_from_grammar_json_strings() {
2024 let bytes = br#"{
2026 "name": "tiny",
2027 "supertypes": ["expression"],
2028 "rules": {
2029 "expression": {
2030 "type": "CHOICE",
2031 "members": [
2032 {"type": "SYMBOL", "name": "binary_expression"},
2033 {"type": "SYMBOL", "name": "identifier"}
2034 ]
2035 },
2036 "binary_expression": {"type": "STRING", "value": "x"},
2037 "identifier": {"type": "PATTERN", "value": "[a-z]+"}
2038 }
2039 }"#;
2040 let g = Grammar::from_bytes("tiny", bytes).expect("parse");
2041 assert!(g.supertypes.contains("expression"));
2042 assert!(kind_satisfies_symbol(&g, Some("identifier"), "expression"));
2044 assert!(!kind_satisfies_symbol(&g, Some("string"), "expression"));
2046 }
2047
2048 #[test]
2049 fn supertypes_decode_from_grammar_json_objects() {
2050 let bytes = br#"{
2053 "name": "tiny",
2054 "supertypes": [{"type": "SYMBOL", "name": "stmt"}],
2055 "rules": {
2056 "stmt": {
2057 "type": "CHOICE",
2058 "members": [
2059 {"type": "SYMBOL", "name": "while_stmt"},
2060 {"type": "SYMBOL", "name": "if_stmt"}
2061 ]
2062 },
2063 "while_stmt": {"type": "STRING", "value": "while"},
2064 "if_stmt": {"type": "STRING", "value": "if"}
2065 }
2066 }"#;
2067 let g = Grammar::from_bytes("tiny", bytes).expect("parse");
2068 assert!(g.supertypes.contains("stmt"));
2069 assert!(kind_satisfies_symbol(&g, Some("while_stmt"), "stmt"));
2070 }
2071
2072 #[test]
2073 fn alias_value_matches_kind() {
2074 let bytes = br#"{
2078 "name": "tiny",
2079 "rules": {
2080 "_package_identifier": {
2081 "type": "ALIAS",
2082 "named": true,
2083 "value": "package_identifier",
2084 "content": {"type": "SYMBOL", "name": "identifier"}
2085 },
2086 "identifier": {"type": "PATTERN", "value": "[a-z]+"}
2087 }
2088 }"#;
2089 let g = Grammar::from_bytes("tiny", bytes).expect("parse");
2090 assert!(kind_satisfies_symbol(
2091 &g,
2092 Some("package_identifier"),
2093 "_package_identifier"
2094 ));
2095 }
2096
2097 #[test]
2098 fn referenced_symbols_walks_nested_seq() {
2099 let prod: Production = serde_json::from_str(
2100 r#"{
2101 "type": "SEQ",
2102 "members": [
2103 {"type": "CHOICE", "members": [
2104 {"type": "SYMBOL", "name": "attribute_item"},
2105 {"type": "BLANK"}
2106 ]},
2107 {"type": "SYMBOL", "name": "parameter"},
2108 {"type": "REPEAT", "content": {
2109 "type": "SEQ",
2110 "members": [
2111 {"type": "STRING", "value": ","},
2112 {"type": "SYMBOL", "name": "parameter"}
2113 ]
2114 }}
2115 ]
2116 }"#,
2117 )
2118 .expect("seq");
2119 let symbols = referenced_symbols(&prod);
2120 assert!(symbols.contains(&"attribute_item"));
2121 assert!(symbols.contains(&"parameter"));
2122 }
2123
2124 #[test]
2125 fn literal_strings_collects_choice_members() {
2126 let prod: Production = serde_json::from_str(
2127 r#"{
2128 "type": "CHOICE",
2129 "members": [
2130 {"type": "STRING", "value": "+"},
2131 {"type": "STRING", "value": "-"},
2132 {"type": "STRING", "value": "*"}
2133 ]
2134 }"#,
2135 )
2136 .expect("choice");
2137 let strings = literal_strings(&prod);
2138 assert_eq!(strings, vec!["+", "-", "*"]);
2139 }
2140
2141 #[test]
2147 fn reserved_variant_deserialises() {
2148 let prod: Production = serde_json::from_str(
2149 r#"{
2150 "type": "RESERVED",
2151 "content": {"type": "SYMBOL", "name": "_lowercase_identifier"},
2152 "context_name": "attribute_id"
2153 }"#,
2154 )
2155 .expect("RESERVED parses");
2156 match prod {
2157 Production::Reserved { content, .. } => match *content {
2158 Production::Symbol { name } => assert_eq!(name, "_lowercase_identifier"),
2159 other => panic!("expected inner SYMBOL, got {other:?}"),
2160 },
2161 other => panic!("expected RESERVED, got {other:?}"),
2162 }
2163 }
2164
2165 #[test]
2166 fn reserved_grammar_loads_end_to_end() {
2167 let bytes = br#"{
2168 "name": "tiny_reserved",
2169 "rules": {
2170 "program": {
2171 "type": "RESERVED",
2172 "content": {"type": "SYMBOL", "name": "ident"},
2173 "context_name": "keywords"
2174 },
2175 "ident": {"type": "PATTERN", "value": "[a-z]+"}
2176 }
2177 }"#;
2178 let g = Grammar::from_bytes("tiny_reserved", bytes).expect("RESERVED-using grammar loads");
2179 assert!(g.rules.contains_key("program"));
2180 }
2181
2182 #[test]
2183 fn reserved_walker_helpers_recurse_into_content() {
2184 let prod: Production = serde_json::from_str(
2191 r#"{
2192 "type": "RESERVED",
2193 "content": {
2194 "type": "FIELD",
2195 "name": "lhs",
2196 "content": {"type": "SYMBOL", "name": "expr"}
2197 },
2198 "context_name": "ctx"
2199 }"#,
2200 )
2201 .expect("nested RESERVED parses");
2202 assert_eq!(first_symbol(&prod), Some("expr"));
2203 assert!(has_field_in(&prod, &["lhs"]));
2204 let symbols = referenced_symbols(&prod);
2205 assert!(symbols.contains(&"expr"));
2206 }
2207}