1#![allow(
2 clippy::module_name_repetitions,
3 clippy::too_many_lines,
4 clippy::too_many_arguments,
5 clippy::map_unwrap_or,
6 clippy::option_if_let_else,
7 clippy::elidable_lifetime_names,
8 clippy::items_after_statements,
9 clippy::needless_pass_by_value,
10 clippy::single_match_else,
11 clippy::manual_let_else,
12 clippy::match_same_arms,
13 clippy::missing_const_for_fn,
14 clippy::single_char_pattern,
15 clippy::naive_bytecount,
16 clippy::expect_used,
17 clippy::redundant_pub_crate,
18 clippy::used_underscore_binding,
19 clippy::redundant_field_names,
20 clippy::struct_field_names,
21 clippy::redundant_else,
22 clippy::similar_names
23)]
24
25use std::collections::BTreeMap;
71
72use panproto_schema::{Edge, Schema};
73use serde::Deserialize;
74
75use crate::error::ParseError;
76
77#[derive(Debug, Clone, Deserialize)]
90#[serde(tag = "type")]
91#[non_exhaustive]
92pub enum Production {
93 #[serde(rename = "SEQ")]
95 Seq {
96 members: Vec<Self>,
98 },
99 #[serde(rename = "CHOICE")]
101 Choice {
102 members: Vec<Self>,
105 },
106 #[serde(rename = "REPEAT")]
108 Repeat {
109 content: Box<Self>,
111 },
112 #[serde(rename = "REPEAT1")]
114 Repeat1 {
115 content: Box<Self>,
117 },
118 #[serde(rename = "OPTIONAL")]
124 Optional {
125 content: Box<Self>,
127 },
128 #[serde(rename = "SYMBOL")]
130 Symbol {
131 name: String,
134 },
135 #[serde(rename = "STRING")]
137 String {
138 value: String,
140 },
141 #[serde(rename = "PATTERN")]
147 Pattern {
148 value: String,
150 },
151 #[serde(rename = "BLANK")]
153 Blank,
154 #[serde(rename = "FIELD")]
160 Field {
161 name: String,
163 content: Box<Self>,
165 },
166 #[serde(rename = "ALIAS")]
171 Alias {
172 content: Box<Self>,
174 #[serde(default)]
176 named: bool,
177 #[serde(default)]
179 value: String,
180 },
181 #[serde(rename = "TOKEN")]
186 Token {
187 content: Box<Self>,
189 },
190 #[serde(rename = "IMMEDIATE_TOKEN")]
194 ImmediateToken {
195 content: Box<Self>,
197 },
198 #[serde(rename = "PREC")]
200 Prec {
201 #[allow(dead_code)]
203 value: serde_json::Value,
204 content: Box<Self>,
206 },
207 #[serde(rename = "PREC_LEFT")]
209 PrecLeft {
210 #[allow(dead_code)]
212 value: serde_json::Value,
213 content: Box<Self>,
215 },
216 #[serde(rename = "PREC_RIGHT")]
218 PrecRight {
219 #[allow(dead_code)]
221 value: serde_json::Value,
222 content: Box<Self>,
224 },
225 #[serde(rename = "PREC_DYNAMIC")]
227 PrecDynamic {
228 #[allow(dead_code)]
230 value: serde_json::Value,
231 content: Box<Self>,
233 },
234 #[serde(rename = "RESERVED")]
244 Reserved {
245 content: Box<Self>,
247 #[allow(dead_code)]
249 #[serde(default)]
250 context_name: String,
251 },
252}
253
254#[derive(Debug, Clone, Deserialize)]
259pub struct Grammar {
260 #[allow(dead_code)]
262 pub name: String,
263 pub rules: BTreeMap<String, Production>,
267 #[serde(default, deserialize_with = "deserialize_supertypes")]
275 pub supertypes: std::collections::HashSet<String>,
276 #[serde(skip)]
290 pub subtypes: std::collections::HashMap<String, std::collections::HashSet<String>>,
291}
292
293fn deserialize_supertypes<'de, D>(
294 deserializer: D,
295) -> Result<std::collections::HashSet<String>, D::Error>
296where
297 D: serde::Deserializer<'de>,
298{
299 let entries: Vec<serde_json::Value> = Vec::deserialize(deserializer)?;
300 let mut out = std::collections::HashSet::new();
301 for entry in entries {
302 match entry {
303 serde_json::Value::String(s) => {
304 out.insert(s);
305 }
306 serde_json::Value::Object(map) => {
307 if let Some(serde_json::Value::String(name)) = map.get("name") {
308 out.insert(name.clone());
309 }
310 }
311 _ => {}
312 }
313 }
314 Ok(out)
315}
316
317impl Grammar {
318 pub fn from_bytes(protocol: &str, bytes: &[u8]) -> Result<Self, ParseError> {
338 let mut grammar: Self =
339 serde_json::from_slice(bytes).map_err(|e| ParseError::EmitFailed {
340 protocol: protocol.to_owned(),
341 reason: format!("grammar.json deserialization failed: {e}"),
342 })?;
343 grammar.subtypes = compute_subtype_closure(&grammar);
344 Ok(grammar)
345 }
346}
347
348fn compute_subtype_closure(
351 grammar: &Grammar,
352) -> std::collections::HashMap<String, std::collections::HashSet<String>> {
353 use std::collections::{HashMap, HashSet};
354 let mut subtypes: HashMap<String, HashSet<String>> = HashMap::new();
359 for name in grammar.rules.keys() {
360 subtypes
361 .entry(name.clone())
362 .or_default()
363 .insert(name.clone());
364 }
365
366 fn walk<'g>(
370 grammar: &'g Grammar,
371 production: &'g Production,
372 visited: &mut HashSet<&'g str>,
373 out: &mut HashSet<String>,
374 ) {
375 match production {
376 Production::Symbol { name } => {
377 out.insert(name.clone());
379 let expand = name.starts_with('_') || grammar.supertypes.contains(name.as_str());
382 if expand && visited.insert(name.as_str()) {
383 if let Some(rule) = grammar.rules.get(name) {
384 walk(grammar, rule, visited, out);
385 }
386 }
387 }
388 Production::Choice { members } | Production::Seq { members } => {
389 for m in members {
390 walk(grammar, m, visited, out);
391 }
392 }
393 Production::Alias {
394 content,
395 named,
396 value,
397 } => {
398 if *named && !value.is_empty() {
399 out.insert(value.clone());
400 }
401 walk(grammar, content, visited, out);
402 }
403 Production::Repeat { content }
404 | Production::Repeat1 { content }
405 | Production::Optional { content }
406 | Production::Field { content, .. }
407 | Production::Token { content }
408 | Production::ImmediateToken { content }
409 | Production::Prec { content, .. }
410 | Production::PrecLeft { content, .. }
411 | Production::PrecRight { content, .. }
412 | Production::PrecDynamic { content, .. }
413 | Production::Reserved { content, .. } => {
414 walk(grammar, content, visited, out);
415 }
416 _ => {}
417 }
418 }
419
420 for (name, rule) in &grammar.rules {
421 let expand = name.starts_with('_') || grammar.supertypes.contains(name.as_str());
422 if !expand {
423 continue;
424 }
425 let mut visited: HashSet<&str> = HashSet::new();
426 visited.insert(name.as_str());
427 let mut reachable: HashSet<String> = HashSet::new();
428 walk(grammar, rule, &mut visited, &mut reachable);
429 for kind in &reachable {
430 subtypes
431 .entry(kind.clone())
432 .or_default()
433 .insert(name.clone());
434 }
435 }
436
437 fn collect_aliases<'g>(production: &'g Production, out: &mut Vec<(String, &'g Production)>) {
443 match production {
444 Production::Alias {
445 content,
446 named,
447 value,
448 } => {
449 if *named && !value.is_empty() {
450 out.push((value.clone(), content.as_ref()));
451 }
452 collect_aliases(content, out);
453 }
454 Production::Choice { members } | Production::Seq { members } => {
455 for m in members {
456 collect_aliases(m, out);
457 }
458 }
459 Production::Repeat { content }
460 | Production::Repeat1 { content }
461 | Production::Optional { content }
462 | Production::Field { content, .. }
463 | Production::Token { content }
464 | Production::ImmediateToken { content }
465 | Production::Prec { content, .. }
466 | Production::PrecLeft { content, .. }
467 | Production::PrecRight { content, .. }
468 | Production::PrecDynamic { content, .. }
469 | Production::Reserved { content, .. } => {
470 collect_aliases(content, out);
471 }
472 _ => {}
473 }
474 }
475 let mut aliases: Vec<(String, &Production)> = Vec::new();
476 for rule in grammar.rules.values() {
477 collect_aliases(rule, &mut aliases);
478 }
479 for (alias_value, content) in aliases {
480 let mut visited: HashSet<&str> = HashSet::new();
481 let mut reachable: HashSet<String> = HashSet::new();
482 walk(grammar, content, &mut visited, &mut reachable);
483 subtypes
486 .entry(alias_value.clone())
487 .or_default()
488 .insert(alias_value.clone());
489 for kind in reachable {
490 subtypes
491 .entry(kind)
492 .or_default()
493 .insert(alias_value.clone());
494 }
495 }
496
497 for _ in 0..8 {
501 let snapshot = subtypes.clone();
502 let mut changed = false;
503 for (kind, supers) in &snapshot {
504 let extra: HashSet<String> = supers
505 .iter()
506 .flat_map(|s| snapshot.get(s).cloned().unwrap_or_default())
507 .collect();
508 let entry = subtypes.entry(kind.clone()).or_default();
509 for s in extra {
510 if entry.insert(s) {
511 changed = true;
512 }
513 }
514 }
515 if !changed {
516 break;
517 }
518 }
519
520 subtypes
521}
522
523#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
536pub struct FormatPolicy {
537 pub indent_width: usize,
539 pub separator: String,
543 pub newline: String,
546 pub line_break_after: Vec<String>,
548 pub indent_open: Vec<String>,
550 pub indent_close: Vec<String>,
552}
553
554impl Default for FormatPolicy {
555 fn default() -> Self {
556 Self {
557 indent_width: 2,
558 separator: " ".to_owned(),
559 newline: "\n".to_owned(),
560 line_break_after: vec![";".into(), "{".into(), "}".into()],
561 indent_open: vec!["{".into()],
562 indent_close: vec!["}".into()],
563 }
564 }
565}
566
567pub fn emit_pretty(
591 protocol: &str,
592 schema: &Schema,
593 grammar: &Grammar,
594 policy: &FormatPolicy,
595) -> Result<Vec<u8>, ParseError> {
596 let roots = collect_roots(schema);
597 if roots.is_empty() {
598 return Err(ParseError::EmitFailed {
599 protocol: protocol.to_owned(),
600 reason: "schema has no entry vertices".to_owned(),
601 });
602 }
603
604 let mut out = Output::new(policy);
605 for (i, root) in roots.iter().enumerate() {
606 if i > 0 {
607 out.newline();
608 }
609 emit_vertex(protocol, schema, grammar, root, &mut out)?;
610 }
611 Ok(out.finish())
612}
613
614fn collect_roots(schema: &Schema) -> Vec<&panproto_gat::Name> {
615 if !schema.entries.is_empty() {
616 return schema
617 .entries
618 .iter()
619 .filter(|name| schema.vertices.contains_key(*name))
620 .collect();
621 }
622
623 let mut targets: std::collections::HashSet<&panproto_gat::Name> =
626 std::collections::HashSet::new();
627 for edge in schema.edges.keys() {
628 targets.insert(&edge.tgt);
629 }
630 let mut roots: Vec<&panproto_gat::Name> = schema
631 .vertices
632 .keys()
633 .filter(|name| !targets.contains(name))
634 .collect();
635 roots.sort();
636 roots
637}
638
639fn emit_vertex(
640 protocol: &str,
641 schema: &Schema,
642 grammar: &Grammar,
643 vertex_id: &panproto_gat::Name,
644 out: &mut Output<'_>,
645) -> Result<(), ParseError> {
646 let vertex = schema
647 .vertices
648 .get(vertex_id)
649 .ok_or_else(|| ParseError::EmitFailed {
650 protocol: protocol.to_owned(),
651 reason: format!("vertex '{vertex_id}' not found"),
652 })?;
653
654 if let Some(literal) = literal_value(schema, vertex_id) {
660 if children_for(schema, vertex_id).is_empty() {
661 out.token(literal);
662 return Ok(());
663 }
664 }
665
666 let kind = vertex.kind.as_ref();
667 let edges = children_for(schema, vertex_id);
668 if let Some(rule) = grammar.rules.get(kind) {
669 let mut cursor = ChildCursor::new(&edges);
670 return emit_production(protocol, schema, grammar, vertex_id, rule, &mut cursor, out);
671 }
672
673 for edge in &edges {
680 emit_vertex(protocol, schema, grammar, &edge.tgt, out)?;
681 }
682 Ok(())
683}
684
685struct ChildCursor<'a> {
688 edges: &'a [&'a Edge],
689 consumed: Vec<bool>,
690}
691
692impl<'a> ChildCursor<'a> {
693 fn new(edges: &'a [&'a Edge]) -> Self {
694 Self {
695 edges,
696 consumed: vec![false; edges.len()],
697 }
698 }
699
700 fn take_field(&mut self, field_name: &str) -> Option<&'a Edge> {
702 for (i, edge) in self.edges.iter().enumerate() {
703 if !self.consumed[i] && edge.kind.as_ref() == field_name {
704 self.consumed[i] = true;
705 return Some(edge);
706 }
707 }
708 None
709 }
710
711 #[cfg(test)]
716 fn has_matching(&self, predicate: impl Fn(&Edge) -> bool) -> bool {
717 self.edges
718 .iter()
719 .enumerate()
720 .any(|(i, edge)| !self.consumed[i] && predicate(edge))
721 }
722
723 fn take_matching(&mut self, predicate: impl Fn(&Edge) -> bool) -> Option<&'a Edge> {
727 for (i, edge) in self.edges.iter().enumerate() {
728 if !self.consumed[i] && predicate(edge) {
729 self.consumed[i] = true;
730 return Some(edge);
731 }
732 }
733 None
734 }
735}
736
737thread_local! {
738 static EMIT_DEPTH: std::cell::Cell<usize> = const { std::cell::Cell::new(0) };
739 static EMIT_MU_FRAMES: std::cell::RefCell<std::collections::HashSet<(String, String)>> =
749 std::cell::RefCell::new(std::collections::HashSet::new());
750}
751
752fn walk_in_mu_frame(
757 protocol: &str,
758 schema: &Schema,
759 grammar: &Grammar,
760 vertex_id: &panproto_gat::Name,
761 rule_name: &str,
762 rule: &Production,
763 cursor: &mut ChildCursor<'_>,
764 out: &mut Output<'_>,
765) -> Result<(), ParseError> {
766 let key = (vertex_id.to_string(), rule_name.to_owned());
767 let inserted = EMIT_MU_FRAMES.with(|frames| frames.borrow_mut().insert(key.clone()));
768 if !inserted {
769 return Ok(());
774 }
775 let result = emit_production(protocol, schema, grammar, vertex_id, rule, cursor, out);
776 EMIT_MU_FRAMES.with(|frames| {
777 frames.borrow_mut().remove(&key);
778 });
779 result
780}
781
782fn emit_production(
783 protocol: &str,
784 schema: &Schema,
785 grammar: &Grammar,
786 vertex_id: &panproto_gat::Name,
787 production: &Production,
788 cursor: &mut ChildCursor<'_>,
789 out: &mut Output<'_>,
790) -> Result<(), ParseError> {
791 let depth = EMIT_DEPTH.with(|d| {
792 let v = d.get() + 1;
793 d.set(v);
794 v
795 });
796 if depth > 500 {
797 EMIT_DEPTH.with(|d| d.set(d.get() - 1));
798 return Err(ParseError::EmitFailed {
799 protocol: protocol.to_owned(),
800 reason: format!(
801 "emit_production recursion >500 (likely a cyclic grammar; \
802 vertex='{vertex_id}')"
803 ),
804 });
805 }
806 let result = emit_production_inner(
807 protocol, schema, grammar, vertex_id, production, cursor, out,
808 );
809 EMIT_DEPTH.with(|d| d.set(d.get() - 1));
810 result
811}
812
813fn emit_production_inner(
814 protocol: &str,
815 schema: &Schema,
816 grammar: &Grammar,
817 vertex_id: &panproto_gat::Name,
818 production: &Production,
819 cursor: &mut ChildCursor<'_>,
820 out: &mut Output<'_>,
821) -> Result<(), ParseError> {
822 match production {
823 Production::String { value } => {
824 out.token(value);
825 Ok(())
826 }
827 Production::Pattern { value } => {
828 if let Some(literal) = literal_value(schema, vertex_id) {
829 out.token(literal);
830 } else {
831 out.token(&placeholder_for_pattern(value));
832 }
833 Ok(())
834 }
835 Production::Blank => Ok(()),
836 Production::Symbol { name } => {
837 if name.starts_with('_') {
838 if let Some(rule) = grammar.rules.get(name) {
852 walk_in_mu_frame(
853 protocol, schema, grammar, vertex_id, name, rule, cursor, out,
854 )
855 } else {
856 if name.contains("line_ending")
866 || name.contains("newline")
867 || name.ends_with("_or_eof")
868 {
869 out.newline();
870 }
871 Ok(())
872 }
873 } else if let Some(edge) = take_symbol_match(grammar, schema, cursor, name) {
874 emit_vertex(protocol, schema, grammar, &edge.tgt, out)
885 } else if vertex_id_kind(schema, vertex_id) == Some(name.as_str()) {
886 let rule = grammar
887 .rules
888 .get(name)
889 .ok_or_else(|| ParseError::EmitFailed {
890 protocol: protocol.to_owned(),
891 reason: format!("no production for SYMBOL '{name}'"),
892 })?;
893 walk_in_mu_frame(
896 protocol, schema, grammar, vertex_id, name, rule, cursor, out,
897 )
898 } else {
899 Ok(())
903 }
904 }
905 Production::Seq { members } => {
906 for member in members {
907 emit_production(protocol, schema, grammar, vertex_id, member, cursor, out)?;
908 }
909 Ok(())
910 }
911 Production::Choice { members } => {
912 if let Some(matched) =
913 pick_choice_with_cursor(schema, grammar, vertex_id, cursor, members)
914 {
915 emit_production(protocol, schema, grammar, vertex_id, matched, cursor, out)
916 } else {
917 Ok(())
918 }
919 }
920 Production::Repeat { content } | Production::Repeat1 { content } => {
921 let mut emitted_any = false;
922 loop {
923 let cursor_snap = cursor.consumed.clone();
924 let out_snap = out.snapshot();
925 let consumed_before = cursor.consumed.iter().filter(|&&c| c).count();
926 let result =
927 emit_production(protocol, schema, grammar, vertex_id, content, cursor, out);
928 let consumed_after = cursor.consumed.iter().filter(|&&c| c).count();
929 if result.is_err() || consumed_after == consumed_before {
930 cursor.consumed = cursor_snap;
931 out.restore(out_snap);
932 break;
933 }
934 emitted_any = true;
935 }
936 if matches!(production, Production::Repeat1 { .. }) && !emitted_any {
937 emit_production(protocol, schema, grammar, vertex_id, content, cursor, out)?;
938 }
939 Ok(())
940 }
941 Production::Optional { content } => {
942 let cursor_snap = cursor.consumed.clone();
943 let out_snap = out.snapshot();
944 let consumed_before = cursor.consumed.iter().filter(|&&c| c).count();
945 let result =
946 emit_production(protocol, schema, grammar, vertex_id, content, cursor, out);
947 if result.is_err() {
952 cursor.consumed = cursor_snap;
953 out.restore(out_snap);
954 return result;
955 }
956 let consumed_after = cursor.consumed.iter().filter(|&&c| c).count();
957 if consumed_after == consumed_before
958 && !has_relevant_constraint(content, schema, vertex_id)
959 {
960 cursor.consumed = cursor_snap;
961 out.restore(out_snap);
962 }
963 Ok(())
964 }
965 Production::Field { name, content } => {
966 let (repeat_inner, at_least_one): (Option<&Production>, bool) = match content.as_ref() {
978 Production::Repeat { content: inner } => (Some(inner.as_ref()), false),
979 Production::Repeat1 { content: inner } => (Some(inner.as_ref()), true),
980 _ => (None, false),
981 };
982 if let Some(inner) = repeat_inner {
983 let mut emitted_any = false;
984 while let Some(edge) = cursor.take_field(name) {
985 emit_in_child_context(protocol, schema, grammar, &edge.tgt, inner, out)?;
986 emitted_any = true;
987 }
988 if at_least_one && !emitted_any && first_symbol(inner).is_none() {
993 emit_production(protocol, schema, grammar, vertex_id, inner, cursor, out)?;
994 }
995 Ok(())
996 } else if let Some(edge) = cursor.take_field(name) {
997 emit_in_child_context(protocol, schema, grammar, &edge.tgt, content, out)
998 } else if first_symbol(content).is_none() {
999 emit_production(protocol, schema, grammar, vertex_id, content, cursor, out)
1007 } else {
1008 Ok(())
1009 }
1010 }
1011 Production::Alias {
1012 content,
1013 named,
1014 value,
1015 } => {
1016 if *named && !value.is_empty() {
1031 if let Some(edge) = cursor.take_matching(|edge| {
1032 schema
1033 .vertices
1034 .get(&edge.tgt)
1035 .map(|v| v.kind.as_ref() == value.as_str())
1036 .unwrap_or(false)
1037 }) {
1038 return emit_aliased_child(protocol, schema, grammar, &edge.tgt, content, out);
1039 }
1040 }
1041 emit_production(protocol, schema, grammar, vertex_id, content, cursor, out)
1042 }
1043 Production::Token { content }
1044 | Production::ImmediateToken { content }
1045 | Production::Prec { content, .. }
1046 | Production::PrecLeft { content, .. }
1047 | Production::PrecRight { content, .. }
1048 | Production::PrecDynamic { content, .. }
1049 | Production::Reserved { content, .. } => {
1050 emit_production(protocol, schema, grammar, vertex_id, content, cursor, out)
1051 }
1052 }
1053}
1054
1055fn take_symbol_match<'a>(
1058 grammar: &Grammar,
1059 schema: &Schema,
1060 cursor: &mut ChildCursor<'a>,
1061 name: &str,
1062) -> Option<&'a Edge> {
1063 cursor.take_matching(|edge| {
1064 let target_kind = schema.vertices.get(&edge.tgt).map(|v| v.kind.as_ref());
1065 kind_satisfies_symbol(grammar, target_kind, name)
1066 })
1067}
1068
1069fn kind_satisfies_symbol(grammar: &Grammar, target_kind: Option<&str>, name: &str) -> bool {
1079 let Some(target) = target_kind else {
1080 return false;
1081 };
1082 if target == name {
1083 return true;
1084 }
1085 grammar
1086 .subtypes
1087 .get(target)
1088 .is_some_and(|set| set.contains(name))
1089}
1090
1091fn emit_aliased_child(
1125 protocol: &str,
1126 schema: &Schema,
1127 grammar: &Grammar,
1128 child_id: &panproto_gat::Name,
1129 content: &Production,
1130 out: &mut Output<'_>,
1131) -> Result<(), ParseError> {
1132 if let Some(literal) = literal_value(schema, child_id) {
1137 if children_for(schema, child_id).is_empty() {
1138 out.token(literal);
1139 return Ok(());
1140 }
1141 }
1142
1143 if let Production::Symbol { name } = content {
1146 if let Some(rule) = grammar.rules.get(name) {
1147 let edges = children_for(schema, child_id);
1148 let mut cursor = ChildCursor::new(&edges);
1149 return emit_production(protocol, schema, grammar, child_id, rule, &mut cursor, out);
1150 }
1151 }
1152
1153 let edges = children_for(schema, child_id);
1155 let mut cursor = ChildCursor::new(&edges);
1156 emit_production(
1157 protocol,
1158 schema,
1159 grammar,
1160 child_id,
1161 content,
1162 &mut cursor,
1163 out,
1164 )
1165}
1166
1167fn emit_in_child_context(
1168 protocol: &str,
1169 schema: &Schema,
1170 grammar: &Grammar,
1171 child_id: &panproto_gat::Name,
1172 production: &Production,
1173 out: &mut Output<'_>,
1174) -> Result<(), ParseError> {
1175 if !matches!(production, Production::Symbol { .. }) {
1184 let child_kind = schema.vertices.get(child_id).map(|v| v.kind.as_ref());
1185 let symbols = referenced_symbols(production);
1186 if symbols
1187 .iter()
1188 .any(|s| kind_satisfies_symbol(grammar, child_kind, s) || child_kind == Some(s))
1189 {
1190 return emit_vertex(protocol, schema, grammar, child_id, out);
1191 }
1192 }
1193 match production {
1194 Production::Symbol { .. } => emit_vertex(protocol, schema, grammar, child_id, out),
1195 _ => {
1196 let edges = children_for(schema, child_id);
1197 let mut cursor = ChildCursor::new(&edges);
1198 emit_production(
1199 protocol,
1200 schema,
1201 grammar,
1202 child_id,
1203 production,
1204 &mut cursor,
1205 out,
1206 )
1207 }
1208 }
1209}
1210
1211fn pick_choice_with_cursor<'a>(
1212 schema: &Schema,
1213 grammar: &Grammar,
1214 vertex_id: &panproto_gat::Name,
1215 cursor: &ChildCursor<'_>,
1216 alternatives: &'a [Production],
1217) -> Option<&'a Production> {
1218 let constraint_blob = schema
1231 .constraints
1232 .get(vertex_id)
1233 .map(|cs| {
1234 let fingerprint: Option<&str> = cs
1235 .iter()
1236 .find(|c| c.sort.as_ref() == "chose-alt-fingerprint")
1237 .map(|c| c.value.as_str());
1238 if let Some(fp) = fingerprint {
1239 fp.to_owned()
1240 } else {
1241 cs.iter()
1242 .filter(|c| {
1243 let s = c.sort.as_ref();
1244 s.starts_with("interstitial-") && !s.ends_with("-start-byte")
1245 })
1246 .map(|c| c.value.as_str())
1247 .collect::<Vec<&str>>()
1248 .join(" ")
1249 }
1250 })
1251 .unwrap_or_default();
1252 let child_kinds: Vec<&str> = schema
1253 .constraints
1254 .get(vertex_id)
1255 .and_then(|cs| {
1256 cs.iter()
1257 .find(|c| c.sort.as_ref() == "chose-alt-child-kinds")
1258 .map(|c| c.value.split_whitespace().collect())
1259 })
1260 .unwrap_or_default();
1261 if !constraint_blob.is_empty() {
1262 let mut best_literal: usize = 0;
1273 let mut best_symbols: usize = 0;
1274 let mut best_alt: Option<&Production> = None;
1275 let mut tied = false;
1276 for alt in alternatives {
1277 let strings = literal_strings(alt);
1278 if strings.is_empty() {
1279 continue;
1280 }
1281 let literal_score = strings
1282 .iter()
1283 .filter(|s| constraint_blob.contains(s.as_str()))
1284 .map(String::len)
1285 .sum::<usize>();
1286 if literal_score == 0 {
1287 continue;
1288 }
1289 let symbol_score = if literal_score >= best_literal && !child_kinds.is_empty() {
1296 let symbols = referenced_symbols(alt);
1297 symbols
1298 .iter()
1299 .filter(|sym| {
1300 let sym_str: &str = sym;
1301 if child_kinds.contains(&sym_str) {
1302 return true;
1303 }
1304 grammar.subtypes.get(sym_str).is_some_and(|sub_set| {
1305 sub_set
1306 .iter()
1307 .any(|sub| child_kinds.contains(&sub.as_str()))
1308 })
1309 })
1310 .count()
1311 } else {
1312 0
1313 };
1314 let better = literal_score > best_literal
1315 || (literal_score == best_literal && symbol_score > best_symbols);
1316 let same = literal_score == best_literal && symbol_score == best_symbols;
1317 if better {
1318 best_literal = literal_score;
1319 best_symbols = symbol_score;
1320 best_alt = Some(alt);
1321 tied = false;
1322 } else if same && best_alt.is_some() {
1323 tied = true;
1324 }
1325 }
1326 if let Some(alt) = best_alt {
1333 if !tied {
1334 return Some(alt);
1335 }
1336 }
1337 }
1338
1339 let first_unconsumed_kind: Option<&str> = cursor
1358 .edges
1359 .iter()
1360 .enumerate()
1361 .find(|(i, _)| !cursor.consumed[*i])
1362 .and_then(|(_, edge)| schema.vertices.get(&edge.tgt).map(|v| v.kind.as_ref()));
1363 if let Some(target_kind) = first_unconsumed_kind {
1364 for alt in alternatives {
1365 let symbols = referenced_symbols(alt);
1366 if !symbols.is_empty()
1367 && symbols
1368 .iter()
1369 .any(|s| kind_satisfies_symbol(grammar, Some(target_kind), s))
1370 {
1371 return Some(alt);
1372 }
1373 }
1374 }
1375
1376 let edge_kinds: Vec<&str> = cursor
1379 .edges
1380 .iter()
1381 .enumerate()
1382 .filter(|(i, _)| !cursor.consumed[*i])
1383 .map(|(_, e)| e.kind.as_ref())
1384 .collect();
1385 for alt in alternatives {
1386 if has_field_in(alt, &edge_kinds) {
1387 return Some(alt);
1388 }
1389 }
1390
1391 let _ = (schema, vertex_id);
1404 if alternatives.iter().any(|a| matches!(a, Production::Blank)) {
1405 return alternatives.iter().find(|a| matches!(a, Production::Blank));
1406 }
1407 alternatives
1408 .iter()
1409 .find(|alt| !matches!(alt, Production::Blank))
1410}
1411
1412fn literal_strings(production: &Production) -> Vec<String> {
1418 let mut out = Vec::new();
1419 fn walk(p: &Production, out: &mut Vec<String>) {
1420 match p {
1421 Production::String { value } if !value.is_empty() => {
1422 out.push(value.clone());
1423 }
1424 Production::Choice { members } | Production::Seq { members } => {
1425 for m in members {
1426 walk(m, out);
1427 }
1428 }
1429 Production::Repeat { content }
1430 | Production::Repeat1 { content }
1431 | Production::Optional { content }
1432 | Production::Field { content, .. }
1433 | Production::Alias { content, .. }
1434 | Production::Token { content }
1435 | Production::ImmediateToken { content }
1436 | Production::Prec { content, .. }
1437 | Production::PrecLeft { content, .. }
1438 | Production::PrecRight { content, .. }
1439 | Production::PrecDynamic { content, .. }
1440 | Production::Reserved { content, .. } => walk(content, out),
1441 _ => {}
1442 }
1443 }
1444 walk(production, &mut out);
1445 out
1446}
1447
1448fn referenced_symbols(production: &Production) -> Vec<&str> {
1455 let mut out = Vec::new();
1456 fn walk<'a>(p: &'a Production, out: &mut Vec<&'a str>) {
1457 match p {
1458 Production::Symbol { name } => out.push(name.as_str()),
1459 Production::Choice { members } | Production::Seq { members } => {
1460 for m in members {
1461 walk(m, out);
1462 }
1463 }
1464 Production::Repeat { content }
1465 | Production::Repeat1 { content }
1466 | Production::Optional { content }
1467 | Production::Field { content, .. }
1468 | Production::Alias { content, .. }
1469 | Production::Token { content }
1470 | Production::ImmediateToken { content }
1471 | Production::Prec { content, .. }
1472 | Production::PrecLeft { content, .. }
1473 | Production::PrecRight { content, .. }
1474 | Production::PrecDynamic { content, .. }
1475 | Production::Reserved { content, .. } => walk(content, out),
1476 _ => {}
1477 }
1478 }
1479 walk(production, &mut out);
1480 out
1481}
1482
1483fn first_symbol(production: &Production) -> Option<&str> {
1484 match production {
1485 Production::Symbol { name } => Some(name),
1486 Production::Seq { members } => members.iter().find_map(first_symbol),
1487 Production::Choice { members } => members.iter().find_map(first_symbol),
1488 Production::Repeat { content }
1489 | Production::Repeat1 { content }
1490 | Production::Optional { content }
1491 | Production::Field { content, .. }
1492 | Production::Alias { content, .. }
1493 | Production::Token { content }
1494 | Production::ImmediateToken { content }
1495 | Production::Prec { content, .. }
1496 | Production::PrecLeft { content, .. }
1497 | Production::PrecRight { content, .. }
1498 | Production::PrecDynamic { content, .. }
1499 | Production::Reserved { content, .. } => first_symbol(content),
1500 _ => None,
1501 }
1502}
1503
1504fn has_field_in(production: &Production, edge_kinds: &[&str]) -> bool {
1505 match production {
1506 Production::Field { name, .. } => edge_kinds.contains(&name.as_str()),
1507 Production::Seq { members } | Production::Choice { members } => {
1508 members.iter().any(|m| has_field_in(m, edge_kinds))
1509 }
1510 Production::Repeat { content }
1511 | Production::Repeat1 { content }
1512 | Production::Optional { content }
1513 | Production::Alias { content, .. }
1514 | Production::Token { content }
1515 | Production::ImmediateToken { content }
1516 | Production::Prec { content, .. }
1517 | Production::PrecLeft { content, .. }
1518 | Production::PrecRight { content, .. }
1519 | Production::PrecDynamic { content, .. }
1520 | Production::Reserved { content, .. } => has_field_in(content, edge_kinds),
1521 _ => false,
1522 }
1523}
1524
1525fn has_relevant_constraint(
1526 production: &Production,
1527 schema: &Schema,
1528 vertex_id: &panproto_gat::Name,
1529) -> bool {
1530 let constraints = match schema.constraints.get(vertex_id) {
1531 Some(c) => c,
1532 None => return false,
1533 };
1534 fn walk(production: &Production, constraints: &[panproto_schema::Constraint]) -> bool {
1535 match production {
1536 Production::String { value } => constraints
1537 .iter()
1538 .any(|c| c.value == *value || c.sort.as_ref() == value),
1539 Production::Field { name, content } => {
1540 constraints.iter().any(|c| c.sort.as_ref() == name) || walk(content, constraints)
1541 }
1542 Production::Seq { members } | Production::Choice { members } => {
1543 members.iter().any(|m| walk(m, constraints))
1544 }
1545 Production::Repeat { content }
1546 | Production::Repeat1 { content }
1547 | Production::Optional { content }
1548 | Production::Alias { content, .. }
1549 | Production::Token { content }
1550 | Production::ImmediateToken { content }
1551 | Production::Prec { content, .. }
1552 | Production::PrecLeft { content, .. }
1553 | Production::PrecRight { content, .. }
1554 | Production::PrecDynamic { content, .. }
1555 | Production::Reserved { content, .. } => walk(content, constraints),
1556 _ => false,
1557 }
1558 }
1559 walk(production, constraints)
1560}
1561
1562fn children_for<'a>(schema: &'a Schema, vertex_id: &panproto_gat::Name) -> Vec<&'a Edge> {
1563 let Some(edges) = schema.outgoing.get(vertex_id) else {
1572 return Vec::new();
1573 };
1574
1575 let mut indexed: Vec<(usize, u32, &Edge)> = edges
1579 .iter()
1580 .enumerate()
1581 .map(|(i, e)| {
1582 let canonical = schema.edges.get_key_value(e).map_or(e, |(k, _)| k);
1583 let pos = schema.orderings.get(canonical).copied().unwrap_or(u32::MAX);
1584 (i, pos, canonical)
1585 })
1586 .collect();
1587
1588 indexed.sort_by_key(|(i, pos, _)| (*pos, *i));
1592 indexed.into_iter().map(|(_, _, e)| e).collect()
1593}
1594
1595fn vertex_id_kind<'a>(schema: &'a Schema, vertex_id: &panproto_gat::Name) -> Option<&'a str> {
1596 schema.vertices.get(vertex_id).map(|v| v.kind.as_ref())
1597}
1598
1599fn literal_value<'a>(schema: &'a Schema, vertex_id: &panproto_gat::Name) -> Option<&'a str> {
1600 schema
1601 .constraints
1602 .get(vertex_id)?
1603 .iter()
1604 .find(|c| c.sort.as_ref() == "literal-value")
1605 .map(|c| c.value.as_str())
1606}
1607
1608fn placeholder_for_pattern(pattern: &str) -> String {
1609 let simple_lit = decode_simple_pattern_literal(pattern);
1617 if let Some(lit) = simple_lit {
1618 return lit;
1619 }
1620
1621 if pattern.contains("[0-9]") || pattern.contains("\\d") {
1622 "0".into()
1623 } else if pattern.contains("[a-zA-Z_]") || pattern.contains("\\w") {
1624 "_x".into()
1625 } else if pattern.contains('"') || pattern.contains('\'') {
1626 "\"\"".into()
1627 } else {
1628 "_".into()
1629 }
1630}
1631
1632fn decode_simple_pattern_literal(pattern: &str) -> Option<String> {
1637 if pattern
1640 .chars()
1641 .any(|c| matches!(c, '[' | ']' | '(' | ')' | '*' | '+' | '?' | '|' | '{' | '}'))
1642 {
1643 return None;
1644 }
1645 let mut out = String::new();
1646 let mut chars = pattern.chars();
1647 while let Some(c) = chars.next() {
1648 if c == '\\' {
1649 match chars.next() {
1650 Some('n') => out.push('\n'),
1651 Some('r') => out.push('\r'),
1652 Some('t') => out.push('\t'),
1653 Some('\\') => out.push('\\'),
1654 Some('/') => out.push('/'),
1655 Some(other) => out.push(other),
1656 None => return None,
1657 }
1658 } else {
1659 out.push(c);
1660 }
1661 }
1662 Some(out)
1663}
1664
1665#[derive(Clone)]
1677enum Token {
1678 Lit(String),
1680 IndentOpen,
1684 IndentClose,
1686 LineBreak,
1689}
1690
1691struct Output<'a> {
1692 tokens: Vec<Token>,
1693 policy: &'a FormatPolicy,
1694}
1695
1696#[derive(Clone)]
1697struct OutputSnapshot {
1698 tokens_len: usize,
1699}
1700
1701impl<'a> Output<'a> {
1702 fn new(policy: &'a FormatPolicy) -> Self {
1703 Self {
1704 tokens: Vec::new(),
1705 policy,
1706 }
1707 }
1708
1709 fn token(&mut self, value: &str) {
1710 if value.is_empty() {
1711 return;
1712 }
1713
1714 if self.policy.indent_close.iter().any(|t| t == value) {
1715 self.tokens.push(Token::IndentClose);
1716 }
1717
1718 self.tokens.push(Token::Lit(value.to_owned()));
1719
1720 if self.policy.indent_open.iter().any(|t| t == value) {
1721 self.tokens.push(Token::IndentOpen);
1722 self.tokens.push(Token::LineBreak);
1723 } else if self.policy.line_break_after.iter().any(|t| t == value) {
1724 self.tokens.push(Token::LineBreak);
1725 }
1726 }
1727
1728 fn newline(&mut self) {
1729 self.tokens.push(Token::LineBreak);
1730 }
1731
1732 fn snapshot(&self) -> OutputSnapshot {
1733 OutputSnapshot {
1734 tokens_len: self.tokens.len(),
1735 }
1736 }
1737
1738 fn restore(&mut self, snap: OutputSnapshot) {
1739 self.tokens.truncate(snap.tokens_len);
1740 }
1741
1742 fn finish(self) -> Vec<u8> {
1743 layout(&self.tokens, self.policy)
1744 }
1745}
1746
1747fn layout(tokens: &[Token], policy: &FormatPolicy) -> Vec<u8> {
1753 let mut bytes = Vec::new();
1754 let mut indent: usize = 0;
1755 let mut at_line_start = true;
1756 let mut last_lit: Option<&str> = None;
1757 let newline = policy.newline.as_bytes();
1758 let separator = policy.separator.as_bytes();
1759
1760 for tok in tokens {
1761 match tok {
1762 Token::IndentOpen => indent += 1,
1763 Token::IndentClose => {
1764 indent = indent.saturating_sub(1);
1765 if !at_line_start {
1766 bytes.extend_from_slice(newline);
1767 at_line_start = true;
1768 }
1769 }
1770 Token::LineBreak => {
1771 if !at_line_start {
1772 bytes.extend_from_slice(newline);
1773 at_line_start = true;
1774 }
1775 }
1776 Token::Lit(value) => {
1777 if at_line_start {
1778 bytes.extend(std::iter::repeat_n(b' ', indent * policy.indent_width));
1779 } else if let Some(prev) = last_lit {
1780 if needs_space_between(prev, value) {
1781 bytes.extend_from_slice(separator);
1782 }
1783 }
1784 bytes.extend_from_slice(value.as_bytes());
1785 at_line_start = false;
1786 last_lit = Some(value.as_str());
1787 }
1788 }
1789 }
1790
1791 if !at_line_start {
1792 bytes.extend_from_slice(newline);
1793 }
1794 bytes
1795}
1796
1797fn needs_space_between(last: &str, next: &str) -> bool {
1798 if last.is_empty() || next.is_empty() {
1799 return false;
1800 }
1801 if is_punct_open(last) || is_punct_open(next) {
1802 return false;
1803 }
1804 if is_punct_close(next) {
1805 return false;
1806 }
1807 if is_punct_close(last) && is_punct_punctuation(next) {
1808 return false;
1809 }
1810 if last == "." || next == "." {
1811 return false;
1812 }
1813 if last_is_word_like(last) && first_is_word_like(next) {
1814 return true;
1815 }
1816 if last_ends_with_alnum(last) && first_is_alnum_or_underscore(next) {
1817 return true;
1818 }
1819 true
1822}
1823
1824fn is_punct_open(s: &str) -> bool {
1825 matches!(s, "(" | "[" | "{" | "\"" | "'" | "`")
1826}
1827
1828fn is_punct_close(s: &str) -> bool {
1829 matches!(s, ")" | "]" | "}" | "," | ";" | ":" | "\"" | "'" | "`")
1830}
1831
1832fn is_punct_punctuation(s: &str) -> bool {
1833 matches!(s, "," | ";" | ":" | "." | ")" | "]" | "}")
1834}
1835
1836fn last_is_word_like(s: &str) -> bool {
1837 s.chars()
1838 .next_back()
1839 .map(|c| c.is_alphanumeric() || c == '_')
1840 .unwrap_or(false)
1841}
1842
1843fn first_is_word_like(s: &str) -> bool {
1844 s.chars()
1845 .next()
1846 .map(|c| c.is_alphanumeric() || c == '_')
1847 .unwrap_or(false)
1848}
1849
1850fn last_ends_with_alnum(s: &str) -> bool {
1851 s.chars()
1852 .next_back()
1853 .map(char::is_alphanumeric)
1854 .unwrap_or(false)
1855}
1856
1857fn first_is_alnum_or_underscore(s: &str) -> bool {
1858 s.chars()
1859 .next()
1860 .map(|c| c.is_alphanumeric() || c == '_')
1861 .unwrap_or(false)
1862}
1863
1864#[cfg(test)]
1865mod tests {
1866 use super::*;
1867
1868 #[test]
1869 fn parses_simple_grammar_json() {
1870 let bytes = br#"{
1871 "name": "tiny",
1872 "rules": {
1873 "program": {
1874 "type": "SEQ",
1875 "members": [
1876 {"type": "STRING", "value": "hello"},
1877 {"type": "STRING", "value": ";"}
1878 ]
1879 }
1880 }
1881 }"#;
1882 let g = Grammar::from_bytes("tiny", bytes).expect("valid tiny grammar");
1883 assert!(g.rules.contains_key("program"));
1884 }
1885
1886 #[test]
1887 fn output_emits_punctuation_without_leading_space() {
1888 let policy = FormatPolicy::default();
1889 let mut out = Output::new(&policy);
1890 out.token("foo");
1891 out.token("(");
1892 out.token(")");
1893 out.token(";");
1894 let bytes = out.finish();
1895 let s = std::str::from_utf8(&bytes).expect("ascii output");
1896 assert!(s.starts_with("foo();"), "got {s:?}");
1897 }
1898
1899 #[test]
1900 fn grammar_from_bytes_rejects_malformed_input() {
1901 let result = Grammar::from_bytes("malformed", b"not json");
1902 let err = result.expect_err("malformed bytes must yield Err");
1903 let msg = err.to_string();
1904 assert!(
1905 msg.contains("malformed"),
1906 "error message should name the protocol: {msg:?}"
1907 );
1908 }
1909
1910 #[test]
1911 fn output_indents_after_open_brace() {
1912 let policy = FormatPolicy::default();
1913 let mut out = Output::new(&policy);
1914 out.token("fn");
1915 out.token("foo");
1916 out.token("(");
1917 out.token(")");
1918 out.token("{");
1919 out.token("body");
1920 out.token("}");
1921 let bytes = out.finish();
1922 let s = std::str::from_utf8(&bytes).expect("ascii output");
1923 assert!(s.contains("{\n"), "newline after opening brace: {s:?}");
1924 assert!(s.contains("body"), "body inside block: {s:?}");
1925 assert!(s.ends_with("}\n"), "newline after closing brace: {s:?}");
1926 }
1927
1928 #[test]
1929 fn output_no_space_between_word_and_dot() {
1930 let policy = FormatPolicy::default();
1931 let mut out = Output::new(&policy);
1932 out.token("foo");
1933 out.token(".");
1934 out.token("bar");
1935 let bytes = out.finish();
1936 let s = std::str::from_utf8(&bytes).expect("ascii output");
1937 assert!(s.starts_with("foo.bar"), "no space around dot: {s:?}");
1938 }
1939
1940 #[test]
1941 fn output_snapshot_restore_truncates_bytes() {
1942 let policy = FormatPolicy::default();
1943 let mut out = Output::new(&policy);
1944 out.token("keep");
1945 let snap = out.snapshot();
1946 out.token("drop");
1947 out.token("more");
1948 out.restore(snap);
1949 out.token("after");
1950 let bytes = out.finish();
1951 let s = std::str::from_utf8(&bytes).expect("ascii output");
1952 assert!(s.contains("keep"), "kept token survives: {s:?}");
1953 assert!(s.contains("after"), "post-restore token visible: {s:?}");
1954 assert!(!s.contains("drop"), "rolled-back token removed: {s:?}");
1955 assert!(!s.contains("more"), "rolled-back token removed: {s:?}");
1956 }
1957
1958 #[test]
1959 fn child_cursor_take_field_consumes_once() {
1960 let edges_owned: Vec<Edge> = vec![Edge {
1961 src: panproto_gat::Name::from("p"),
1962 tgt: panproto_gat::Name::from("c"),
1963 kind: panproto_gat::Name::from("name"),
1964 name: None,
1965 }];
1966 let edges: Vec<&Edge> = edges_owned.iter().collect();
1967 let mut cursor = ChildCursor::new(&edges);
1968 let first = cursor.take_field("name");
1969 let second = cursor.take_field("name");
1970 assert!(first.is_some(), "first take returns the edge");
1971 assert!(
1972 second.is_none(),
1973 "second take returns None (already consumed)"
1974 );
1975 }
1976
1977 #[test]
1978 fn child_cursor_take_matching_predicate() {
1979 let edges_owned: Vec<Edge> = vec![
1980 Edge {
1981 src: "p".into(),
1982 tgt: "c1".into(),
1983 kind: "child_of".into(),
1984 name: None,
1985 },
1986 Edge {
1987 src: "p".into(),
1988 tgt: "c2".into(),
1989 kind: "key".into(),
1990 name: None,
1991 },
1992 ];
1993 let edges: Vec<&Edge> = edges_owned.iter().collect();
1994 let mut cursor = ChildCursor::new(&edges);
1995 assert!(cursor.has_matching(|e| e.kind.as_ref() == "key"));
1996 let taken = cursor.take_matching(|e| e.kind.as_ref() == "key");
1997 assert!(taken.is_some());
1998 assert!(
1999 !cursor.has_matching(|e| e.kind.as_ref() == "key"),
2000 "consumed edge no longer matches"
2001 );
2002 assert!(
2003 cursor.has_matching(|e| e.kind.as_ref() == "child_of"),
2004 "the other edge is still available"
2005 );
2006 }
2007
2008 #[test]
2009 fn kind_satisfies_symbol_direct_match() {
2010 let bytes = br#"{
2011 "name": "tiny",
2012 "rules": {
2013 "x": {"type": "STRING", "value": "x"}
2014 }
2015 }"#;
2016 let g = Grammar::from_bytes("tiny", bytes).expect("valid grammar");
2017 assert!(kind_satisfies_symbol(&g, Some("x"), "x"));
2018 assert!(!kind_satisfies_symbol(&g, Some("y"), "x"));
2019 assert!(!kind_satisfies_symbol(&g, None, "x"));
2020 }
2021
2022 #[test]
2023 fn kind_satisfies_symbol_through_hidden_rule() {
2024 let bytes = br#"{
2025 "name": "tiny",
2026 "rules": {
2027 "_value": {
2028 "type": "CHOICE",
2029 "members": [
2030 {"type": "SYMBOL", "name": "object"},
2031 {"type": "SYMBOL", "name": "number"}
2032 ]
2033 },
2034 "object": {"type": "STRING", "value": "{}"},
2035 "number": {"type": "PATTERN", "value": "[0-9]+"}
2036 }
2037 }"#;
2038 let g = Grammar::from_bytes("tiny", bytes).expect("valid grammar");
2039 assert!(
2040 kind_satisfies_symbol(&g, Some("number"), "_value"),
2041 "number is reachable from _value via CHOICE"
2042 );
2043 assert!(
2044 kind_satisfies_symbol(&g, Some("object"), "_value"),
2045 "object is reachable from _value via CHOICE"
2046 );
2047 assert!(
2048 !kind_satisfies_symbol(&g, Some("string"), "_value"),
2049 "string is NOT among the alternatives"
2050 );
2051 }
2052
2053 #[test]
2054 fn first_symbol_skips_string_terminals() {
2055 let prod: Production = serde_json::from_str(
2056 r#"{
2057 "type": "SEQ",
2058 "members": [
2059 {"type": "STRING", "value": "{"},
2060 {"type": "SYMBOL", "name": "body"},
2061 {"type": "STRING", "value": "}"}
2062 ]
2063 }"#,
2064 )
2065 .expect("valid SEQ");
2066 assert_eq!(first_symbol(&prod), Some("body"));
2067 }
2068
2069 #[test]
2070 fn placeholder_for_pattern_routes_by_regex_class() {
2071 assert_eq!(placeholder_for_pattern("[0-9]+"), "0");
2072 assert_eq!(placeholder_for_pattern("[a-zA-Z_]\\w*"), "_x");
2073 assert_eq!(placeholder_for_pattern("\"[^\"]*\""), "\"\"");
2074 assert_eq!(placeholder_for_pattern("\\d+\\.\\d+"), "0");
2075 }
2076
2077 #[test]
2078 fn format_policy_default_breaks_after_semicolon() {
2079 let policy = FormatPolicy::default();
2080 assert!(policy.line_break_after.iter().any(|t| t == ";"));
2081 assert!(policy.indent_open.iter().any(|t| t == "{"));
2082 assert!(policy.indent_close.iter().any(|t| t == "}"));
2083 assert_eq!(policy.indent_width, 2);
2084 }
2085
2086 #[test]
2087 fn placeholder_decodes_literal_pattern_separators() {
2088 assert_eq!(placeholder_for_pattern("\\n"), "\n");
2092 assert_eq!(placeholder_for_pattern("\\r\\n"), "\r\n");
2093 assert_eq!(placeholder_for_pattern(";"), ";");
2094 assert_eq!(placeholder_for_pattern("[0-9]+"), "0");
2097 assert_eq!(placeholder_for_pattern("a|b"), "_");
2098 }
2099
2100 #[test]
2101 fn supertypes_decode_from_grammar_json_strings() {
2102 let bytes = br#"{
2104 "name": "tiny",
2105 "supertypes": ["expression"],
2106 "rules": {
2107 "expression": {
2108 "type": "CHOICE",
2109 "members": [
2110 {"type": "SYMBOL", "name": "binary_expression"},
2111 {"type": "SYMBOL", "name": "identifier"}
2112 ]
2113 },
2114 "binary_expression": {"type": "STRING", "value": "x"},
2115 "identifier": {"type": "PATTERN", "value": "[a-z]+"}
2116 }
2117 }"#;
2118 let g = Grammar::from_bytes("tiny", bytes).expect("parse");
2119 assert!(g.supertypes.contains("expression"));
2120 assert!(kind_satisfies_symbol(&g, Some("identifier"), "expression"));
2122 assert!(!kind_satisfies_symbol(&g, Some("string"), "expression"));
2124 }
2125
2126 #[test]
2127 fn supertypes_decode_from_grammar_json_objects() {
2128 let bytes = br#"{
2131 "name": "tiny",
2132 "supertypes": [{"type": "SYMBOL", "name": "stmt"}],
2133 "rules": {
2134 "stmt": {
2135 "type": "CHOICE",
2136 "members": [
2137 {"type": "SYMBOL", "name": "while_stmt"},
2138 {"type": "SYMBOL", "name": "if_stmt"}
2139 ]
2140 },
2141 "while_stmt": {"type": "STRING", "value": "while"},
2142 "if_stmt": {"type": "STRING", "value": "if"}
2143 }
2144 }"#;
2145 let g = Grammar::from_bytes("tiny", bytes).expect("parse");
2146 assert!(g.supertypes.contains("stmt"));
2147 assert!(kind_satisfies_symbol(&g, Some("while_stmt"), "stmt"));
2148 }
2149
2150 #[test]
2151 fn alias_value_matches_kind() {
2152 let bytes = br#"{
2156 "name": "tiny",
2157 "rules": {
2158 "_package_identifier": {
2159 "type": "ALIAS",
2160 "named": true,
2161 "value": "package_identifier",
2162 "content": {"type": "SYMBOL", "name": "identifier"}
2163 },
2164 "identifier": {"type": "PATTERN", "value": "[a-z]+"}
2165 }
2166 }"#;
2167 let g = Grammar::from_bytes("tiny", bytes).expect("parse");
2168 assert!(kind_satisfies_symbol(
2169 &g,
2170 Some("package_identifier"),
2171 "_package_identifier"
2172 ));
2173 }
2174
2175 #[test]
2176 fn referenced_symbols_walks_nested_seq() {
2177 let prod: Production = serde_json::from_str(
2178 r#"{
2179 "type": "SEQ",
2180 "members": [
2181 {"type": "CHOICE", "members": [
2182 {"type": "SYMBOL", "name": "attribute_item"},
2183 {"type": "BLANK"}
2184 ]},
2185 {"type": "SYMBOL", "name": "parameter"},
2186 {"type": "REPEAT", "content": {
2187 "type": "SEQ",
2188 "members": [
2189 {"type": "STRING", "value": ","},
2190 {"type": "SYMBOL", "name": "parameter"}
2191 ]
2192 }}
2193 ]
2194 }"#,
2195 )
2196 .expect("seq");
2197 let symbols = referenced_symbols(&prod);
2198 assert!(symbols.contains(&"attribute_item"));
2199 assert!(symbols.contains(&"parameter"));
2200 }
2201
2202 #[test]
2203 fn literal_strings_collects_choice_members() {
2204 let prod: Production = serde_json::from_str(
2205 r#"{
2206 "type": "CHOICE",
2207 "members": [
2208 {"type": "STRING", "value": "+"},
2209 {"type": "STRING", "value": "-"},
2210 {"type": "STRING", "value": "*"}
2211 ]
2212 }"#,
2213 )
2214 .expect("choice");
2215 let strings = literal_strings(&prod);
2216 assert_eq!(strings, vec!["+", "-", "*"]);
2217 }
2218
2219 #[test]
2225 fn reserved_variant_deserialises() {
2226 let prod: Production = serde_json::from_str(
2227 r#"{
2228 "type": "RESERVED",
2229 "content": {"type": "SYMBOL", "name": "_lowercase_identifier"},
2230 "context_name": "attribute_id"
2231 }"#,
2232 )
2233 .expect("RESERVED parses");
2234 match prod {
2235 Production::Reserved { content, .. } => match *content {
2236 Production::Symbol { name } => assert_eq!(name, "_lowercase_identifier"),
2237 other => panic!("expected inner SYMBOL, got {other:?}"),
2238 },
2239 other => panic!("expected RESERVED, got {other:?}"),
2240 }
2241 }
2242
2243 #[test]
2244 fn reserved_grammar_loads_end_to_end() {
2245 let bytes = br#"{
2246 "name": "tiny_reserved",
2247 "rules": {
2248 "program": {
2249 "type": "RESERVED",
2250 "content": {"type": "SYMBOL", "name": "ident"},
2251 "context_name": "keywords"
2252 },
2253 "ident": {"type": "PATTERN", "value": "[a-z]+"}
2254 }
2255 }"#;
2256 let g = Grammar::from_bytes("tiny_reserved", bytes).expect("RESERVED-using grammar loads");
2257 assert!(g.rules.contains_key("program"));
2258 }
2259
2260 #[test]
2261 fn reserved_walker_helpers_recurse_into_content() {
2262 let prod: Production = serde_json::from_str(
2269 r#"{
2270 "type": "RESERVED",
2271 "content": {
2272 "type": "FIELD",
2273 "name": "lhs",
2274 "content": {"type": "SYMBOL", "name": "expr"}
2275 },
2276 "context_name": "ctx"
2277 }"#,
2278 )
2279 .expect("nested RESERVED parses");
2280 assert_eq!(first_symbol(&prod), Some("expr"));
2281 assert!(has_field_in(&prod, &["lhs"]));
2282 let symbols = referenced_symbols(&prod);
2283 assert!(symbols.contains(&"expr"));
2284 }
2285}