1#![allow(
2 clippy::module_name_repetitions,
3 clippy::too_many_lines,
4 clippy::too_many_arguments,
5 clippy::map_unwrap_or,
6 clippy::option_if_let_else,
7 clippy::elidable_lifetime_names,
8 clippy::items_after_statements,
9 clippy::needless_pass_by_value,
10 clippy::single_match_else,
11 clippy::manual_let_else,
12 clippy::match_same_arms,
13 clippy::missing_const_for_fn,
14 clippy::single_char_pattern,
15 clippy::naive_bytecount,
16 clippy::expect_used,
17 clippy::redundant_pub_crate,
18 clippy::used_underscore_binding,
19 clippy::redundant_field_names,
20 clippy::struct_field_names,
21 clippy::redundant_else,
22 clippy::similar_names
23)]
24
25use std::collections::BTreeMap;
71
72use panproto_schema::{Edge, Schema};
73use serde::Deserialize;
74
75use crate::error::ParseError;
76
77#[derive(Debug, Clone, Deserialize)]
90#[serde(tag = "type")]
91#[non_exhaustive]
92pub enum Production {
93 #[serde(rename = "SEQ")]
95 Seq {
96 members: Vec<Self>,
98 },
99 #[serde(rename = "CHOICE")]
101 Choice {
102 members: Vec<Self>,
105 },
106 #[serde(rename = "REPEAT")]
108 Repeat {
109 content: Box<Self>,
111 },
112 #[serde(rename = "REPEAT1")]
114 Repeat1 {
115 content: Box<Self>,
117 },
118 #[serde(rename = "OPTIONAL")]
124 Optional {
125 content: Box<Self>,
127 },
128 #[serde(rename = "SYMBOL")]
130 Symbol {
131 name: String,
134 },
135 #[serde(rename = "STRING")]
137 String {
138 value: String,
140 },
141 #[serde(rename = "PATTERN")]
147 Pattern {
148 value: String,
150 },
151 #[serde(rename = "BLANK")]
153 Blank,
154 #[serde(rename = "FIELD")]
160 Field {
161 name: String,
163 content: Box<Self>,
165 },
166 #[serde(rename = "ALIAS")]
171 Alias {
172 content: Box<Self>,
174 #[serde(default)]
176 named: bool,
177 #[serde(default)]
179 value: String,
180 },
181 #[serde(rename = "TOKEN")]
186 Token {
187 content: Box<Self>,
189 },
190 #[serde(rename = "IMMEDIATE_TOKEN")]
194 ImmediateToken {
195 content: Box<Self>,
197 },
198 #[serde(rename = "PREC")]
200 Prec {
201 #[allow(dead_code)]
203 value: serde_json::Value,
204 content: Box<Self>,
206 },
207 #[serde(rename = "PREC_LEFT")]
209 PrecLeft {
210 #[allow(dead_code)]
212 value: serde_json::Value,
213 content: Box<Self>,
215 },
216 #[serde(rename = "PREC_RIGHT")]
218 PrecRight {
219 #[allow(dead_code)]
221 value: serde_json::Value,
222 content: Box<Self>,
224 },
225 #[serde(rename = "PREC_DYNAMIC")]
227 PrecDynamic {
228 #[allow(dead_code)]
230 value: serde_json::Value,
231 content: Box<Self>,
233 },
234 #[serde(rename = "RESERVED")]
244 Reserved {
245 content: Box<Self>,
247 #[allow(dead_code)]
249 #[serde(default)]
250 context_name: String,
251 },
252}
253
254#[derive(Debug, Clone, Deserialize)]
259pub struct Grammar {
260 #[allow(dead_code)]
262 pub name: String,
263 pub rules: BTreeMap<String, Production>,
267 #[serde(default, deserialize_with = "deserialize_supertypes")]
275 pub supertypes: std::collections::HashSet<String>,
276 #[serde(skip)]
290 pub subtypes: std::collections::HashMap<String, std::collections::HashSet<String>>,
291}
292
293fn deserialize_supertypes<'de, D>(
294 deserializer: D,
295) -> Result<std::collections::HashSet<String>, D::Error>
296where
297 D: serde::Deserializer<'de>,
298{
299 let entries: Vec<serde_json::Value> = Vec::deserialize(deserializer)?;
300 let mut out = std::collections::HashSet::new();
301 for entry in entries {
302 match entry {
303 serde_json::Value::String(s) => {
304 out.insert(s);
305 }
306 serde_json::Value::Object(map) => {
307 if let Some(serde_json::Value::String(name)) = map.get("name") {
308 out.insert(name.clone());
309 }
310 }
311 _ => {}
312 }
313 }
314 Ok(out)
315}
316
317impl Grammar {
318 pub fn from_bytes(protocol: &str, bytes: &[u8]) -> Result<Self, ParseError> {
338 let mut grammar: Self =
339 serde_json::from_slice(bytes).map_err(|e| ParseError::EmitFailed {
340 protocol: protocol.to_owned(),
341 reason: format!("grammar.json deserialization failed: {e}"),
342 })?;
343 grammar.subtypes = compute_subtype_closure(&grammar);
344 Ok(grammar)
345 }
346}
347
348fn compute_subtype_closure(
351 grammar: &Grammar,
352) -> std::collections::HashMap<String, std::collections::HashSet<String>> {
353 use std::collections::{HashMap, HashSet};
354 let mut subtypes: HashMap<String, HashSet<String>> = HashMap::new();
359 for name in grammar.rules.keys() {
360 subtypes
361 .entry(name.clone())
362 .or_default()
363 .insert(name.clone());
364 }
365
366 fn walk<'g>(
370 grammar: &'g Grammar,
371 production: &'g Production,
372 visited: &mut HashSet<&'g str>,
373 out: &mut HashSet<String>,
374 ) {
375 match production {
376 Production::Symbol { name } => {
377 out.insert(name.clone());
379 let expand = name.starts_with('_') || grammar.supertypes.contains(name.as_str());
382 if expand && visited.insert(name.as_str()) {
383 if let Some(rule) = grammar.rules.get(name) {
384 walk(grammar, rule, visited, out);
385 }
386 }
387 }
388 Production::Choice { members } | Production::Seq { members } => {
389 for m in members {
390 walk(grammar, m, visited, out);
391 }
392 }
393 Production::Alias {
394 content,
395 named,
396 value,
397 } => {
398 if *named && !value.is_empty() {
399 out.insert(value.clone());
400 }
401 walk(grammar, content, visited, out);
402 }
403 Production::Repeat { content }
404 | Production::Repeat1 { content }
405 | Production::Optional { content }
406 | Production::Field { content, .. }
407 | Production::Token { content }
408 | Production::ImmediateToken { content }
409 | Production::Prec { content, .. }
410 | Production::PrecLeft { content, .. }
411 | Production::PrecRight { content, .. }
412 | Production::PrecDynamic { content, .. }
413 | Production::Reserved { content, .. } => {
414 walk(grammar, content, visited, out);
415 }
416 _ => {}
417 }
418 }
419
420 for (name, rule) in &grammar.rules {
421 let expand = name.starts_with('_') || grammar.supertypes.contains(name.as_str());
422 if !expand {
423 continue;
424 }
425 let mut visited: HashSet<&str> = HashSet::new();
426 visited.insert(name.as_str());
427 let mut reachable: HashSet<String> = HashSet::new();
428 walk(grammar, rule, &mut visited, &mut reachable);
429 for kind in &reachable {
430 subtypes
431 .entry(kind.clone())
432 .or_default()
433 .insert(name.clone());
434 }
435 }
436
437 fn collect_aliases<'g>(production: &'g Production, out: &mut Vec<(String, &'g Production)>) {
443 match production {
444 Production::Alias {
445 content,
446 named,
447 value,
448 } => {
449 if *named && !value.is_empty() {
450 out.push((value.clone(), content.as_ref()));
451 }
452 collect_aliases(content, out);
453 }
454 Production::Choice { members } | Production::Seq { members } => {
455 for m in members {
456 collect_aliases(m, out);
457 }
458 }
459 Production::Repeat { content }
460 | Production::Repeat1 { content }
461 | Production::Optional { content }
462 | Production::Field { content, .. }
463 | Production::Token { content }
464 | Production::ImmediateToken { content }
465 | Production::Prec { content, .. }
466 | Production::PrecLeft { content, .. }
467 | Production::PrecRight { content, .. }
468 | Production::PrecDynamic { content, .. }
469 | Production::Reserved { content, .. } => {
470 collect_aliases(content, out);
471 }
472 _ => {}
473 }
474 }
475 let mut aliases: Vec<(String, &Production)> = Vec::new();
476 for rule in grammar.rules.values() {
477 collect_aliases(rule, &mut aliases);
478 }
479 for (alias_value, content) in aliases {
480 let mut visited: HashSet<&str> = HashSet::new();
481 let mut reachable: HashSet<String> = HashSet::new();
482 walk(grammar, content, &mut visited, &mut reachable);
483 subtypes
486 .entry(alias_value.clone())
487 .or_default()
488 .insert(alias_value.clone());
489 for kind in reachable {
490 subtypes
491 .entry(kind)
492 .or_default()
493 .insert(alias_value.clone());
494 }
495 }
496
497 for _ in 0..8 {
501 let snapshot = subtypes.clone();
502 let mut changed = false;
503 for (kind, supers) in &snapshot {
504 let extra: HashSet<String> = supers
505 .iter()
506 .flat_map(|s| snapshot.get(s).cloned().unwrap_or_default())
507 .collect();
508 let entry = subtypes.entry(kind.clone()).or_default();
509 for s in extra {
510 if entry.insert(s) {
511 changed = true;
512 }
513 }
514 }
515 if !changed {
516 break;
517 }
518 }
519
520 subtypes
521}
522
523#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
536pub struct FormatPolicy {
537 pub indent_width: usize,
539 pub separator: String,
543 pub newline: String,
546 pub line_break_after: Vec<String>,
548 pub indent_open: Vec<String>,
550 pub indent_close: Vec<String>,
552}
553
554impl Default for FormatPolicy {
555 fn default() -> Self {
556 Self {
557 indent_width: 2,
558 separator: " ".to_owned(),
559 newline: "\n".to_owned(),
560 line_break_after: vec![";".into(), "{".into(), "}".into()],
561 indent_open: vec!["{".into()],
562 indent_close: vec!["}".into()],
563 }
564 }
565}
566
567pub fn emit_pretty(
591 protocol: &str,
592 schema: &Schema,
593 grammar: &Grammar,
594 policy: &FormatPolicy,
595) -> Result<Vec<u8>, ParseError> {
596 let roots = collect_roots(schema);
597 if roots.is_empty() {
598 return Err(ParseError::EmitFailed {
599 protocol: protocol.to_owned(),
600 reason: "schema has no entry vertices".to_owned(),
601 });
602 }
603
604 let mut out = Output::new(policy);
605 for (i, root) in roots.iter().enumerate() {
606 if i > 0 {
607 out.newline();
608 }
609 emit_vertex(protocol, schema, grammar, root, &mut out)?;
610 }
611 Ok(out.finish())
612}
613
614fn collect_roots(schema: &Schema) -> Vec<&panproto_gat::Name> {
615 if !schema.entries.is_empty() {
616 return schema
617 .entries
618 .iter()
619 .filter(|name| schema.vertices.contains_key(*name))
620 .collect();
621 }
622
623 let mut targets: std::collections::HashSet<&panproto_gat::Name> =
626 std::collections::HashSet::new();
627 for edge in schema.edges.keys() {
628 targets.insert(&edge.tgt);
629 }
630 let mut roots: Vec<&panproto_gat::Name> = schema
631 .vertices
632 .keys()
633 .filter(|name| !targets.contains(name))
634 .collect();
635 roots.sort();
636 roots
637}
638
639fn emit_vertex(
640 protocol: &str,
641 schema: &Schema,
642 grammar: &Grammar,
643 vertex_id: &panproto_gat::Name,
644 out: &mut Output<'_>,
645) -> Result<(), ParseError> {
646 let vertex = schema
647 .vertices
648 .get(vertex_id)
649 .ok_or_else(|| ParseError::EmitFailed {
650 protocol: protocol.to_owned(),
651 reason: format!("vertex '{vertex_id}' not found"),
652 })?;
653
654 if let Some(literal) = literal_value(schema, vertex_id) {
660 if children_for(schema, vertex_id).is_empty() {
661 out.token(literal);
662 return Ok(());
663 }
664 }
665
666 let kind = vertex.kind.as_ref();
667 let edges = children_for(schema, vertex_id);
668 if let Some(rule) = grammar.rules.get(kind) {
669 let mut cursor = ChildCursor::new(&edges);
670 return emit_production(protocol, schema, grammar, vertex_id, rule, &mut cursor, out);
671 }
672
673 for edge in &edges {
680 emit_vertex(protocol, schema, grammar, &edge.tgt, out)?;
681 }
682 Ok(())
683}
684
685struct ChildCursor<'a> {
688 edges: &'a [&'a Edge],
689 consumed: Vec<bool>,
690}
691
692impl<'a> ChildCursor<'a> {
693 fn new(edges: &'a [&'a Edge]) -> Self {
694 Self {
695 edges,
696 consumed: vec![false; edges.len()],
697 }
698 }
699
700 fn take_field(&mut self, field_name: &str) -> Option<&'a Edge> {
702 for (i, edge) in self.edges.iter().enumerate() {
703 if !self.consumed[i] && edge.kind.as_ref() == field_name {
704 self.consumed[i] = true;
705 return Some(edge);
706 }
707 }
708 None
709 }
710
711 #[cfg(test)]
716 fn has_matching(&self, predicate: impl Fn(&Edge) -> bool) -> bool {
717 self.edges
718 .iter()
719 .enumerate()
720 .any(|(i, edge)| !self.consumed[i] && predicate(edge))
721 }
722
723 fn take_matching(&mut self, predicate: impl Fn(&Edge) -> bool) -> Option<&'a Edge> {
727 for (i, edge) in self.edges.iter().enumerate() {
728 if !self.consumed[i] && predicate(edge) {
729 self.consumed[i] = true;
730 return Some(edge);
731 }
732 }
733 None
734 }
735}
736
737thread_local! {
738 static EMIT_DEPTH: std::cell::Cell<usize> = const { std::cell::Cell::new(0) };
739 static EMIT_MU_FRAMES: std::cell::RefCell<std::collections::HashSet<(String, String)>> =
749 std::cell::RefCell::new(std::collections::HashSet::new());
750}
751
752fn walk_in_mu_frame(
757 protocol: &str,
758 schema: &Schema,
759 grammar: &Grammar,
760 vertex_id: &panproto_gat::Name,
761 rule_name: &str,
762 rule: &Production,
763 cursor: &mut ChildCursor<'_>,
764 out: &mut Output<'_>,
765) -> Result<(), ParseError> {
766 let key = (vertex_id.to_string(), rule_name.to_owned());
767 let inserted = EMIT_MU_FRAMES.with(|frames| frames.borrow_mut().insert(key.clone()));
768 if !inserted {
769 return Ok(());
774 }
775 let result = emit_production(protocol, schema, grammar, vertex_id, rule, cursor, out);
776 EMIT_MU_FRAMES.with(|frames| {
777 frames.borrow_mut().remove(&key);
778 });
779 result
780}
781
782fn emit_production(
783 protocol: &str,
784 schema: &Schema,
785 grammar: &Grammar,
786 vertex_id: &panproto_gat::Name,
787 production: &Production,
788 cursor: &mut ChildCursor<'_>,
789 out: &mut Output<'_>,
790) -> Result<(), ParseError> {
791 let depth = EMIT_DEPTH.with(|d| {
792 let v = d.get() + 1;
793 d.set(v);
794 v
795 });
796 if depth > 500 {
797 EMIT_DEPTH.with(|d| d.set(d.get() - 1));
798 return Err(ParseError::EmitFailed {
799 protocol: protocol.to_owned(),
800 reason: format!(
801 "emit_production recursion >500 (likely a cyclic grammar; \
802 vertex='{vertex_id}')"
803 ),
804 });
805 }
806 let result = emit_production_inner(
807 protocol, schema, grammar, vertex_id, production, cursor, out,
808 );
809 EMIT_DEPTH.with(|d| d.set(d.get() - 1));
810 result
811}
812
813fn emit_production_inner(
814 protocol: &str,
815 schema: &Schema,
816 grammar: &Grammar,
817 vertex_id: &panproto_gat::Name,
818 production: &Production,
819 cursor: &mut ChildCursor<'_>,
820 out: &mut Output<'_>,
821) -> Result<(), ParseError> {
822 match production {
823 Production::String { value } => {
824 out.token(value);
825 Ok(())
826 }
827 Production::Pattern { value } => {
828 if let Some(literal) = literal_value(schema, vertex_id) {
829 out.token(literal);
830 } else {
831 out.token(&placeholder_for_pattern(value));
832 }
833 Ok(())
834 }
835 Production::Blank => Ok(()),
836 Production::Symbol { name } => {
837 if name.starts_with('_') {
838 if let Some(rule) = grammar.rules.get(name) {
852 walk_in_mu_frame(
853 protocol, schema, grammar, vertex_id, name, rule, cursor, out,
854 )
855 } else {
856 if name.contains("line_ending")
866 || name.contains("newline")
867 || name.ends_with("_or_eof")
868 {
869 out.newline();
870 }
871 Ok(())
872 }
873 } else if let Some(edge) = take_symbol_match(grammar, schema, cursor, name) {
874 emit_vertex(protocol, schema, grammar, &edge.tgt, out)
885 } else if vertex_id_kind(schema, vertex_id) == Some(name.as_str()) {
886 let rule = grammar
887 .rules
888 .get(name)
889 .ok_or_else(|| ParseError::EmitFailed {
890 protocol: protocol.to_owned(),
891 reason: format!("no production for SYMBOL '{name}'"),
892 })?;
893 walk_in_mu_frame(
896 protocol, schema, grammar, vertex_id, name, rule, cursor, out,
897 )
898 } else {
899 Ok(())
903 }
904 }
905 Production::Seq { members } => {
906 for member in members {
907 emit_production(protocol, schema, grammar, vertex_id, member, cursor, out)?;
908 }
909 Ok(())
910 }
911 Production::Choice { members } => {
912 if let Some(matched) =
913 pick_choice_with_cursor(schema, grammar, vertex_id, cursor, members)
914 {
915 emit_production(protocol, schema, grammar, vertex_id, matched, cursor, out)
916 } else {
917 Ok(())
918 }
919 }
920 Production::Repeat { content } | Production::Repeat1 { content } => {
921 let mut emitted_any = false;
922 loop {
923 let cursor_snap = cursor.consumed.clone();
924 let out_snap = out.snapshot();
925 let consumed_before = cursor.consumed.iter().filter(|&&c| c).count();
926 let result =
927 emit_production(protocol, schema, grammar, vertex_id, content, cursor, out);
928 let consumed_after = cursor.consumed.iter().filter(|&&c| c).count();
929 if result.is_err() || consumed_after == consumed_before {
930 cursor.consumed = cursor_snap;
931 out.restore(out_snap);
932 break;
933 }
934 emitted_any = true;
935 }
936 if matches!(production, Production::Repeat1 { .. }) && !emitted_any {
937 emit_production(protocol, schema, grammar, vertex_id, content, cursor, out)?;
938 }
939 Ok(())
940 }
941 Production::Optional { content } => {
942 let cursor_snap = cursor.consumed.clone();
943 let out_snap = out.snapshot();
944 let consumed_before = cursor.consumed.iter().filter(|&&c| c).count();
945 let result =
946 emit_production(protocol, schema, grammar, vertex_id, content, cursor, out);
947 if result.is_err() {
952 cursor.consumed = cursor_snap;
953 out.restore(out_snap);
954 return result;
955 }
956 let consumed_after = cursor.consumed.iter().filter(|&&c| c).count();
957 if consumed_after == consumed_before
958 && !has_relevant_constraint(content, schema, vertex_id)
959 {
960 cursor.consumed = cursor_snap;
961 out.restore(out_snap);
962 }
963 Ok(())
964 }
965 Production::Field { name, content } => {
966 if let Some(edge) = cursor.take_field(name) {
967 emit_in_child_context(protocol, schema, grammar, &edge.tgt, content, out)
968 } else if first_symbol(content).is_none() {
969 emit_production(protocol, schema, grammar, vertex_id, content, cursor, out)
977 } else {
978 Ok(())
979 }
980 }
981 Production::Alias {
982 content,
983 named,
984 value,
985 } => {
986 if *named && !value.is_empty() {
1001 if let Some(edge) = cursor.take_matching(|edge| {
1002 schema
1003 .vertices
1004 .get(&edge.tgt)
1005 .map(|v| v.kind.as_ref() == value.as_str())
1006 .unwrap_or(false)
1007 }) {
1008 return emit_aliased_child(protocol, schema, grammar, &edge.tgt, content, out);
1009 }
1010 }
1011 emit_production(protocol, schema, grammar, vertex_id, content, cursor, out)
1012 }
1013 Production::Token { content }
1014 | Production::ImmediateToken { content }
1015 | Production::Prec { content, .. }
1016 | Production::PrecLeft { content, .. }
1017 | Production::PrecRight { content, .. }
1018 | Production::PrecDynamic { content, .. }
1019 | Production::Reserved { content, .. } => {
1020 emit_production(protocol, schema, grammar, vertex_id, content, cursor, out)
1021 }
1022 }
1023}
1024
1025fn take_symbol_match<'a>(
1028 grammar: &Grammar,
1029 schema: &Schema,
1030 cursor: &mut ChildCursor<'a>,
1031 name: &str,
1032) -> Option<&'a Edge> {
1033 cursor.take_matching(|edge| {
1034 let target_kind = schema.vertices.get(&edge.tgt).map(|v| v.kind.as_ref());
1035 kind_satisfies_symbol(grammar, target_kind, name)
1036 })
1037}
1038
1039fn kind_satisfies_symbol(grammar: &Grammar, target_kind: Option<&str>, name: &str) -> bool {
1049 let Some(target) = target_kind else {
1050 return false;
1051 };
1052 if target == name {
1053 return true;
1054 }
1055 grammar
1056 .subtypes
1057 .get(target)
1058 .is_some_and(|set| set.contains(name))
1059}
1060
1061fn emit_aliased_child(
1095 protocol: &str,
1096 schema: &Schema,
1097 grammar: &Grammar,
1098 child_id: &panproto_gat::Name,
1099 content: &Production,
1100 out: &mut Output<'_>,
1101) -> Result<(), ParseError> {
1102 if let Some(literal) = literal_value(schema, child_id) {
1107 if children_for(schema, child_id).is_empty() {
1108 out.token(literal);
1109 return Ok(());
1110 }
1111 }
1112
1113 if let Production::Symbol { name } = content {
1116 if let Some(rule) = grammar.rules.get(name) {
1117 let edges = children_for(schema, child_id);
1118 let mut cursor = ChildCursor::new(&edges);
1119 return emit_production(protocol, schema, grammar, child_id, rule, &mut cursor, out);
1120 }
1121 }
1122
1123 let edges = children_for(schema, child_id);
1125 let mut cursor = ChildCursor::new(&edges);
1126 emit_production(
1127 protocol,
1128 schema,
1129 grammar,
1130 child_id,
1131 content,
1132 &mut cursor,
1133 out,
1134 )
1135}
1136
1137fn emit_in_child_context(
1138 protocol: &str,
1139 schema: &Schema,
1140 grammar: &Grammar,
1141 child_id: &panproto_gat::Name,
1142 production: &Production,
1143 out: &mut Output<'_>,
1144) -> Result<(), ParseError> {
1145 if !matches!(production, Production::Symbol { .. }) {
1154 let child_kind = schema.vertices.get(child_id).map(|v| v.kind.as_ref());
1155 let symbols = referenced_symbols(production);
1156 if symbols
1157 .iter()
1158 .any(|s| kind_satisfies_symbol(grammar, child_kind, s) || child_kind == Some(s))
1159 {
1160 return emit_vertex(protocol, schema, grammar, child_id, out);
1161 }
1162 }
1163 match production {
1164 Production::Symbol { .. } => emit_vertex(protocol, schema, grammar, child_id, out),
1165 _ => {
1166 let edges = children_for(schema, child_id);
1167 let mut cursor = ChildCursor::new(&edges);
1168 emit_production(
1169 protocol,
1170 schema,
1171 grammar,
1172 child_id,
1173 production,
1174 &mut cursor,
1175 out,
1176 )
1177 }
1178 }
1179}
1180
1181fn pick_choice_with_cursor<'a>(
1182 schema: &Schema,
1183 grammar: &Grammar,
1184 vertex_id: &panproto_gat::Name,
1185 cursor: &ChildCursor<'_>,
1186 alternatives: &'a [Production],
1187) -> Option<&'a Production> {
1188 let constraint_blob = schema
1201 .constraints
1202 .get(vertex_id)
1203 .map(|cs| {
1204 let fingerprint: Option<&str> = cs
1205 .iter()
1206 .find(|c| c.sort.as_ref() == "chose-alt-fingerprint")
1207 .map(|c| c.value.as_str());
1208 if let Some(fp) = fingerprint {
1209 fp.to_owned()
1210 } else {
1211 cs.iter()
1212 .filter(|c| {
1213 let s = c.sort.as_ref();
1214 s.starts_with("interstitial-") && !s.ends_with("-start-byte")
1215 })
1216 .map(|c| c.value.as_str())
1217 .collect::<Vec<&str>>()
1218 .join(" ")
1219 }
1220 })
1221 .unwrap_or_default();
1222 let child_kinds: Vec<&str> = schema
1223 .constraints
1224 .get(vertex_id)
1225 .and_then(|cs| {
1226 cs.iter()
1227 .find(|c| c.sort.as_ref() == "chose-alt-child-kinds")
1228 .map(|c| c.value.split_whitespace().collect())
1229 })
1230 .unwrap_or_default();
1231 if !constraint_blob.is_empty() {
1232 let mut best_literal: usize = 0;
1243 let mut best_symbols: usize = 0;
1244 let mut best_alt: Option<&Production> = None;
1245 let mut tied = false;
1246 for alt in alternatives {
1247 let strings = literal_strings(alt);
1248 if strings.is_empty() {
1249 continue;
1250 }
1251 let literal_score = strings
1252 .iter()
1253 .filter(|s| constraint_blob.contains(s.as_str()))
1254 .map(String::len)
1255 .sum::<usize>();
1256 if literal_score == 0 {
1257 continue;
1258 }
1259 let symbol_score = if literal_score >= best_literal && !child_kinds.is_empty() {
1266 let symbols = referenced_symbols(alt);
1267 symbols
1268 .iter()
1269 .filter(|sym| {
1270 let sym_str: &str = sym;
1271 if child_kinds.contains(&sym_str) {
1272 return true;
1273 }
1274 grammar.subtypes.get(sym_str).is_some_and(|sub_set| {
1275 sub_set
1276 .iter()
1277 .any(|sub| child_kinds.contains(&sub.as_str()))
1278 })
1279 })
1280 .count()
1281 } else {
1282 0
1283 };
1284 let better = literal_score > best_literal
1285 || (literal_score == best_literal && symbol_score > best_symbols);
1286 let same = literal_score == best_literal && symbol_score == best_symbols;
1287 if better {
1288 best_literal = literal_score;
1289 best_symbols = symbol_score;
1290 best_alt = Some(alt);
1291 tied = false;
1292 } else if same && best_alt.is_some() {
1293 tied = true;
1294 }
1295 }
1296 if let Some(alt) = best_alt {
1303 if !tied {
1304 return Some(alt);
1305 }
1306 }
1307 }
1308
1309 let first_unconsumed_kind: Option<&str> = cursor
1328 .edges
1329 .iter()
1330 .enumerate()
1331 .find(|(i, _)| !cursor.consumed[*i])
1332 .and_then(|(_, edge)| schema.vertices.get(&edge.tgt).map(|v| v.kind.as_ref()));
1333 if let Some(target_kind) = first_unconsumed_kind {
1334 for alt in alternatives {
1335 let symbols = referenced_symbols(alt);
1336 if !symbols.is_empty()
1337 && symbols
1338 .iter()
1339 .any(|s| kind_satisfies_symbol(grammar, Some(target_kind), s))
1340 {
1341 return Some(alt);
1342 }
1343 }
1344 }
1345
1346 let edge_kinds: Vec<&str> = cursor
1349 .edges
1350 .iter()
1351 .enumerate()
1352 .filter(|(i, _)| !cursor.consumed[*i])
1353 .map(|(_, e)| e.kind.as_ref())
1354 .collect();
1355 for alt in alternatives {
1356 if has_field_in(alt, &edge_kinds) {
1357 return Some(alt);
1358 }
1359 }
1360
1361 let _ = (schema, vertex_id);
1374 if alternatives.iter().any(|a| matches!(a, Production::Blank)) {
1375 return alternatives.iter().find(|a| matches!(a, Production::Blank));
1376 }
1377 alternatives
1378 .iter()
1379 .find(|alt| !matches!(alt, Production::Blank))
1380}
1381
1382fn literal_strings(production: &Production) -> Vec<String> {
1388 let mut out = Vec::new();
1389 fn walk(p: &Production, out: &mut Vec<String>) {
1390 match p {
1391 Production::String { value } if !value.is_empty() => {
1392 out.push(value.clone());
1393 }
1394 Production::Choice { members } | Production::Seq { members } => {
1395 for m in members {
1396 walk(m, out);
1397 }
1398 }
1399 Production::Repeat { content }
1400 | Production::Repeat1 { content }
1401 | Production::Optional { content }
1402 | Production::Field { content, .. }
1403 | Production::Alias { content, .. }
1404 | Production::Token { content }
1405 | Production::ImmediateToken { content }
1406 | Production::Prec { content, .. }
1407 | Production::PrecLeft { content, .. }
1408 | Production::PrecRight { content, .. }
1409 | Production::PrecDynamic { content, .. }
1410 | Production::Reserved { content, .. } => walk(content, out),
1411 _ => {}
1412 }
1413 }
1414 walk(production, &mut out);
1415 out
1416}
1417
1418fn referenced_symbols(production: &Production) -> Vec<&str> {
1425 let mut out = Vec::new();
1426 fn walk<'a>(p: &'a Production, out: &mut Vec<&'a str>) {
1427 match p {
1428 Production::Symbol { name } => out.push(name.as_str()),
1429 Production::Choice { members } | Production::Seq { members } => {
1430 for m in members {
1431 walk(m, out);
1432 }
1433 }
1434 Production::Repeat { content }
1435 | Production::Repeat1 { content }
1436 | Production::Optional { content }
1437 | Production::Field { content, .. }
1438 | Production::Alias { content, .. }
1439 | Production::Token { content }
1440 | Production::ImmediateToken { content }
1441 | Production::Prec { content, .. }
1442 | Production::PrecLeft { content, .. }
1443 | Production::PrecRight { content, .. }
1444 | Production::PrecDynamic { content, .. }
1445 | Production::Reserved { content, .. } => walk(content, out),
1446 _ => {}
1447 }
1448 }
1449 walk(production, &mut out);
1450 out
1451}
1452
1453fn first_symbol(production: &Production) -> Option<&str> {
1454 match production {
1455 Production::Symbol { name } => Some(name),
1456 Production::Seq { members } => members.iter().find_map(first_symbol),
1457 Production::Choice { members } => members.iter().find_map(first_symbol),
1458 Production::Repeat { content }
1459 | Production::Repeat1 { content }
1460 | Production::Optional { content }
1461 | Production::Field { content, .. }
1462 | Production::Alias { content, .. }
1463 | Production::Token { content }
1464 | Production::ImmediateToken { content }
1465 | Production::Prec { content, .. }
1466 | Production::PrecLeft { content, .. }
1467 | Production::PrecRight { content, .. }
1468 | Production::PrecDynamic { content, .. }
1469 | Production::Reserved { content, .. } => first_symbol(content),
1470 _ => None,
1471 }
1472}
1473
1474fn has_field_in(production: &Production, edge_kinds: &[&str]) -> bool {
1475 match production {
1476 Production::Field { name, .. } => edge_kinds.contains(&name.as_str()),
1477 Production::Seq { members } | Production::Choice { members } => {
1478 members.iter().any(|m| has_field_in(m, edge_kinds))
1479 }
1480 Production::Repeat { content }
1481 | Production::Repeat1 { content }
1482 | Production::Optional { content }
1483 | Production::Alias { content, .. }
1484 | Production::Token { content }
1485 | Production::ImmediateToken { content }
1486 | Production::Prec { content, .. }
1487 | Production::PrecLeft { content, .. }
1488 | Production::PrecRight { content, .. }
1489 | Production::PrecDynamic { content, .. }
1490 | Production::Reserved { content, .. } => has_field_in(content, edge_kinds),
1491 _ => false,
1492 }
1493}
1494
1495fn has_relevant_constraint(
1496 production: &Production,
1497 schema: &Schema,
1498 vertex_id: &panproto_gat::Name,
1499) -> bool {
1500 let constraints = match schema.constraints.get(vertex_id) {
1501 Some(c) => c,
1502 None => return false,
1503 };
1504 fn walk(production: &Production, constraints: &[panproto_schema::Constraint]) -> bool {
1505 match production {
1506 Production::String { value } => constraints
1507 .iter()
1508 .any(|c| c.value == *value || c.sort.as_ref() == value),
1509 Production::Field { name, content } => {
1510 constraints.iter().any(|c| c.sort.as_ref() == name) || walk(content, constraints)
1511 }
1512 Production::Seq { members } | Production::Choice { members } => {
1513 members.iter().any(|m| walk(m, constraints))
1514 }
1515 Production::Repeat { content }
1516 | Production::Repeat1 { content }
1517 | Production::Optional { content }
1518 | Production::Alias { content, .. }
1519 | Production::Token { content }
1520 | Production::ImmediateToken { content }
1521 | Production::Prec { content, .. }
1522 | Production::PrecLeft { content, .. }
1523 | Production::PrecRight { content, .. }
1524 | Production::PrecDynamic { content, .. }
1525 | Production::Reserved { content, .. } => walk(content, constraints),
1526 _ => false,
1527 }
1528 }
1529 walk(production, constraints)
1530}
1531
1532fn children_for<'a>(schema: &'a Schema, vertex_id: &panproto_gat::Name) -> Vec<&'a Edge> {
1533 let Some(edges) = schema.outgoing.get(vertex_id) else {
1542 return Vec::new();
1543 };
1544
1545 let mut indexed: Vec<(usize, u32, &Edge)> = edges
1549 .iter()
1550 .enumerate()
1551 .map(|(i, e)| {
1552 let canonical = schema.edges.get_key_value(e).map_or(e, |(k, _)| k);
1553 let pos = schema.orderings.get(canonical).copied().unwrap_or(u32::MAX);
1554 (i, pos, canonical)
1555 })
1556 .collect();
1557
1558 indexed.sort_by_key(|(i, pos, _)| (*pos, *i));
1562 indexed.into_iter().map(|(_, _, e)| e).collect()
1563}
1564
1565fn vertex_id_kind<'a>(schema: &'a Schema, vertex_id: &panproto_gat::Name) -> Option<&'a str> {
1566 schema.vertices.get(vertex_id).map(|v| v.kind.as_ref())
1567}
1568
1569fn literal_value<'a>(schema: &'a Schema, vertex_id: &panproto_gat::Name) -> Option<&'a str> {
1570 schema
1571 .constraints
1572 .get(vertex_id)?
1573 .iter()
1574 .find(|c| c.sort.as_ref() == "literal-value")
1575 .map(|c| c.value.as_str())
1576}
1577
1578fn placeholder_for_pattern(pattern: &str) -> String {
1579 let simple_lit = decode_simple_pattern_literal(pattern);
1587 if let Some(lit) = simple_lit {
1588 return lit;
1589 }
1590
1591 if pattern.contains("[0-9]") || pattern.contains("\\d") {
1592 "0".into()
1593 } else if pattern.contains("[a-zA-Z_]") || pattern.contains("\\w") {
1594 "_x".into()
1595 } else if pattern.contains('"') || pattern.contains('\'') {
1596 "\"\"".into()
1597 } else {
1598 "_".into()
1599 }
1600}
1601
1602fn decode_simple_pattern_literal(pattern: &str) -> Option<String> {
1607 if pattern
1610 .chars()
1611 .any(|c| matches!(c, '[' | ']' | '(' | ')' | '*' | '+' | '?' | '|' | '{' | '}'))
1612 {
1613 return None;
1614 }
1615 let mut out = String::new();
1616 let mut chars = pattern.chars();
1617 while let Some(c) = chars.next() {
1618 if c == '\\' {
1619 match chars.next() {
1620 Some('n') => out.push('\n'),
1621 Some('r') => out.push('\r'),
1622 Some('t') => out.push('\t'),
1623 Some('\\') => out.push('\\'),
1624 Some('/') => out.push('/'),
1625 Some(other) => out.push(other),
1626 None => return None,
1627 }
1628 } else {
1629 out.push(c);
1630 }
1631 }
1632 Some(out)
1633}
1634
1635#[derive(Clone)]
1647enum Token {
1648 Lit(String),
1650 IndentOpen,
1654 IndentClose,
1656 LineBreak,
1659}
1660
1661struct Output<'a> {
1662 tokens: Vec<Token>,
1663 policy: &'a FormatPolicy,
1664}
1665
1666#[derive(Clone)]
1667struct OutputSnapshot {
1668 tokens_len: usize,
1669}
1670
1671impl<'a> Output<'a> {
1672 fn new(policy: &'a FormatPolicy) -> Self {
1673 Self {
1674 tokens: Vec::new(),
1675 policy,
1676 }
1677 }
1678
1679 fn token(&mut self, value: &str) {
1680 if value.is_empty() {
1681 return;
1682 }
1683
1684 if self.policy.indent_close.iter().any(|t| t == value) {
1685 self.tokens.push(Token::IndentClose);
1686 }
1687
1688 self.tokens.push(Token::Lit(value.to_owned()));
1689
1690 if self.policy.indent_open.iter().any(|t| t == value) {
1691 self.tokens.push(Token::IndentOpen);
1692 self.tokens.push(Token::LineBreak);
1693 } else if self.policy.line_break_after.iter().any(|t| t == value) {
1694 self.tokens.push(Token::LineBreak);
1695 }
1696 }
1697
1698 fn newline(&mut self) {
1699 self.tokens.push(Token::LineBreak);
1700 }
1701
1702 fn snapshot(&self) -> OutputSnapshot {
1703 OutputSnapshot {
1704 tokens_len: self.tokens.len(),
1705 }
1706 }
1707
1708 fn restore(&mut self, snap: OutputSnapshot) {
1709 self.tokens.truncate(snap.tokens_len);
1710 }
1711
1712 fn finish(self) -> Vec<u8> {
1713 layout(&self.tokens, self.policy)
1714 }
1715}
1716
1717fn layout(tokens: &[Token], policy: &FormatPolicy) -> Vec<u8> {
1723 let mut bytes = Vec::new();
1724 let mut indent: usize = 0;
1725 let mut at_line_start = true;
1726 let mut last_lit: Option<&str> = None;
1727 let newline = policy.newline.as_bytes();
1728 let separator = policy.separator.as_bytes();
1729
1730 for tok in tokens {
1731 match tok {
1732 Token::IndentOpen => indent += 1,
1733 Token::IndentClose => {
1734 indent = indent.saturating_sub(1);
1735 if !at_line_start {
1736 bytes.extend_from_slice(newline);
1737 at_line_start = true;
1738 }
1739 }
1740 Token::LineBreak => {
1741 if !at_line_start {
1742 bytes.extend_from_slice(newline);
1743 at_line_start = true;
1744 }
1745 }
1746 Token::Lit(value) => {
1747 if at_line_start {
1748 bytes.extend(std::iter::repeat_n(b' ', indent * policy.indent_width));
1749 } else if let Some(prev) = last_lit {
1750 if needs_space_between(prev, value) {
1751 bytes.extend_from_slice(separator);
1752 }
1753 }
1754 bytes.extend_from_slice(value.as_bytes());
1755 at_line_start = false;
1756 last_lit = Some(value.as_str());
1757 }
1758 }
1759 }
1760
1761 if !at_line_start {
1762 bytes.extend_from_slice(newline);
1763 }
1764 bytes
1765}
1766
1767fn needs_space_between(last: &str, next: &str) -> bool {
1768 if last.is_empty() || next.is_empty() {
1769 return false;
1770 }
1771 if is_punct_open(last) || is_punct_open(next) {
1772 return false;
1773 }
1774 if is_punct_close(next) {
1775 return false;
1776 }
1777 if is_punct_close(last) && is_punct_punctuation(next) {
1778 return false;
1779 }
1780 if last == "." || next == "." {
1781 return false;
1782 }
1783 if last_is_word_like(last) && first_is_word_like(next) {
1784 return true;
1785 }
1786 if last_ends_with_alnum(last) && first_is_alnum_or_underscore(next) {
1787 return true;
1788 }
1789 true
1792}
1793
1794fn is_punct_open(s: &str) -> bool {
1795 matches!(s, "(" | "[" | "{" | "\"" | "'" | "`")
1796}
1797
1798fn is_punct_close(s: &str) -> bool {
1799 matches!(s, ")" | "]" | "}" | "," | ";" | ":" | "\"" | "'" | "`")
1800}
1801
1802fn is_punct_punctuation(s: &str) -> bool {
1803 matches!(s, "," | ";" | ":" | "." | ")" | "]" | "}")
1804}
1805
1806fn last_is_word_like(s: &str) -> bool {
1807 s.chars()
1808 .next_back()
1809 .map(|c| c.is_alphanumeric() || c == '_')
1810 .unwrap_or(false)
1811}
1812
1813fn first_is_word_like(s: &str) -> bool {
1814 s.chars()
1815 .next()
1816 .map(|c| c.is_alphanumeric() || c == '_')
1817 .unwrap_or(false)
1818}
1819
1820fn last_ends_with_alnum(s: &str) -> bool {
1821 s.chars()
1822 .next_back()
1823 .map(char::is_alphanumeric)
1824 .unwrap_or(false)
1825}
1826
1827fn first_is_alnum_or_underscore(s: &str) -> bool {
1828 s.chars()
1829 .next()
1830 .map(|c| c.is_alphanumeric() || c == '_')
1831 .unwrap_or(false)
1832}
1833
1834#[cfg(test)]
1835mod tests {
1836 use super::*;
1837
1838 #[test]
1839 fn parses_simple_grammar_json() {
1840 let bytes = br#"{
1841 "name": "tiny",
1842 "rules": {
1843 "program": {
1844 "type": "SEQ",
1845 "members": [
1846 {"type": "STRING", "value": "hello"},
1847 {"type": "STRING", "value": ";"}
1848 ]
1849 }
1850 }
1851 }"#;
1852 let g = Grammar::from_bytes("tiny", bytes).expect("valid tiny grammar");
1853 assert!(g.rules.contains_key("program"));
1854 }
1855
1856 #[test]
1857 fn output_emits_punctuation_without_leading_space() {
1858 let policy = FormatPolicy::default();
1859 let mut out = Output::new(&policy);
1860 out.token("foo");
1861 out.token("(");
1862 out.token(")");
1863 out.token(";");
1864 let bytes = out.finish();
1865 let s = std::str::from_utf8(&bytes).expect("ascii output");
1866 assert!(s.starts_with("foo();"), "got {s:?}");
1867 }
1868
1869 #[test]
1870 fn grammar_from_bytes_rejects_malformed_input() {
1871 let result = Grammar::from_bytes("malformed", b"not json");
1872 let err = result.expect_err("malformed bytes must yield Err");
1873 let msg = err.to_string();
1874 assert!(
1875 msg.contains("malformed"),
1876 "error message should name the protocol: {msg:?}"
1877 );
1878 }
1879
1880 #[test]
1881 fn output_indents_after_open_brace() {
1882 let policy = FormatPolicy::default();
1883 let mut out = Output::new(&policy);
1884 out.token("fn");
1885 out.token("foo");
1886 out.token("(");
1887 out.token(")");
1888 out.token("{");
1889 out.token("body");
1890 out.token("}");
1891 let bytes = out.finish();
1892 let s = std::str::from_utf8(&bytes).expect("ascii output");
1893 assert!(s.contains("{\n"), "newline after opening brace: {s:?}");
1894 assert!(s.contains("body"), "body inside block: {s:?}");
1895 assert!(s.ends_with("}\n"), "newline after closing brace: {s:?}");
1896 }
1897
1898 #[test]
1899 fn output_no_space_between_word_and_dot() {
1900 let policy = FormatPolicy::default();
1901 let mut out = Output::new(&policy);
1902 out.token("foo");
1903 out.token(".");
1904 out.token("bar");
1905 let bytes = out.finish();
1906 let s = std::str::from_utf8(&bytes).expect("ascii output");
1907 assert!(s.starts_with("foo.bar"), "no space around dot: {s:?}");
1908 }
1909
1910 #[test]
1911 fn output_snapshot_restore_truncates_bytes() {
1912 let policy = FormatPolicy::default();
1913 let mut out = Output::new(&policy);
1914 out.token("keep");
1915 let snap = out.snapshot();
1916 out.token("drop");
1917 out.token("more");
1918 out.restore(snap);
1919 out.token("after");
1920 let bytes = out.finish();
1921 let s = std::str::from_utf8(&bytes).expect("ascii output");
1922 assert!(s.contains("keep"), "kept token survives: {s:?}");
1923 assert!(s.contains("after"), "post-restore token visible: {s:?}");
1924 assert!(!s.contains("drop"), "rolled-back token removed: {s:?}");
1925 assert!(!s.contains("more"), "rolled-back token removed: {s:?}");
1926 }
1927
1928 #[test]
1929 fn child_cursor_take_field_consumes_once() {
1930 let edges_owned: Vec<Edge> = vec![Edge {
1931 src: panproto_gat::Name::from("p"),
1932 tgt: panproto_gat::Name::from("c"),
1933 kind: panproto_gat::Name::from("name"),
1934 name: None,
1935 }];
1936 let edges: Vec<&Edge> = edges_owned.iter().collect();
1937 let mut cursor = ChildCursor::new(&edges);
1938 let first = cursor.take_field("name");
1939 let second = cursor.take_field("name");
1940 assert!(first.is_some(), "first take returns the edge");
1941 assert!(
1942 second.is_none(),
1943 "second take returns None (already consumed)"
1944 );
1945 }
1946
1947 #[test]
1948 fn child_cursor_take_matching_predicate() {
1949 let edges_owned: Vec<Edge> = vec![
1950 Edge {
1951 src: "p".into(),
1952 tgt: "c1".into(),
1953 kind: "child_of".into(),
1954 name: None,
1955 },
1956 Edge {
1957 src: "p".into(),
1958 tgt: "c2".into(),
1959 kind: "key".into(),
1960 name: None,
1961 },
1962 ];
1963 let edges: Vec<&Edge> = edges_owned.iter().collect();
1964 let mut cursor = ChildCursor::new(&edges);
1965 assert!(cursor.has_matching(|e| e.kind.as_ref() == "key"));
1966 let taken = cursor.take_matching(|e| e.kind.as_ref() == "key");
1967 assert!(taken.is_some());
1968 assert!(
1969 !cursor.has_matching(|e| e.kind.as_ref() == "key"),
1970 "consumed edge no longer matches"
1971 );
1972 assert!(
1973 cursor.has_matching(|e| e.kind.as_ref() == "child_of"),
1974 "the other edge is still available"
1975 );
1976 }
1977
1978 #[test]
1979 fn kind_satisfies_symbol_direct_match() {
1980 let bytes = br#"{
1981 "name": "tiny",
1982 "rules": {
1983 "x": {"type": "STRING", "value": "x"}
1984 }
1985 }"#;
1986 let g = Grammar::from_bytes("tiny", bytes).expect("valid grammar");
1987 assert!(kind_satisfies_symbol(&g, Some("x"), "x"));
1988 assert!(!kind_satisfies_symbol(&g, Some("y"), "x"));
1989 assert!(!kind_satisfies_symbol(&g, None, "x"));
1990 }
1991
1992 #[test]
1993 fn kind_satisfies_symbol_through_hidden_rule() {
1994 let bytes = br#"{
1995 "name": "tiny",
1996 "rules": {
1997 "_value": {
1998 "type": "CHOICE",
1999 "members": [
2000 {"type": "SYMBOL", "name": "object"},
2001 {"type": "SYMBOL", "name": "number"}
2002 ]
2003 },
2004 "object": {"type": "STRING", "value": "{}"},
2005 "number": {"type": "PATTERN", "value": "[0-9]+"}
2006 }
2007 }"#;
2008 let g = Grammar::from_bytes("tiny", bytes).expect("valid grammar");
2009 assert!(
2010 kind_satisfies_symbol(&g, Some("number"), "_value"),
2011 "number is reachable from _value via CHOICE"
2012 );
2013 assert!(
2014 kind_satisfies_symbol(&g, Some("object"), "_value"),
2015 "object is reachable from _value via CHOICE"
2016 );
2017 assert!(
2018 !kind_satisfies_symbol(&g, Some("string"), "_value"),
2019 "string is NOT among the alternatives"
2020 );
2021 }
2022
2023 #[test]
2024 fn first_symbol_skips_string_terminals() {
2025 let prod: Production = serde_json::from_str(
2026 r#"{
2027 "type": "SEQ",
2028 "members": [
2029 {"type": "STRING", "value": "{"},
2030 {"type": "SYMBOL", "name": "body"},
2031 {"type": "STRING", "value": "}"}
2032 ]
2033 }"#,
2034 )
2035 .expect("valid SEQ");
2036 assert_eq!(first_symbol(&prod), Some("body"));
2037 }
2038
2039 #[test]
2040 fn placeholder_for_pattern_routes_by_regex_class() {
2041 assert_eq!(placeholder_for_pattern("[0-9]+"), "0");
2042 assert_eq!(placeholder_for_pattern("[a-zA-Z_]\\w*"), "_x");
2043 assert_eq!(placeholder_for_pattern("\"[^\"]*\""), "\"\"");
2044 assert_eq!(placeholder_for_pattern("\\d+\\.\\d+"), "0");
2045 }
2046
2047 #[test]
2048 fn format_policy_default_breaks_after_semicolon() {
2049 let policy = FormatPolicy::default();
2050 assert!(policy.line_break_after.iter().any(|t| t == ";"));
2051 assert!(policy.indent_open.iter().any(|t| t == "{"));
2052 assert!(policy.indent_close.iter().any(|t| t == "}"));
2053 assert_eq!(policy.indent_width, 2);
2054 }
2055
2056 #[test]
2057 fn placeholder_decodes_literal_pattern_separators() {
2058 assert_eq!(placeholder_for_pattern("\\n"), "\n");
2062 assert_eq!(placeholder_for_pattern("\\r\\n"), "\r\n");
2063 assert_eq!(placeholder_for_pattern(";"), ";");
2064 assert_eq!(placeholder_for_pattern("[0-9]+"), "0");
2067 assert_eq!(placeholder_for_pattern("a|b"), "_");
2068 }
2069
2070 #[test]
2071 fn supertypes_decode_from_grammar_json_strings() {
2072 let bytes = br#"{
2074 "name": "tiny",
2075 "supertypes": ["expression"],
2076 "rules": {
2077 "expression": {
2078 "type": "CHOICE",
2079 "members": [
2080 {"type": "SYMBOL", "name": "binary_expression"},
2081 {"type": "SYMBOL", "name": "identifier"}
2082 ]
2083 },
2084 "binary_expression": {"type": "STRING", "value": "x"},
2085 "identifier": {"type": "PATTERN", "value": "[a-z]+"}
2086 }
2087 }"#;
2088 let g = Grammar::from_bytes("tiny", bytes).expect("parse");
2089 assert!(g.supertypes.contains("expression"));
2090 assert!(kind_satisfies_symbol(&g, Some("identifier"), "expression"));
2092 assert!(!kind_satisfies_symbol(&g, Some("string"), "expression"));
2094 }
2095
2096 #[test]
2097 fn supertypes_decode_from_grammar_json_objects() {
2098 let bytes = br#"{
2101 "name": "tiny",
2102 "supertypes": [{"type": "SYMBOL", "name": "stmt"}],
2103 "rules": {
2104 "stmt": {
2105 "type": "CHOICE",
2106 "members": [
2107 {"type": "SYMBOL", "name": "while_stmt"},
2108 {"type": "SYMBOL", "name": "if_stmt"}
2109 ]
2110 },
2111 "while_stmt": {"type": "STRING", "value": "while"},
2112 "if_stmt": {"type": "STRING", "value": "if"}
2113 }
2114 }"#;
2115 let g = Grammar::from_bytes("tiny", bytes).expect("parse");
2116 assert!(g.supertypes.contains("stmt"));
2117 assert!(kind_satisfies_symbol(&g, Some("while_stmt"), "stmt"));
2118 }
2119
2120 #[test]
2121 fn alias_value_matches_kind() {
2122 let bytes = br#"{
2126 "name": "tiny",
2127 "rules": {
2128 "_package_identifier": {
2129 "type": "ALIAS",
2130 "named": true,
2131 "value": "package_identifier",
2132 "content": {"type": "SYMBOL", "name": "identifier"}
2133 },
2134 "identifier": {"type": "PATTERN", "value": "[a-z]+"}
2135 }
2136 }"#;
2137 let g = Grammar::from_bytes("tiny", bytes).expect("parse");
2138 assert!(kind_satisfies_symbol(
2139 &g,
2140 Some("package_identifier"),
2141 "_package_identifier"
2142 ));
2143 }
2144
2145 #[test]
2146 fn referenced_symbols_walks_nested_seq() {
2147 let prod: Production = serde_json::from_str(
2148 r#"{
2149 "type": "SEQ",
2150 "members": [
2151 {"type": "CHOICE", "members": [
2152 {"type": "SYMBOL", "name": "attribute_item"},
2153 {"type": "BLANK"}
2154 ]},
2155 {"type": "SYMBOL", "name": "parameter"},
2156 {"type": "REPEAT", "content": {
2157 "type": "SEQ",
2158 "members": [
2159 {"type": "STRING", "value": ","},
2160 {"type": "SYMBOL", "name": "parameter"}
2161 ]
2162 }}
2163 ]
2164 }"#,
2165 )
2166 .expect("seq");
2167 let symbols = referenced_symbols(&prod);
2168 assert!(symbols.contains(&"attribute_item"));
2169 assert!(symbols.contains(&"parameter"));
2170 }
2171
2172 #[test]
2173 fn literal_strings_collects_choice_members() {
2174 let prod: Production = serde_json::from_str(
2175 r#"{
2176 "type": "CHOICE",
2177 "members": [
2178 {"type": "STRING", "value": "+"},
2179 {"type": "STRING", "value": "-"},
2180 {"type": "STRING", "value": "*"}
2181 ]
2182 }"#,
2183 )
2184 .expect("choice");
2185 let strings = literal_strings(&prod);
2186 assert_eq!(strings, vec!["+", "-", "*"]);
2187 }
2188
2189 #[test]
2195 fn reserved_variant_deserialises() {
2196 let prod: Production = serde_json::from_str(
2197 r#"{
2198 "type": "RESERVED",
2199 "content": {"type": "SYMBOL", "name": "_lowercase_identifier"},
2200 "context_name": "attribute_id"
2201 }"#,
2202 )
2203 .expect("RESERVED parses");
2204 match prod {
2205 Production::Reserved { content, .. } => match *content {
2206 Production::Symbol { name } => assert_eq!(name, "_lowercase_identifier"),
2207 other => panic!("expected inner SYMBOL, got {other:?}"),
2208 },
2209 other => panic!("expected RESERVED, got {other:?}"),
2210 }
2211 }
2212
2213 #[test]
2214 fn reserved_grammar_loads_end_to_end() {
2215 let bytes = br#"{
2216 "name": "tiny_reserved",
2217 "rules": {
2218 "program": {
2219 "type": "RESERVED",
2220 "content": {"type": "SYMBOL", "name": "ident"},
2221 "context_name": "keywords"
2222 },
2223 "ident": {"type": "PATTERN", "value": "[a-z]+"}
2224 }
2225 }"#;
2226 let g = Grammar::from_bytes("tiny_reserved", bytes).expect("RESERVED-using grammar loads");
2227 assert!(g.rules.contains_key("program"));
2228 }
2229
2230 #[test]
2231 fn reserved_walker_helpers_recurse_into_content() {
2232 let prod: Production = serde_json::from_str(
2239 r#"{
2240 "type": "RESERVED",
2241 "content": {
2242 "type": "FIELD",
2243 "name": "lhs",
2244 "content": {"type": "SYMBOL", "name": "expr"}
2245 },
2246 "context_name": "ctx"
2247 }"#,
2248 )
2249 .expect("nested RESERVED parses");
2250 assert_eq!(first_symbol(&prod), Some("expr"));
2251 assert!(has_field_in(&prod, &["lhs"]));
2252 let symbols = referenced_symbols(&prod);
2253 assert!(symbols.contains(&"expr"));
2254 }
2255}