1#![allow(
2 clippy::module_name_repetitions,
3 clippy::too_many_lines,
4 clippy::too_many_arguments,
5 clippy::map_unwrap_or,
6 clippy::option_if_let_else,
7 clippy::elidable_lifetime_names,
8 clippy::items_after_statements,
9 clippy::needless_pass_by_value,
10 clippy::single_match_else,
11 clippy::manual_let_else,
12 clippy::match_same_arms,
13 clippy::missing_const_for_fn,
14 clippy::single_char_pattern,
15 clippy::naive_bytecount,
16 clippy::expect_used,
17 clippy::redundant_pub_crate,
18 clippy::used_underscore_binding,
19 clippy::redundant_field_names,
20 clippy::struct_field_names,
21 clippy::redundant_else,
22 clippy::similar_names
23)]
24
25use std::collections::BTreeMap;
71
72use panproto_schema::{Edge, Schema};
73use serde::Deserialize;
74
75use crate::error::ParseError;
76
77#[derive(Debug, Clone, Deserialize)]
90#[serde(tag = "type")]
91#[non_exhaustive]
92pub enum Production {
93 #[serde(rename = "SEQ")]
95 Seq {
96 members: Vec<Self>,
98 },
99 #[serde(rename = "CHOICE")]
101 Choice {
102 members: Vec<Self>,
105 },
106 #[serde(rename = "REPEAT")]
108 Repeat {
109 content: Box<Self>,
111 },
112 #[serde(rename = "REPEAT1")]
114 Repeat1 {
115 content: Box<Self>,
117 },
118 #[serde(rename = "OPTIONAL")]
124 Optional {
125 content: Box<Self>,
127 },
128 #[serde(rename = "SYMBOL")]
130 Symbol {
131 name: String,
134 },
135 #[serde(rename = "STRING")]
137 String {
138 value: String,
140 },
141 #[serde(rename = "PATTERN")]
147 Pattern {
148 value: String,
150 },
151 #[serde(rename = "BLANK")]
153 Blank,
154 #[serde(rename = "FIELD")]
160 Field {
161 name: String,
163 content: Box<Self>,
165 },
166 #[serde(rename = "ALIAS")]
171 Alias {
172 content: Box<Self>,
174 #[serde(default)]
176 named: bool,
177 #[serde(default)]
179 value: String,
180 },
181 #[serde(rename = "TOKEN")]
186 Token {
187 content: Box<Self>,
189 },
190 #[serde(rename = "IMMEDIATE_TOKEN")]
194 ImmediateToken {
195 content: Box<Self>,
197 },
198 #[serde(rename = "PREC")]
200 Prec {
201 #[allow(dead_code)]
203 value: serde_json::Value,
204 content: Box<Self>,
206 },
207 #[serde(rename = "PREC_LEFT")]
209 PrecLeft {
210 #[allow(dead_code)]
212 value: serde_json::Value,
213 content: Box<Self>,
215 },
216 #[serde(rename = "PREC_RIGHT")]
218 PrecRight {
219 #[allow(dead_code)]
221 value: serde_json::Value,
222 content: Box<Self>,
224 },
225 #[serde(rename = "PREC_DYNAMIC")]
227 PrecDynamic {
228 #[allow(dead_code)]
230 value: serde_json::Value,
231 content: Box<Self>,
233 },
234 #[serde(rename = "RESERVED")]
244 Reserved {
245 content: Box<Self>,
247 #[allow(dead_code)]
249 #[serde(default)]
250 context_name: String,
251 },
252}
253
254#[derive(Debug, Clone, Deserialize)]
259pub struct Grammar {
260 #[allow(dead_code)]
262 pub name: String,
263 pub rules: BTreeMap<String, Production>,
267 #[serde(default, deserialize_with = "deserialize_supertypes")]
275 pub supertypes: std::collections::HashSet<String>,
276 #[serde(skip)]
290 pub subtypes: std::collections::HashMap<String, std::collections::HashSet<String>>,
291}
292
293fn deserialize_supertypes<'de, D>(
294 deserializer: D,
295) -> Result<std::collections::HashSet<String>, D::Error>
296where
297 D: serde::Deserializer<'de>,
298{
299 let entries: Vec<serde_json::Value> = Vec::deserialize(deserializer)?;
300 let mut out = std::collections::HashSet::new();
301 for entry in entries {
302 match entry {
303 serde_json::Value::String(s) => {
304 out.insert(s);
305 }
306 serde_json::Value::Object(map) => {
307 if let Some(serde_json::Value::String(name)) = map.get("name") {
308 out.insert(name.clone());
309 }
310 }
311 _ => {}
312 }
313 }
314 Ok(out)
315}
316
317impl Grammar {
318 pub fn from_bytes(protocol: &str, bytes: &[u8]) -> Result<Self, ParseError> {
338 let mut grammar: Self =
339 serde_json::from_slice(bytes).map_err(|e| ParseError::EmitFailed {
340 protocol: protocol.to_owned(),
341 reason: format!("grammar.json deserialization failed: {e}"),
342 })?;
343 grammar.subtypes = compute_subtype_closure(&grammar);
344 Ok(grammar)
345 }
346}
347
348fn compute_subtype_closure(
351 grammar: &Grammar,
352) -> std::collections::HashMap<String, std::collections::HashSet<String>> {
353 use std::collections::{HashMap, HashSet};
354 let mut subtypes: HashMap<String, HashSet<String>> = HashMap::new();
359 for name in grammar.rules.keys() {
360 subtypes
361 .entry(name.clone())
362 .or_default()
363 .insert(name.clone());
364 }
365
366 fn walk<'g>(
370 grammar: &'g Grammar,
371 production: &'g Production,
372 visited: &mut HashSet<&'g str>,
373 out: &mut HashSet<String>,
374 ) {
375 match production {
376 Production::Symbol { name } => {
377 out.insert(name.clone());
379 let expand = name.starts_with('_') || grammar.supertypes.contains(name.as_str());
382 if expand && visited.insert(name.as_str()) {
383 if let Some(rule) = grammar.rules.get(name) {
384 walk(grammar, rule, visited, out);
385 }
386 }
387 }
388 Production::Choice { members } | Production::Seq { members } => {
389 for m in members {
390 walk(grammar, m, visited, out);
391 }
392 }
393 Production::Alias {
394 content,
395 named,
396 value,
397 } => {
398 if *named && !value.is_empty() {
399 out.insert(value.clone());
400 }
401 walk(grammar, content, visited, out);
402 }
403 Production::Repeat { content }
404 | Production::Repeat1 { content }
405 | Production::Optional { content }
406 | Production::Field { content, .. }
407 | Production::Token { content }
408 | Production::ImmediateToken { content }
409 | Production::Prec { content, .. }
410 | Production::PrecLeft { content, .. }
411 | Production::PrecRight { content, .. }
412 | Production::PrecDynamic { content, .. }
413 | Production::Reserved { content, .. } => {
414 walk(grammar, content, visited, out);
415 }
416 _ => {}
417 }
418 }
419
420 for (name, rule) in &grammar.rules {
421 let expand = name.starts_with('_') || grammar.supertypes.contains(name.as_str());
422 if !expand {
423 continue;
424 }
425 let mut visited: HashSet<&str> = HashSet::new();
426 visited.insert(name.as_str());
427 let mut reachable: HashSet<String> = HashSet::new();
428 walk(grammar, rule, &mut visited, &mut reachable);
429 for kind in &reachable {
430 subtypes
431 .entry(kind.clone())
432 .or_default()
433 .insert(name.clone());
434 }
435 }
436
437 fn collect_aliases<'g>(production: &'g Production, out: &mut Vec<(String, &'g Production)>) {
443 match production {
444 Production::Alias {
445 content,
446 named,
447 value,
448 } => {
449 if *named && !value.is_empty() {
450 out.push((value.clone(), content.as_ref()));
451 }
452 collect_aliases(content, out);
453 }
454 Production::Choice { members } | Production::Seq { members } => {
455 for m in members {
456 collect_aliases(m, out);
457 }
458 }
459 Production::Repeat { content }
460 | Production::Repeat1 { content }
461 | Production::Optional { content }
462 | Production::Field { content, .. }
463 | Production::Token { content }
464 | Production::ImmediateToken { content }
465 | Production::Prec { content, .. }
466 | Production::PrecLeft { content, .. }
467 | Production::PrecRight { content, .. }
468 | Production::PrecDynamic { content, .. }
469 | Production::Reserved { content, .. } => {
470 collect_aliases(content, out);
471 }
472 _ => {}
473 }
474 }
475 let mut aliases: Vec<(String, &Production)> = Vec::new();
476 for rule in grammar.rules.values() {
477 collect_aliases(rule, &mut aliases);
478 }
479 for (alias_value, content) in aliases {
480 let mut visited: HashSet<&str> = HashSet::new();
481 let mut reachable: HashSet<String> = HashSet::new();
482 walk(grammar, content, &mut visited, &mut reachable);
483 subtypes
486 .entry(alias_value.clone())
487 .or_default()
488 .insert(alias_value.clone());
489 for kind in reachable {
490 subtypes
491 .entry(kind)
492 .or_default()
493 .insert(alias_value.clone());
494 }
495 }
496
497 for _ in 0..8 {
501 let snapshot = subtypes.clone();
502 let mut changed = false;
503 for (kind, supers) in &snapshot {
504 let extra: HashSet<String> = supers
505 .iter()
506 .flat_map(|s| snapshot.get(s).cloned().unwrap_or_default())
507 .collect();
508 let entry = subtypes.entry(kind.clone()).or_default();
509 for s in extra {
510 if entry.insert(s) {
511 changed = true;
512 }
513 }
514 }
515 if !changed {
516 break;
517 }
518 }
519
520 subtypes
521}
522
523#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
536pub struct FormatPolicy {
537 pub indent_width: usize,
539 pub separator: String,
543 pub newline: String,
546 pub line_break_after: Vec<String>,
548 pub indent_open: Vec<String>,
550 pub indent_close: Vec<String>,
552}
553
554impl Default for FormatPolicy {
555 fn default() -> Self {
556 Self {
557 indent_width: 2,
558 separator: " ".to_owned(),
559 newline: "\n".to_owned(),
560 line_break_after: vec![";".into(), "{".into(), "}".into()],
561 indent_open: vec!["{".into()],
562 indent_close: vec!["}".into()],
563 }
564 }
565}
566
567pub fn emit_pretty(
591 protocol: &str,
592 schema: &Schema,
593 grammar: &Grammar,
594 policy: &FormatPolicy,
595) -> Result<Vec<u8>, ParseError> {
596 let roots = collect_roots(schema);
597 if roots.is_empty() {
598 return Err(ParseError::EmitFailed {
599 protocol: protocol.to_owned(),
600 reason: "schema has no entry vertices".to_owned(),
601 });
602 }
603
604 let mut out = Output::new(policy);
605 for (i, root) in roots.iter().enumerate() {
606 if i > 0 {
607 out.newline();
608 }
609 emit_vertex(protocol, schema, grammar, root, &mut out)?;
610 }
611 Ok(out.finish())
612}
613
614fn collect_roots(schema: &Schema) -> Vec<&panproto_gat::Name> {
615 if !schema.entries.is_empty() {
616 return schema
617 .entries
618 .iter()
619 .filter(|name| schema.vertices.contains_key(*name))
620 .collect();
621 }
622
623 let mut targets: std::collections::HashSet<&panproto_gat::Name> =
626 std::collections::HashSet::new();
627 for edge in schema.edges.keys() {
628 targets.insert(&edge.tgt);
629 }
630 let mut roots: Vec<&panproto_gat::Name> = schema
631 .vertices
632 .keys()
633 .filter(|name| !targets.contains(name))
634 .collect();
635 roots.sort();
636 roots
637}
638
639fn emit_vertex(
640 protocol: &str,
641 schema: &Schema,
642 grammar: &Grammar,
643 vertex_id: &panproto_gat::Name,
644 out: &mut Output<'_>,
645) -> Result<(), ParseError> {
646 let vertex = schema
647 .vertices
648 .get(vertex_id)
649 .ok_or_else(|| ParseError::EmitFailed {
650 protocol: protocol.to_owned(),
651 reason: format!("vertex '{vertex_id}' not found"),
652 })?;
653
654 if let Some(literal) = literal_value(schema, vertex_id) {
660 if children_for(schema, vertex_id).is_empty() {
661 out.token(literal);
662 return Ok(());
663 }
664 }
665
666 let kind = vertex.kind.as_ref();
667 let edges = children_for(schema, vertex_id);
668 if let Some(rule) = grammar.rules.get(kind) {
669 let mut cursor = ChildCursor::new(&edges);
670 return emit_production(protocol, schema, grammar, vertex_id, rule, &mut cursor, out);
671 }
672
673 for edge in &edges {
680 emit_vertex(protocol, schema, grammar, &edge.tgt, out)?;
681 }
682 Ok(())
683}
684
685struct ChildCursor<'a> {
688 edges: &'a [&'a Edge],
689 consumed: Vec<bool>,
690}
691
692impl<'a> ChildCursor<'a> {
693 fn new(edges: &'a [&'a Edge]) -> Self {
694 Self {
695 edges,
696 consumed: vec![false; edges.len()],
697 }
698 }
699
700 fn take_field(&mut self, field_name: &str) -> Option<&'a Edge> {
702 for (i, edge) in self.edges.iter().enumerate() {
703 if !self.consumed[i] && edge.kind.as_ref() == field_name {
704 self.consumed[i] = true;
705 return Some(edge);
706 }
707 }
708 None
709 }
710
711 fn take_matching(&mut self, predicate: impl Fn(&Edge) -> bool) -> Option<&'a Edge> {
715 for (i, edge) in self.edges.iter().enumerate() {
716 if !self.consumed[i] && predicate(edge) {
717 self.consumed[i] = true;
718 return Some(edge);
719 }
720 }
721 None
722 }
723
724 fn has_matching(&self, predicate: impl Fn(&Edge) -> bool) -> bool {
726 self.edges
727 .iter()
728 .enumerate()
729 .any(|(i, edge)| !self.consumed[i] && predicate(edge))
730 }
731}
732
733thread_local! {
734 static EMIT_DEPTH: std::cell::Cell<usize> = const { std::cell::Cell::new(0) };
735 static EMIT_MU_FRAMES: std::cell::RefCell<std::collections::HashSet<(String, String)>> =
745 std::cell::RefCell::new(std::collections::HashSet::new());
746}
747
748fn walk_in_mu_frame(
753 protocol: &str,
754 schema: &Schema,
755 grammar: &Grammar,
756 vertex_id: &panproto_gat::Name,
757 rule_name: &str,
758 rule: &Production,
759 cursor: &mut ChildCursor<'_>,
760 out: &mut Output<'_>,
761) -> Result<(), ParseError> {
762 let key = (vertex_id.to_string(), rule_name.to_owned());
763 let inserted = EMIT_MU_FRAMES.with(|frames| frames.borrow_mut().insert(key.clone()));
764 if !inserted {
765 return Ok(());
770 }
771 let result = emit_production(protocol, schema, grammar, vertex_id, rule, cursor, out);
772 EMIT_MU_FRAMES.with(|frames| {
773 frames.borrow_mut().remove(&key);
774 });
775 result
776}
777
778fn emit_production(
779 protocol: &str,
780 schema: &Schema,
781 grammar: &Grammar,
782 vertex_id: &panproto_gat::Name,
783 production: &Production,
784 cursor: &mut ChildCursor<'_>,
785 out: &mut Output<'_>,
786) -> Result<(), ParseError> {
787 let depth = EMIT_DEPTH.with(|d| {
788 let v = d.get() + 1;
789 d.set(v);
790 v
791 });
792 if depth > 500 {
793 EMIT_DEPTH.with(|d| d.set(d.get() - 1));
794 return Err(ParseError::EmitFailed {
795 protocol: protocol.to_owned(),
796 reason: format!(
797 "emit_production recursion >500 (likely a cyclic grammar; \
798 vertex='{vertex_id}')"
799 ),
800 });
801 }
802 let result = emit_production_inner(
803 protocol, schema, grammar, vertex_id, production, cursor, out,
804 );
805 EMIT_DEPTH.with(|d| d.set(d.get() - 1));
806 result
807}
808
809fn emit_production_inner(
810 protocol: &str,
811 schema: &Schema,
812 grammar: &Grammar,
813 vertex_id: &panproto_gat::Name,
814 production: &Production,
815 cursor: &mut ChildCursor<'_>,
816 out: &mut Output<'_>,
817) -> Result<(), ParseError> {
818 match production {
819 Production::String { value } => {
820 out.token(value);
821 Ok(())
822 }
823 Production::Pattern { value } => {
824 if let Some(literal) = literal_value(schema, vertex_id) {
825 out.token(literal);
826 } else {
827 out.token(&placeholder_for_pattern(value));
828 }
829 Ok(())
830 }
831 Production::Blank => Ok(()),
832 Production::Symbol { name } => {
833 if name.starts_with('_') {
834 if let Some(rule) = grammar.rules.get(name) {
848 walk_in_mu_frame(
849 protocol, schema, grammar, vertex_id, name, rule, cursor, out,
850 )
851 } else {
852 if name.contains("line_ending")
862 || name.contains("newline")
863 || name.ends_with("_or_eof")
864 {
865 out.newline();
866 }
867 Ok(())
868 }
869 } else if let Some(edge) = take_symbol_match(grammar, schema, cursor, name) {
870 emit_vertex(protocol, schema, grammar, &edge.tgt, out)
881 } else if vertex_id_kind(schema, vertex_id) == Some(name.as_str()) {
882 let rule = grammar
883 .rules
884 .get(name)
885 .ok_or_else(|| ParseError::EmitFailed {
886 protocol: protocol.to_owned(),
887 reason: format!("no production for SYMBOL '{name}'"),
888 })?;
889 walk_in_mu_frame(
892 protocol, schema, grammar, vertex_id, name, rule, cursor, out,
893 )
894 } else {
895 Ok(())
899 }
900 }
901 Production::Seq { members } => {
902 for member in members {
903 emit_production(protocol, schema, grammar, vertex_id, member, cursor, out)?;
904 }
905 Ok(())
906 }
907 Production::Choice { members } => {
908 if let Some(matched) =
909 pick_choice_with_cursor(schema, grammar, vertex_id, cursor, members)
910 {
911 emit_production(protocol, schema, grammar, vertex_id, matched, cursor, out)
912 } else {
913 Ok(())
914 }
915 }
916 Production::Repeat { content } | Production::Repeat1 { content } => {
917 let mut emitted_any = false;
918 loop {
919 let cursor_snap = cursor.consumed.clone();
920 let out_snap = out.snapshot();
921 let consumed_before = cursor.consumed.iter().filter(|&&c| c).count();
922 let result =
923 emit_production(protocol, schema, grammar, vertex_id, content, cursor, out);
924 let consumed_after = cursor.consumed.iter().filter(|&&c| c).count();
925 if result.is_err() || consumed_after == consumed_before {
926 cursor.consumed = cursor_snap;
927 out.restore(out_snap);
928 break;
929 }
930 emitted_any = true;
931 }
932 if matches!(production, Production::Repeat1 { .. }) && !emitted_any {
933 emit_production(protocol, schema, grammar, vertex_id, content, cursor, out)?;
934 }
935 Ok(())
936 }
937 Production::Optional { content } => {
938 let cursor_snap = cursor.consumed.clone();
939 let out_snap = out.snapshot();
940 let consumed_before = cursor.consumed.iter().filter(|&&c| c).count();
941 let result =
942 emit_production(protocol, schema, grammar, vertex_id, content, cursor, out);
943 if result.is_err() {
948 cursor.consumed = cursor_snap;
949 out.restore(out_snap);
950 return result;
951 }
952 let consumed_after = cursor.consumed.iter().filter(|&&c| c).count();
953 if consumed_after == consumed_before
954 && !has_relevant_constraint(content, schema, vertex_id)
955 {
956 cursor.consumed = cursor_snap;
957 out.restore(out_snap);
958 }
959 Ok(())
960 }
961 Production::Field { name, content } => {
962 if let Some(edge) = cursor.take_field(name) {
963 emit_in_child_context(protocol, schema, grammar, &edge.tgt, content, out)
964 } else if first_symbol(content).is_none() {
965 emit_production(protocol, schema, grammar, vertex_id, content, cursor, out)
973 } else {
974 Ok(())
975 }
976 }
977 Production::Alias {
978 content,
979 named,
980 value,
981 } => {
982 if *named && !value.is_empty() {
997 if let Some(edge) = cursor.take_matching(|edge| {
998 schema
999 .vertices
1000 .get(&edge.tgt)
1001 .map(|v| v.kind.as_ref() == value.as_str())
1002 .unwrap_or(false)
1003 }) {
1004 return emit_aliased_child(protocol, schema, grammar, &edge.tgt, content, out);
1005 }
1006 }
1007 emit_production(protocol, schema, grammar, vertex_id, content, cursor, out)
1008 }
1009 Production::Token { content }
1010 | Production::ImmediateToken { content }
1011 | Production::Prec { content, .. }
1012 | Production::PrecLeft { content, .. }
1013 | Production::PrecRight { content, .. }
1014 | Production::PrecDynamic { content, .. }
1015 | Production::Reserved { content, .. } => {
1016 emit_production(protocol, schema, grammar, vertex_id, content, cursor, out)
1017 }
1018 }
1019}
1020
1021fn take_symbol_match<'a>(
1024 grammar: &Grammar,
1025 schema: &Schema,
1026 cursor: &mut ChildCursor<'a>,
1027 name: &str,
1028) -> Option<&'a Edge> {
1029 cursor.take_matching(|edge| {
1030 let target_kind = schema.vertices.get(&edge.tgt).map(|v| v.kind.as_ref());
1031 kind_satisfies_symbol(grammar, target_kind, name)
1032 })
1033}
1034
1035fn kind_satisfies_symbol(grammar: &Grammar, target_kind: Option<&str>, name: &str) -> bool {
1045 let Some(target) = target_kind else {
1046 return false;
1047 };
1048 if target == name {
1049 return true;
1050 }
1051 grammar
1052 .subtypes
1053 .get(target)
1054 .is_some_and(|set| set.contains(name))
1055}
1056
1057fn emit_aliased_child(
1091 protocol: &str,
1092 schema: &Schema,
1093 grammar: &Grammar,
1094 child_id: &panproto_gat::Name,
1095 content: &Production,
1096 out: &mut Output<'_>,
1097) -> Result<(), ParseError> {
1098 if let Some(literal) = literal_value(schema, child_id) {
1103 if children_for(schema, child_id).is_empty() {
1104 out.token(literal);
1105 return Ok(());
1106 }
1107 }
1108
1109 if let Production::Symbol { name } = content {
1112 if let Some(rule) = grammar.rules.get(name) {
1113 let edges = children_for(schema, child_id);
1114 let mut cursor = ChildCursor::new(&edges);
1115 return emit_production(protocol, schema, grammar, child_id, rule, &mut cursor, out);
1116 }
1117 }
1118
1119 let edges = children_for(schema, child_id);
1121 let mut cursor = ChildCursor::new(&edges);
1122 emit_production(
1123 protocol,
1124 schema,
1125 grammar,
1126 child_id,
1127 content,
1128 &mut cursor,
1129 out,
1130 )
1131}
1132
1133fn emit_in_child_context(
1134 protocol: &str,
1135 schema: &Schema,
1136 grammar: &Grammar,
1137 child_id: &panproto_gat::Name,
1138 production: &Production,
1139 out: &mut Output<'_>,
1140) -> Result<(), ParseError> {
1141 if !matches!(production, Production::Symbol { .. }) {
1150 let child_kind = schema.vertices.get(child_id).map(|v| v.kind.as_ref());
1151 let symbols = referenced_symbols(production);
1152 if symbols
1153 .iter()
1154 .any(|s| kind_satisfies_symbol(grammar, child_kind, s) || child_kind == Some(s))
1155 {
1156 return emit_vertex(protocol, schema, grammar, child_id, out);
1157 }
1158 }
1159 match production {
1160 Production::Symbol { .. } => emit_vertex(protocol, schema, grammar, child_id, out),
1161 _ => {
1162 let edges = children_for(schema, child_id);
1163 let mut cursor = ChildCursor::new(&edges);
1164 emit_production(
1165 protocol,
1166 schema,
1167 grammar,
1168 child_id,
1169 production,
1170 &mut cursor,
1171 out,
1172 )
1173 }
1174 }
1175}
1176
1177fn pick_choice_with_cursor<'a>(
1178 schema: &Schema,
1179 grammar: &Grammar,
1180 vertex_id: &panproto_gat::Name,
1181 cursor: &ChildCursor<'_>,
1182 alternatives: &'a [Production],
1183) -> Option<&'a Production> {
1184 let constraint_blob = schema
1197 .constraints
1198 .get(vertex_id)
1199 .map(|cs| {
1200 let fingerprint: Option<&str> = cs
1201 .iter()
1202 .find(|c| c.sort.as_ref() == "chose-alt-fingerprint")
1203 .map(|c| c.value.as_str());
1204 if let Some(fp) = fingerprint {
1205 fp.to_owned()
1206 } else {
1207 cs.iter()
1208 .filter(|c| {
1209 let s = c.sort.as_ref();
1210 s.starts_with("interstitial-") && !s.ends_with("-start-byte")
1211 })
1212 .map(|c| c.value.as_str())
1213 .collect::<Vec<&str>>()
1214 .join(" ")
1215 }
1216 })
1217 .unwrap_or_default();
1218 let child_kinds: Vec<&str> = schema
1219 .constraints
1220 .get(vertex_id)
1221 .and_then(|cs| {
1222 cs.iter()
1223 .find(|c| c.sort.as_ref() == "chose-alt-child-kinds")
1224 .map(|c| c.value.split_whitespace().collect())
1225 })
1226 .unwrap_or_default();
1227 if !constraint_blob.is_empty() {
1228 let mut best_literal: usize = 0;
1239 let mut best_symbols: usize = 0;
1240 let mut best_alt: Option<&Production> = None;
1241 let mut tied = false;
1242 for alt in alternatives {
1243 let strings = literal_strings(alt);
1244 if strings.is_empty() {
1245 continue;
1246 }
1247 let literal_score = strings
1248 .iter()
1249 .filter(|s| constraint_blob.contains(s.as_str()))
1250 .map(String::len)
1251 .sum::<usize>();
1252 if literal_score == 0 {
1253 continue;
1254 }
1255 let symbol_score = if literal_score >= best_literal && !child_kinds.is_empty() {
1262 let symbols = referenced_symbols(alt);
1263 symbols
1264 .iter()
1265 .filter(|sym| {
1266 let sym_str: &str = sym;
1267 if child_kinds.contains(&sym_str) {
1268 return true;
1269 }
1270 grammar.subtypes.get(sym_str).is_some_and(|sub_set| {
1271 sub_set
1272 .iter()
1273 .any(|sub| child_kinds.contains(&sub.as_str()))
1274 })
1275 })
1276 .count()
1277 } else {
1278 0
1279 };
1280 let better = literal_score > best_literal
1281 || (literal_score == best_literal && symbol_score > best_symbols);
1282 let same = literal_score == best_literal && symbol_score == best_symbols;
1283 if better {
1284 best_literal = literal_score;
1285 best_symbols = symbol_score;
1286 best_alt = Some(alt);
1287 tied = false;
1288 } else if same && best_alt.is_some() {
1289 tied = true;
1290 }
1291 }
1292 if let Some(alt) = best_alt {
1299 if !tied {
1300 return Some(alt);
1301 }
1302 }
1303 }
1304
1305 for alt in alternatives {
1313 let symbols = referenced_symbols(alt);
1314 if !symbols.is_empty()
1315 && cursor.has_matching(|edge| {
1316 let tk = schema.vertices.get(&edge.tgt).map(|v| v.kind.as_ref());
1317 symbols
1318 .iter()
1319 .any(|s| kind_satisfies_symbol(grammar, tk, s))
1320 })
1321 {
1322 return Some(alt);
1323 }
1324 }
1325
1326 let edge_kinds: Vec<&str> = cursor
1329 .edges
1330 .iter()
1331 .enumerate()
1332 .filter(|(i, _)| !cursor.consumed[*i])
1333 .map(|(_, e)| e.kind.as_ref())
1334 .collect();
1335 for alt in alternatives {
1336 if has_field_in(alt, &edge_kinds) {
1337 return Some(alt);
1338 }
1339 }
1340
1341 let _ = (schema, vertex_id);
1354 if alternatives.iter().any(|a| matches!(a, Production::Blank)) {
1355 return alternatives.iter().find(|a| matches!(a, Production::Blank));
1356 }
1357 alternatives
1358 .iter()
1359 .find(|alt| !matches!(alt, Production::Blank))
1360}
1361
1362fn literal_strings(production: &Production) -> Vec<String> {
1368 let mut out = Vec::new();
1369 fn walk(p: &Production, out: &mut Vec<String>) {
1370 match p {
1371 Production::String { value } if !value.is_empty() => {
1372 out.push(value.clone());
1373 }
1374 Production::Choice { members } | Production::Seq { members } => {
1375 for m in members {
1376 walk(m, out);
1377 }
1378 }
1379 Production::Repeat { content }
1380 | Production::Repeat1 { content }
1381 | Production::Optional { content }
1382 | Production::Field { content, .. }
1383 | Production::Alias { content, .. }
1384 | Production::Token { content }
1385 | Production::ImmediateToken { content }
1386 | Production::Prec { content, .. }
1387 | Production::PrecLeft { content, .. }
1388 | Production::PrecRight { content, .. }
1389 | Production::PrecDynamic { content, .. }
1390 | Production::Reserved { content, .. } => walk(content, out),
1391 _ => {}
1392 }
1393 }
1394 walk(production, &mut out);
1395 out
1396}
1397
1398fn referenced_symbols(production: &Production) -> Vec<&str> {
1405 let mut out = Vec::new();
1406 fn walk<'a>(p: &'a Production, out: &mut Vec<&'a str>) {
1407 match p {
1408 Production::Symbol { name } => out.push(name.as_str()),
1409 Production::Choice { members } | Production::Seq { members } => {
1410 for m in members {
1411 walk(m, out);
1412 }
1413 }
1414 Production::Repeat { content }
1415 | Production::Repeat1 { content }
1416 | Production::Optional { content }
1417 | Production::Field { content, .. }
1418 | Production::Alias { content, .. }
1419 | Production::Token { content }
1420 | Production::ImmediateToken { content }
1421 | Production::Prec { content, .. }
1422 | Production::PrecLeft { content, .. }
1423 | Production::PrecRight { content, .. }
1424 | Production::PrecDynamic { content, .. }
1425 | Production::Reserved { content, .. } => walk(content, out),
1426 _ => {}
1427 }
1428 }
1429 walk(production, &mut out);
1430 out
1431}
1432
1433fn first_symbol(production: &Production) -> Option<&str> {
1434 match production {
1435 Production::Symbol { name } => Some(name),
1436 Production::Seq { members } => members.iter().find_map(first_symbol),
1437 Production::Choice { members } => members.iter().find_map(first_symbol),
1438 Production::Repeat { content }
1439 | Production::Repeat1 { content }
1440 | Production::Optional { content }
1441 | Production::Field { content, .. }
1442 | Production::Alias { content, .. }
1443 | Production::Token { content }
1444 | Production::ImmediateToken { content }
1445 | Production::Prec { content, .. }
1446 | Production::PrecLeft { content, .. }
1447 | Production::PrecRight { content, .. }
1448 | Production::PrecDynamic { content, .. }
1449 | Production::Reserved { content, .. } => first_symbol(content),
1450 _ => None,
1451 }
1452}
1453
1454fn has_field_in(production: &Production, edge_kinds: &[&str]) -> bool {
1455 match production {
1456 Production::Field { name, .. } => edge_kinds.contains(&name.as_str()),
1457 Production::Seq { members } | Production::Choice { members } => {
1458 members.iter().any(|m| has_field_in(m, edge_kinds))
1459 }
1460 Production::Repeat { content }
1461 | Production::Repeat1 { content }
1462 | Production::Optional { content }
1463 | Production::Alias { content, .. }
1464 | Production::Token { content }
1465 | Production::ImmediateToken { content }
1466 | Production::Prec { content, .. }
1467 | Production::PrecLeft { content, .. }
1468 | Production::PrecRight { content, .. }
1469 | Production::PrecDynamic { content, .. }
1470 | Production::Reserved { content, .. } => has_field_in(content, edge_kinds),
1471 _ => false,
1472 }
1473}
1474
1475fn has_relevant_constraint(
1476 production: &Production,
1477 schema: &Schema,
1478 vertex_id: &panproto_gat::Name,
1479) -> bool {
1480 let constraints = match schema.constraints.get(vertex_id) {
1481 Some(c) => c,
1482 None => return false,
1483 };
1484 fn walk(production: &Production, constraints: &[panproto_schema::Constraint]) -> bool {
1485 match production {
1486 Production::String { value } => constraints
1487 .iter()
1488 .any(|c| c.value == *value || c.sort.as_ref() == value),
1489 Production::Field { name, content } => {
1490 constraints.iter().any(|c| c.sort.as_ref() == name) || walk(content, constraints)
1491 }
1492 Production::Seq { members } | Production::Choice { members } => {
1493 members.iter().any(|m| walk(m, constraints))
1494 }
1495 Production::Repeat { content }
1496 | Production::Repeat1 { content }
1497 | Production::Optional { content }
1498 | Production::Alias { content, .. }
1499 | Production::Token { content }
1500 | Production::ImmediateToken { content }
1501 | Production::Prec { content, .. }
1502 | Production::PrecLeft { content, .. }
1503 | Production::PrecRight { content, .. }
1504 | Production::PrecDynamic { content, .. }
1505 | Production::Reserved { content, .. } => walk(content, constraints),
1506 _ => false,
1507 }
1508 }
1509 walk(production, constraints)
1510}
1511
1512fn children_for<'a>(schema: &'a Schema, vertex_id: &panproto_gat::Name) -> Vec<&'a Edge> {
1513 let mut edges: Vec<&Edge> = schema
1514 .edges
1515 .keys()
1516 .filter(|e| &e.src == vertex_id)
1517 .collect();
1518 edges.sort_by_key(|e| {
1519 let pos = schema.orderings.get(*e).copied().unwrap_or(u32::MAX);
1523 (pos, e.kind.clone(), e.tgt.clone())
1524 });
1525 edges
1526}
1527
1528fn vertex_id_kind<'a>(schema: &'a Schema, vertex_id: &panproto_gat::Name) -> Option<&'a str> {
1529 schema.vertices.get(vertex_id).map(|v| v.kind.as_ref())
1530}
1531
1532fn literal_value<'a>(schema: &'a Schema, vertex_id: &panproto_gat::Name) -> Option<&'a str> {
1533 schema
1534 .constraints
1535 .get(vertex_id)?
1536 .iter()
1537 .find(|c| c.sort.as_ref() == "literal-value")
1538 .map(|c| c.value.as_str())
1539}
1540
1541fn placeholder_for_pattern(pattern: &str) -> String {
1542 let simple_lit = decode_simple_pattern_literal(pattern);
1550 if let Some(lit) = simple_lit {
1551 return lit;
1552 }
1553
1554 if pattern.contains("[0-9]") || pattern.contains("\\d") {
1555 "0".into()
1556 } else if pattern.contains("[a-zA-Z_]") || pattern.contains("\\w") {
1557 "_x".into()
1558 } else if pattern.contains('"') || pattern.contains('\'') {
1559 "\"\"".into()
1560 } else {
1561 "_".into()
1562 }
1563}
1564
1565fn decode_simple_pattern_literal(pattern: &str) -> Option<String> {
1570 if pattern
1573 .chars()
1574 .any(|c| matches!(c, '[' | ']' | '(' | ')' | '*' | '+' | '?' | '|' | '{' | '}'))
1575 {
1576 return None;
1577 }
1578 let mut out = String::new();
1579 let mut chars = pattern.chars();
1580 while let Some(c) = chars.next() {
1581 if c == '\\' {
1582 match chars.next() {
1583 Some('n') => out.push('\n'),
1584 Some('r') => out.push('\r'),
1585 Some('t') => out.push('\t'),
1586 Some('\\') => out.push('\\'),
1587 Some('/') => out.push('/'),
1588 Some(other) => out.push(other),
1589 None => return None,
1590 }
1591 } else {
1592 out.push(c);
1593 }
1594 }
1595 Some(out)
1596}
1597
1598#[derive(Clone)]
1610enum Token {
1611 Lit(String),
1613 IndentOpen,
1617 IndentClose,
1619 LineBreak,
1622}
1623
1624struct Output<'a> {
1625 tokens: Vec<Token>,
1626 policy: &'a FormatPolicy,
1627}
1628
1629#[derive(Clone)]
1630struct OutputSnapshot {
1631 tokens_len: usize,
1632}
1633
1634impl<'a> Output<'a> {
1635 fn new(policy: &'a FormatPolicy) -> Self {
1636 Self {
1637 tokens: Vec::new(),
1638 policy,
1639 }
1640 }
1641
1642 fn token(&mut self, value: &str) {
1643 if value.is_empty() {
1644 return;
1645 }
1646
1647 if self.policy.indent_close.iter().any(|t| t == value) {
1648 self.tokens.push(Token::IndentClose);
1649 }
1650
1651 self.tokens.push(Token::Lit(value.to_owned()));
1652
1653 if self.policy.indent_open.iter().any(|t| t == value) {
1654 self.tokens.push(Token::IndentOpen);
1655 self.tokens.push(Token::LineBreak);
1656 } else if self.policy.line_break_after.iter().any(|t| t == value) {
1657 self.tokens.push(Token::LineBreak);
1658 }
1659 }
1660
1661 fn newline(&mut self) {
1662 self.tokens.push(Token::LineBreak);
1663 }
1664
1665 fn snapshot(&self) -> OutputSnapshot {
1666 OutputSnapshot {
1667 tokens_len: self.tokens.len(),
1668 }
1669 }
1670
1671 fn restore(&mut self, snap: OutputSnapshot) {
1672 self.tokens.truncate(snap.tokens_len);
1673 }
1674
1675 fn finish(self) -> Vec<u8> {
1676 layout(&self.tokens, self.policy)
1677 }
1678}
1679
1680fn layout(tokens: &[Token], policy: &FormatPolicy) -> Vec<u8> {
1686 let mut bytes = Vec::new();
1687 let mut indent: usize = 0;
1688 let mut at_line_start = true;
1689 let mut last_lit: Option<&str> = None;
1690 let newline = policy.newline.as_bytes();
1691 let separator = policy.separator.as_bytes();
1692
1693 for tok in tokens {
1694 match tok {
1695 Token::IndentOpen => indent += 1,
1696 Token::IndentClose => {
1697 indent = indent.saturating_sub(1);
1698 if !at_line_start {
1699 bytes.extend_from_slice(newline);
1700 at_line_start = true;
1701 }
1702 }
1703 Token::LineBreak => {
1704 if !at_line_start {
1705 bytes.extend_from_slice(newline);
1706 at_line_start = true;
1707 }
1708 }
1709 Token::Lit(value) => {
1710 if at_line_start {
1711 bytes.extend(std::iter::repeat_n(b' ', indent * policy.indent_width));
1712 } else if let Some(prev) = last_lit {
1713 if needs_space_between(prev, value) {
1714 bytes.extend_from_slice(separator);
1715 }
1716 }
1717 bytes.extend_from_slice(value.as_bytes());
1718 at_line_start = false;
1719 last_lit = Some(value.as_str());
1720 }
1721 }
1722 }
1723
1724 if !at_line_start {
1725 bytes.extend_from_slice(newline);
1726 }
1727 bytes
1728}
1729
1730fn needs_space_between(last: &str, next: &str) -> bool {
1731 if last.is_empty() || next.is_empty() {
1732 return false;
1733 }
1734 if is_punct_open(last) || is_punct_open(next) {
1735 return false;
1736 }
1737 if is_punct_close(next) {
1738 return false;
1739 }
1740 if is_punct_close(last) && is_punct_punctuation(next) {
1741 return false;
1742 }
1743 if last == "." || next == "." {
1744 return false;
1745 }
1746 if last_is_word_like(last) && first_is_word_like(next) {
1747 return true;
1748 }
1749 if last_ends_with_alnum(last) && first_is_alnum_or_underscore(next) {
1750 return true;
1751 }
1752 true
1755}
1756
1757fn is_punct_open(s: &str) -> bool {
1758 matches!(s, "(" | "[" | "{" | "\"" | "'" | "`")
1759}
1760
1761fn is_punct_close(s: &str) -> bool {
1762 matches!(s, ")" | "]" | "}" | "," | ";" | ":" | "\"" | "'" | "`")
1763}
1764
1765fn is_punct_punctuation(s: &str) -> bool {
1766 matches!(s, "," | ";" | ":" | "." | ")" | "]" | "}")
1767}
1768
1769fn last_is_word_like(s: &str) -> bool {
1770 s.chars()
1771 .next_back()
1772 .map(|c| c.is_alphanumeric() || c == '_')
1773 .unwrap_or(false)
1774}
1775
1776fn first_is_word_like(s: &str) -> bool {
1777 s.chars()
1778 .next()
1779 .map(|c| c.is_alphanumeric() || c == '_')
1780 .unwrap_or(false)
1781}
1782
1783fn last_ends_with_alnum(s: &str) -> bool {
1784 s.chars()
1785 .next_back()
1786 .map(char::is_alphanumeric)
1787 .unwrap_or(false)
1788}
1789
1790fn first_is_alnum_or_underscore(s: &str) -> bool {
1791 s.chars()
1792 .next()
1793 .map(|c| c.is_alphanumeric() || c == '_')
1794 .unwrap_or(false)
1795}
1796
1797#[cfg(test)]
1798mod tests {
1799 use super::*;
1800
1801 #[test]
1802 fn parses_simple_grammar_json() {
1803 let bytes = br#"{
1804 "name": "tiny",
1805 "rules": {
1806 "program": {
1807 "type": "SEQ",
1808 "members": [
1809 {"type": "STRING", "value": "hello"},
1810 {"type": "STRING", "value": ";"}
1811 ]
1812 }
1813 }
1814 }"#;
1815 let g = Grammar::from_bytes("tiny", bytes).expect("valid tiny grammar");
1816 assert!(g.rules.contains_key("program"));
1817 }
1818
1819 #[test]
1820 fn output_emits_punctuation_without_leading_space() {
1821 let policy = FormatPolicy::default();
1822 let mut out = Output::new(&policy);
1823 out.token("foo");
1824 out.token("(");
1825 out.token(")");
1826 out.token(";");
1827 let bytes = out.finish();
1828 let s = std::str::from_utf8(&bytes).expect("ascii output");
1829 assert!(s.starts_with("foo();"), "got {s:?}");
1830 }
1831
1832 #[test]
1833 fn grammar_from_bytes_rejects_malformed_input() {
1834 let result = Grammar::from_bytes("malformed", b"not json");
1835 let err = result.expect_err("malformed bytes must yield Err");
1836 let msg = err.to_string();
1837 assert!(
1838 msg.contains("malformed"),
1839 "error message should name the protocol: {msg:?}"
1840 );
1841 }
1842
1843 #[test]
1844 fn output_indents_after_open_brace() {
1845 let policy = FormatPolicy::default();
1846 let mut out = Output::new(&policy);
1847 out.token("fn");
1848 out.token("foo");
1849 out.token("(");
1850 out.token(")");
1851 out.token("{");
1852 out.token("body");
1853 out.token("}");
1854 let bytes = out.finish();
1855 let s = std::str::from_utf8(&bytes).expect("ascii output");
1856 assert!(s.contains("{\n"), "newline after opening brace: {s:?}");
1857 assert!(s.contains("body"), "body inside block: {s:?}");
1858 assert!(s.ends_with("}\n"), "newline after closing brace: {s:?}");
1859 }
1860
1861 #[test]
1862 fn output_no_space_between_word_and_dot() {
1863 let policy = FormatPolicy::default();
1864 let mut out = Output::new(&policy);
1865 out.token("foo");
1866 out.token(".");
1867 out.token("bar");
1868 let bytes = out.finish();
1869 let s = std::str::from_utf8(&bytes).expect("ascii output");
1870 assert!(s.starts_with("foo.bar"), "no space around dot: {s:?}");
1871 }
1872
1873 #[test]
1874 fn output_snapshot_restore_truncates_bytes() {
1875 let policy = FormatPolicy::default();
1876 let mut out = Output::new(&policy);
1877 out.token("keep");
1878 let snap = out.snapshot();
1879 out.token("drop");
1880 out.token("more");
1881 out.restore(snap);
1882 out.token("after");
1883 let bytes = out.finish();
1884 let s = std::str::from_utf8(&bytes).expect("ascii output");
1885 assert!(s.contains("keep"), "kept token survives: {s:?}");
1886 assert!(s.contains("after"), "post-restore token visible: {s:?}");
1887 assert!(!s.contains("drop"), "rolled-back token removed: {s:?}");
1888 assert!(!s.contains("more"), "rolled-back token removed: {s:?}");
1889 }
1890
1891 #[test]
1892 fn child_cursor_take_field_consumes_once() {
1893 let edges_owned: Vec<Edge> = vec![Edge {
1894 src: panproto_gat::Name::from("p"),
1895 tgt: panproto_gat::Name::from("c"),
1896 kind: panproto_gat::Name::from("name"),
1897 name: None,
1898 }];
1899 let edges: Vec<&Edge> = edges_owned.iter().collect();
1900 let mut cursor = ChildCursor::new(&edges);
1901 let first = cursor.take_field("name");
1902 let second = cursor.take_field("name");
1903 assert!(first.is_some(), "first take returns the edge");
1904 assert!(
1905 second.is_none(),
1906 "second take returns None (already consumed)"
1907 );
1908 }
1909
1910 #[test]
1911 fn child_cursor_take_matching_predicate() {
1912 let edges_owned: Vec<Edge> = vec![
1913 Edge {
1914 src: "p".into(),
1915 tgt: "c1".into(),
1916 kind: "child_of".into(),
1917 name: None,
1918 },
1919 Edge {
1920 src: "p".into(),
1921 tgt: "c2".into(),
1922 kind: "key".into(),
1923 name: None,
1924 },
1925 ];
1926 let edges: Vec<&Edge> = edges_owned.iter().collect();
1927 let mut cursor = ChildCursor::new(&edges);
1928 assert!(cursor.has_matching(|e| e.kind.as_ref() == "key"));
1929 let taken = cursor.take_matching(|e| e.kind.as_ref() == "key");
1930 assert!(taken.is_some());
1931 assert!(
1932 !cursor.has_matching(|e| e.kind.as_ref() == "key"),
1933 "consumed edge no longer matches"
1934 );
1935 assert!(
1936 cursor.has_matching(|e| e.kind.as_ref() == "child_of"),
1937 "the other edge is still available"
1938 );
1939 }
1940
1941 #[test]
1942 fn kind_satisfies_symbol_direct_match() {
1943 let bytes = br#"{
1944 "name": "tiny",
1945 "rules": {
1946 "x": {"type": "STRING", "value": "x"}
1947 }
1948 }"#;
1949 let g = Grammar::from_bytes("tiny", bytes).expect("valid grammar");
1950 assert!(kind_satisfies_symbol(&g, Some("x"), "x"));
1951 assert!(!kind_satisfies_symbol(&g, Some("y"), "x"));
1952 assert!(!kind_satisfies_symbol(&g, None, "x"));
1953 }
1954
1955 #[test]
1956 fn kind_satisfies_symbol_through_hidden_rule() {
1957 let bytes = br#"{
1958 "name": "tiny",
1959 "rules": {
1960 "_value": {
1961 "type": "CHOICE",
1962 "members": [
1963 {"type": "SYMBOL", "name": "object"},
1964 {"type": "SYMBOL", "name": "number"}
1965 ]
1966 },
1967 "object": {"type": "STRING", "value": "{}"},
1968 "number": {"type": "PATTERN", "value": "[0-9]+"}
1969 }
1970 }"#;
1971 let g = Grammar::from_bytes("tiny", bytes).expect("valid grammar");
1972 assert!(
1973 kind_satisfies_symbol(&g, Some("number"), "_value"),
1974 "number is reachable from _value via CHOICE"
1975 );
1976 assert!(
1977 kind_satisfies_symbol(&g, Some("object"), "_value"),
1978 "object is reachable from _value via CHOICE"
1979 );
1980 assert!(
1981 !kind_satisfies_symbol(&g, Some("string"), "_value"),
1982 "string is NOT among the alternatives"
1983 );
1984 }
1985
1986 #[test]
1987 fn first_symbol_skips_string_terminals() {
1988 let prod: Production = serde_json::from_str(
1989 r#"{
1990 "type": "SEQ",
1991 "members": [
1992 {"type": "STRING", "value": "{"},
1993 {"type": "SYMBOL", "name": "body"},
1994 {"type": "STRING", "value": "}"}
1995 ]
1996 }"#,
1997 )
1998 .expect("valid SEQ");
1999 assert_eq!(first_symbol(&prod), Some("body"));
2000 }
2001
2002 #[test]
2003 fn placeholder_for_pattern_routes_by_regex_class() {
2004 assert_eq!(placeholder_for_pattern("[0-9]+"), "0");
2005 assert_eq!(placeholder_for_pattern("[a-zA-Z_]\\w*"), "_x");
2006 assert_eq!(placeholder_for_pattern("\"[^\"]*\""), "\"\"");
2007 assert_eq!(placeholder_for_pattern("\\d+\\.\\d+"), "0");
2008 }
2009
2010 #[test]
2011 fn format_policy_default_breaks_after_semicolon() {
2012 let policy = FormatPolicy::default();
2013 assert!(policy.line_break_after.iter().any(|t| t == ";"));
2014 assert!(policy.indent_open.iter().any(|t| t == "{"));
2015 assert!(policy.indent_close.iter().any(|t| t == "}"));
2016 assert_eq!(policy.indent_width, 2);
2017 }
2018
2019 #[test]
2020 fn placeholder_decodes_literal_pattern_separators() {
2021 assert_eq!(placeholder_for_pattern("\\n"), "\n");
2025 assert_eq!(placeholder_for_pattern("\\r\\n"), "\r\n");
2026 assert_eq!(placeholder_for_pattern(";"), ";");
2027 assert_eq!(placeholder_for_pattern("[0-9]+"), "0");
2030 assert_eq!(placeholder_for_pattern("a|b"), "_");
2031 }
2032
2033 #[test]
2034 fn supertypes_decode_from_grammar_json_strings() {
2035 let bytes = br#"{
2037 "name": "tiny",
2038 "supertypes": ["expression"],
2039 "rules": {
2040 "expression": {
2041 "type": "CHOICE",
2042 "members": [
2043 {"type": "SYMBOL", "name": "binary_expression"},
2044 {"type": "SYMBOL", "name": "identifier"}
2045 ]
2046 },
2047 "binary_expression": {"type": "STRING", "value": "x"},
2048 "identifier": {"type": "PATTERN", "value": "[a-z]+"}
2049 }
2050 }"#;
2051 let g = Grammar::from_bytes("tiny", bytes).expect("parse");
2052 assert!(g.supertypes.contains("expression"));
2053 assert!(kind_satisfies_symbol(&g, Some("identifier"), "expression"));
2055 assert!(!kind_satisfies_symbol(&g, Some("string"), "expression"));
2057 }
2058
2059 #[test]
2060 fn supertypes_decode_from_grammar_json_objects() {
2061 let bytes = br#"{
2064 "name": "tiny",
2065 "supertypes": [{"type": "SYMBOL", "name": "stmt"}],
2066 "rules": {
2067 "stmt": {
2068 "type": "CHOICE",
2069 "members": [
2070 {"type": "SYMBOL", "name": "while_stmt"},
2071 {"type": "SYMBOL", "name": "if_stmt"}
2072 ]
2073 },
2074 "while_stmt": {"type": "STRING", "value": "while"},
2075 "if_stmt": {"type": "STRING", "value": "if"}
2076 }
2077 }"#;
2078 let g = Grammar::from_bytes("tiny", bytes).expect("parse");
2079 assert!(g.supertypes.contains("stmt"));
2080 assert!(kind_satisfies_symbol(&g, Some("while_stmt"), "stmt"));
2081 }
2082
2083 #[test]
2084 fn alias_value_matches_kind() {
2085 let bytes = br#"{
2089 "name": "tiny",
2090 "rules": {
2091 "_package_identifier": {
2092 "type": "ALIAS",
2093 "named": true,
2094 "value": "package_identifier",
2095 "content": {"type": "SYMBOL", "name": "identifier"}
2096 },
2097 "identifier": {"type": "PATTERN", "value": "[a-z]+"}
2098 }
2099 }"#;
2100 let g = Grammar::from_bytes("tiny", bytes).expect("parse");
2101 assert!(kind_satisfies_symbol(
2102 &g,
2103 Some("package_identifier"),
2104 "_package_identifier"
2105 ));
2106 }
2107
2108 #[test]
2109 fn referenced_symbols_walks_nested_seq() {
2110 let prod: Production = serde_json::from_str(
2111 r#"{
2112 "type": "SEQ",
2113 "members": [
2114 {"type": "CHOICE", "members": [
2115 {"type": "SYMBOL", "name": "attribute_item"},
2116 {"type": "BLANK"}
2117 ]},
2118 {"type": "SYMBOL", "name": "parameter"},
2119 {"type": "REPEAT", "content": {
2120 "type": "SEQ",
2121 "members": [
2122 {"type": "STRING", "value": ","},
2123 {"type": "SYMBOL", "name": "parameter"}
2124 ]
2125 }}
2126 ]
2127 }"#,
2128 )
2129 .expect("seq");
2130 let symbols = referenced_symbols(&prod);
2131 assert!(symbols.contains(&"attribute_item"));
2132 assert!(symbols.contains(&"parameter"));
2133 }
2134
2135 #[test]
2136 fn literal_strings_collects_choice_members() {
2137 let prod: Production = serde_json::from_str(
2138 r#"{
2139 "type": "CHOICE",
2140 "members": [
2141 {"type": "STRING", "value": "+"},
2142 {"type": "STRING", "value": "-"},
2143 {"type": "STRING", "value": "*"}
2144 ]
2145 }"#,
2146 )
2147 .expect("choice");
2148 let strings = literal_strings(&prod);
2149 assert_eq!(strings, vec!["+", "-", "*"]);
2150 }
2151
2152 #[test]
2158 fn reserved_variant_deserialises() {
2159 let prod: Production = serde_json::from_str(
2160 r#"{
2161 "type": "RESERVED",
2162 "content": {"type": "SYMBOL", "name": "_lowercase_identifier"},
2163 "context_name": "attribute_id"
2164 }"#,
2165 )
2166 .expect("RESERVED parses");
2167 match prod {
2168 Production::Reserved { content, .. } => match *content {
2169 Production::Symbol { name } => assert_eq!(name, "_lowercase_identifier"),
2170 other => panic!("expected inner SYMBOL, got {other:?}"),
2171 },
2172 other => panic!("expected RESERVED, got {other:?}"),
2173 }
2174 }
2175
2176 #[test]
2177 fn reserved_grammar_loads_end_to_end() {
2178 let bytes = br#"{
2179 "name": "tiny_reserved",
2180 "rules": {
2181 "program": {
2182 "type": "RESERVED",
2183 "content": {"type": "SYMBOL", "name": "ident"},
2184 "context_name": "keywords"
2185 },
2186 "ident": {"type": "PATTERN", "value": "[a-z]+"}
2187 }
2188 }"#;
2189 let g = Grammar::from_bytes("tiny_reserved", bytes).expect("RESERVED-using grammar loads");
2190 assert!(g.rules.contains_key("program"));
2191 }
2192
2193 #[test]
2194 fn reserved_walker_helpers_recurse_into_content() {
2195 let prod: Production = serde_json::from_str(
2202 r#"{
2203 "type": "RESERVED",
2204 "content": {
2205 "type": "FIELD",
2206 "name": "lhs",
2207 "content": {"type": "SYMBOL", "name": "expr"}
2208 },
2209 "context_name": "ctx"
2210 }"#,
2211 )
2212 .expect("nested RESERVED parses");
2213 assert_eq!(first_symbol(&prod), Some("expr"));
2214 assert!(has_field_in(&prod, &["lhs"]));
2215 let symbols = referenced_symbols(&prod);
2216 assert!(symbols.contains(&"expr"));
2217 }
2218}