1#![allow(
2 clippy::module_name_repetitions,
3 clippy::too_many_lines,
4 clippy::too_many_arguments,
5 clippy::map_unwrap_or,
6 clippy::option_if_let_else,
7 clippy::elidable_lifetime_names,
8 clippy::items_after_statements,
9 clippy::needless_pass_by_value,
10 clippy::single_match_else,
11 clippy::manual_let_else,
12 clippy::match_same_arms,
13 clippy::missing_const_for_fn,
14 clippy::single_char_pattern,
15 clippy::naive_bytecount,
16 clippy::expect_used,
17 clippy::redundant_pub_crate,
18 clippy::used_underscore_binding,
19 clippy::redundant_field_names,
20 clippy::struct_field_names,
21 clippy::redundant_else,
22 clippy::similar_names
23)]
24
25use std::collections::BTreeMap;
71
72use panproto_schema::{Edge, Schema};
73use serde::Deserialize;
74
75use crate::error::ParseError;
76
77#[derive(Debug, Clone, Deserialize)]
90#[serde(tag = "type")]
91#[non_exhaustive]
92pub enum Production {
93 #[serde(rename = "SEQ")]
95 Seq {
96 members: Vec<Self>,
98 },
99 #[serde(rename = "CHOICE")]
101 Choice {
102 members: Vec<Self>,
105 },
106 #[serde(rename = "REPEAT")]
108 Repeat {
109 content: Box<Self>,
111 },
112 #[serde(rename = "REPEAT1")]
114 Repeat1 {
115 content: Box<Self>,
117 },
118 #[serde(rename = "OPTIONAL")]
124 Optional {
125 content: Box<Self>,
127 },
128 #[serde(rename = "SYMBOL")]
130 Symbol {
131 name: String,
134 },
135 #[serde(rename = "STRING")]
137 String {
138 value: String,
140 },
141 #[serde(rename = "PATTERN")]
147 Pattern {
148 value: String,
150 },
151 #[serde(rename = "BLANK")]
153 Blank,
154 #[serde(rename = "FIELD")]
160 Field {
161 name: String,
163 content: Box<Self>,
165 },
166 #[serde(rename = "ALIAS")]
171 Alias {
172 content: Box<Self>,
174 #[serde(default)]
176 named: bool,
177 #[serde(default)]
179 value: String,
180 },
181 #[serde(rename = "TOKEN")]
186 Token {
187 content: Box<Self>,
189 },
190 #[serde(rename = "IMMEDIATE_TOKEN")]
194 ImmediateToken {
195 content: Box<Self>,
197 },
198 #[serde(rename = "PREC")]
200 Prec {
201 #[allow(dead_code)]
203 value: serde_json::Value,
204 content: Box<Self>,
206 },
207 #[serde(rename = "PREC_LEFT")]
209 PrecLeft {
210 #[allow(dead_code)]
212 value: serde_json::Value,
213 content: Box<Self>,
215 },
216 #[serde(rename = "PREC_RIGHT")]
218 PrecRight {
219 #[allow(dead_code)]
221 value: serde_json::Value,
222 content: Box<Self>,
224 },
225 #[serde(rename = "PREC_DYNAMIC")]
227 PrecDynamic {
228 #[allow(dead_code)]
230 value: serde_json::Value,
231 content: Box<Self>,
233 },
234 #[serde(rename = "RESERVED")]
244 Reserved {
245 content: Box<Self>,
247 #[allow(dead_code)]
249 #[serde(default)]
250 context_name: String,
251 },
252}
253
254#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
260pub enum TokenRole {
261 BracketOpen,
264 BracketClose,
267 Separator,
270 Keyword,
273 Operator,
277 Connector,
282 Terminal,
284}
285
286#[derive(Debug, Clone, Deserialize)]
291#[non_exhaustive]
292pub struct Grammar {
293 #[allow(dead_code)]
295 pub name: String,
296 pub rules: BTreeMap<String, Production>,
300 #[serde(default, deserialize_with = "deserialize_supertypes")]
308 pub supertypes: std::collections::HashSet<String>,
309 #[serde(default, deserialize_with = "deserialize_extras")]
321 pub extras: std::collections::HashSet<String>,
322 #[serde(skip)]
336 pub subtypes: std::collections::HashMap<String, std::collections::HashSet<String>>,
337 #[serde(skip)]
354 pub yield_sets: std::collections::HashMap<String, std::collections::HashSet<String>>,
355 #[serde(skip)]
361 pub node_type_children: std::collections::HashMap<String, std::collections::HashSet<String>>,
362 #[serde(skip)]
366 pub node_type_field_children: std::collections::HashMap<
367 String,
368 std::collections::HashMap<String, std::collections::HashSet<String>>,
369 >,
370 #[serde(skip)]
373 pub node_type_nonfield_children:
374 std::collections::HashMap<String, std::collections::HashSet<String>>,
375 #[serde(skip)]
381 pub external_alias_map: std::collections::HashMap<String, String>,
382 #[serde(skip)]
387 pub token_roles:
388 std::collections::HashMap<String, std::collections::HashMap<String, TokenRole>>,
389 #[serde(skip)]
394 pub indent_triggers: std::collections::HashSet<(String, String)>,
395 #[serde(skip)]
401 pub line_comment_prefixes: Vec<String>,
402 #[serde(skip)]
406 pub external_indent_opens: std::collections::HashSet<String>,
407 #[serde(skip)]
409 pub external_indent_closes: std::collections::HashSet<String>,
410 #[serde(skip)]
412 pub external_newlines: std::collections::HashSet<String>,
413 #[serde(skip)]
415 pub external_semicolons: std::collections::HashSet<String>,
416 #[serde(skip)]
421 pub named_alias_map: std::collections::HashMap<String, String>,
422}
423
424fn deserialize_supertypes<'de, D>(
425 deserializer: D,
426) -> Result<std::collections::HashSet<String>, D::Error>
427where
428 D: serde::Deserializer<'de>,
429{
430 let entries: Vec<serde_json::Value> = Vec::deserialize(deserializer)?;
431 let mut out = std::collections::HashSet::new();
432 for entry in entries {
433 match entry {
434 serde_json::Value::String(s) => {
435 out.insert(s);
436 }
437 serde_json::Value::Object(map) => {
438 if let Some(serde_json::Value::String(name)) = map.get("name") {
439 out.insert(name.clone());
440 }
441 }
442 _ => {}
443 }
444 }
445 Ok(out)
446}
447
448fn deserialize_extras<'de, D>(
449 deserializer: D,
450) -> Result<std::collections::HashSet<String>, D::Error>
451where
452 D: serde::Deserializer<'de>,
453{
454 let entries: Vec<serde_json::Value> = Vec::deserialize(deserializer)?;
455 let mut out = std::collections::HashSet::new();
456 for entry in entries {
457 if let serde_json::Value::Object(map) = entry {
458 let ty = map.get("type").and_then(serde_json::Value::as_str);
459 match ty {
460 Some("SYMBOL") => {
464 if let Some(serde_json::Value::String(name)) = map.get("name") {
465 out.insert(name.clone());
466 }
467 }
468 Some("ALIAS") => {
471 let named = map
472 .get("named")
473 .and_then(serde_json::Value::as_bool)
474 .unwrap_or(false);
475 if named {
476 if let Some(serde_json::Value::String(value)) = map.get("value") {
477 out.insert(value.clone());
478 }
479 }
480 }
481 _ => {}
484 }
485 }
486 }
487 Ok(out)
488}
489
490impl Grammar {
491 pub fn from_bytes(protocol: &str, bytes: &[u8]) -> Result<Self, ParseError> {
511 Self::from_bytes_with_node_types(protocol, bytes, None)
512 }
513
514 pub fn from_bytes_with_node_types(
522 protocol: &str,
523 grammar_bytes: &[u8],
524 node_types_bytes: Option<&[u8]>,
525 ) -> Result<Self, ParseError> {
526 let mut grammar: Self =
527 serde_json::from_slice(grammar_bytes).map_err(|e| ParseError::EmitFailed {
528 protocol: protocol.to_owned(),
529 reason: format!("grammar.json deserialization failed: {e}"),
530 })?;
531 grammar.subtypes = compute_subtype_closure(&grammar);
532 grammar.named_alias_map = build_named_alias_map(&grammar);
533 grammar.yield_sets = compute_yield_sets(&grammar);
534 if let Some(nt_bytes) = node_types_bytes {
535 let (all_children, field_children, nonfield_children) =
536 build_node_type_children(nt_bytes);
537 grammar.node_type_children = all_children;
538 grammar.node_type_field_children = field_children;
539 grammar.node_type_nonfield_children = nonfield_children;
540 augment_subtypes_from_node_types(&mut grammar);
541 }
542 grammar.yield_sets = compute_yield_sets(&grammar);
543 grammar.external_alias_map = build_external_alias_map(&grammar);
544 let (token_roles, indent_triggers) = compute_token_roles(&grammar);
545 grammar.token_roles = token_roles;
546 grammar.indent_triggers = indent_triggers;
547 grammar.line_comment_prefixes = extract_line_comment_prefixes(&grammar);
548 classify_external_layout_tokens(&mut grammar);
549 grammar.yield_sets = compute_yield_sets(&grammar);
550 Ok(grammar)
551 }
552}
553
554fn compute_subtype_closure(
557 grammar: &Grammar,
558) -> std::collections::HashMap<String, std::collections::HashSet<String>> {
559 use std::collections::{HashMap, HashSet};
560 let mut subtypes: HashMap<String, HashSet<String>> = HashMap::new();
565 for name in grammar.rules.keys() {
566 subtypes
567 .entry(name.clone())
568 .or_default()
569 .insert(name.clone());
570 }
571
572 fn walk<'g>(
576 grammar: &'g Grammar,
577 production: &'g Production,
578 visited: &mut HashSet<&'g str>,
579 out: &mut HashSet<String>,
580 ) {
581 match production {
582 Production::Symbol { name } => {
583 out.insert(name.clone());
585 let expand = name.starts_with('_') || grammar.supertypes.contains(name.as_str());
588 if expand && visited.insert(name.as_str()) {
589 if let Some(rule) = grammar.rules.get(name) {
590 walk(grammar, rule, visited, out);
591 }
592 }
593 }
594 Production::Choice { members } | Production::Seq { members } => {
595 for m in members {
596 walk(grammar, m, visited, out);
597 }
598 }
599 Production::Alias {
600 content,
601 named,
602 value,
603 } => {
604 if *named && !value.is_empty() {
605 out.insert(value.clone());
606 }
607 walk(grammar, content, visited, out);
608 }
609 Production::Repeat { content }
610 | Production::Repeat1 { content }
611 | Production::Optional { content }
612 | Production::Field { content, .. }
613 | Production::Token { content }
614 | Production::ImmediateToken { content }
615 | Production::Prec { content, .. }
616 | Production::PrecLeft { content, .. }
617 | Production::PrecRight { content, .. }
618 | Production::PrecDynamic { content, .. }
619 | Production::Reserved { content, .. } => {
620 walk(grammar, content, visited, out);
621 }
622 _ => {}
623 }
624 }
625
626 for (name, rule) in &grammar.rules {
627 let expand = name.starts_with('_') || grammar.supertypes.contains(name.as_str());
628 if !expand {
629 continue;
630 }
631 let mut visited: HashSet<&str> = HashSet::new();
632 visited.insert(name.as_str());
633 let mut reachable: HashSet<String> = HashSet::new();
634 walk(grammar, rule, &mut visited, &mut reachable);
635 for kind in &reachable {
636 subtypes
637 .entry(kind.clone())
638 .or_default()
639 .insert(name.clone());
640 }
641 }
642
643 fn collect_aliases<'g>(production: &'g Production, out: &mut Vec<(String, &'g Production)>) {
649 match production {
650 Production::Alias {
651 content,
652 named,
653 value,
654 } => {
655 if *named && !value.is_empty() {
656 out.push((value.clone(), content.as_ref()));
657 }
658 collect_aliases(content, out);
659 }
660 Production::Choice { members } | Production::Seq { members } => {
661 for m in members {
662 collect_aliases(m, out);
663 }
664 }
665 Production::Repeat { content }
666 | Production::Repeat1 { content }
667 | Production::Optional { content }
668 | Production::Field { content, .. }
669 | Production::Token { content }
670 | Production::ImmediateToken { content }
671 | Production::Prec { content, .. }
672 | Production::PrecLeft { content, .. }
673 | Production::PrecRight { content, .. }
674 | Production::PrecDynamic { content, .. }
675 | Production::Reserved { content, .. } => {
676 collect_aliases(content, out);
677 }
678 _ => {}
679 }
680 }
681 let mut aliases: Vec<(String, &Production)> = Vec::new();
682 for rule in grammar.rules.values() {
683 collect_aliases(rule, &mut aliases);
684 }
685 for (alias_value, content) in aliases {
686 let mut visited: HashSet<&str> = HashSet::new();
687 let mut reachable: HashSet<String> = HashSet::new();
688 walk(grammar, content, &mut visited, &mut reachable);
689 subtypes
692 .entry(alias_value.clone())
693 .or_default()
694 .insert(alias_value.clone());
695 for kind in reachable {
696 subtypes
697 .entry(kind)
698 .or_default()
699 .insert(alias_value.clone());
700 }
701 }
702
703 let is_dispatch = |s: &str| s.starts_with('_') || grammar.supertypes.contains(s);
715 let mut nodes: HashSet<String> = HashSet::new();
718 for (k, vs) in &subtypes {
719 if is_dispatch(k) {
720 nodes.insert(k.clone());
721 }
722 for v in vs {
723 if is_dispatch(v) {
724 nodes.insert(v.clone());
725 }
726 }
727 }
728 let nodes: Vec<String> = nodes.into_iter().collect();
729 let index_of: HashMap<&str, usize> = nodes
730 .iter()
731 .enumerate()
732 .map(|(i, n)| (n.as_str(), i))
733 .collect();
734 let mut edges: Vec<Vec<usize>> = vec![Vec::new(); nodes.len()];
736 for (i, name) in nodes.iter().enumerate() {
737 if let Some(targets) = subtypes.get(name) {
738 for t in targets {
739 if let Some(&j) = index_of.get(t.as_str()) {
740 if i != j {
741 edges[i].push(j);
742 }
743 }
744 }
745 }
746 }
747
748 fn tarjan(edges: &[Vec<usize>]) -> Vec<usize> {
752 let n = edges.len();
753 let mut comp = vec![usize::MAX; n];
754 let mut index_arr = vec![usize::MAX; n];
755 let mut lowlink = vec![0usize; n];
756 let mut on_stack = vec![false; n];
757 let mut stack: Vec<usize> = Vec::new();
758 let mut next_index = 0usize;
759 let mut next_comp = 0usize;
760 let mut work: Vec<(usize, usize)> = Vec::new();
762 for start in 0..n {
763 if index_arr[start] != usize::MAX {
764 continue;
765 }
766 work.push((start, 0));
767 index_arr[start] = next_index;
768 lowlink[start] = next_index;
769 next_index += 1;
770 stack.push(start);
771 on_stack[start] = true;
772 while let Some(&(v, i)) = work.last() {
773 if i < edges[v].len() {
774 let w = edges[v][i];
775 if let Some(slot) = work.last_mut() {
776 slot.1 += 1;
777 }
778 if index_arr[w] == usize::MAX {
779 index_arr[w] = next_index;
780 lowlink[w] = next_index;
781 next_index += 1;
782 stack.push(w);
783 on_stack[w] = true;
784 work.push((w, 0));
785 } else if on_stack[w] && index_arr[w] < lowlink[v] {
786 lowlink[v] = index_arr[w];
787 }
788 } else {
789 if lowlink[v] == index_arr[v] {
790 while let Some(w) = stack.pop() {
791 on_stack[w] = false;
792 comp[w] = next_comp;
793 if w == v {
794 break;
795 }
796 }
797 next_comp += 1;
798 }
799 let lv = lowlink[v];
800 work.pop();
801 if let Some(&(parent, _)) = work.last() {
802 if lv < lowlink[parent] {
803 lowlink[parent] = lv;
804 }
805 }
806 }
807 }
808 }
809 comp
810 }
811 let comp = tarjan(&edges);
812 let num_comps = comp.iter().max().copied().map_or(0, |m| m + 1);
813
814 let mut scc_members: Vec<Vec<usize>> = vec![Vec::new(); num_comps];
819 for (v, &c) in comp.iter().enumerate() {
820 scc_members[c].push(v);
821 }
822 let mut scc_closure: Vec<HashSet<String>> = vec![HashSet::new(); num_comps];
823 for c in 0..num_comps {
824 let mut closure: HashSet<String> = HashSet::new();
826 for &v in &scc_members[c] {
827 closure.insert(nodes[v].clone());
828 }
829 for &v in &scc_members[c] {
831 for &w in &edges[v] {
832 let wc = comp[w];
833 if wc != c {
834 closure.extend(scc_closure[wc].iter().cloned());
835 }
836 }
837 }
838 scc_closure[c] = closure;
839 }
840
841 let keys: Vec<String> = subtypes.keys().cloned().collect();
845 for k in keys {
846 let existing = subtypes.remove(&k).unwrap_or_default();
847 let mut new_set: HashSet<String> = HashSet::new();
848 for s in &existing {
849 new_set.insert(s.clone());
850 if let Some(&i) = index_of.get(s.as_str()) {
851 new_set.extend(scc_closure[comp[i]].iter().cloned());
852 }
853 }
854 subtypes.insert(k, new_set);
855 }
856
857 subtypes
858}
859
860fn compute_yield_sets(
866 grammar: &Grammar,
867) -> std::collections::HashMap<String, std::collections::HashSet<String>> {
868 let mut cache: std::collections::HashMap<String, std::collections::HashSet<String>> =
869 std::collections::HashMap::new();
870 for (name, rule) in &grammar.rules {
871 let expand = name.starts_with('_') || grammar.supertypes.contains(name.as_str());
872 if !expand {
873 continue;
874 }
875 if cache.contains_key(name) {
876 continue;
877 }
878 let mut visited = std::collections::HashSet::new();
879 let ys = yield_of_production(grammar, rule, &mut visited, &mut cache);
880 cache.insert(name.clone(), ys);
881 }
882 cache
883}
884
885fn yield_of_production(
893 grammar: &Grammar,
894 production: &Production,
895 visited: &mut std::collections::HashSet<String>,
896 cache: &mut std::collections::HashMap<String, std::collections::HashSet<String>>,
897) -> std::collections::HashSet<String> {
898 match production {
899 Production::Symbol { name } => {
900 let expand = name.starts_with('_') || grammar.supertypes.contains(name.as_str());
901 if !expand {
902 let mut set = std::collections::HashSet::new();
903 set.insert(name.clone());
904 return set;
905 }
906 if let Some(cached) = cache.get(name) {
907 return cached.clone();
908 }
909 {
910 if !visited.insert(name.clone()) {
911 return std::collections::HashSet::new();
912 }
913 let result = if let Some(rule) = grammar.rules.get(name) {
914 yield_of_production(grammar, rule, visited, cache)
915 } else {
916 std::collections::HashSet::new()
917 };
918 visited.remove(name);
919 cache.insert(name.clone(), result.clone());
920 result
921 }
922 }
923 Production::Alias {
924 content,
925 named,
926 value,
927 } => {
928 if *named && !value.is_empty() {
929 let mut set = std::collections::HashSet::new();
930 set.insert(value.clone());
931 set
932 } else {
933 yield_of_production(grammar, content, visited, cache)
934 }
935 }
936 Production::Seq { members } => {
937 if members.is_empty() {
938 let mut set = std::collections::HashSet::new();
939 set.insert(String::new());
940 set
941 } else {
942 let mut combined = std::collections::HashSet::new();
949 for m in members {
950 let ys = yield_of_production(grammar, m, visited, cache);
951 if ys.is_empty() {
952 continue;
953 }
954 let has_epsilon = ys.contains("");
955 combined.extend(ys);
956 if !has_epsilon {
957 break;
958 }
959 }
960 combined
961 }
962 }
963 Production::Choice { members } => {
964 let mut union = std::collections::HashSet::new();
965 for m in members {
966 union.extend(yield_of_production(grammar, m, visited, cache));
967 }
968 union
969 }
970 Production::Optional { content } => {
971 let mut set = yield_of_production(grammar, content, visited, cache);
972 set.insert(String::new());
973 set
974 }
975 Production::Blank => {
976 let mut set = std::collections::HashSet::new();
977 set.insert(String::new());
978 set
979 }
980 Production::String { .. } | Production::Pattern { .. } => std::collections::HashSet::new(),
981 Production::Repeat { content } => {
982 let mut set = yield_of_production(grammar, content, visited, cache);
983 set.insert(String::new());
984 set
985 }
986 Production::Repeat1 { content }
987 | Production::Field { content, .. }
988 | Production::Token { content }
989 | Production::ImmediateToken { content }
990 | Production::Prec { content, .. }
991 | Production::PrecLeft { content, .. }
992 | Production::PrecRight { content, .. }
993 | Production::PrecDynamic { content, .. }
994 | Production::Reserved { content, .. } => {
995 yield_of_production(grammar, content, visited, cache)
996 }
997 }
998}
999
1000type NodeTypeResult = (
1007 std::collections::HashMap<String, std::collections::HashSet<String>>,
1008 std::collections::HashMap<
1009 String,
1010 std::collections::HashMap<String, std::collections::HashSet<String>>,
1011 >,
1012 std::collections::HashMap<String, std::collections::HashSet<String>>,
1013);
1014
1015fn build_node_type_children(nt_bytes: &[u8]) -> NodeTypeResult {
1016 use std::collections::{HashMap, HashSet};
1017 let node_types: Vec<crate::theory_extract::NodeType> = match serde_json::from_slice(nt_bytes) {
1018 Ok(v) => v,
1019 Err(_) => return (HashMap::new(), HashMap::new(), HashMap::new()),
1020 };
1021 let mut all_map: HashMap<String, HashSet<String>> = HashMap::new();
1022 let mut field_map: HashMap<String, HashMap<String, HashSet<String>>> = HashMap::new();
1023 let mut nonfield_map: HashMap<String, HashSet<String>> = HashMap::new();
1024 for entry in &node_types {
1025 if !entry.named {
1026 continue;
1027 }
1028 let mut child_kinds = HashSet::new();
1029 for (field_name, field_value) in &entry.fields {
1030 if let Some(types) = field_value.get("types").and_then(|t| t.as_array()) {
1031 for t in types {
1032 if let (Some(name), Some(true)) = (
1033 t.get("type").and_then(|n| n.as_str()),
1034 t.get("named").and_then(serde_json::Value::as_bool),
1035 ) {
1036 child_kinds.insert(name.to_owned());
1037 field_map
1038 .entry(entry.node_type.clone())
1039 .or_default()
1040 .entry(field_name.clone())
1041 .or_default()
1042 .insert(name.to_owned());
1043 }
1044 }
1045 }
1046 }
1047 if let Some(ref children) = entry.children {
1048 for t in &children.types {
1049 if t.named {
1050 child_kinds.insert(t.node_type.clone());
1051 nonfield_map
1052 .entry(entry.node_type.clone())
1053 .or_default()
1054 .insert(t.node_type.clone());
1055 }
1056 }
1057 }
1058 if !child_kinds.is_empty() {
1059 all_map.insert(entry.node_type.clone(), child_kinds);
1060 }
1061 }
1062 (all_map, field_map, nonfield_map)
1063}
1064
1065fn augment_subtypes_from_node_types(grammar: &mut Grammar) {
1074 use std::collections::HashMap;
1075
1076 let mut pairs: Vec<(String, String)> = Vec::new();
1078 for parent_kind in grammar.node_type_children.keys() {
1079 let Some(rule) = grammar.rules.get(parent_kind) else {
1080 continue;
1081 };
1082
1083 let mut field_symbols: HashMap<String, Vec<String>> = HashMap::new();
1086 let mut non_field_symbols: Vec<String> = Vec::new();
1087 collect_field_symbols(rule, &mut field_symbols, &mut non_field_symbols, false);
1088
1089 if let Some(nt_fields) = grammar.node_type_field_children.get(parent_kind) {
1093 for (field_name, nt_child_kinds) in nt_fields {
1094 let Some(rule_syms) = field_symbols.get(field_name) else {
1095 continue;
1096 };
1097 for child_kind in nt_child_kinds {
1098 if grammar.rules.contains_key(child_kind) {
1099 continue;
1100 }
1101 for sym_name in rule_syms {
1102 if !kind_satisfies_symbol(grammar, Some(child_kind), sym_name) {
1103 pairs.push((child_kind.clone(), sym_name.clone()));
1104 }
1105 }
1106 }
1107 }
1108 }
1109
1110 if let Some(nt_nonfield) = grammar.node_type_nonfield_children.get(parent_kind) {
1113 for child_kind in nt_nonfield {
1114 if grammar.rules.contains_key(child_kind) {
1115 continue;
1116 }
1117 for sym_name in &non_field_symbols {
1118 if !kind_satisfies_symbol(grammar, Some(child_kind), sym_name) {
1119 pairs.push((child_kind.clone(), sym_name.clone()));
1120 }
1121 }
1122 }
1123 }
1124 }
1125 for (child_kind, sym_name) in pairs {
1126 grammar
1127 .subtypes
1128 .entry(child_kind)
1129 .or_default()
1130 .insert(sym_name);
1131 }
1132}
1133
1134fn collect_field_symbols(
1137 prod: &Production,
1138 field_map: &mut std::collections::HashMap<String, Vec<String>>,
1139 non_field: &mut Vec<String>,
1140 inside_field: bool,
1141) {
1142 match prod {
1143 Production::Symbol { name } if !inside_field => {
1144 non_field.push(name.clone());
1145 }
1146 Production::Field { name, content } => {
1147 let mut syms = Vec::new();
1148 collect_symbols_flat(content, &mut syms);
1149 field_map.entry(name.clone()).or_default().extend(syms);
1150 }
1151 Production::Choice { members } | Production::Seq { members } => {
1152 for m in members {
1153 collect_field_symbols(m, field_map, non_field, inside_field);
1154 }
1155 }
1156 Production::Repeat { content }
1157 | Production::Repeat1 { content }
1158 | Production::Optional { content }
1159 | Production::Alias { content, .. }
1160 | Production::Token { content }
1161 | Production::ImmediateToken { content }
1162 | Production::Prec { content, .. }
1163 | Production::PrecLeft { content, .. }
1164 | Production::PrecRight { content, .. }
1165 | Production::PrecDynamic { content, .. }
1166 | Production::Reserved { content, .. } => {
1167 collect_field_symbols(content, field_map, non_field, inside_field);
1168 }
1169 _ => {}
1170 }
1171}
1172
1173fn collect_symbols_flat(prod: &Production, out: &mut Vec<String>) {
1174 match prod {
1175 Production::Symbol { name } => out.push(name.clone()),
1176 Production::Choice { members } | Production::Seq { members } => {
1177 for m in members {
1178 collect_symbols_flat(m, out);
1179 }
1180 }
1181 Production::Repeat { content }
1182 | Production::Repeat1 { content }
1183 | Production::Optional { content }
1184 | Production::Alias { content, .. }
1185 | Production::Field { content, .. }
1186 | Production::Token { content }
1187 | Production::ImmediateToken { content }
1188 | Production::Prec { content, .. }
1189 | Production::PrecLeft { content, .. }
1190 | Production::PrecRight { content, .. }
1191 | Production::PrecDynamic { content, .. }
1192 | Production::Reserved { content, .. } => collect_symbols_flat(content, out),
1193 _ => {}
1194 }
1195}
1196
1197fn build_external_alias_map(grammar: &Grammar) -> std::collections::HashMap<String, String> {
1200 let mut map = std::collections::HashMap::new();
1201 fn walk(
1202 grammar: &Grammar,
1203 prod: &Production,
1204 map: &mut std::collections::HashMap<String, String>,
1205 ) {
1206 match prod {
1207 Production::Alias {
1208 content,
1209 named,
1210 value,
1211 } => {
1212 if !*named && !value.is_empty() {
1213 if let Production::Symbol { name } = content.as_ref() {
1214 if name.starts_with('_') && !grammar.rules.contains_key(name) {
1215 map.entry(name.clone()).or_insert_with(|| value.clone());
1216 }
1217 }
1218 }
1219 walk(grammar, content, map);
1220 }
1221 Production::Choice { members } | Production::Seq { members } => {
1222 for m in members {
1223 walk(grammar, m, map);
1224 }
1225 }
1226 Production::Repeat { content }
1227 | Production::Repeat1 { content }
1228 | Production::Optional { content }
1229 | Production::Field { content, .. }
1230 | Production::Token { content }
1231 | Production::ImmediateToken { content }
1232 | Production::Prec { content, .. }
1233 | Production::PrecLeft { content, .. }
1234 | Production::PrecRight { content, .. }
1235 | Production::PrecDynamic { content, .. }
1236 | Production::Reserved { content, .. } => walk(grammar, content, map),
1237 _ => {}
1238 }
1239 }
1240 for rule in grammar.rules.values() {
1241 walk(grammar, rule, &mut map);
1242 }
1243 map
1244}
1245
1246fn build_named_alias_map(grammar: &Grammar) -> std::collections::HashMap<String, String> {
1252 let mut map = std::collections::HashMap::new();
1253 fn walk(prod: &Production, map: &mut std::collections::HashMap<String, String>) {
1254 match prod {
1255 Production::Alias {
1256 content,
1257 named,
1258 value,
1259 } => {
1260 if *named && !value.is_empty() {
1261 if let Production::Symbol { name } = content.as_ref() {
1262 map.entry(value.clone()).or_insert_with(|| name.clone());
1263 }
1264 }
1265 walk(content, map);
1266 }
1267 Production::Choice { members } | Production::Seq { members } => {
1268 for m in members {
1269 walk(m, map);
1270 }
1271 }
1272 Production::Repeat { content }
1273 | Production::Repeat1 { content }
1274 | Production::Optional { content }
1275 | Production::Field { content, .. }
1276 | Production::Token { content }
1277 | Production::ImmediateToken { content }
1278 | Production::Prec { content, .. }
1279 | Production::PrecLeft { content, .. }
1280 | Production::PrecRight { content, .. }
1281 | Production::PrecDynamic { content, .. }
1282 | Production::Reserved { content, .. } => walk(content, map),
1283 _ => {}
1284 }
1285 }
1286 for rule in grammar.rules.values() {
1287 walk(rule, &mut map);
1288 }
1289 map
1290}
1291
1292type RoleMap = std::collections::HashMap<String, std::collections::HashMap<String, TokenRole>>;
1305type IndentSet = std::collections::HashSet<(String, String)>;
1306
1307fn compute_token_roles(grammar: &Grammar) -> (RoleMap, IndentSet) {
1308 use std::collections::{HashMap, HashSet};
1309 let mut all_roles: HashMap<String, HashMap<String, TokenRole>> = HashMap::new();
1310 let mut indent_triggers: HashSet<(String, String)> = HashSet::new();
1311
1312 for (rule_name, rule) in &grammar.rules {
1313 let mut roles: HashMap<String, TokenRole> = HashMap::new();
1314 classify_production(rule, &mut roles, &mut indent_triggers, rule_name);
1315 if !roles.is_empty() {
1316 all_roles.insert(rule_name.clone(), roles);
1317 }
1318 }
1319
1320 (all_roles, indent_triggers)
1321}
1322
1323fn classify_production(
1325 prod: &Production,
1326 roles: &mut std::collections::HashMap<String, TokenRole>,
1327 indent_triggers: &mut std::collections::HashSet<(String, String)>,
1328 rule_name: &str,
1329) {
1330 match prod {
1331 Production::Seq { members } => {
1332 classify_seq(members, roles, indent_triggers, rule_name, false);
1333 }
1334 Production::Choice { members } => {
1335 for m in members {
1336 match m {
1341 Production::Seq {
1342 members: seq_members,
1343 } => {
1344 classify_seq(seq_members, roles, indent_triggers, rule_name, true);
1345 }
1346 _ => classify_production(m, roles, indent_triggers, rule_name),
1347 }
1348 }
1349 }
1350 Production::Repeat { content } | Production::Repeat1 { content } => {
1351 classify_repeat_body(content, roles, indent_triggers, rule_name);
1352 }
1353 Production::Optional { content }
1354 | Production::Field { content, .. }
1355 | Production::Token { content }
1356 | Production::ImmediateToken { content }
1357 | Production::Prec { content, .. }
1358 | Production::PrecLeft { content, .. }
1359 | Production::PrecRight { content, .. }
1360 | Production::PrecDynamic { content, .. }
1361 | Production::Reserved { content, .. } => {
1362 classify_production(content, roles, indent_triggers, rule_name);
1363 }
1364 Production::Alias { content, .. } => {
1365 classify_production(content, roles, indent_triggers, rule_name);
1366 }
1367 _ => {}
1368 }
1369}
1370
1371fn classify_seq(
1374 members: &[Production],
1375 roles: &mut std::collections::HashMap<String, TokenRole>,
1376 indent_triggers: &mut std::collections::HashSet<(String, String)>,
1377 rule_name: &str,
1378 in_choice: bool,
1379) {
1380 let string_positions: Vec<(usize, &str)> = members
1381 .iter()
1382 .enumerate()
1383 .filter_map(|(i, m)| unwrap_to_string(m).map(|s| (i, s)))
1384 .collect();
1385
1386 let content_count = members
1387 .iter()
1388 .filter(|m| unwrap_to_string(m).is_none())
1389 .count();
1390
1391 if string_positions.len() >= 2 {
1392 let (first_idx, first_val) = string_positions[0];
1393 let (last_idx, last_val) = string_positions[string_positions.len() - 1];
1394
1395 let has_content_between = members[first_idx + 1..last_idx]
1396 .iter()
1397 .any(|m| unwrap_to_string(m).is_none());
1398
1399 let both_punct = !is_word_like(first_val) && !is_word_like(last_val);
1400 let both_word = is_word_like(first_val) && is_word_like(last_val);
1401 if has_content_between && first_val != last_val && (both_punct || both_word) {
1402 roles.insert(first_val.to_owned(), TokenRole::BracketOpen);
1403 roles.insert(last_val.to_owned(), TokenRole::BracketClose);
1404
1405 let between = &members[first_idx + 1..last_idx];
1406 if first_val == "{" && has_repeat_recursive(between) {
1407 indent_triggers.insert((rule_name.to_owned(), first_val.to_owned()));
1408 }
1409 }
1410 }
1411
1412 let first_content_idx = members.iter().position(|m| unwrap_to_string(m).is_none());
1414 let last_content_idx = members.iter().rposition(|m| unwrap_to_string(m).is_none());
1415
1416 for (i, m) in members.iter().enumerate() {
1417 if let Some(value) = unwrap_to_string(m) {
1418 let value = value.to_owned();
1419 if !roles.contains_key(&value) {
1420 if is_word_like(&value) {
1421 roles.insert(value.clone(), TokenRole::Keyword);
1422 } else if !in_choice
1423 && first_content_idx.is_some_and(|fc| i < fc)
1424 && is_prefix_sigil(&value)
1425 {
1426 roles.insert(value.clone(), TokenRole::BracketOpen);
1427 } else if last_content_idx.is_some_and(|lc| i > lc) {
1428 roles.insert(value.clone(), TokenRole::BracketClose);
1432 } else if !in_choice
1433 && string_positions.len() == 1
1434 && content_count == 2
1435 && value.len() == 1
1436 {
1437 roles.insert(value.clone(), TokenRole::Connector);
1443 } else {
1444 roles.insert(value.clone(), TokenRole::Operator);
1445 }
1446 }
1447 }
1448 }
1449
1450 for m in members {
1451 if unwrap_to_string(m).is_none() {
1452 classify_production(m, roles, indent_triggers, rule_name);
1453 }
1454 }
1455}
1456
1457fn classify_repeat_body(
1461 content: &Production,
1462 roles: &mut std::collections::HashMap<String, TokenRole>,
1463 indent_triggers: &mut std::collections::HashSet<(String, String)>,
1464 rule_name: &str,
1465) {
1466 match content {
1467 Production::Seq { members } => {
1468 if let Some(Production::String { value }) = members.first() {
1469 roles.insert(value.clone(), TokenRole::Separator);
1470 }
1471 classify_seq(members, roles, indent_triggers, rule_name, false);
1472 }
1473 _ => classify_production(content, roles, indent_triggers, rule_name),
1474 }
1475}
1476
1477fn classify_seq_positions(members: &[Production], in_choice: bool) -> Vec<Option<TokenRole>> {
1482 let mut roles: Vec<Option<TokenRole>> = vec![None; members.len()];
1483
1484 let string_positions: Vec<(usize, &str)> = members
1485 .iter()
1486 .enumerate()
1487 .filter_map(|(i, m)| unwrap_to_string(m).map(|s| (i, s)))
1488 .collect();
1489
1490 let content_count = members
1491 .iter()
1492 .filter(|m| unwrap_to_string(m).is_none())
1493 .count();
1494
1495 let mut bracket_open_idx: Option<usize> = None;
1497 let mut bracket_close_idx: Option<usize> = None;
1498 if string_positions.len() >= 2 {
1499 let (first_idx, first_val) = string_positions[0];
1500 let (last_idx, last_val) = string_positions[string_positions.len() - 1];
1501
1502 let has_content_between = members[first_idx + 1..last_idx]
1503 .iter()
1504 .any(|m| unwrap_to_string(m).is_none());
1505
1506 let both_punct = !is_word_like(first_val) && !is_word_like(last_val);
1507 let both_word = is_word_like(first_val) && is_word_like(last_val);
1508 let either_immediate =
1513 is_immediate_token(&members[first_idx]) || is_immediate_token(&members[last_idx]);
1514 let same_text_immediate = first_val == last_val && either_immediate;
1515 if has_content_between
1516 && (both_punct || both_word)
1517 && (first_val != last_val || same_text_immediate)
1518 {
1519 roles[first_idx] = Some(TokenRole::BracketOpen);
1520 roles[last_idx] = Some(TokenRole::BracketClose);
1521 bracket_open_idx = Some(first_idx);
1522 bracket_close_idx = Some(last_idx);
1523 }
1524 }
1525
1526 let first_content_idx = members.iter().position(|m| unwrap_to_string(m).is_none());
1527 let last_content_idx = members.iter().rposition(|m| unwrap_to_string(m).is_none());
1528
1529 for (i, m) in members.iter().enumerate() {
1530 if roles[i].is_some() {
1531 continue;
1532 }
1533 if let Some(value) = unwrap_to_string(m) {
1534 roles[i] = Some(if is_word_like(value) {
1535 TokenRole::Keyword
1536 } else if !in_choice && first_content_idx.is_some_and(|fc| i < fc) {
1537 if is_prefix_sigil(value) {
1538 TokenRole::BracketOpen
1539 } else {
1540 TokenRole::Operator
1541 }
1542 } else if last_content_idx.is_some_and(|lc| i > lc) {
1543 TokenRole::BracketClose
1544 } else if !in_choice
1545 && string_positions.len() == 1
1546 && content_count == 2
1547 && value.len() == 1
1548 {
1549 TokenRole::Connector
1550 } else {
1551 TokenRole::Operator
1552 });
1553 }
1554 }
1555
1556 let _ = (bracket_open_idx, bracket_close_idx);
1560
1561 roles
1562}
1563
1564#[allow(clippy::branches_sharing_code)]
1566fn seq_bracket_triggers_indent(
1567 members: &[Production],
1568 open_idx: usize,
1569 _grammar: &Grammar,
1570) -> bool {
1571 let string_positions: Vec<(usize, &str)> = members
1572 .iter()
1573 .enumerate()
1574 .filter_map(|(i, m)| unwrap_to_string(m).map(|s| (i, s)))
1575 .collect();
1576 if string_positions.len() < 2 {
1577 return false;
1578 }
1579 let open_val = string_positions.iter().find(|(i, _)| *i == open_idx);
1580 let close_val = string_positions.last();
1581 if let (Some((_, open_text)), Some((close_idx, close_text))) = (open_val, close_val) {
1582 if open_idx >= *close_idx {
1583 return false;
1584 }
1585 if is_word_like(open_text) && is_word_like(close_text) {
1591 return true;
1592 }
1593 let between = &members[open_idx + 1..*close_idx];
1594 if *open_text == "{" && has_repeat_recursive(between) {
1599 return true;
1600 }
1601 if *open_text == "{" {
1606 for m in between {
1607 if let Production::Choice { members: alts } = m {
1608 let has_blank = alts.iter().any(|a| matches!(a, Production::Blank));
1609 if has_blank {
1610 for alt in alts {
1611 if let Production::Symbol { name } = alt {
1612 if let Some(rule) = _grammar.rules.get(name) {
1613 if has_repeat_in(rule) {
1614 return true;
1615 }
1616 }
1617 }
1618 }
1619 }
1620 }
1621 }
1622 }
1623 false
1624 } else {
1625 false
1626 }
1627}
1628
1629fn member_has_leading_bracket(prod: &Production, grammar: &Grammar) -> bool {
1633 match prod {
1634 Production::Symbol { name } => grammar
1635 .rules
1636 .get(name)
1637 .is_some_and(|rule| first_string_of(rule).is_some_and(|s| !is_word_like(s))),
1638 Production::Field { content, .. } => member_has_leading_bracket(content, grammar),
1639 Production::Choice { members } => {
1640 let non_blank: Vec<_> = members
1641 .iter()
1642 .filter(|m| !matches!(m, Production::Blank))
1643 .collect();
1644 !non_blank.is_empty()
1645 && non_blank
1646 .iter()
1647 .all(|m| member_has_leading_bracket(m, grammar))
1648 }
1649 Production::Alias { content, .. } => {
1650 if let Production::Symbol { name } = content.as_ref() {
1651 grammar
1652 .rules
1653 .get(name)
1654 .is_some_and(|rule| first_string_of(rule).is_some_and(|s| !is_word_like(s)))
1655 } else {
1656 false
1657 }
1658 }
1659 Production::Prec { content, .. }
1660 | Production::PrecLeft { content, .. }
1661 | Production::PrecRight { content, .. }
1662 | Production::PrecDynamic { content, .. }
1663 | Production::Optional { content } => member_has_leading_bracket(content, grammar),
1664 Production::Repeat { .. } | Production::Repeat1 { .. } => false,
1665 _ => false,
1666 }
1667}
1668
1669fn first_string_of(prod: &Production) -> Option<&str> {
1670 match prod {
1671 Production::String { value } => Some(value.as_str()),
1672 Production::Seq { members } => members.first().and_then(first_string_of),
1673 Production::Prec { content, .. }
1674 | Production::PrecLeft { content, .. }
1675 | Production::PrecRight { content, .. }
1676 | Production::PrecDynamic { content, .. }
1677 | Production::Token { content }
1678 | Production::ImmediateToken { content }
1679 | Production::Field { content, .. } => first_string_of(content),
1680 _ => None,
1681 }
1682}
1683
1684fn has_repeat_recursive(members: &[Production]) -> bool {
1686 members.iter().any(has_repeat_in)
1687}
1688
1689fn has_repeat_in(prod: &Production) -> bool {
1690 match prod {
1691 Production::Repeat { .. } | Production::Repeat1 { .. } => true,
1692 Production::Choice { members } | Production::Seq { members } => {
1693 members.iter().any(has_repeat_in)
1694 }
1695 Production::Prec { content, .. }
1696 | Production::PrecLeft { content, .. }
1697 | Production::PrecRight { content, .. }
1698 | Production::PrecDynamic { content, .. }
1699 | Production::Optional { content }
1700 | Production::Field { content, .. }
1701 | Production::Token { content }
1702 | Production::ImmediateToken { content }
1703 | Production::Reserved { content, .. }
1704 | Production::Alias { content, .. } => has_repeat_in(content),
1705 _ => false,
1706 }
1707}
1708
1709fn is_word_like(s: &str) -> bool {
1711 !s.is_empty()
1712 && s.chars().all(|c| c.is_alphanumeric() || c == '_')
1713 && s.starts_with(|c: char| c.is_alphabetic() || c == '_')
1714}
1715
1716fn is_prefix_sigil(s: &str) -> bool {
1722 if s.len() == 1 {
1723 let c = s.as_bytes()[0];
1724 !matches!(
1725 c,
1726 b'=' | b'+'
1727 | b'-'
1728 | b'*'
1729 | b'/'
1730 | b'<'
1731 | b'>'
1732 | b'!'
1733 | b'?'
1734 | b'|'
1735 | b'&'
1736 | b'^'
1737 | b'%'
1738 | b'~'
1739 )
1740 } else {
1741 true
1742 }
1743}
1744
1745fn is_immediate_token(prod: &Production) -> bool {
1750 match prod {
1751 Production::ImmediateToken { .. } => true,
1752 Production::Prec { content, .. }
1753 | Production::PrecLeft { content, .. }
1754 | Production::PrecRight { content, .. }
1755 | Production::PrecDynamic { content, .. }
1756 | Production::Token { content }
1757 | Production::Field { content, .. }
1758 | Production::Reserved { content, .. } => is_immediate_token(content),
1759 _ => false,
1760 }
1761}
1762
1763fn unwrap_to_string(prod: &Production) -> Option<&str> {
1764 match prod {
1765 Production::String { value } => Some(value.as_str()),
1766 Production::Token { content }
1767 | Production::ImmediateToken { content }
1768 | Production::Prec { content, .. }
1769 | Production::PrecLeft { content, .. }
1770 | Production::PrecRight { content, .. }
1771 | Production::PrecDynamic { content, .. }
1772 | Production::Field { content, .. }
1773 | Production::Reserved { content, .. } => unwrap_to_string(content),
1774 _ => None,
1775 }
1776}
1777
1778fn extract_line_comment_prefixes(grammar: &Grammar) -> Vec<String> {
1785 let mut prefixes = Vec::new();
1786 for extra_name in &grammar.extras {
1787 if let Some(rule) = grammar.rules.get(extra_name) {
1788 if let Some(prefix) = extract_line_comment_prefix(rule) {
1789 prefixes.push(prefix);
1790 }
1791 }
1792 }
1793 prefixes
1794}
1795
1796fn extract_line_comment_prefix(prod: &Production) -> Option<String> {
1797 match prod {
1798 Production::Token { content } | Production::ImmediateToken { content } => {
1799 extract_line_comment_prefix(content)
1800 }
1801 Production::Seq { members } if members.len() >= 2 => {
1802 if let Production::String { value } = &members[0] {
1803 if members[1..].iter().any(|m| {
1804 matches!(m, Production::Pattern { value } if value.contains(".*") || value.contains("[^\\n]*") || value.contains("[^\\r\\n]*"))
1805 }) {
1806 return Some(value.clone());
1807 }
1808 }
1809 None
1810 }
1811 Production::Choice { members } => members.iter().find_map(extract_line_comment_prefix),
1812 _ => None,
1813 }
1814}
1815
1816fn classify_external_layout_tokens(grammar: &mut Grammar) {
1823 let all_hidden_refs = collect_all_symbol_refs(&grammar.rules);
1830 for name in &all_hidden_refs {
1831 if !name.starts_with('_') || grammar.rules.contains_key(name) {
1832 continue;
1833 }
1834 if grammar.external_alias_map.contains_key(name) {
1835 continue;
1836 }
1837 if name == "_indent" || name.ends_with("_indent") {
1838 grammar.external_indent_opens.insert(name.clone());
1839 } else if name == "_dedent" || name.ends_with("_dedent") {
1840 grammar.external_indent_closes.insert(name.clone());
1841 } else if name.contains("line_ending")
1842 || name.contains("newline")
1843 || name.ends_with("_or_eof")
1844 {
1845 grammar.external_newlines.insert(name.clone());
1846 } else if name.contains("semicolon") {
1847 grammar.external_semicolons.insert(name.clone());
1848 }
1849 }
1850}
1851
1852fn collect_all_symbol_refs(
1854 rules: &BTreeMap<String, Production>,
1855) -> std::collections::HashSet<String> {
1856 let mut refs = std::collections::HashSet::new();
1857 fn walk(prod: &Production, refs: &mut std::collections::HashSet<String>) {
1858 match prod {
1859 Production::Symbol { name } => {
1860 refs.insert(name.clone());
1861 }
1862 Production::Seq { members } | Production::Choice { members } => {
1863 for m in members {
1864 walk(m, refs);
1865 }
1866 }
1867 Production::Alias { content, .. }
1868 | Production::Repeat { content }
1869 | Production::Repeat1 { content }
1870 | Production::Optional { content }
1871 | Production::Field { content, .. }
1872 | Production::Token { content }
1873 | Production::ImmediateToken { content }
1874 | Production::Prec { content, .. }
1875 | Production::PrecLeft { content, .. }
1876 | Production::PrecRight { content, .. }
1877 | Production::PrecDynamic { content, .. }
1878 | Production::Reserved { content, .. } => walk(content, refs),
1879 _ => {}
1880 }
1881 }
1882 for rule in rules.values() {
1883 walk(rule, &mut refs);
1884 }
1885 refs
1886}
1887
1888#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
1899pub struct FormatPolicy {
1900 pub indent_width: usize,
1902 pub separator: String,
1906 pub newline: String,
1909 pub line_break_after: Vec<String>,
1911 pub indent_open: Vec<String>,
1913 pub indent_close: Vec<String>,
1915}
1916
1917impl Default for FormatPolicy {
1918 fn default() -> Self {
1919 Self {
1920 indent_width: 2,
1921 separator: " ".to_owned(),
1922 newline: "\n".to_owned(),
1923 line_break_after: vec![";".into(), "{".into(), "}".into()],
1924 indent_open: vec!["{".into()],
1925 indent_close: vec!["}".into()],
1926 }
1927 }
1928}
1929
1930pub fn emit_pretty(
1954 protocol: &str,
1955 schema: &Schema,
1956 grammar: &Grammar,
1957 policy: &FormatPolicy,
1958 cassette: Option<&dyn crate::languages::cassettes::GrammarCassette>,
1959) -> Result<Vec<u8>, ParseError> {
1960 let roots = collect_roots(schema);
1961 if roots.is_empty() {
1962 return Err(ParseError::EmitFailed {
1963 protocol: protocol.to_owned(),
1964 reason: "schema has no entry vertices".to_owned(),
1965 });
1966 }
1967
1968 let mut out = Output::new(policy, grammar, cassette);
1969 for (i, root) in roots.iter().enumerate() {
1970 if i > 0 {
1971 out.newline();
1972 }
1973 emit_vertex(protocol, schema, grammar, root, &mut out)?;
1974 }
1975 Ok(out.finish())
1976}
1977
1978fn collect_roots(schema: &Schema) -> Vec<&panproto_gat::Name> {
1979 if !schema.entries.is_empty() {
1980 return schema
1981 .entries
1982 .iter()
1983 .filter(|name| schema.vertices.contains_key(*name))
1984 .collect();
1985 }
1986
1987 let mut targets: std::collections::HashSet<&panproto_gat::Name> =
1990 std::collections::HashSet::new();
1991 for edge in schema.edges.keys() {
1992 targets.insert(&edge.tgt);
1993 }
1994 let mut roots: Vec<&panproto_gat::Name> = schema
1995 .vertices
1996 .keys()
1997 .filter(|name| !targets.contains(name))
1998 .collect();
1999 roots.sort();
2000 roots
2001}
2002
2003fn emit_vertex(
2004 protocol: &str,
2005 schema: &Schema,
2006 grammar: &Grammar,
2007 vertex_id: &panproto_gat::Name,
2008 out: &mut Output<'_>,
2009) -> Result<(), ParseError> {
2010 let vertex = schema
2011 .vertices
2012 .get(vertex_id)
2013 .ok_or_else(|| ParseError::EmitFailed {
2014 protocol: protocol.to_owned(),
2015 reason: format!("vertex '{vertex_id}' not found"),
2016 })?;
2017
2018 let kind_head = vertex.kind.as_ref();
2024 if let Some(rule) = grammar.rules.get(kind_head) {
2025 if is_immediate_token(rule) {
2026 out.no_space();
2027 }
2028 }
2029
2030 if let Some(literal) = literal_value(schema, vertex_id) {
2036 if children_for(schema, vertex_id).is_empty() {
2037 let is_bracket_pair = literal.len() >= 2
2042 && matches!(
2043 (literal.as_bytes().first(), literal.as_bytes().last()),
2044 (Some(b'('), Some(b')')) | (Some(b'['), Some(b']')) | (Some(b'{'), Some(b'}'))
2045 );
2046 let vkind = vertex.kind.as_ref();
2047 let has_alias_rule = grammar
2048 .named_alias_map
2049 .get(vkind)
2050 .is_some_and(|src| grammar.rules.contains_key(src));
2051 if !(is_bracket_pair && has_alias_rule) {
2052 out.token_with_role(literal, Some(TokenRole::Terminal));
2053 return Ok(());
2054 }
2055 }
2056 }
2057
2058 let kind = vertex.kind.as_ref();
2059 let edges = children_for(schema, vertex_id);
2060 if let Some(rule) = grammar.rules.get(kind) {
2061 let old_rule = out.current_rule.take();
2062 out.current_rule = Some(kind.to_owned());
2063 let mut cursor = ChildCursor::new(&edges);
2064 emit_production(protocol, schema, grammar, vertex_id, rule, &mut cursor, out)?;
2065 drain_extras(protocol, schema, grammar, &mut cursor, out)?;
2066 out.current_rule = old_rule;
2067 return Ok(());
2068 }
2069
2070 if let Some(source_name) = grammar.named_alias_map.get(kind) {
2075 if let Some(rule) = grammar.rules.get(source_name) {
2076 let old_rule = out.current_rule.take();
2077 out.current_rule = Some(source_name.to_owned());
2078 let mut cursor = ChildCursor::new(&edges);
2079 emit_production(protocol, schema, grammar, vertex_id, rule, &mut cursor, out)?;
2080 drain_extras(protocol, schema, grammar, &mut cursor, out)?;
2081 out.current_rule = old_rule;
2082 return Ok(());
2083 }
2084 }
2085
2086 for edge in &edges {
2090 emit_vertex(protocol, schema, grammar, &edge.tgt, out)?;
2091 }
2092 Ok(())
2093}
2094
2095struct ChildCursor<'a> {
2098 edges: &'a [&'a Edge],
2099 consumed: Vec<bool>,
2100}
2101
2102impl<'a> ChildCursor<'a> {
2103 fn new(edges: &'a [&'a Edge]) -> Self {
2104 Self {
2105 edges,
2106 consumed: vec![false; edges.len()],
2107 }
2108 }
2109
2110 fn take_field(&mut self, field_name: &str) -> Option<&'a Edge> {
2112 for (i, edge) in self.edges.iter().enumerate() {
2113 if !self.consumed[i] && edge.kind.as_ref() == field_name {
2114 self.consumed[i] = true;
2115 return Some(edge);
2116 }
2117 }
2118 None
2119 }
2120
2121 #[cfg(test)]
2126 fn has_matching(&self, predicate: impl Fn(&Edge) -> bool) -> bool {
2127 self.edges
2128 .iter()
2129 .enumerate()
2130 .any(|(i, edge)| !self.consumed[i] && predicate(edge))
2131 }
2132
2133 fn take_matching(&mut self, predicate: impl Fn(&Edge) -> bool) -> Option<&'a Edge> {
2137 for (i, edge) in self.edges.iter().enumerate() {
2138 if !self.consumed[i] && predicate(edge) {
2139 self.consumed[i] = true;
2140 return Some(edge);
2141 }
2142 }
2143 None
2144 }
2145}
2146
2147thread_local! {
2148 static EMIT_DEPTH: std::cell::Cell<usize> = const { std::cell::Cell::new(0) };
2149 static EMIT_MU_FRAMES: std::cell::RefCell<std::collections::HashSet<(String, String)>> =
2159 std::cell::RefCell::new(std::collections::HashSet::new());
2160 static EMIT_FIELD_CONTEXT: std::cell::RefCell<Option<String>> =
2174 const { std::cell::RefCell::new(None) };
2175}
2176
2177struct FieldContextGuard(Option<String>);
2179
2180impl Drop for FieldContextGuard {
2181 fn drop(&mut self) {
2182 EMIT_FIELD_CONTEXT.with(|f| *f.borrow_mut() = self.0.take());
2183 }
2184}
2185
2186fn push_field_context(name: &str) -> FieldContextGuard {
2187 let prev = EMIT_FIELD_CONTEXT.with(|f| f.borrow_mut().replace(name.to_owned()));
2188 FieldContextGuard(prev)
2189}
2190
2191fn clear_field_context() -> FieldContextGuard {
2195 let prev = EMIT_FIELD_CONTEXT.with(|f| f.borrow_mut().take());
2196 FieldContextGuard(prev)
2197}
2198
2199fn current_field_context() -> Option<String> {
2200 EMIT_FIELD_CONTEXT.with(|f| f.borrow().clone())
2201}
2202
2203fn walk_in_mu_frame(
2208 protocol: &str,
2209 schema: &Schema,
2210 grammar: &Grammar,
2211 vertex_id: &panproto_gat::Name,
2212 rule_name: &str,
2213 rule: &Production,
2214 cursor: &mut ChildCursor<'_>,
2215 out: &mut Output<'_>,
2216) -> Result<(), ParseError> {
2217 let key = (vertex_id.to_string(), rule_name.to_owned());
2218 let inserted = EMIT_MU_FRAMES.with(|frames| frames.borrow_mut().insert(key.clone()));
2219 if !inserted {
2220 return Ok(());
2225 }
2226 let result = emit_production(protocol, schema, grammar, vertex_id, rule, cursor, out);
2227 EMIT_MU_FRAMES.with(|frames| {
2228 frames.borrow_mut().remove(&key);
2229 });
2230 result
2231}
2232
2233fn emit_production(
2234 protocol: &str,
2235 schema: &Schema,
2236 grammar: &Grammar,
2237 vertex_id: &panproto_gat::Name,
2238 production: &Production,
2239 cursor: &mut ChildCursor<'_>,
2240 out: &mut Output<'_>,
2241) -> Result<(), ParseError> {
2242 let depth = EMIT_DEPTH.with(|d| {
2243 let v = d.get() + 1;
2244 d.set(v);
2245 v
2246 });
2247 if depth > 500 {
2248 EMIT_DEPTH.with(|d| d.set(d.get() - 1));
2249 return Err(ParseError::EmitFailed {
2250 protocol: protocol.to_owned(),
2251 reason: format!(
2252 "emit_production recursion >500 (likely a cyclic grammar; \
2253 vertex='{vertex_id}')"
2254 ),
2255 });
2256 }
2257 drain_extras(protocol, schema, grammar, cursor, out)?;
2258 let result = emit_production_inner(
2259 protocol, schema, grammar, vertex_id, production, cursor, out,
2260 );
2261 EMIT_DEPTH.with(|d| d.set(d.get() - 1));
2262 result
2263}
2264
2265fn drain_extras(
2273 protocol: &str,
2274 schema: &Schema,
2275 grammar: &Grammar,
2276 cursor: &mut ChildCursor<'_>,
2277 out: &mut Output<'_>,
2278) -> Result<(), ParseError> {
2279 if grammar.extras.is_empty() {
2280 return Ok(());
2281 }
2282 loop {
2283 let next_extra: Option<usize> = cursor
2284 .edges
2285 .iter()
2286 .enumerate()
2287 .find(|(i, _)| !cursor.consumed[*i])
2288 .and_then(|(i, edge)| {
2289 let kind = schema.vertices.get(&edge.tgt).map(|v| v.kind.as_ref())?;
2290 if grammar.extras.contains(kind) {
2291 Some(i)
2292 } else {
2293 None
2294 }
2295 });
2296 let Some(idx) = next_extra else {
2297 return Ok(());
2298 };
2299 cursor.consumed[idx] = true;
2300 let target = &cursor.edges[idx].tgt;
2301 emit_vertex(protocol, schema, grammar, target, out)?;
2302 }
2303}
2304
2305fn emit_seq_with_roles(
2312 protocol: &str,
2313 schema: &Schema,
2314 grammar: &Grammar,
2315 vertex_id: &panproto_gat::Name,
2316 members: &[Production],
2317 cursor: &mut ChildCursor<'_>,
2318 out: &mut Output<'_>,
2319 in_choice: bool,
2320) -> Result<(), ParseError> {
2321 let positional_roles = classify_seq_positions(members, in_choice);
2322
2323 let indent_open_idx: Option<usize> = positional_roles.iter().enumerate().position(|(i, r)| {
2326 *r == Some(TokenRole::BracketOpen) && seq_bracket_triggers_indent(members, i, grammar)
2327 });
2328
2329 let mut line_break_positions: std::collections::HashSet<usize> =
2333 std::collections::HashSet::new();
2334 if let Some(oi) = indent_open_idx {
2335 let open_text = unwrap_to_string(&members[oi]);
2336 if open_text.is_some_and(is_word_like) {
2337 let mut found_body = false;
2338 for (j, member) in members.iter().enumerate().skip(oi + 1) {
2339 if let Production::Choice { members: alts } = member {
2340 let has_blank = alts.iter().any(|a| matches!(a, Production::Blank));
2341 let has_block_symbol = alts.iter().any(|a| match a {
2342 Production::Symbol { name } => {
2343 grammar.rules.get(name).is_some_and(has_repeat_in)
2344 }
2345 _ => false,
2346 });
2347 if has_blank && has_block_symbol {
2348 line_break_positions.insert(j);
2349 found_body = true;
2350 }
2351 } else if found_body && matches!(member, Production::Field { .. }) {
2352 line_break_positions.insert(j);
2353 }
2354 }
2355 }
2356 }
2357
2358 let mut prev_member_emitted_content = false;
2359 for (i, member) in members.iter().enumerate() {
2360 let tokens_before_member = out.tokens.len();
2361 if let Some(value) = unwrap_to_string(member) {
2362 let role = positional_roles[i].unwrap_or_else(|| {
2363 if is_word_like(value) {
2364 TokenRole::Keyword
2365 } else {
2366 TokenRole::Operator
2367 }
2368 });
2369
2370 if indent_open_idx == Some(i) {
2371 if is_word_like(value) {
2372 out.tokens.push(Token::Lit(value.to_owned(), role));
2373 out.tokens.push(Token::IndentOpen);
2374 } else {
2375 out.token_with_indent_open(value, role);
2376 }
2377 } else if role == TokenRole::BracketClose && indent_open_idx.is_some() {
2378 out.tokens.push(Token::IndentClose);
2379 out.tokens.push(Token::Lit(value.to_owned(), role));
2380 } else {
2381 out.token_with_role(value, Some(role));
2382 }
2383 } else {
2384 if i > 0 && unwrap_to_string(&members[i - 1]).is_none() && prev_member_emitted_content {
2391 let member_starts_with_bracket = member_has_leading_bracket(member, grammar);
2392 let is_zero_width_external = matches!(
2393 member,
2394 Production::Symbol { name }
2395 if name.starts_with('_') && !grammar.rules.contains_key(name)
2396 );
2397 let is_separator_choice = matches!(member, Production::Choice { members: alts }
2398 if alts.iter().all(|a| matches!(a, Production::Blank) || unwrap_to_string(a).is_some()));
2399 let is_repeat = matches!(
2400 member,
2401 Production::Repeat { .. } | Production::Repeat1 { .. }
2402 );
2403 if !member_starts_with_bracket
2404 && !is_zero_width_external
2405 && !is_separator_choice
2406 && !is_repeat
2407 {
2408 out.tokens.push(Token::ForceSpace);
2409 }
2410 }
2411 if line_break_positions.contains(&i) {
2412 out.newline();
2413 }
2414 emit_production(protocol, schema, grammar, vertex_id, member, cursor, out)?;
2415 }
2416 prev_member_emitted_content = out.tokens[tokens_before_member..]
2417 .iter()
2418 .any(|t| matches!(t, Token::Lit(_, _)));
2419 }
2420 Ok(())
2421}
2422
2423fn emit_production_inner(
2424 protocol: &str,
2425 schema: &Schema,
2426 grammar: &Grammar,
2427 vertex_id: &panproto_gat::Name,
2428 production: &Production,
2429 cursor: &mut ChildCursor<'_>,
2430 out: &mut Output<'_>,
2431) -> Result<(), ParseError> {
2432 match production {
2433 Production::String { value } => {
2434 out.token(value);
2435 Ok(())
2436 }
2437 Production::Pattern { value } => {
2438 if let Some(literal) = literal_value(schema, vertex_id) {
2439 out.token_with_role(literal, Some(TokenRole::Terminal));
2440 } else if is_newline_like_pattern(value) {
2441 out.newline();
2449 } else if is_whitespace_only_pattern(value) {
2450 } else {
2454 out.token_with_role(&placeholder_for_pattern(value), Some(TokenRole::Terminal));
2455 }
2456 Ok(())
2457 }
2458 Production::Blank => Ok(()),
2459 Production::Symbol { name } => {
2460 if let Some(field) = current_field_context() {
2471 if let Some(edge) = cursor.take_field(&field) {
2472 return emit_in_child_context(
2473 protocol, schema, grammar, &edge.tgt, production, out,
2474 );
2475 }
2476 return Ok(());
2481 }
2482 if name.starts_with('_') {
2483 if let Some(rule) = grammar.rules.get(name) {
2497 let old_rule = out.current_rule.take();
2498 out.current_rule = Some(name.to_owned());
2499 let result = walk_in_mu_frame(
2500 protocol, schema, grammar, vertex_id, name, rule, cursor, out,
2501 );
2502 out.current_rule = old_rule;
2503 result
2504 } else {
2505 if let Some(alias_value) = grammar.external_alias_map.get(name) {
2529 out.token(alias_value);
2530 return Ok(());
2531 }
2532 if grammar.external_indent_opens.contains(name) {
2533 out.indent_open();
2534 } else if grammar.external_indent_closes.contains(name) {
2535 out.indent_close();
2536 } else if grammar.external_newlines.contains(name) {
2537 out.newline();
2538 } else if grammar.external_semicolons.contains(name) {
2539 out.token_with_role(";", Some(TokenRole::Separator));
2540 } else if let Some(default) = out
2541 .cassette
2542 .and_then(|c| crate::languages::cassettes::resolve_external_token(c, name))
2543 {
2544 if !default.is_empty() {
2545 out.token(default);
2546 }
2547 }
2548 Ok(())
2549 }
2550 } else if let Some(edge) = { take_symbol_match(grammar, schema, cursor, name) } {
2551 emit_vertex(protocol, schema, grammar, &edge.tgt, out)
2562 } else if vertex_id_kind(schema, vertex_id) == Some(name.as_str()) {
2563 let rule = grammar
2564 .rules
2565 .get(name)
2566 .ok_or_else(|| ParseError::EmitFailed {
2567 protocol: protocol.to_owned(),
2568 reason: format!("no production for SYMBOL '{name}'"),
2569 })?;
2570 {
2573 let old_rule = out.current_rule.take();
2574 out.current_rule = Some(name.to_owned());
2575 let result = walk_in_mu_frame(
2576 protocol, schema, grammar, vertex_id, name, rule, cursor, out,
2577 );
2578 out.current_rule = old_rule;
2579 result
2580 }
2581 } else {
2582 Ok(())
2586 }
2587 }
2588 Production::Seq { members } => emit_seq_with_roles(
2589 protocol, schema, grammar, vertex_id, members, cursor, out, false,
2590 ),
2591 Production::Choice { members } => {
2592 if let Some(matched) =
2593 pick_choice_with_cursor(schema, grammar, vertex_id, cursor, members)
2594 {
2595 match matched {
2596 Production::Seq {
2597 members: seq_members,
2598 } => emit_seq_with_roles(
2599 protocol,
2600 schema,
2601 grammar,
2602 vertex_id,
2603 seq_members,
2604 cursor,
2605 out,
2606 true,
2607 ),
2608 Production::String { value } => {
2609 let role = if is_word_like(value) {
2610 TokenRole::Keyword
2611 } else {
2612 TokenRole::Separator
2613 };
2614 out.token_with_role(value, Some(role));
2615 Ok(())
2616 }
2617 _ => {
2618 emit_production(protocol, schema, grammar, vertex_id, matched, cursor, out)
2619 }
2620 }
2621 } else {
2622 Ok(())
2623 }
2624 }
2625 Production::Repeat { content } | Production::Repeat1 { content } => {
2626 let mandatory_sep_text: Option<&str> = match content.as_ref() {
2646 Production::Seq { members } if members.len() >= 2 => unwrap_to_string(&members[0]),
2647 _ => None,
2648 };
2649 let separator_leading_seq: Option<&[Production]> = match content.as_ref() {
2650 Production::Seq { members } if members.len() >= 2 => {
2651 let first = &members[0];
2652 let is_mandatory_sep = unwrap_to_string(first).is_some();
2653 let cassette_overrides = is_mandatory_sep
2654 && unwrap_to_string(first).is_some_and(|sep| {
2655 out.cassette.is_some_and(|c| c.separator_is_line_break(sep))
2656 });
2657 let is_separator_slot = match first {
2658 Production::Choice { members } => {
2659 members.iter().any(|m| matches!(m, Production::Blank))
2660 }
2661 Production::Optional { .. } => true,
2662 _ => cassette_overrides,
2663 };
2664 if is_separator_slot {
2665 Some(members.as_slice())
2666 } else {
2667 None
2668 }
2669 }
2670 _ => None,
2671 };
2672
2673 let mut emitted_any = false;
2674 loop {
2675 let cursor_snap = cursor.consumed.clone();
2676 let out_snap = out.snapshot();
2677 let consumed_before = cursor.consumed.iter().filter(|&&c| c).count();
2678 let result: Result<(), ParseError> =
2679 if let Some(seq_members) = separator_leading_seq {
2680 let cassette_replaces_sep = mandatory_sep_text.is_some_and(|sep| {
2686 out.cassette.is_some_and(|c| c.separator_is_line_break(sep))
2687 });
2688 let pre_sep = out.snapshot();
2689 let sep_result = if cassette_replaces_sep {
2690 out.newline();
2691 Ok(())
2692 } else {
2693 emit_production(
2694 protocol,
2695 schema,
2696 grammar,
2697 vertex_id,
2698 &seq_members[0],
2699 cursor,
2700 out,
2701 )
2702 };
2703 match sep_result {
2704 Err(e) => Err(e),
2705 Ok(()) => {
2706 if !cassette_replaces_sep && !out.lit_emitted_since(pre_sep) {
2707 out.no_space();
2708 }
2709 let mut rest_result = Ok(());
2710 for member in &seq_members[1..] {
2711 rest_result = emit_production(
2712 protocol, schema, grammar, vertex_id, member, cursor, out,
2713 );
2714 if rest_result.is_err() {
2715 break;
2716 }
2717 }
2718 rest_result
2719 }
2720 }
2721 } else {
2722 emit_production(protocol, schema, grammar, vertex_id, content, cursor, out)
2723 };
2724 let consumed_after = cursor.consumed.iter().filter(|&&c| c).count();
2725 if result.is_err() || consumed_after == consumed_before {
2726 cursor.consumed = cursor_snap;
2727 out.restore(out_snap);
2728 break;
2729 }
2730 emitted_any = true;
2731 }
2732 if matches!(production, Production::Repeat1 { .. }) && !emitted_any {
2733 emit_production(protocol, schema, grammar, vertex_id, content, cursor, out)?;
2734 }
2735 Ok(())
2736 }
2737 Production::Optional { content } => {
2738 let cursor_snap = cursor.consumed.clone();
2739 let out_snap = out.snapshot();
2740 let consumed_before = cursor.consumed.iter().filter(|&&c| c).count();
2741 let result =
2742 emit_production(protocol, schema, grammar, vertex_id, content, cursor, out);
2743 if result.is_err() {
2748 cursor.consumed = cursor_snap;
2749 out.restore(out_snap);
2750 return result;
2751 }
2752 let consumed_after = cursor.consumed.iter().filter(|&&c| c).count();
2753 if consumed_after == consumed_before
2754 && !has_relevant_constraint(content, schema, vertex_id)
2755 {
2756 cursor.consumed = cursor_snap;
2757 out.restore(out_snap);
2758 }
2759 Ok(())
2760 }
2761 Production::Field { name, content } => {
2762 let _guard = push_field_context(name);
2781 emit_production(protocol, schema, grammar, vertex_id, content, cursor, out)
2782 }
2783 Production::Alias {
2784 content,
2785 named,
2786 value,
2787 } => {
2788 if *named && !value.is_empty() {
2803 if let Some(edge) = cursor.take_matching(|edge| {
2804 schema
2805 .vertices
2806 .get(&edge.tgt)
2807 .map(|v| v.kind.as_ref() == value.as_str())
2808 .unwrap_or(false)
2809 }) {
2810 return emit_aliased_child(protocol, schema, grammar, &edge.tgt, content, out);
2811 }
2812 }
2813 if !*named && !value.is_empty() {
2820 if let Production::Symbol { name: sym } = content.as_ref() {
2821 if sym.starts_with('_') && !grammar.rules.contains_key(sym) {
2822 out.token(value);
2823 return Ok(());
2824 }
2825 }
2826 }
2827 emit_production(protocol, schema, grammar, vertex_id, content, cursor, out)
2828 }
2829 Production::ImmediateToken { content } => {
2830 out.no_space();
2837 emit_production(protocol, schema, grammar, vertex_id, content, cursor, out)
2838 }
2839 Production::Token { content }
2840 | Production::Prec { content, .. }
2841 | Production::PrecLeft { content, .. }
2842 | Production::PrecRight { content, .. }
2843 | Production::PrecDynamic { content, .. }
2844 | Production::Reserved { content, .. } => {
2845 emit_production(protocol, schema, grammar, vertex_id, content, cursor, out)
2846 }
2847 }
2848}
2849
2850fn take_symbol_match<'a>(
2853 grammar: &Grammar,
2854 schema: &Schema,
2855 cursor: &mut ChildCursor<'a>,
2856 name: &str,
2857) -> Option<&'a Edge> {
2858 if let Some(edge) = cursor.take_matching(|edge| {
2866 edge.kind.as_ref() == "child_of" && {
2867 let target_kind = schema.vertices.get(&edge.tgt).map(|v| v.kind.as_ref());
2868 kind_satisfies_symbol(grammar, target_kind, name)
2869 }
2870 }) {
2871 return Some(edge);
2872 }
2873 cursor.take_matching(|edge| {
2874 let target_kind = schema.vertices.get(&edge.tgt).map(|v| v.kind.as_ref());
2875 kind_satisfies_symbol(grammar, target_kind, name)
2876 })
2877}
2878
2879fn kind_satisfies_symbol(grammar: &Grammar, target_kind: Option<&str>, name: &str) -> bool {
2889 let Some(target) = target_kind else {
2890 return false;
2891 };
2892 if target == name {
2893 return true;
2894 }
2895 grammar
2896 .subtypes
2897 .get(target)
2898 .is_some_and(|set| set.contains(name))
2899}
2900
2901fn emit_aliased_child(
2935 protocol: &str,
2936 schema: &Schema,
2937 grammar: &Grammar,
2938 child_id: &panproto_gat::Name,
2939 content: &Production,
2940 out: &mut Output<'_>,
2941) -> Result<(), ParseError> {
2942 if let Some(literal) = literal_value(schema, child_id) {
2946 if children_for(schema, child_id).is_empty() {
2947 let is_bracket_pair = literal.len() >= 2
2948 && matches!(
2949 (literal.as_bytes().first(), literal.as_bytes().last()),
2950 (Some(b'('), Some(b')')) | (Some(b'['), Some(b']')) | (Some(b'{'), Some(b'}'))
2951 );
2952 if !is_bracket_pair {
2953 out.token_with_role(literal, Some(TokenRole::Terminal));
2954 return Ok(());
2955 }
2956 }
2957 }
2958
2959 let _guard = clear_field_context();
2965
2966 if let Production::Symbol { name } = content {
2969 if let Some(rule) = grammar.rules.get(name) {
2970 let edges = children_for(schema, child_id);
2971 let mut cursor = ChildCursor::new(&edges);
2972 let old_rule = out.current_rule.take();
2973 out.current_rule = Some(name.to_owned());
2974 let result =
2975 emit_production(protocol, schema, grammar, child_id, rule, &mut cursor, out);
2976 out.current_rule = old_rule;
2977 return result;
2978 }
2979 }
2980
2981 let edges = children_for(schema, child_id);
2983 let mut cursor = ChildCursor::new(&edges);
2984 emit_production(
2985 protocol,
2986 schema,
2987 grammar,
2988 child_id,
2989 content,
2990 &mut cursor,
2991 out,
2992 )
2993}
2994
2995fn emit_in_child_context(
2996 protocol: &str,
2997 schema: &Schema,
2998 grammar: &Grammar,
2999 child_id: &panproto_gat::Name,
3000 production: &Production,
3001 out: &mut Output<'_>,
3002) -> Result<(), ParseError> {
3003 let _guard = clear_field_context();
3009 if !matches!(production, Production::Symbol { .. }) {
3018 let child_kind = schema.vertices.get(child_id).map(|v| v.kind.as_ref());
3019 let symbols = referenced_symbols(production);
3020 if symbols
3021 .iter()
3022 .any(|s| kind_satisfies_symbol(grammar, child_kind, s) || child_kind == Some(s))
3023 {
3024 return emit_vertex(protocol, schema, grammar, child_id, out);
3025 }
3026 }
3027 match production {
3028 Production::Symbol { .. } => emit_vertex(protocol, schema, grammar, child_id, out),
3029 _ => {
3030 let edges = children_for(schema, child_id);
3031 let mut cursor = ChildCursor::new(&edges);
3032 emit_production(
3033 protocol,
3034 schema,
3035 grammar,
3036 child_id,
3037 production,
3038 &mut cursor,
3039 out,
3040 )
3041 }
3042 }
3043}
3044
3045fn pick_choice_with_cursor<'a>(
3046 schema: &Schema,
3047 grammar: &Grammar,
3048 vertex_id: &panproto_gat::Name,
3049 cursor: &ChildCursor<'_>,
3050 alternatives: &'a [Production],
3051) -> Option<&'a Production> {
3052 let consumed_count = cursor.consumed.iter().filter(|&&c| c).count();
3067 let positional_interstitials: Vec<&str> = schema
3068 .constraints
3069 .get(vertex_id)
3070 .map(|cs| {
3071 let mut indexed: Vec<(usize, &str)> = cs
3072 .iter()
3073 .filter_map(|c| {
3074 let s = c.sort.as_ref();
3075 if !s.starts_with("interstitial-") || s.ends_with("-start-byte") {
3076 return None;
3077 }
3078 let idx: usize = s["interstitial-".len()..].parse().ok()?;
3079 Some((idx, c.value.as_str()))
3080 })
3081 .collect();
3082 indexed.sort_by_key(|&(i, _)| i);
3083 indexed.into_iter().map(|(_, v)| v).collect()
3084 })
3085 .unwrap_or_default();
3086 let positional_slice: String = if positional_interstitials.is_empty() {
3087 String::new()
3088 } else {
3089 positional_interstitials
3090 .iter()
3091 .skip(consumed_count)
3092 .copied()
3093 .collect::<Vec<&str>>()
3094 .join(" ")
3095 };
3096 let fingerprint_blob = schema
3097 .constraints
3098 .get(vertex_id)
3099 .and_then(|cs| {
3100 cs.iter()
3101 .find(|c| c.sort.as_ref() == "chose-alt-fingerprint")
3102 .map(|c| c.value.clone())
3103 })
3104 .unwrap_or_default();
3105 let constraint_blob: String = if positional_slice.is_empty() {
3106 fingerprint_blob
3107 } else {
3108 positional_slice
3109 };
3110 let child_kinds: Vec<&str> = schema
3111 .constraints
3112 .get(vertex_id)
3113 .and_then(|cs| {
3114 cs.iter()
3115 .find(|c| c.sort.as_ref() == "chose-alt-child-kinds")
3116 .map(|c| c.value.split_whitespace().collect())
3117 })
3118 .unwrap_or_default();
3119 let any_unconsumed = cursor
3135 .edges
3136 .iter()
3137 .enumerate()
3138 .any(|(i, _)| !cursor.consumed[i]);
3139 let blank_present = alternatives.iter().any(|a| matches!(a, Production::Blank));
3140 let edge_kinds: Vec<&str> = cursor
3141 .edges
3142 .iter()
3143 .enumerate()
3144 .filter(|(i, _)| !cursor.consumed[*i])
3145 .map(|(_, e)| e.kind.as_ref())
3146 .collect();
3147 if !any_unconsumed && blank_present {
3148 return alternatives.iter().find(|a| matches!(a, Production::Blank));
3149 }
3150 if !any_unconsumed && !blank_present {
3151 for alt in alternatives {
3155 if let Production::Pattern { value } = alt {
3156 if is_newline_like_pattern(value) {
3157 return Some(alt);
3158 }
3159 }
3160 }
3161 if let Some(pure_lit) = alternatives.iter().find(|alt| {
3166 let syms = referenced_symbols(alt);
3167 let strings = literal_strings(alt);
3168 syms.is_empty() && !strings.is_empty()
3169 }) {
3170 return Some(pure_lit);
3171 }
3172 let mut visited = std::collections::HashSet::new();
3173 let mut yield_cache = grammar.yield_sets.clone();
3174 for alt in alternatives {
3175 let ys = yield_of_production(grammar, alt, &mut visited, &mut yield_cache);
3176 if ys.contains("") {
3177 return Some(alt);
3178 }
3179 visited.clear();
3180 }
3181 }
3182
3183 for edge_idx in 0..cursor.edges.len() {
3189 if cursor.consumed[edge_idx] {
3190 continue;
3191 }
3192 let edge = &cursor.edges[edge_idx];
3193 let tgt_kind = schema.vertices.get(&edge.tgt).map(|v| v.kind.as_ref());
3194 let tgt_lit = literal_value(schema, &edge.tgt);
3195 for alt in alternatives {
3196 if let Production::String { value } = alt {
3197 if Some(value.as_str()) == tgt_kind || tgt_lit == Some(value.as_str()) {
3198 return Some(alt);
3199 }
3200 }
3201 }
3202 }
3203
3204 if !constraint_blob.is_empty() {
3205 let first_uc_edge_pre = cursor
3212 .edges
3213 .iter()
3214 .enumerate()
3215 .find(|(i, _)| !cursor.consumed[*i])
3216 .map(|(_, e)| e);
3217 let alt_accepts = |a: &Production| -> bool {
3218 let Some(edge) = first_uc_edge_pre else {
3219 return false;
3220 };
3221 let edge_kind = edge.kind.as_ref();
3222 let Some(tgt_kind) = schema.vertices.get(&edge.tgt).map(|v| v.kind.as_ref()) else {
3223 return false;
3224 };
3225 accepts_first_edge(grammar, a, edge_kind, tgt_kind)
3226 };
3227 let any_consumes = any_unconsumed && alternatives.iter().any(alt_accepts);
3228
3229 let mut best_literal: usize = 0;
3240 let mut best_symbols: usize = 0;
3241 let mut best_total_chars: usize = usize::MAX;
3242 let mut best_alt: Option<&Production> = None;
3243 let mut tied = false;
3244 for alt in alternatives {
3245 let strings = literal_strings(alt);
3246 if strings.is_empty() {
3247 continue;
3248 }
3249 if any_consumes && !alt_accepts(alt) {
3252 continue;
3253 }
3254 let literal_score = strings
3255 .iter()
3256 .filter(|s| constraint_blob.contains(s.as_str()))
3257 .map(String::len)
3258 .sum::<usize>();
3259 if literal_score == 0 {
3260 continue;
3261 }
3262 let total_chars: usize = strings.iter().map(String::len).sum();
3263 let symbol_score = if literal_score >= best_literal && !child_kinds.is_empty() {
3264 let symbols = referenced_symbols(alt);
3265 symbols
3266 .iter()
3267 .filter(|sym| {
3268 let sym_str: &str = sym;
3269 if child_kinds.contains(&sym_str) {
3270 return true;
3271 }
3272 grammar.subtypes.get(sym_str).is_some_and(|sub_set| {
3273 sub_set
3274 .iter()
3275 .any(|sub| child_kinds.contains(&sub.as_str()))
3276 })
3277 })
3278 .count()
3279 } else {
3280 0
3281 };
3282 let better = literal_score > best_literal
3283 || (literal_score == best_literal && symbol_score > best_symbols)
3284 || (literal_score == best_literal
3285 && symbol_score == best_symbols
3286 && total_chars < best_total_chars);
3287 let same = literal_score == best_literal
3288 && symbol_score == best_symbols
3289 && total_chars == best_total_chars;
3290 if better {
3291 best_literal = literal_score;
3292 best_symbols = symbol_score;
3293 best_total_chars = total_chars;
3294 best_alt = Some(alt);
3295 tied = false;
3296 } else if same && best_alt.is_some() {
3297 tied = true;
3298 }
3299 }
3300 if let Some(alt) = best_alt {
3301 if !tied {
3302 if any_unconsumed {
3303 if alt_accepts(alt) {
3304 return Some(alt);
3305 }
3306 if !blank_present || referenced_symbols(alt).is_empty() {
3327 return Some(alt);
3328 }
3329 } else {
3330 return Some(alt);
3331 }
3332 }
3333 }
3334 }
3335
3336 let first_unconsumed_kind: Option<&str> = cursor
3345 .edges
3346 .iter()
3347 .enumerate()
3348 .find(|(i, _)| !cursor.consumed[*i])
3349 .and_then(|(_, edge)| schema.vertices.get(&edge.tgt).map(|v| v.kind.as_ref()));
3350 if let Some(target_kind) = first_unconsumed_kind {
3351 let target_supers = grammar.subtypes.get(target_kind);
3367
3368 {
3374 let mut match_count = 0usize;
3375 let mut indent_alt_idx: Option<usize> = None;
3376 let mut visited = std::collections::HashSet::new();
3377 let mut yield_cache = grammar.yield_sets.clone();
3378 for (i, alt) in alternatives.iter().enumerate() {
3379 let ys = yield_of_production(grammar, alt, &mut visited, &mut yield_cache);
3380 if ys.contains(target_kind) {
3381 match_count += 1;
3382 if indent_alt_idx.is_none()
3383 && referenced_symbols(alt)
3384 .iter()
3385 .any(|s| grammar.external_indent_opens.contains(*s))
3386 {
3387 indent_alt_idx = Some(i);
3388 }
3389 }
3390 visited.clear();
3391 }
3392 if match_count > 1 {
3393 if let Some(idx) = indent_alt_idx {
3394 return Some(&alternatives[idx]);
3395 }
3396 }
3397 }
3398
3399 for alt in alternatives {
3401 if let Production::Symbol { name } = alt {
3402 if name.as_str() == target_kind {
3403 return Some(alt);
3404 }
3405 }
3406 if let Production::Alias {
3407 named: true, value, ..
3408 } = alt
3409 {
3410 if value.as_str() == target_kind {
3411 return Some(alt);
3412 }
3413 }
3414 }
3415
3416 if let Some(supers) = target_supers {
3419 for alt in alternatives {
3420 if let Production::Symbol { name } = alt {
3421 if supers.contains(name.as_str()) {
3422 return Some(alt);
3423 }
3424 }
3425 if let Production::Alias {
3426 named: true, value, ..
3427 } = alt
3428 {
3429 if supers.contains(value.as_str()) {
3430 return Some(alt);
3431 }
3432 }
3433 }
3434 }
3435
3436 let mut visited = std::collections::HashSet::new();
3444 let mut yield_cache = grammar.yield_sets.clone();
3445 let mut matching_alts: Vec<&Production> = Vec::new();
3446 for alt in alternatives {
3447 if has_any_field(alt) && !has_field_in(alt, &edge_kinds) {
3448 visited.clear();
3449 continue;
3450 }
3451 if !alt_satisfies_field_token_restrictions(schema, cursor, alt) {
3460 visited.clear();
3461 continue;
3462 }
3463 if !alt_satisfies_pre_alias_constraints(schema, cursor, alt) {
3471 visited.clear();
3472 continue;
3473 }
3474 let ys = yield_of_production(grammar, alt, &mut visited, &mut yield_cache);
3475 if ys.contains(target_kind) {
3476 matching_alts.push(alt);
3477 }
3478 visited.clear();
3479 }
3480 if matching_alts.len() == 1 {
3481 return Some(matching_alts[0]);
3482 }
3483 if matching_alts.len() > 1 {
3484 matching_alts.sort_by_key(|alt| std::cmp::Reverse(prec_value(alt)));
3490 return Some(matching_alts[0]);
3491 }
3492 }
3493
3494 for alt in alternatives {
3497 if has_field_in(alt, &edge_kinds) {
3498 return Some(alt);
3499 }
3500 }
3501
3502 let _ = (schema, vertex_id);
3516 let has_newline_pattern = alternatives
3521 .iter()
3522 .any(|a| matches!(a, Production::Pattern { value } if is_newline_like_pattern(value)));
3523 if has_newline_pattern {
3524 for alt in alternatives {
3525 if let Production::Pattern { value } = alt {
3526 if is_newline_like_pattern(value) {
3527 return Some(alt);
3528 }
3529 }
3530 }
3531 }
3532 if alternatives.iter().any(|a| matches!(a, Production::Blank)) {
3533 for alt in alternatives {
3538 if let Production::Symbol { name } = alt {
3539 if name.starts_with('_') {
3540 if let Some(rule) = grammar.rules.get(name) {
3541 if contains_newline_pattern(rule) {
3542 return Some(alt);
3543 }
3544 }
3545 }
3546 }
3547 }
3548 return alternatives.iter().find(|a| matches!(a, Production::Blank));
3549 }
3550 if !any_unconsumed {
3556 if let Some(pure_lit) = alternatives.iter().find(|alt| {
3557 let syms = referenced_symbols(alt);
3558 syms.is_empty() && !matches!(alt, Production::Blank)
3559 }) {
3560 return Some(pure_lit);
3561 }
3562 }
3563 alternatives
3564 .iter()
3565 .find(|alt| !matches!(alt, Production::Blank))
3566}
3567
3568fn literal_strings(production: &Production) -> Vec<String> {
3574 let mut out = Vec::new();
3575 fn walk(p: &Production, out: &mut Vec<String>) {
3576 match p {
3577 Production::String { value } if !value.is_empty() => {
3578 out.push(value.clone());
3579 }
3580 Production::Choice { members } | Production::Seq { members } => {
3581 for m in members {
3582 walk(m, out);
3583 }
3584 }
3585 Production::Repeat { content }
3586 | Production::Repeat1 { content }
3587 | Production::Optional { content }
3588 | Production::Field { content, .. }
3589 | Production::Alias { content, .. }
3590 | Production::Token { content }
3591 | Production::ImmediateToken { content }
3592 | Production::Prec { content, .. }
3593 | Production::PrecLeft { content, .. }
3594 | Production::PrecRight { content, .. }
3595 | Production::PrecDynamic { content, .. }
3596 | Production::Reserved { content, .. } => walk(content, out),
3597 _ => {}
3598 }
3599 }
3600 walk(production, &mut out);
3601 out
3602}
3603
3604fn referenced_symbols(production: &Production) -> Vec<&str> {
3611 let mut out = Vec::new();
3612 fn walk<'a>(p: &'a Production, out: &mut Vec<&'a str>) {
3613 match p {
3614 Production::Symbol { name } => out.push(name.as_str()),
3615 Production::Choice { members } | Production::Seq { members } => {
3616 for m in members {
3617 walk(m, out);
3618 }
3619 }
3620 Production::Alias {
3621 content,
3622 named,
3623 value,
3624 } => {
3625 if *named && !value.is_empty() {
3634 out.push(value.as_str());
3635 }
3636 walk(content, out);
3637 }
3638 Production::Repeat { content }
3639 | Production::Repeat1 { content }
3640 | Production::Optional { content }
3641 | Production::Field { content, .. }
3642 | Production::Token { content }
3643 | Production::ImmediateToken { content }
3644 | Production::Prec { content, .. }
3645 | Production::PrecLeft { content, .. }
3646 | Production::PrecRight { content, .. }
3647 | Production::PrecDynamic { content, .. }
3648 | Production::Reserved { content, .. } => walk(content, out),
3649 _ => {}
3650 }
3651 }
3652 walk(production, &mut out);
3653 out
3654}
3655
3656#[cfg(test)]
3657fn first_symbol(production: &Production) -> Option<&str> {
3658 match production {
3659 Production::Symbol { name } => Some(name),
3660 Production::Seq { members } => members.iter().find_map(first_symbol),
3661 Production::Choice { members } => members.iter().find_map(first_symbol),
3662 Production::Repeat { content }
3663 | Production::Repeat1 { content }
3664 | Production::Optional { content }
3665 | Production::Field { content, .. }
3666 | Production::Alias { content, .. }
3667 | Production::Token { content }
3668 | Production::ImmediateToken { content }
3669 | Production::Prec { content, .. }
3670 | Production::PrecLeft { content, .. }
3671 | Production::PrecRight { content, .. }
3672 | Production::PrecDynamic { content, .. }
3673 | Production::Reserved { content, .. } => first_symbol(content),
3674 _ => None,
3675 }
3676}
3677
3678fn prec_value(prod: &Production) -> i64 {
3679 match prod {
3680 Production::Prec { value, .. }
3681 | Production::PrecLeft { value, .. }
3682 | Production::PrecRight { value, .. }
3683 | Production::PrecDynamic { value, .. } => value.as_i64().unwrap_or(0),
3684 _ => 0,
3685 }
3686}
3687
3688fn has_any_field(production: &Production) -> bool {
3689 match production {
3690 Production::Field { .. } => true,
3691 Production::Seq { members } | Production::Choice { members } => {
3692 members.iter().any(has_any_field)
3693 }
3694 Production::Repeat { content }
3695 | Production::Repeat1 { content }
3696 | Production::Optional { content }
3697 | Production::Alias { content, .. }
3698 | Production::Token { content }
3699 | Production::ImmediateToken { content }
3700 | Production::Prec { content, .. }
3701 | Production::PrecLeft { content, .. }
3702 | Production::PrecRight { content, .. }
3703 | Production::PrecDynamic { content, .. }
3704 | Production::Reserved { content, .. } => has_any_field(content),
3705 _ => false,
3706 }
3707}
3708
3709fn has_field_in(production: &Production, edge_kinds: &[&str]) -> bool {
3710 match production {
3711 Production::Field { name, .. } => edge_kinds.contains(&name.as_str()),
3712 Production::Seq { members } | Production::Choice { members } => {
3713 members.iter().any(|m| has_field_in(m, edge_kinds))
3714 }
3715 Production::Repeat { content }
3716 | Production::Repeat1 { content }
3717 | Production::Optional { content }
3718 | Production::Alias { content, .. }
3719 | Production::Token { content }
3720 | Production::ImmediateToken { content }
3721 | Production::Prec { content, .. }
3722 | Production::PrecLeft { content, .. }
3723 | Production::PrecRight { content, .. }
3724 | Production::PrecDynamic { content, .. }
3725 | Production::Reserved { content, .. } => has_field_in(content, edge_kinds),
3726 _ => false,
3727 }
3728}
3729
3730fn collect_field_token_restrictions<'a>(
3738 production: &'a Production,
3739 out: &mut Vec<(&'a str, Vec<&'a str>)>,
3740) {
3741 match production {
3742 Production::Field { name, content } => {
3743 if let Some(strings) = literal_choice_set(content) {
3744 out.push((name.as_str(), strings));
3745 }
3746 collect_field_token_restrictions(content, out);
3747 }
3748 Production::Seq { members } | Production::Choice { members } => {
3749 for m in members {
3750 collect_field_token_restrictions(m, out);
3751 }
3752 }
3753 Production::Repeat { content }
3754 | Production::Repeat1 { content }
3755 | Production::Optional { content }
3756 | Production::Alias { content, .. }
3757 | Production::Token { content }
3758 | Production::ImmediateToken { content }
3759 | Production::Prec { content, .. }
3760 | Production::PrecLeft { content, .. }
3761 | Production::PrecRight { content, .. }
3762 | Production::PrecDynamic { content, .. }
3763 | Production::Reserved { content, .. } => {
3764 collect_field_token_restrictions(content, out);
3765 }
3766 _ => {}
3767 }
3768}
3769
3770fn literal_choice_set(p: &Production) -> Option<Vec<&str>> {
3773 fn unwrap(p: &Production) -> &Production {
3774 match p {
3775 Production::Prec { content, .. }
3776 | Production::PrecLeft { content, .. }
3777 | Production::PrecRight { content, .. }
3778 | Production::PrecDynamic { content, .. }
3779 | Production::Token { content }
3780 | Production::ImmediateToken { content }
3781 | Production::Reserved { content, .. } => unwrap(content),
3782 _ => p,
3783 }
3784 }
3785 let p = unwrap(p);
3786 let Production::Alias { content, .. } = p else {
3787 return None;
3788 };
3789 let inner = unwrap(content);
3790 match inner {
3791 Production::String { value } => Some(vec![value.as_str()]),
3792 Production::Choice { members } => {
3793 let mut out = Vec::new();
3794 for m in members {
3795 match unwrap(m) {
3796 Production::String { value } => out.push(value.as_str()),
3797 _ => return None,
3798 }
3799 }
3800 Some(out)
3801 }
3802 _ => None,
3803 }
3804}
3805
3806fn accepts_first_edge(
3825 grammar: &Grammar,
3826 production: &Production,
3827 edge_field: &str,
3828 target_kind: &str,
3829) -> bool {
3830 fn yield_contains(grammar: &Grammar, prod: &Production, kind: &str) -> bool {
3831 let mut visited = std::collections::HashSet::new();
3832 let mut cache = grammar.yield_sets.clone();
3833 let ys = yield_of_production(grammar, prod, &mut visited, &mut cache);
3834 ys.contains(kind)
3835 || grammar
3836 .subtypes
3837 .get(kind)
3838 .is_some_and(|subs| subs.iter().any(|s| ys.contains(s.as_str())))
3839 }
3840 fn yield_has_epsilon(grammar: &Grammar, prod: &Production) -> bool {
3841 let mut visited = std::collections::HashSet::new();
3842 let mut cache = grammar.yield_sets.clone();
3843 let ys = yield_of_production(grammar, prod, &mut visited, &mut cache);
3844 ys.contains("") || ys.is_empty()
3847 }
3848 match production {
3849 Production::String { .. } | Production::Pattern { .. } | Production::Blank => false,
3850 Production::Symbol { name } => {
3851 if edge_field != "child_of" {
3852 return false;
3853 }
3854 if name == target_kind {
3855 return true;
3856 }
3857 if grammar
3858 .subtypes
3859 .get(target_kind)
3860 .is_some_and(|s| s.contains(name))
3861 {
3862 return true;
3863 }
3864 let is_expand = name.starts_with('_') || grammar.supertypes.contains(name.as_str());
3866 if is_expand {
3867 if let Some(rule) = grammar.rules.get(name) {
3868 return accepts_first_edge(grammar, rule, edge_field, target_kind);
3869 }
3870 }
3871 false
3872 }
3873 Production::Alias {
3874 named,
3875 value,
3876 content,
3877 } => {
3878 if *named && !value.is_empty() {
3879 edge_field == "child_of" && value == target_kind
3880 } else {
3881 accepts_first_edge(grammar, content, edge_field, target_kind)
3882 }
3883 }
3884 Production::Field { name, content } => {
3885 edge_field == name.as_str() && yield_contains(grammar, content, target_kind)
3886 }
3887 Production::Seq { members } => {
3888 for m in members {
3889 if accepts_first_edge(grammar, m, edge_field, target_kind) {
3890 return true;
3891 }
3892 if !yield_has_epsilon(grammar, m) {
3893 return false;
3894 }
3895 }
3896 false
3897 }
3898 Production::Choice { members } => members
3899 .iter()
3900 .any(|m| accepts_first_edge(grammar, m, edge_field, target_kind)),
3901 Production::Optional { content }
3902 | Production::Repeat { content }
3903 | Production::Repeat1 { content }
3904 | Production::Token { content }
3905 | Production::ImmediateToken { content }
3906 | Production::Prec { content, .. }
3907 | Production::PrecLeft { content, .. }
3908 | Production::PrecRight { content, .. }
3909 | Production::PrecDynamic { content, .. }
3910 | Production::Reserved { content, .. } => {
3911 accepts_first_edge(grammar, content, edge_field, target_kind)
3912 }
3913 }
3914}
3915
3916fn pre_alias_symbol<'a>(schema: &'a Schema, vertex_id: &panproto_gat::Name) -> Option<&'a str> {
3920 schema.constraints.get(vertex_id).and_then(|cs| {
3921 cs.iter()
3922 .find(|c| c.sort.as_ref() == "pre-alias-symbol")
3923 .map(|c| c.value.as_str())
3924 })
3925}
3926
3927fn field_alias_sources<'a>(production: &'a Production, field_name: &str, out: &mut Vec<&'a str>) {
3933 fn unwrap_to_alias_source(p: &Production) -> Option<&str> {
3934 let inner = match p {
3935 Production::Prec { content, .. }
3936 | Production::PrecLeft { content, .. }
3937 | Production::PrecRight { content, .. }
3938 | Production::PrecDynamic { content, .. }
3939 | Production::Token { content }
3940 | Production::ImmediateToken { content }
3941 | Production::Reserved { content, .. } => content.as_ref(),
3942 _ => p,
3943 };
3944 match inner {
3945 Production::Alias { content, named, .. } if *named => {
3946 if let Production::Symbol { name } = content.as_ref() {
3947 return Some(name.as_str());
3948 }
3949 None
3950 }
3951 _ => None,
3952 }
3953 }
3954 match production {
3955 Production::Field { name, content } if name.as_str() == field_name => {
3956 if let Some(src) = unwrap_to_alias_source(content) {
3957 out.push(src);
3958 }
3959 }
3960 Production::Field { content, .. }
3961 | Production::Repeat { content }
3962 | Production::Repeat1 { content }
3963 | Production::Optional { content }
3964 | Production::Alias { content, .. }
3965 | Production::Token { content }
3966 | Production::ImmediateToken { content }
3967 | Production::Prec { content, .. }
3968 | Production::PrecLeft { content, .. }
3969 | Production::PrecRight { content, .. }
3970 | Production::PrecDynamic { content, .. }
3971 | Production::Reserved { content, .. } => {
3972 field_alias_sources(content, field_name, out);
3973 }
3974 Production::Seq { members } | Production::Choice { members } => {
3975 for m in members {
3976 field_alias_sources(m, field_name, out);
3977 }
3978 }
3979 _ => {}
3980 }
3981}
3982
3983fn alt_satisfies_pre_alias_constraints(
3991 schema: &Schema,
3992 cursor: &ChildCursor<'_>,
3993 alt: &Production,
3994) -> bool {
3995 for (i, edge) in cursor.edges.iter().enumerate() {
3996 if cursor.consumed[i] {
3997 continue;
3998 }
3999 let edge_kind = edge.kind.as_ref();
4000 if edge_kind == "child_of" {
4001 continue;
4002 }
4003 let Some(actual_source) = pre_alias_symbol(schema, &edge.tgt) else {
4004 continue;
4005 };
4006 let mut sources: Vec<&str> = Vec::new();
4007 field_alias_sources(alt, edge_kind, &mut sources);
4008 if sources.is_empty() {
4009 continue;
4013 }
4014 if !sources.contains(&actual_source) {
4015 return false;
4016 }
4017 }
4018 true
4019}
4020
4021fn alt_satisfies_field_token_restrictions(
4027 schema: &Schema,
4028 cursor: &ChildCursor<'_>,
4029 alt: &Production,
4030) -> bool {
4031 let mut restrictions: Vec<(&str, Vec<&str>)> = Vec::new();
4032 collect_field_token_restrictions(alt, &mut restrictions);
4033 for (field_name, allowed) in &restrictions {
4034 let mut field_seen = false;
4035 let mut field_admits = false;
4036 for (i, edge) in cursor.edges.iter().enumerate() {
4037 if cursor.consumed[i] {
4038 continue;
4039 }
4040 if edge.kind.as_ref() != *field_name {
4041 continue;
4042 }
4043 field_seen = true;
4044 let lit = literal_value(schema, &edge.tgt);
4045 if let Some(l) = lit {
4046 if allowed.contains(&l) {
4047 field_admits = true;
4048 break;
4049 }
4050 }
4051 }
4052 if field_seen && !field_admits {
4053 return false;
4054 }
4055 }
4056 true
4057}
4058
4059fn has_relevant_constraint(
4060 production: &Production,
4061 schema: &Schema,
4062 vertex_id: &panproto_gat::Name,
4063) -> bool {
4064 let constraints = match schema.constraints.get(vertex_id) {
4065 Some(c) => c,
4066 None => return false,
4067 };
4068 fn walk(production: &Production, constraints: &[panproto_schema::Constraint]) -> bool {
4069 match production {
4070 Production::String { value } => constraints
4071 .iter()
4072 .any(|c| c.value == *value || c.sort.as_ref() == value),
4073 Production::Field { name, content } => {
4074 constraints.iter().any(|c| c.sort.as_ref() == name) || walk(content, constraints)
4075 }
4076 Production::Seq { members } | Production::Choice { members } => {
4077 members.iter().any(|m| walk(m, constraints))
4078 }
4079 Production::Repeat { content }
4080 | Production::Repeat1 { content }
4081 | Production::Optional { content }
4082 | Production::Alias { content, .. }
4083 | Production::Token { content }
4084 | Production::ImmediateToken { content }
4085 | Production::Prec { content, .. }
4086 | Production::PrecLeft { content, .. }
4087 | Production::PrecRight { content, .. }
4088 | Production::PrecDynamic { content, .. }
4089 | Production::Reserved { content, .. } => walk(content, constraints),
4090 _ => false,
4091 }
4092 }
4093 walk(production, constraints)
4094}
4095
4096fn children_for<'a>(schema: &'a Schema, vertex_id: &panproto_gat::Name) -> Vec<&'a Edge> {
4097 let Some(edges) = schema.outgoing.get(vertex_id) else {
4106 return Vec::new();
4107 };
4108
4109 let mut indexed: Vec<(usize, u32, &Edge)> = edges
4113 .iter()
4114 .enumerate()
4115 .map(|(i, e)| {
4116 let canonical = schema.edges.get_key_value(e).map_or(e, |(k, _)| k);
4117 let pos = schema.orderings.get(canonical).copied().unwrap_or(u32::MAX);
4118 (i, pos, canonical)
4119 })
4120 .collect();
4121
4122 indexed.sort_by_key(|(i, pos, _)| (*pos, *i));
4126 indexed.into_iter().map(|(_, _, e)| e).collect()
4127}
4128
4129fn vertex_id_kind<'a>(schema: &'a Schema, vertex_id: &panproto_gat::Name) -> Option<&'a str> {
4130 schema.vertices.get(vertex_id).map(|v| v.kind.as_ref())
4131}
4132
4133fn literal_value<'a>(schema: &'a Schema, vertex_id: &panproto_gat::Name) -> Option<&'a str> {
4134 schema
4135 .constraints
4136 .get(vertex_id)?
4137 .iter()
4138 .find(|c| c.sort.as_ref() == "literal-value")
4139 .map(|c| c.value.as_str())
4140}
4141
4142fn contains_newline_pattern(prod: &Production) -> bool {
4148 match prod {
4149 Production::Pattern { value } => is_newline_like_pattern(value),
4150 Production::Choice { members } | Production::Seq { members } => {
4151 members.iter().any(contains_newline_pattern)
4152 }
4153 Production::Prec { content, .. }
4154 | Production::PrecLeft { content, .. }
4155 | Production::PrecRight { content, .. }
4156 | Production::PrecDynamic { content, .. }
4157 | Production::Token { content }
4158 | Production::ImmediateToken { content }
4159 | Production::Optional { content }
4160 | Production::Field { content, .. }
4161 | Production::Alias { content, .. }
4162 | Production::Reserved { content, .. } => contains_newline_pattern(content),
4163 _ => false,
4164 }
4165}
4166
4167fn is_newline_like_pattern(pattern: &str) -> bool {
4168 if pattern.is_empty() {
4169 return false;
4170 }
4171 let mut chars = pattern.chars();
4172 let mut saw_newline_atom = false;
4173 while let Some(c) = chars.next() {
4174 match c {
4175 '\\' => match chars.next() {
4176 Some('n' | 'r') => saw_newline_atom = true,
4177 _ => return false,
4178 },
4179 '\n' | '\r' => saw_newline_atom = true,
4180 '?' | '*' | '+' => {} _ => return false,
4182 }
4183 }
4184 saw_newline_atom
4185}
4186
4187fn is_whitespace_only_pattern(pattern: &str) -> bool {
4193 if pattern.is_empty() {
4194 return false;
4195 }
4196 let trimmed = pattern.trim_end_matches(['?', '*', '+']);
4198 if trimmed.is_empty() {
4199 return false;
4200 }
4201 if matches!(trimmed, "\\s" | " " | "\\t") {
4203 return true;
4204 }
4205 if let Some(inner) = trimmed.strip_prefix('[').and_then(|s| s.strip_suffix(']')) {
4207 let mut chars = inner.chars();
4208 let mut saw_atom = false;
4209 while let Some(c) = chars.next() {
4210 match c {
4211 '\\' => match chars.next() {
4212 Some('s' | 't' | 'r' | 'n') => saw_atom = true,
4213 _ => return false,
4214 },
4215 ' ' | '\t' => saw_atom = true,
4216 _ => return false,
4217 }
4218 }
4219 return saw_atom;
4220 }
4221 false
4222}
4223
4224fn placeholder_for_pattern(pattern: &str) -> String {
4225 let simple_lit = decode_simple_pattern_literal(pattern);
4233 if let Some(lit) = simple_lit {
4234 return lit;
4235 }
4236
4237 if pattern.contains("[0-9]") || pattern.contains("\\d") {
4238 "0".into()
4239 } else if pattern.contains("[a-zA-Z_]") || pattern.contains("\\w") {
4240 "_x".into()
4241 } else if pattern.contains('"') || pattern.contains('\'') {
4242 "\"\"".into()
4243 } else {
4244 "_".into()
4245 }
4246}
4247
4248fn decode_simple_pattern_literal(pattern: &str) -> Option<String> {
4253 if pattern
4256 .chars()
4257 .any(|c| matches!(c, '[' | ']' | '(' | ')' | '*' | '+' | '?' | '|' | '{' | '}'))
4258 {
4259 return None;
4260 }
4261 let mut out = String::new();
4262 let mut chars = pattern.chars();
4263 while let Some(c) = chars.next() {
4264 if c == '\\' {
4265 match chars.next() {
4266 Some('n') => out.push('\n'),
4267 Some('r') => out.push('\r'),
4268 Some('t') => out.push('\t'),
4269 Some('\\') => out.push('\\'),
4270 Some('/') => out.push('/'),
4271 Some(other) => out.push(other),
4272 None => return None,
4273 }
4274 } else {
4275 out.push(c);
4276 }
4277 }
4278 Some(out)
4279}
4280
4281#[derive(Clone)]
4293enum Token {
4294 Lit(String, TokenRole),
4297 IndentOpen,
4301 IndentClose,
4303 LineBreak,
4306 ForceSpace,
4311 NoSpace,
4317}
4318
4319struct Output<'a> {
4320 tokens: Vec<Token>,
4321 policy: &'a FormatPolicy,
4322 grammar: &'a Grammar,
4323 current_rule: Option<String>,
4324 cassette: Option<&'a dyn crate::languages::cassettes::GrammarCassette>,
4325}
4326
4327#[derive(Clone)]
4328struct OutputSnapshot {
4329 tokens_len: usize,
4330}
4331
4332impl<'a> Output<'a> {
4333 fn new(
4334 policy: &'a FormatPolicy,
4335 grammar: &'a Grammar,
4336 cassette: Option<&'a dyn crate::languages::cassettes::GrammarCassette>,
4337 ) -> Self {
4338 Self {
4339 tokens: Vec::new(),
4340 policy,
4341 grammar,
4342 current_rule: None,
4343 cassette,
4344 }
4345 }
4346
4347 fn token(&mut self, value: &str) {
4348 self.token_with_role(value, None);
4349 }
4350
4351 fn token_with_role(&mut self, value: &str, explicit_role: Option<TokenRole>) {
4352 if value.is_empty() {
4353 return;
4354 }
4355
4356 if value == "\n" || value == "\r\n" || value == "\r" {
4357 self.tokens.push(Token::LineBreak);
4358 return;
4359 }
4360
4361 let trimmed = value.trim_end_matches(['\n', '\r']);
4362 let trailing_newlines = value.len() - trimmed.len();
4363 if trailing_newlines > 0 && !trimmed.is_empty() {
4364 let role = explicit_role.unwrap_or(TokenRole::Terminal);
4365 if role == TokenRole::BracketClose
4366 && self.policy.indent_close.iter().any(|t| t == trimmed)
4367 {
4368 self.tokens.push(Token::IndentClose);
4369 }
4370 self.tokens.push(Token::Lit(trimmed.to_owned(), role));
4371 if role == TokenRole::BracketOpen {
4372 if let Some(ref rule) = self.current_rule {
4373 if self
4374 .grammar
4375 .indent_triggers
4376 .contains(&(rule.clone(), trimmed.to_owned()))
4377 {
4378 self.tokens.push(Token::IndentOpen);
4379 }
4380 }
4381 }
4382 self.tokens.push(Token::LineBreak);
4383 return;
4384 }
4385
4386 let role = explicit_role.unwrap_or_else(|| self.lookup_role(value));
4387
4388 if role == TokenRole::BracketClose && self.policy.indent_close.iter().any(|t| t == value) {
4389 self.tokens.push(Token::IndentClose);
4390 }
4391
4392 self.tokens.push(Token::Lit(value.to_owned(), role));
4393
4394 if role == TokenRole::BracketOpen {
4395 let grammar_indent = self.current_rule.as_ref().is_some_and(|rule| {
4396 self.grammar
4397 .indent_triggers
4398 .contains(&(rule.clone(), value.to_owned()))
4399 });
4400 if grammar_indent {
4401 self.tokens.push(Token::IndentOpen);
4402 self.tokens.push(Token::LineBreak);
4403 }
4404 }
4405 let is_non_indent_bracket = self.current_rule.is_some()
4410 && (role == TokenRole::BracketOpen || role == TokenRole::BracketClose)
4411 && !self.current_rule.as_ref().is_some_and(|rule| {
4412 self.grammar
4413 .indent_triggers
4414 .contains(&(rule.clone(), value.to_owned()))
4415 });
4416 if !is_non_indent_bracket && self.policy.line_break_after.iter().any(|t| t == value) {
4417 self.tokens.push(Token::LineBreak);
4418 }
4419 }
4420
4421 fn lookup_role(&self, value: &str) -> TokenRole {
4422 if let Some(ref rule) = self.current_rule {
4423 if let Some(role_map) = self.grammar.token_roles.get(rule) {
4424 if let Some(role) = role_map.get(value) {
4425 return *role;
4426 }
4427 }
4428 }
4429 if is_word_like(value) {
4430 TokenRole::Keyword
4431 } else {
4432 TokenRole::Operator
4433 }
4434 }
4435
4436 fn token_with_indent_open(&mut self, value: &str, role: TokenRole) {
4441 if value.is_empty() {
4442 return;
4443 }
4444 if role == TokenRole::BracketClose && self.policy.indent_close.iter().any(|t| t == value) {
4445 self.tokens.push(Token::IndentClose);
4446 }
4447 self.tokens.push(Token::Lit(value.to_owned(), role));
4448 if role == TokenRole::BracketOpen {
4449 self.tokens.push(Token::IndentOpen);
4450 self.tokens.push(Token::LineBreak);
4451 }
4452 }
4453
4454 fn newline(&mut self) {
4455 self.tokens.push(Token::LineBreak);
4456 }
4457
4458 fn indent_open(&mut self) {
4463 self.tokens.push(Token::IndentOpen);
4464 self.tokens.push(Token::LineBreak);
4465 }
4466
4467 fn indent_close(&mut self) {
4469 self.tokens.push(Token::IndentClose);
4470 }
4471
4472 fn snapshot(&self) -> OutputSnapshot {
4473 OutputSnapshot {
4474 tokens_len: self.tokens.len(),
4475 }
4476 }
4477
4478 fn restore(&mut self, snap: OutputSnapshot) {
4479 self.tokens.truncate(snap.tokens_len);
4480 }
4481
4482 fn lit_emitted_since(&self, snap: OutputSnapshot) -> bool {
4489 self.tokens[snap.tokens_len..]
4490 .iter()
4491 .any(|t| matches!(t, Token::Lit(_, _)))
4492 }
4493
4494 fn no_space(&mut self) {
4499 self.tokens.push(Token::NoSpace);
4500 }
4501
4502 fn finish(self) -> Vec<u8> {
4503 layout(
4504 &self.tokens,
4505 self.policy,
4506 &self.grammar.line_comment_prefixes,
4507 )
4508 }
4509}
4510
4511fn layout(tokens: &[Token], policy: &FormatPolicy, line_comment_prefixes: &[String]) -> Vec<u8> {
4517 let mut bytes = Vec::new();
4518 let mut indent: usize = 0;
4519 let mut at_line_start = true;
4520 let mut last_role: Option<TokenRole> = None;
4521 let mut last_text: String = String::new();
4522 let mut suppress_next_separator = false;
4523 let mut force_next_separator = false;
4524 let newline = policy.newline.as_bytes();
4525 let separator = policy.separator.as_bytes();
4526
4527 for (tok_idx, tok) in tokens.iter().enumerate() {
4528 if std::env::var("DBG_LAYOUT").is_ok() {
4529 match tok {
4530 Token::Lit(v, r) => eprintln!(
4531 " TOK: Lit({v:?}, {r:?}) at_line_start={at_line_start} last_role={last_role:?}"
4532 ),
4533 Token::IndentOpen => eprintln!(" TOK: IndentOpen"),
4534 Token::IndentClose => eprintln!(" TOK: IndentClose"),
4535 Token::LineBreak => eprintln!(" TOK: LineBreak"),
4536 Token::NoSpace => eprintln!(" TOK: NoSpace"),
4537 Token::ForceSpace => eprintln!(" TOK: ForceSpace"),
4538 }
4539 }
4540 match tok {
4541 Token::IndentOpen => indent += 1,
4542 Token::IndentClose => {
4543 indent = indent.saturating_sub(1);
4544 if !at_line_start {
4545 bytes.extend_from_slice(newline);
4546 at_line_start = true;
4547 }
4548 }
4549 Token::LineBreak => {
4550 if !at_line_start {
4551 bytes.extend_from_slice(newline);
4552 at_line_start = true;
4553 }
4554 }
4555 Token::NoSpace => {
4556 suppress_next_separator = true;
4557 }
4558 Token::ForceSpace => {
4559 force_next_separator = true;
4560 }
4561 Token::Lit(value, role) => {
4562 let is_block_open = *role == TokenRole::BracketOpen
4566 && tokens
4567 .get(tok_idx + 1)
4568 .is_some_and(|t| matches!(t, Token::IndentOpen));
4569 if at_line_start {
4570 bytes.extend(std::iter::repeat_n(b' ', indent * policy.indent_width));
4571 } else if let Some(prev_role) = last_role {
4572 let want_space = force_next_separator
4573 || (!suppress_next_separator
4574 && needs_space_by_role(prev_role, &last_text, *role, value))
4575 || (is_block_open
4576 && !suppress_next_separator
4577 && matches!(prev_role, TokenRole::Terminal | TokenRole::BracketClose));
4578 if want_space {
4579 bytes.extend_from_slice(separator);
4580 }
4581 }
4582 suppress_next_separator = false;
4583 force_next_separator = false;
4584 bytes.extend_from_slice(value.as_bytes());
4585 at_line_start = false;
4586 last_role = Some(*role);
4587 last_text.clear();
4588 last_text.push_str(value);
4589 if line_comment_prefixes
4590 .iter()
4591 .any(|p| value.starts_with(p.as_str()))
4592 {
4593 bytes.extend_from_slice(newline);
4594 at_line_start = true;
4595 last_role = None;
4596 }
4597 }
4598 }
4599 }
4600
4601 if !at_line_start {
4602 bytes.extend_from_slice(newline);
4603 }
4604 bytes
4605}
4606
4607fn effective_spacing_role(role: TokenRole, text: &str) -> TokenRole {
4611 match role {
4612 TokenRole::BracketOpen | TokenRole::BracketClose if is_word_like(text) => {
4613 TokenRole::Keyword
4614 }
4615 other => other,
4616 }
4617}
4618
4619fn needs_space_by_role(last: TokenRole, last_text: &str, next: TokenRole, next_text: &str) -> bool {
4623 let last = effective_spacing_role(last, last_text);
4624 let next = effective_spacing_role(next, next_text);
4625 match (last, next) {
4626 (TokenRole::BracketOpen, _) | (_, TokenRole::BracketClose) => false,
4628 (_, TokenRole::Separator) => false,
4630 (TokenRole::Separator, _) => true,
4631 (TokenRole::Connector, _) | (_, TokenRole::Connector) => false,
4633 (TokenRole::Terminal, TokenRole::BracketOpen) => false,
4635 (TokenRole::BracketClose, TokenRole::BracketOpen) => false,
4637 (TokenRole::Keyword, _) | (_, TokenRole::Keyword) => true,
4639 (TokenRole::Terminal, TokenRole::Terminal) => true,
4641 (TokenRole::Terminal, TokenRole::Operator) | (TokenRole::Operator, TokenRole::Terminal) => {
4642 true
4643 }
4644 (TokenRole::Operator, TokenRole::Operator) => true,
4645 (TokenRole::BracketClose, _) => true,
4647 (TokenRole::Operator, TokenRole::BracketOpen) => true,
4649 }
4650}
4651
4652#[cfg(test)]
4653#[allow(clippy::unwrap_used)]
4654mod tests {
4655 use super::*;
4656
4657 fn test_grammar() -> Grammar {
4658 Grammar::from_bytes("test", b"{\"name\":\"test\",\"rules\":{}}").unwrap_or_else(|_| {
4659 serde_json::from_str::<Grammar>(r#"{"name":"test","rules":{}}"#).unwrap()
4660 })
4661 }
4662
4663 #[test]
4664 fn parses_simple_grammar_json() {
4665 let bytes = br#"{
4666 "name": "tiny",
4667 "rules": {
4668 "program": {
4669 "type": "SEQ",
4670 "members": [
4671 {"type": "STRING", "value": "hello"},
4672 {"type": "STRING", "value": ";"}
4673 ]
4674 }
4675 }
4676 }"#;
4677 let g = Grammar::from_bytes("tiny", bytes).expect("valid tiny grammar");
4678 assert!(g.rules.contains_key("program"));
4679 }
4680
4681 #[test]
4682 fn output_emits_punctuation_without_leading_space() {
4683 let policy = FormatPolicy::default();
4684 let g = test_grammar();
4685 let mut out = Output::new(&policy, &g, None);
4686 out.token_with_role("foo", Some(TokenRole::Terminal));
4687 out.token_with_role("(", Some(TokenRole::BracketOpen));
4688 out.token_with_role(")", Some(TokenRole::BracketClose));
4689 out.token_with_role(";", Some(TokenRole::Separator));
4690 let bytes = out.finish();
4691 let s = std::str::from_utf8(&bytes).expect("ascii output");
4692 assert!(s.starts_with("foo();"), "got {s:?}");
4693 }
4694
4695 #[test]
4696 fn grammar_from_bytes_rejects_malformed_input() {
4697 let result = Grammar::from_bytes("malformed", b"not json");
4698 let err = result.expect_err("malformed bytes must yield Err");
4699 let msg = err.to_string();
4700 assert!(
4701 msg.contains("malformed"),
4702 "error message should name the protocol: {msg:?}"
4703 );
4704 }
4705
4706 #[test]
4707 fn output_indents_after_open_brace() {
4708 let policy = FormatPolicy::default();
4709 let g = test_grammar();
4710 let mut out = Output::new(&policy, &g, None);
4711 out.token_with_role("fn", Some(TokenRole::Keyword));
4712 out.token_with_role("foo", Some(TokenRole::Terminal));
4713 out.token_with_role("(", Some(TokenRole::BracketOpen));
4714 out.token_with_role(")", Some(TokenRole::BracketClose));
4715 out.token_with_role("{", Some(TokenRole::BracketOpen));
4716 out.token_with_role("body", Some(TokenRole::Terminal));
4717 out.token_with_role("}", Some(TokenRole::BracketClose));
4718 let bytes = out.finish();
4719 let s = std::str::from_utf8(&bytes).expect("ascii output");
4720 assert!(s.contains("{\n"), "newline after opening brace: {s:?}");
4721 assert!(s.contains("body"), "body inside block: {s:?}");
4722 assert!(s.ends_with("}\n"), "newline after closing brace: {s:?}");
4723 }
4724
4725 #[test]
4726 fn output_no_space_between_word_and_dot() {
4727 let policy = FormatPolicy::default();
4728 let g = test_grammar();
4729 let mut out = Output::new(&policy, &g, None);
4730 out.token_with_role("foo", Some(TokenRole::Terminal));
4731 out.token_with_role(".", Some(TokenRole::Operator));
4732 out.token_with_role("bar", Some(TokenRole::Terminal));
4733 let bytes = out.finish();
4734 let s = std::str::from_utf8(&bytes).expect("ascii output");
4735 assert!(
4740 s.contains("foo") && s.contains("bar"),
4741 "both identifiers present: {s:?}"
4742 );
4743 }
4744
4745 #[test]
4746 fn output_snapshot_restore_truncates_bytes() {
4747 let policy = FormatPolicy::default();
4748 let g = test_grammar();
4749 let mut out = Output::new(&policy, &g, None);
4750 out.token("keep");
4751 let snap = out.snapshot();
4752 out.token("drop");
4753 out.token("more");
4754 out.restore(snap);
4755 out.token("after");
4756 let bytes = out.finish();
4757 let s = std::str::from_utf8(&bytes).expect("ascii output");
4758 assert!(s.contains("keep"), "kept token survives: {s:?}");
4759 assert!(s.contains("after"), "post-restore token visible: {s:?}");
4760 assert!(!s.contains("drop"), "rolled-back token removed: {s:?}");
4761 assert!(!s.contains("more"), "rolled-back token removed: {s:?}");
4762 }
4763
4764 #[test]
4765 fn child_cursor_take_field_consumes_once() {
4766 let edges_owned: Vec<Edge> = vec![Edge {
4767 src: panproto_gat::Name::from("p"),
4768 tgt: panproto_gat::Name::from("c"),
4769 kind: panproto_gat::Name::from("name"),
4770 name: None,
4771 }];
4772 let edges: Vec<&Edge> = edges_owned.iter().collect();
4773 let mut cursor = ChildCursor::new(&edges);
4774 let first = cursor.take_field("name");
4775 let second = cursor.take_field("name");
4776 assert!(first.is_some(), "first take returns the edge");
4777 assert!(
4778 second.is_none(),
4779 "second take returns None (already consumed)"
4780 );
4781 }
4782
4783 #[test]
4784 fn child_cursor_take_matching_predicate() {
4785 let edges_owned: Vec<Edge> = vec![
4786 Edge {
4787 src: "p".into(),
4788 tgt: "c1".into(),
4789 kind: "child_of".into(),
4790 name: None,
4791 },
4792 Edge {
4793 src: "p".into(),
4794 tgt: "c2".into(),
4795 kind: "key".into(),
4796 name: None,
4797 },
4798 ];
4799 let edges: Vec<&Edge> = edges_owned.iter().collect();
4800 let mut cursor = ChildCursor::new(&edges);
4801 assert!(cursor.has_matching(|e| e.kind.as_ref() == "key"));
4802 let taken = cursor.take_matching(|e| e.kind.as_ref() == "key");
4803 assert!(taken.is_some());
4804 assert!(
4805 !cursor.has_matching(|e| e.kind.as_ref() == "key"),
4806 "consumed edge no longer matches"
4807 );
4808 assert!(
4809 cursor.has_matching(|e| e.kind.as_ref() == "child_of"),
4810 "the other edge is still available"
4811 );
4812 }
4813
4814 #[test]
4815 fn kind_satisfies_symbol_direct_match() {
4816 let bytes = br#"{
4817 "name": "tiny",
4818 "rules": {
4819 "x": {"type": "STRING", "value": "x"}
4820 }
4821 }"#;
4822 let g = Grammar::from_bytes("tiny", bytes).expect("valid grammar");
4823 assert!(kind_satisfies_symbol(&g, Some("x"), "x"));
4824 assert!(!kind_satisfies_symbol(&g, Some("y"), "x"));
4825 assert!(!kind_satisfies_symbol(&g, None, "x"));
4826 }
4827
4828 #[test]
4829 fn kind_satisfies_symbol_through_hidden_rule() {
4830 let bytes = br#"{
4831 "name": "tiny",
4832 "rules": {
4833 "_value": {
4834 "type": "CHOICE",
4835 "members": [
4836 {"type": "SYMBOL", "name": "object"},
4837 {"type": "SYMBOL", "name": "number"}
4838 ]
4839 },
4840 "object": {"type": "STRING", "value": "{}"},
4841 "number": {"type": "PATTERN", "value": "[0-9]+"}
4842 }
4843 }"#;
4844 let g = Grammar::from_bytes("tiny", bytes).expect("valid grammar");
4845 assert!(
4846 kind_satisfies_symbol(&g, Some("number"), "_value"),
4847 "number is reachable from _value via CHOICE"
4848 );
4849 assert!(
4850 kind_satisfies_symbol(&g, Some("object"), "_value"),
4851 "object is reachable from _value via CHOICE"
4852 );
4853 assert!(
4854 !kind_satisfies_symbol(&g, Some("string"), "_value"),
4855 "string is NOT among the alternatives"
4856 );
4857 }
4858
4859 #[test]
4860 fn first_symbol_skips_string_terminals() {
4861 let prod: Production = serde_json::from_str(
4862 r#"{
4863 "type": "SEQ",
4864 "members": [
4865 {"type": "STRING", "value": "{"},
4866 {"type": "SYMBOL", "name": "body"},
4867 {"type": "STRING", "value": "}"}
4868 ]
4869 }"#,
4870 )
4871 .expect("valid SEQ");
4872 assert_eq!(first_symbol(&prod), Some("body"));
4873 }
4874
4875 #[test]
4876 fn placeholder_for_pattern_routes_by_regex_class() {
4877 assert_eq!(placeholder_for_pattern("[0-9]+"), "0");
4878 assert_eq!(placeholder_for_pattern("[a-zA-Z_]\\w*"), "_x");
4879 assert_eq!(placeholder_for_pattern("\"[^\"]*\""), "\"\"");
4880 assert_eq!(placeholder_for_pattern("\\d+\\.\\d+"), "0");
4881 }
4882
4883 #[test]
4884 fn format_policy_default_breaks_after_semicolon() {
4885 let policy = FormatPolicy::default();
4886 assert!(policy.line_break_after.iter().any(|t| t == ";"));
4887 assert!(policy.indent_open.iter().any(|t| t == "{"));
4888 assert!(policy.indent_close.iter().any(|t| t == "}"));
4889 assert_eq!(policy.indent_width, 2);
4890 }
4891
4892 #[test]
4893 fn placeholder_decodes_literal_pattern_separators() {
4894 assert_eq!(placeholder_for_pattern("\\n"), "\n");
4898 assert_eq!(placeholder_for_pattern("\\r\\n"), "\r\n");
4899 assert_eq!(placeholder_for_pattern(";"), ";");
4900 assert_eq!(placeholder_for_pattern("[0-9]+"), "0");
4903 assert_eq!(placeholder_for_pattern("a|b"), "_");
4904 }
4905
4906 #[test]
4907 fn supertypes_decode_from_grammar_json_strings() {
4908 let bytes = br#"{
4910 "name": "tiny",
4911 "supertypes": ["expression"],
4912 "rules": {
4913 "expression": {
4914 "type": "CHOICE",
4915 "members": [
4916 {"type": "SYMBOL", "name": "binary_expression"},
4917 {"type": "SYMBOL", "name": "identifier"}
4918 ]
4919 },
4920 "binary_expression": {"type": "STRING", "value": "x"},
4921 "identifier": {"type": "PATTERN", "value": "[a-z]+"}
4922 }
4923 }"#;
4924 let g = Grammar::from_bytes("tiny", bytes).expect("parse");
4925 assert!(g.supertypes.contains("expression"));
4926 assert!(kind_satisfies_symbol(&g, Some("identifier"), "expression"));
4928 assert!(!kind_satisfies_symbol(&g, Some("string"), "expression"));
4930 }
4931
4932 #[test]
4933 fn supertypes_decode_from_grammar_json_objects() {
4934 let bytes = br#"{
4937 "name": "tiny",
4938 "supertypes": [{"type": "SYMBOL", "name": "stmt"}],
4939 "rules": {
4940 "stmt": {
4941 "type": "CHOICE",
4942 "members": [
4943 {"type": "SYMBOL", "name": "while_stmt"},
4944 {"type": "SYMBOL", "name": "if_stmt"}
4945 ]
4946 },
4947 "while_stmt": {"type": "STRING", "value": "while"},
4948 "if_stmt": {"type": "STRING", "value": "if"}
4949 }
4950 }"#;
4951 let g = Grammar::from_bytes("tiny", bytes).expect("parse");
4952 assert!(g.supertypes.contains("stmt"));
4953 assert!(kind_satisfies_symbol(&g, Some("while_stmt"), "stmt"));
4954 }
4955
4956 #[test]
4957 fn alias_value_matches_kind() {
4958 let bytes = br#"{
4962 "name": "tiny",
4963 "rules": {
4964 "_package_identifier": {
4965 "type": "ALIAS",
4966 "named": true,
4967 "value": "package_identifier",
4968 "content": {"type": "SYMBOL", "name": "identifier"}
4969 },
4970 "identifier": {"type": "PATTERN", "value": "[a-z]+"}
4971 }
4972 }"#;
4973 let g = Grammar::from_bytes("tiny", bytes).expect("parse");
4974 assert!(kind_satisfies_symbol(
4975 &g,
4976 Some("package_identifier"),
4977 "_package_identifier"
4978 ));
4979 }
4980
4981 #[test]
4982 fn referenced_symbols_walks_nested_seq() {
4983 let prod: Production = serde_json::from_str(
4984 r#"{
4985 "type": "SEQ",
4986 "members": [
4987 {"type": "CHOICE", "members": [
4988 {"type": "SYMBOL", "name": "attribute_item"},
4989 {"type": "BLANK"}
4990 ]},
4991 {"type": "SYMBOL", "name": "parameter"},
4992 {"type": "REPEAT", "content": {
4993 "type": "SEQ",
4994 "members": [
4995 {"type": "STRING", "value": ","},
4996 {"type": "SYMBOL", "name": "parameter"}
4997 ]
4998 }}
4999 ]
5000 }"#,
5001 )
5002 .expect("seq");
5003 let symbols = referenced_symbols(&prod);
5004 assert!(symbols.contains(&"attribute_item"));
5005 assert!(symbols.contains(&"parameter"));
5006 }
5007
5008 #[test]
5009 fn literal_strings_collects_choice_members() {
5010 let prod: Production = serde_json::from_str(
5011 r#"{
5012 "type": "CHOICE",
5013 "members": [
5014 {"type": "STRING", "value": "+"},
5015 {"type": "STRING", "value": "-"},
5016 {"type": "STRING", "value": "*"}
5017 ]
5018 }"#,
5019 )
5020 .expect("choice");
5021 let strings = literal_strings(&prod);
5022 assert_eq!(strings, vec!["+", "-", "*"]);
5023 }
5024
5025 #[test]
5031 fn reserved_variant_deserialises() {
5032 let prod: Production = serde_json::from_str(
5033 r#"{
5034 "type": "RESERVED",
5035 "content": {"type": "SYMBOL", "name": "_lowercase_identifier"},
5036 "context_name": "attribute_id"
5037 }"#,
5038 )
5039 .expect("RESERVED parses");
5040 match prod {
5041 Production::Reserved { content, .. } => match *content {
5042 Production::Symbol { name } => assert_eq!(name, "_lowercase_identifier"),
5043 other => panic!("expected inner SYMBOL, got {other:?}"),
5044 },
5045 other => panic!("expected RESERVED, got {other:?}"),
5046 }
5047 }
5048
5049 #[test]
5050 fn reserved_grammar_loads_end_to_end() {
5051 let bytes = br#"{
5052 "name": "tiny_reserved",
5053 "rules": {
5054 "program": {
5055 "type": "RESERVED",
5056 "content": {"type": "SYMBOL", "name": "ident"},
5057 "context_name": "keywords"
5058 },
5059 "ident": {"type": "PATTERN", "value": "[a-z]+"}
5060 }
5061 }"#;
5062 let g = Grammar::from_bytes("tiny_reserved", bytes).expect("RESERVED-using grammar loads");
5063 assert!(g.rules.contains_key("program"));
5064 }
5065
5066 #[test]
5067 fn reserved_walker_helpers_recurse_into_content() {
5068 let prod: Production = serde_json::from_str(
5075 r#"{
5076 "type": "RESERVED",
5077 "content": {
5078 "type": "FIELD",
5079 "name": "lhs",
5080 "content": {"type": "SYMBOL", "name": "expr"}
5081 },
5082 "context_name": "ctx"
5083 }"#,
5084 )
5085 .expect("nested RESERVED parses");
5086 assert_eq!(first_symbol(&prod), Some("expr"));
5087 assert!(has_field_in(&prod, &["lhs"]));
5088 let symbols = referenced_symbols(&prod);
5089 assert!(symbols.contains(&"expr"));
5090 }
5091
5092 fn yield_of(grammar: &Grammar, prod: &Production) -> std::collections::HashSet<String> {
5095 let mut visited = std::collections::HashSet::new();
5096 let mut cache = grammar.yield_sets.clone();
5097 yield_of_production(grammar, prod, &mut visited, &mut cache)
5098 }
5099
5100 #[test]
5101 fn yield_set_seq_only_first_member() {
5102 let prod: Production = serde_json::from_str(
5103 r#"{
5104 "type": "SEQ",
5105 "members": [
5106 {"type": "SYMBOL", "name": "identifier"},
5107 {"type": "STRING", "value": "as"},
5108 {"type": "SYMBOL", "name": "target"}
5109 ]
5110 }"#,
5111 )
5112 .expect("valid SEQ");
5113 let g = Grammar::from_bytes("test", b"{}").unwrap_or_else(|_| {
5114 serde_json::from_str::<Grammar>(r#"{"name":"t","rules":{}}"#).unwrap()
5115 });
5116 let ys = yield_of(&g, &prod);
5117 assert!(ys.contains("identifier"), "SEQ yields first member");
5118 assert!(
5119 !ys.contains("target"),
5120 "SEQ must NOT yield non-first members"
5121 );
5122 }
5123
5124 #[test]
5125 fn yield_set_choice_union() {
5126 let prod: Production = serde_json::from_str(
5127 r#"{
5128 "type": "CHOICE",
5129 "members": [
5130 {"type": "SYMBOL", "name": "a"},
5131 {"type": "SYMBOL", "name": "b"}
5132 ]
5133 }"#,
5134 )
5135 .expect("valid CHOICE");
5136 let g = serde_json::from_str::<Grammar>(r#"{"name":"t","rules":{}}"#).unwrap();
5137 let ys = yield_of(&g, &prod);
5138 assert_eq!(ys.len(), 2);
5139 assert!(ys.contains("a"));
5140 assert!(ys.contains("b"));
5141 }
5142
5143 #[test]
5144 fn yield_set_hidden_expansion() {
5145 let g = serde_json::from_str::<Grammar>(
5146 r#"{"name":"t","rules":{
5147 "_value": {
5148 "type": "CHOICE",
5149 "members": [
5150 {"type": "SYMBOL", "name": "number"},
5151 {"type": "SYMBOL", "name": "object"}
5152 ]
5153 }
5154 }}"#,
5155 )
5156 .unwrap();
5157 let mut g = g;
5158 g.subtypes = compute_subtype_closure(&g);
5159 g.yield_sets = compute_yield_sets(&g);
5160 let sym: Production =
5161 serde_json::from_str(r#"{"type": "SYMBOL", "name": "_value"}"#).unwrap();
5162 let ys = yield_of(&g, &sym);
5163 assert!(
5164 ys.contains("number"),
5165 "hidden rule expands into its CHOICE members"
5166 );
5167 assert!(ys.contains("object"));
5168 assert!(
5169 !ys.contains("_value"),
5170 "hidden rule name is not in yield set"
5171 );
5172 }
5173
5174 #[test]
5175 fn yield_set_optional_includes_epsilon() {
5176 let prod: Production = serde_json::from_str(
5177 r#"{"type": "OPTIONAL", "content": {"type": "SYMBOL", "name": "x"}}"#,
5178 )
5179 .unwrap();
5180 let g = serde_json::from_str::<Grammar>(r#"{"name":"t","rules":{}}"#).unwrap();
5181 let ys = yield_of(&g, &prod);
5182 assert!(ys.contains("x"));
5183 assert!(ys.contains(""), "OPTIONAL includes epsilon");
5184 }
5185
5186 #[test]
5187 fn yield_set_alias_uses_value() {
5188 let prod: Production = serde_json::from_str(
5189 r#"{"type": "ALIAS", "content": {"type": "SYMBOL", "name": "real"},
5190 "named": true, "value": "alias_name"}"#,
5191 )
5192 .unwrap();
5193 let g = serde_json::from_str::<Grammar>(r#"{"name":"t","rules":{}}"#).unwrap();
5194 let ys = yield_of(&g, &prod);
5195 assert_eq!(ys.len(), 1);
5196 assert!(ys.contains("alias_name"), "named ALIAS yields its value");
5197 }
5198}