1use crate::token::{Token, TokenType, BlockType};
20use logicaffeine_base::{Interner, Symbol};
21use super::registry::{TypeRegistry, TypeDef, FieldDef, FieldType, VariantDef};
22use super::policy::{PolicyRegistry, PredicateDef, CapabilityDef, PolicyCondition};
23use super::dependencies::scan_dependencies;
24
25pub struct DiscoveryResult {
27 pub types: TypeRegistry,
28 pub policies: PolicyRegistry,
29}
30
31pub struct DiscoveryPass<'a> {
40 tokens: &'a [Token],
41 pos: usize,
42 interner: &'a mut Interner,
43}
44
45impl<'a> DiscoveryPass<'a> {
46 pub fn new(tokens: &'a [Token], interner: &'a mut Interner) -> Self {
47 Self { tokens, pos: 0, interner }
48 }
49
50 pub fn run(&mut self) -> TypeRegistry {
53 self.run_full().types
54 }
55
56 pub fn run_full(&mut self) -> DiscoveryResult {
58 let mut type_registry = TypeRegistry::with_primitives(self.interner);
59 let mut policy_registry = PolicyRegistry::new();
60
61 while self.pos < self.tokens.len() {
62 if self.check_block_header(BlockType::Definition) {
64 self.advance(); self.scan_definition_block(&mut type_registry);
66 } else if self.check_block_header(BlockType::TypeDef) {
67 self.advance(); self.parse_type_definition_inline(&mut type_registry);
71 } else if self.check_block_header(BlockType::Policy) {
72 self.advance(); self.scan_policy_block(&mut policy_registry);
75 } else if self.check_block_header(BlockType::Requires) {
76 self.advance(); while self.pos < self.tokens.len() {
80 if matches!(self.tokens.get(self.pos), Some(Token { kind: TokenType::BlockHeader { .. }, .. })) {
81 break;
82 }
83 self.advance();
84 }
85 } else {
86 self.advance();
87 }
88 }
89
90 DiscoveryResult {
91 types: type_registry,
92 policies: policy_registry,
93 }
94 }
95
96 fn check_block_header(&self, expected: BlockType) -> bool {
97 matches!(
98 self.tokens.get(self.pos),
99 Some(Token { kind: TokenType::BlockHeader { block_type }, .. })
100 if *block_type == expected
101 )
102 }
103
104 fn scan_definition_block(&mut self, registry: &mut TypeRegistry) {
105 while self.pos < self.tokens.len() {
107 if matches!(self.peek(), Some(Token { kind: TokenType::BlockHeader { .. }, .. })) {
108 break;
109 }
110
111 if self.check_article() {
113 self.try_parse_type_definition(registry);
114 } else {
115 self.advance();
116 }
117 }
118 }
119
120 fn scan_policy_block(&mut self, registry: &mut PolicyRegistry) {
125 while self.pos < self.tokens.len() {
126 if matches!(self.peek(), Some(Token { kind: TokenType::BlockHeader { .. }, .. })) {
127 break;
128 }
129
130 if self.check_newline() || self.check_indent() || self.check_dedent() {
132 self.advance();
133 continue;
134 }
135
136 if self.check_article() {
138 self.try_parse_policy_definition(registry);
139 } else {
140 self.advance();
141 }
142 }
143 }
144
145 fn try_parse_policy_definition(&mut self, registry: &mut PolicyRegistry) {
147 self.advance(); let subject_type = match self.consume_noun_or_proper() {
151 Some(sym) => sym,
152 None => return,
153 };
154
155 if self.check_copula() {
157 self.advance(); let predicate_name = match self.consume_noun_or_proper() {
162 Some(sym) => sym,
163 None => return,
164 };
165
166 if !self.check_word("if") {
168 self.skip_to_period();
169 return;
170 }
171 self.advance(); if self.check_colon() {
175 self.advance();
176 }
177 if self.check_newline() {
178 self.advance();
179 }
180 if self.check_indent() {
181 self.advance();
182 }
183
184 let condition = self.parse_policy_condition(subject_type, None);
186
187 registry.register_predicate(PredicateDef {
188 subject_type,
189 predicate_name,
190 condition,
191 });
192
193 self.skip_to_period();
194 } else if self.check_word("can") {
195 self.advance(); let action = match self.consume_noun_or_proper() {
200 Some(sym) => sym,
201 None => {
202 if let Some(Token { kind: TokenType::Verb { lemma, .. }, .. }) = self.peek() {
204 let sym = *lemma;
205 self.advance();
206 sym
207 } else {
208 return;
209 }
210 }
211 };
212
213 if self.check_article() {
215 self.advance();
216 }
217
218 let object_type = match self.consume_noun_or_proper() {
220 Some(sym) => sym,
221 None => return,
222 };
223
224 if !self.check_word("if") {
226 self.skip_to_period();
227 return;
228 }
229 self.advance(); if self.check_colon() {
233 self.advance();
234 }
235 if self.check_newline() {
236 self.advance();
237 }
238 if self.check_indent() {
239 self.advance();
240 }
241
242 let condition = self.parse_policy_condition(subject_type, Some(object_type));
243
244 registry.register_capability(CapabilityDef {
245 subject_type,
246 action,
247 object_type,
248 condition,
249 });
250
251 self.skip_policy_definition();
253 } else {
254 self.skip_to_period();
255 }
256 }
257
258 fn parse_policy_condition(&mut self, subject_type: Symbol, object_type: Option<Symbol>) -> PolicyCondition {
261 let first = self.parse_atomic_condition(subject_type, object_type);
262
263 loop {
265 while self.check_newline() {
267 self.advance();
268 }
269
270 if self.check_comma() {
272 self.advance(); while self.check_newline() {
275 self.advance();
276 }
277 }
278
279 if self.check_word("AND") {
280 self.advance();
281 while self.check_newline() {
283 self.advance();
284 }
285 let right = self.parse_atomic_condition(subject_type, object_type);
286 return PolicyCondition::And(Box::new(first), Box::new(right));
287 } else if self.check_word("OR") {
288 self.advance();
289 while self.check_newline() {
291 self.advance();
292 }
293 let right = self.parse_atomic_condition(subject_type, object_type);
294 return PolicyCondition::Or(Box::new(first), Box::new(right));
295 } else {
296 break;
297 }
298 }
299
300 first
301 }
302
303 fn parse_atomic_condition(&mut self, subject_type: Symbol, object_type: Option<Symbol>) -> PolicyCondition {
305 if self.check_article() {
307 self.advance();
308 }
309
310 let subject_ref = match self.consume_noun_or_proper() {
312 Some(sym) => sym,
313 None => return PolicyCondition::FieldEquals {
314 field: self.interner.intern("unknown"),
315 value: self.interner.intern("unknown"),
316 is_string_literal: false,
317 },
318 };
319
320 if self.check_possessive() {
322 self.advance(); let field = match self.consume_noun_or_proper() {
326 Some(sym) => sym,
327 None => return PolicyCondition::FieldEquals {
328 field: self.interner.intern("unknown"),
329 value: self.interner.intern("unknown"),
330 is_string_literal: false,
331 },
332 };
333
334 if self.check_word("equals") {
336 self.advance();
337
338 let (value, is_string_literal) = self.consume_value();
340
341 return PolicyCondition::FieldEquals { field, value, is_string_literal };
342 }
343 } else if self.check_copula() {
344 self.advance(); let predicate = match self.consume_noun_or_proper() {
349 Some(sym) => sym,
350 None => return PolicyCondition::FieldEquals {
351 field: self.interner.intern("unknown"),
352 value: self.interner.intern("unknown"),
353 is_string_literal: false,
354 },
355 };
356
357 return PolicyCondition::Predicate {
358 subject: subject_ref,
359 predicate,
360 };
361 } else if self.check_word("equals") {
362 self.advance(); if self.check_article() {
367 self.advance();
368 }
369
370 if let Some(obj_ref) = self.consume_noun_or_proper() {
372 if self.check_possessive() {
373 self.advance(); if let Some(field) = self.consume_noun_or_proper() {
375 return PolicyCondition::ObjectFieldEquals {
376 subject: subject_ref,
377 object: obj_ref,
378 field,
379 };
380 }
381 }
382 }
383 }
384
385 PolicyCondition::FieldEquals {
387 field: self.interner.intern("unknown"),
388 value: self.interner.intern("unknown"),
389 is_string_literal: false,
390 }
391 }
392
393 fn consume_value(&mut self) -> (Symbol, bool) {
395 if let Some(Token { kind: TokenType::StringLiteral(sym), .. }) = self.peek() {
396 let s = *sym;
397 self.advance();
398 (s, true)
399 } else if let Some(sym) = self.consume_noun_or_proper() {
400 (sym, false)
401 } else {
402 (self.interner.intern("unknown"), false)
403 }
404 }
405
406 fn check_possessive(&self) -> bool {
408 matches!(self.peek(), Some(Token { kind: TokenType::Possessive, .. }))
409 }
410
411 fn skip_policy_definition(&mut self) {
413 let mut depth = 0;
414 while self.pos < self.tokens.len() {
415 if self.check_indent() {
416 depth += 1;
417 } else if self.check_dedent() {
418 if depth == 0 {
419 break;
420 }
421 depth -= 1;
422 }
423 if self.check_period() && depth == 0 {
424 self.advance();
425 break;
426 }
427 if matches!(self.peek(), Some(Token { kind: TokenType::BlockHeader { .. }, .. })) {
428 break;
429 }
430 self.advance();
431 }
432 }
433
434 fn parse_type_definition_inline(&mut self, registry: &mut TypeRegistry) {
436 self.parse_type_definition_body(registry);
438 }
439
440 fn try_parse_type_definition(&mut self, registry: &mut TypeRegistry) {
441 self.advance(); self.parse_type_definition_body(registry);
443 }
444
445 fn parse_type_definition_body(&mut self, registry: &mut TypeRegistry) {
446 if let Some(name_sym) = self.consume_noun_or_proper() {
447 let type_params = if self.check_preposition("of") {
449 self.advance(); self.parse_type_params()
451 } else {
452 vec![]
453 };
454
455 let mut is_portable = false;
457 let mut is_shared = false;
458 if self.check_copula() {
459 let copula_pos = self.pos;
460 self.advance(); loop {
464 if self.check_portable() {
465 self.advance(); is_portable = true;
467 if self.check_word("and") {
468 self.advance(); }
470 } else if self.check_shared() {
471 self.advance(); is_shared = true;
473 if self.check_word("and") {
474 self.advance(); }
476 } else {
477 break;
478 }
479 }
480
481 if !is_portable && !is_shared {
483 self.pos = copula_pos;
484 }
485 }
486
487 if self.check_word("has") {
490 self.advance(); if self.check_colon() {
492 self.advance(); if self.check_newline() {
495 self.advance();
496 }
497 if self.check_indent() {
498 self.advance(); let fields = self.parse_struct_fields_with_params(&type_params);
500 registry.register(name_sym, TypeDef::Struct { fields, generics: type_params, is_portable, is_shared });
501 return;
502 }
503 }
504 }
505
506 if self.check_copula() {
508 self.advance(); let is_enum_pattern = if self.check_either() {
512 self.advance(); true
514 } else if self.check_word("one") {
515 self.advance(); if self.check_word("of") {
517 self.advance(); true
519 } else {
520 false
521 }
522 } else {
523 false
524 };
525
526 if is_enum_pattern {
527 if self.check_colon() {
528 self.advance(); if self.check_newline() {
531 self.advance();
532 }
533 if self.check_indent() {
534 self.advance(); let variants = self.parse_enum_variants_with_params(&type_params);
536 registry.register(name_sym, TypeDef::Enum { variants, generics: type_params, is_portable, is_shared });
537 return;
538 }
539 }
540 }
541
542 if self.check_article() {
543 self.advance(); if self.check_word("generic") {
547 registry.register(name_sym, TypeDef::Generic { param_count: 1 });
548 self.skip_to_period();
549 } else if self.check_word("record") || self.check_word("struct") || self.check_word("structure") {
550 registry.register(name_sym, TypeDef::Struct { fields: vec![], generics: vec![], is_portable: false, is_shared: false });
551 self.skip_to_period();
552 } else if self.check_word("sum") || self.check_word("enum") || self.check_word("choice") {
553 registry.register(name_sym, TypeDef::Enum { variants: vec![], generics: vec![], is_portable: false, is_shared: false });
554 self.skip_to_period();
555 }
556 }
557 } else if !type_params.is_empty() {
558 registry.register(name_sym, TypeDef::Generic { param_count: type_params.len() });
560 self.skip_to_period();
561 }
562 }
563 }
564
565 fn parse_enum_variants_with_params(&mut self, type_params: &[Symbol]) -> Vec<VariantDef> {
569 let mut variants = Vec::new();
570
571 while self.pos < self.tokens.len() {
572 if self.check_dedent() {
574 self.advance();
575 break;
576 }
577 if matches!(self.peek(), Some(Token { kind: TokenType::BlockHeader { .. }, .. })) {
578 break;
579 }
580
581 if self.check_newline() {
583 self.advance();
584 continue;
585 }
586
587 if self.check_article() {
590 self.advance(); }
592
593 if let Some(variant_name) = self.consume_noun_or_proper() {
595 let fields = if self.check_word("with") {
597 self.parse_variant_fields_natural_with_params(type_params)
599 } else if self.check_lparen() {
600 self.parse_variant_fields_concise_with_params(type_params)
602 } else {
603 vec![]
605 };
606
607 variants.push(VariantDef {
608 name: variant_name,
609 fields,
610 });
611
612 if self.check_period() {
614 self.advance();
615 }
616 } else {
617 self.advance(); }
619 }
620
621 variants
622 }
623
624 fn parse_enum_variants(&mut self) -> Vec<VariantDef> {
626 self.parse_enum_variants_with_params(&[])
627 }
628
629 fn parse_variant_fields_natural_with_params(&mut self, type_params: &[Symbol]) -> Vec<FieldDef> {
634 let mut fields = Vec::new();
635
636 self.advance();
638
639 loop {
640 if self.check_article() {
642 self.advance();
643 }
644
645 if let Some(field_name) = self.consume_noun_or_proper() {
647 let ty = if self.check_comma() {
651 self.advance(); if self.check_word("which") {
654 self.advance();
655 }
656 if self.check_copula() {
658 self.advance();
659 }
660 self.consume_field_type_with_params(type_params)
661 } else {
662 self.consume_field_type_with_params(type_params)
664 };
665
666 fields.push(FieldDef {
667 name: field_name,
668 ty,
669 is_public: true, });
671
672 if self.check_comma() {
675 self.advance(); }
677 if self.check_word("and") {
678 self.advance();
679 continue;
680 }
681 }
682 break;
683 }
684
685 fields
686 }
687
688 fn parse_variant_fields_natural(&mut self) -> Vec<FieldDef> {
690 self.parse_variant_fields_natural_with_params(&[])
691 }
692
693 fn parse_variant_fields_concise_with_params(&mut self, type_params: &[Symbol]) -> Vec<FieldDef> {
695 let mut fields = Vec::new();
696
697 self.advance();
699
700 loop {
701 if let Some(field_name) = self.consume_noun_or_proper() {
703 let ty = if self.check_colon() {
705 self.advance(); self.consume_field_type_with_params(type_params)
707 } else {
708 FieldType::Primitive(self.interner.intern("Unknown"))
709 };
710
711 fields.push(FieldDef {
712 name: field_name,
713 ty,
714 is_public: true, });
716
717 if self.check_comma() {
719 self.advance();
720 continue;
721 }
722 }
723 break;
724 }
725
726 if self.check_rparen() {
728 self.advance();
729 }
730
731 fields
732 }
733
734 fn parse_variant_fields_concise(&mut self) -> Vec<FieldDef> {
736 self.parse_variant_fields_concise_with_params(&[])
737 }
738
739 fn parse_struct_fields_with_params(&mut self, type_params: &[Symbol]) -> Vec<FieldDef> {
742 let mut fields = Vec::new();
743
744 while self.pos < self.tokens.len() {
745 if self.check_dedent() {
747 self.advance();
748 break;
749 }
750 if matches!(self.peek(), Some(Token { kind: TokenType::BlockHeader { .. }, .. })) {
751 break;
752 }
753
754 if self.check_newline() {
756 self.advance();
757 continue;
758 }
759
760 let has_article = self.check_article();
763 if has_article {
764 self.advance(); }
766
767 let has_public_keyword = if self.check_word("public") {
769 self.advance();
770 true
771 } else {
772 false
773 };
774 let mut is_public = has_public_keyword;
776
777 if let Some(field_name) = self.consume_noun_or_proper() {
779 let ty = if self.check_colon() {
783 is_public = true;
785 self.advance(); self.consume_field_type_with_params(type_params)
787 } else if self.check_comma() {
788 is_public = true;
790 self.advance(); if self.check_word("which") {
793 self.advance();
794 }
795 if self.check_copula() {
797 self.advance();
798 }
799 self.consume_field_type_with_params(type_params)
800 } else if !has_article {
801 continue;
803 } else {
804 FieldType::Primitive(self.interner.intern("Unknown"))
806 };
807
808 fields.push(FieldDef {
809 name: field_name,
810 ty,
811 is_public,
812 });
813
814 if self.check_period() {
816 self.advance();
817 }
818 } else if !has_article {
819 self.advance();
821 }
822 }
823
824 fields
825 }
826
827 fn parse_struct_fields(&mut self) -> Vec<FieldDef> {
829 self.parse_struct_fields_with_params(&[])
830 }
831
832 fn consume_field_type(&mut self) -> FieldType {
834 if self.check_lparen() {
836 self.advance(); let inner_type = self.consume_field_type();
838 if self.check_rparen() {
839 self.advance(); }
841 return inner_type;
842 }
843
844 if self.check_article() {
846 self.advance();
847 }
848
849 if let Some(name) = self.consume_noun_or_proper() {
850 let name_str = self.interner.resolve(name);
851
852 let modified_name = if name_str == "SharedSet" || name_str == "ORSet" {
854 if self.check_lparen() {
855 self.advance(); let modifier = if self.check_removewins() {
857 self.advance(); Some("SharedSet_RemoveWins")
859 } else if self.check_addwins() {
860 self.advance(); Some("SharedSet_AddWins")
862 } else {
863 None
864 };
865 if self.check_rparen() {
866 self.advance(); }
868 modifier.map(|m| self.interner.intern(m))
869 } else {
870 None
871 }
872 } else if name_str == "SharedSequence" {
873 if self.check_lparen() {
875 self.advance(); let modifier = if self.check_yata() {
877 self.advance(); Some("SharedSequence_YATA")
879 } else {
880 None
881 };
882 if self.check_rparen() {
883 self.advance(); }
885 modifier.map(|m| self.interner.intern(m))
886 } else {
887 None
888 }
889 } else {
890 None
891 };
892
893 let final_name = modified_name.unwrap_or(name);
895 let final_name_str = self.interner.resolve(final_name);
896
897 if (final_name_str == "SharedMap" || final_name_str == "ORMap") && self.check_from() {
899 self.advance(); let key_type = self.consume_field_type();
901 if self.check_to() {
903 self.advance(); }
905 let value_type = self.consume_field_type();
906 return FieldType::Generic { base: final_name, params: vec![key_type, value_type] };
907 }
908
909 if self.check_preposition("of") {
911 let is_map_type = final_name_str == "Map" || final_name_str == "HashMap";
913
914 self.advance();
915 let first_param = self.consume_field_type();
916
917 if is_map_type && self.check_to() {
919 self.advance(); let second_param = self.consume_field_type();
921 return FieldType::Generic { base: final_name, params: vec![first_param, second_param] };
922 }
923
924 return FieldType::Generic { base: final_name, params: vec![first_param] };
925 }
926
927 if final_name_str == "Divergent" {
929 let param = self.consume_field_type();
931 return FieldType::Generic { base: final_name, params: vec![param] };
932 }
933
934 match final_name_str {
936 "Int" | "Nat" | "Text" | "Bool" | "Real" | "Unit" => FieldType::Primitive(final_name),
937 _ => FieldType::Named(final_name),
938 }
939 } else {
940 FieldType::Primitive(self.interner.intern("Unknown"))
941 }
942 }
943
944 fn peek(&self) -> Option<&Token> {
946 self.tokens.get(self.pos)
947 }
948
949 fn advance(&mut self) {
950 if self.pos < self.tokens.len() {
951 self.pos += 1;
952 }
953 }
954
955 fn check_article(&self) -> bool {
956 match self.peek() {
957 Some(Token { kind: TokenType::Article(_), .. }) => true,
958 Some(Token { kind: TokenType::ProperName(sym), .. }) => {
960 let text = self.interner.resolve(*sym);
961 text.eq_ignore_ascii_case("a") || text.eq_ignore_ascii_case("an")
962 }
963 _ => false,
964 }
965 }
966
967 fn check_copula(&self) -> bool {
968 match self.peek() {
969 Some(Token { kind: TokenType::Is | TokenType::Are, .. }) => true,
970 Some(Token { kind: TokenType::Verb { lemma, .. }, .. }) => {
972 let word = self.interner.resolve(*lemma).to_lowercase();
973 word == "is" || word == "are"
974 }
975 _ => false,
976 }
977 }
978
979 fn check_preposition(&self, word: &str) -> bool {
980 if let Some(Token { kind: TokenType::Preposition(sym), .. }) = self.peek() {
981 self.interner.resolve(*sym) == word
982 } else {
983 false
984 }
985 }
986
987 fn consume_noun_or_proper(&mut self) -> Option<Symbol> {
988 let t = self.peek()?;
989 match &t.kind {
990 TokenType::Noun(s) | TokenType::ProperName(s) => {
991 let sym = *s;
992 self.advance();
993 Some(sym)
994 }
995 TokenType::Adjective(s) => {
997 let sym = *s;
998 self.advance();
999 Some(sym)
1000 }
1001 TokenType::Performative(s) => {
1003 let sym = *s;
1004 self.advance();
1005 Some(sym)
1006 }
1007 TokenType::Items | TokenType::Some => {
1009 let sym = t.lexeme;
1010 self.advance();
1011 Some(sym)
1012 }
1013 TokenType::Verb { .. } => {
1018 let sym = t.lexeme;
1019 self.advance();
1020 Some(sym)
1021 }
1022 TokenType::Tally => {
1024 self.advance();
1025 Some(self.interner.intern("Tally"))
1026 }
1027 TokenType::SharedSet => {
1028 self.advance();
1029 Some(self.interner.intern("SharedSet"))
1030 }
1031 TokenType::SharedSequence => {
1032 self.advance();
1033 Some(self.interner.intern("SharedSequence"))
1034 }
1035 TokenType::CollaborativeSequence => {
1036 self.advance();
1037 Some(self.interner.intern("CollaborativeSequence"))
1038 }
1039 TokenType::SharedMap => {
1040 self.advance();
1041 Some(self.interner.intern("SharedMap"))
1042 }
1043 TokenType::Divergent => {
1044 self.advance();
1045 Some(self.interner.intern("Divergent"))
1046 }
1047 TokenType::Ambiguous { .. } => {
1050 let sym = t.lexeme;
1051 self.advance();
1052 Some(sym)
1053 }
1054 TokenType::Escape => {
1056 let sym = t.lexeme;
1057 self.advance();
1058 Some(sym)
1059 }
1060 TokenType::Focus(_) => {
1062 let sym = t.lexeme;
1063 self.advance();
1064 Some(sym)
1065 }
1066 TokenType::Nothing => {
1068 let sym = t.lexeme;
1069 self.advance();
1070 Some(sym)
1071 }
1072 TokenType::Article(_) => {
1074 let sym = t.lexeme;
1075 self.advance();
1076 Some(sym)
1077 }
1078 TokenType::Either => {
1080 let sym = t.lexeme;
1081 self.advance();
1082 Some(sym)
1083 }
1084 TokenType::CalendarUnit(_) => {
1086 let sym = t.lexeme;
1087 self.advance();
1088 Some(sym)
1089 }
1090 _ => None
1091 }
1092 }
1093
1094 fn check_word(&self, word: &str) -> bool {
1095 if let Some(token) = self.peek() {
1096 self.interner.resolve(token.lexeme).eq_ignore_ascii_case(word)
1098 } else {
1099 false
1100 }
1101 }
1102
1103 fn skip_to_period(&mut self) {
1104 while self.pos < self.tokens.len() {
1105 if matches!(self.peek(), Some(Token { kind: TokenType::Period, .. })) {
1106 self.advance();
1107 break;
1108 }
1109 self.advance();
1110 }
1111 }
1112
1113 fn check_colon(&self) -> bool {
1114 matches!(self.peek(), Some(Token { kind: TokenType::Colon, .. }))
1115 }
1116
1117 fn check_newline(&self) -> bool {
1118 matches!(self.peek(), Some(Token { kind: TokenType::Newline, .. }))
1119 }
1120
1121 fn check_indent(&self) -> bool {
1122 matches!(self.peek(), Some(Token { kind: TokenType::Indent, .. }))
1123 }
1124
1125 fn check_dedent(&self) -> bool {
1126 matches!(self.peek(), Some(Token { kind: TokenType::Dedent, .. }))
1127 }
1128
1129 fn check_comma(&self) -> bool {
1130 matches!(self.peek(), Some(Token { kind: TokenType::Comma, .. }))
1131 }
1132
1133 fn check_period(&self) -> bool {
1134 matches!(self.peek(), Some(Token { kind: TokenType::Period, .. }))
1135 }
1136
1137 fn check_either(&self) -> bool {
1138 matches!(self.peek(), Some(Token { kind: TokenType::Either, .. }))
1139 }
1140
1141 fn check_lparen(&self) -> bool {
1142 matches!(self.peek(), Some(Token { kind: TokenType::LParen, .. }))
1143 }
1144
1145 fn check_rparen(&self) -> bool {
1146 matches!(self.peek(), Some(Token { kind: TokenType::RParen, .. }))
1147 }
1148
1149 fn check_addwins(&self) -> bool {
1151 matches!(self.peek(), Some(Token { kind: TokenType::AddWins, .. }))
1152 }
1153
1154 fn check_removewins(&self) -> bool {
1156 matches!(self.peek(), Some(Token { kind: TokenType::RemoveWins, .. }))
1157 }
1158
1159 fn check_yata(&self) -> bool {
1161 matches!(self.peek(), Some(Token { kind: TokenType::YATA, .. }))
1162 }
1163
1164 fn check_to(&self) -> bool {
1166 match self.peek() {
1167 Some(Token { kind: TokenType::To, .. }) => true,
1168 Some(Token { kind: TokenType::Preposition(sym), .. }) => {
1169 self.interner.resolve(*sym) == "to"
1170 }
1171 _ => false,
1172 }
1173 }
1174
1175 fn check_from(&self) -> bool {
1177 match self.peek() {
1178 Some(Token { kind: TokenType::From, .. }) => true,
1179 Some(Token { kind: TokenType::Preposition(sym), .. }) => {
1180 self.interner.resolve(*sym) == "from"
1181 }
1182 _ => false,
1183 }
1184 }
1185
1186 fn check_portable(&self) -> bool {
1188 matches!(self.peek(), Some(Token { kind: TokenType::Portable, .. }))
1189 }
1190
1191 fn check_shared(&self) -> bool {
1193 matches!(self.peek(), Some(Token { kind: TokenType::Shared, .. }))
1194 }
1195
1196 fn check_lbracket(&self) -> bool {
1198 matches!(self.peek(), Some(Token { kind: TokenType::LBracket, .. }))
1199 }
1200
1201 fn check_rbracket(&self) -> bool {
1202 matches!(self.peek(), Some(Token { kind: TokenType::RBracket, .. }))
1203 }
1204
1205 fn parse_type_params(&mut self) -> Vec<Symbol> {
1207 let mut params = Vec::new();
1208
1209 loop {
1210 if self.check_lbracket() {
1211 self.advance(); if let Some(param) = self.consume_noun_or_proper() {
1213 params.push(param);
1214 }
1215 if self.check_rbracket() {
1216 self.advance(); }
1218 }
1219
1220 if self.check_word("and") {
1222 self.advance();
1223 continue;
1224 }
1225 break;
1226 }
1227 params
1228 }
1229
1230 fn consume_field_type_with_params(&mut self, type_params: &[Symbol]) -> FieldType {
1232 if self.check_lparen() {
1234 self.advance(); let inner_type = self.consume_field_type_with_params(type_params);
1236 if self.check_rparen() {
1237 self.advance(); }
1239 return inner_type;
1240 }
1241
1242 if let Some(Token { kind: TokenType::Article(_), lexeme, .. }) = self.peek() {
1245 let text = self.interner.resolve(*lexeme);
1246 for ¶m_sym in type_params {
1248 let param_name = self.interner.resolve(param_sym);
1249 if text.eq_ignore_ascii_case(param_name) {
1250 self.advance(); return FieldType::TypeParam(param_sym);
1252 }
1253 }
1254 self.advance();
1256 }
1257
1258 if let Some(name) = self.consume_noun_or_proper() {
1259 if type_params.contains(&name) {
1261 return FieldType::TypeParam(name);
1262 }
1263
1264 let name_str = self.interner.resolve(name);
1265
1266 let modified_name = if name_str == "SharedSet" || name_str == "ORSet" {
1268 if self.check_lparen() {
1269 self.advance(); let modifier = if self.check_removewins() {
1271 self.advance(); Some("SharedSet_RemoveWins")
1273 } else if self.check_addwins() {
1274 self.advance(); Some("SharedSet_AddWins")
1276 } else {
1277 None
1278 };
1279 if self.check_rparen() {
1280 self.advance(); }
1282 modifier.map(|m| self.interner.intern(m))
1283 } else {
1284 None
1285 }
1286 } else if name_str == "SharedSequence" {
1287 if self.check_lparen() {
1289 self.advance(); let modifier = if self.check_yata() {
1291 self.advance(); Some("SharedSequence_YATA")
1293 } else {
1294 None
1295 };
1296 if self.check_rparen() {
1297 self.advance(); }
1299 modifier.map(|m| self.interner.intern(m))
1300 } else {
1301 None
1302 }
1303 } else {
1304 None
1305 };
1306
1307 let final_name = modified_name.unwrap_or(name);
1309 let final_name_str = self.interner.resolve(final_name);
1310
1311 if (final_name_str == "SharedMap" || final_name_str == "ORMap") && self.check_from() {
1313 self.advance(); let key_type = self.consume_field_type_with_params(type_params);
1315 if self.check_to() {
1317 self.advance(); }
1319 let value_type = self.consume_field_type_with_params(type_params);
1320 return FieldType::Generic { base: final_name, params: vec![key_type, value_type] };
1321 }
1322
1323 if self.check_preposition("of") {
1325 let is_map_type = final_name_str == "Map" || final_name_str == "HashMap";
1327
1328 self.advance();
1329 let first_param = self.consume_field_type_with_params(type_params);
1330
1331 if is_map_type && self.check_to() {
1333 self.advance(); let second_param = self.consume_field_type_with_params(type_params);
1335 return FieldType::Generic { base: final_name, params: vec![first_param, second_param] };
1336 }
1337
1338 return FieldType::Generic { base: final_name, params: vec![first_param] };
1339 }
1340
1341 if final_name_str == "Divergent" {
1343 let param = self.consume_field_type_with_params(type_params);
1345 return FieldType::Generic { base: final_name, params: vec![param] };
1346 }
1347
1348 match final_name_str {
1350 "Int" | "Nat" | "Text" | "Bool" | "Real" | "Unit" => FieldType::Primitive(final_name),
1351 _ => FieldType::Named(final_name),
1352 }
1353 } else {
1354 FieldType::Primitive(self.interner.intern("Unknown"))
1355 }
1356 }
1357}
1358
1359#[cfg(test)]
1363mod tests {
1364 use super::*;
1365 use crate::Lexer;
1366 use crate::mwe;
1367
1368 fn make_tokens(source: &str, interner: &mut Interner) -> Vec<Token> {
1369 let mut lexer = Lexer::new(source, interner);
1370 let tokens = lexer.tokenize();
1371 let mwe_trie = mwe::build_mwe_trie();
1372 mwe::apply_mwe_pipeline(tokens, &mwe_trie, interner)
1373 }
1374
1375 #[test]
1376 fn discovery_finds_generic_in_definition_block() {
1377 let source = "## Definition\nA Stack is a generic collection.";
1378 let mut interner = Interner::new();
1379 let tokens = make_tokens(source, &mut interner);
1380
1381 let mut discovery = DiscoveryPass::new(&tokens, &mut interner);
1382 let registry = discovery.run();
1383
1384 let stack = interner.intern("Stack");
1385 assert!(registry.is_generic(stack), "Stack should be discovered as generic");
1386 }
1387
1388 #[test]
1389 fn discovery_parses_struct_with_fields() {
1390 let source = r#"## Definition
1391A Point has:
1392 an x, which is Int.
1393 a y, which is Int.
1394"#;
1395 let mut interner = Interner::new();
1396 let tokens = make_tokens(source, &mut interner);
1397
1398 let mut discovery = DiscoveryPass::new(&tokens, &mut interner);
1399 let registry = discovery.run();
1400
1401 let point = interner.intern("Point");
1402 assert!(registry.is_type(point), "Point should be registered");
1403
1404 if let Some(TypeDef::Struct { fields, generics, .. }) = registry.get(point) {
1405 assert_eq!(fields.len(), 2, "Point should have 2 fields, got {:?}", fields);
1406 assert_eq!(interner.resolve(fields[0].name), "x");
1407 assert_eq!(interner.resolve(fields[1].name), "y");
1408 assert!(generics.is_empty(), "Point should have no generics");
1409 } else {
1410 panic!("Point should be a struct with fields");
1411 }
1412 }
1413
1414 #[test]
1415 fn discovery_works_with_markdown_header() {
1416 let source = r#"# Geometry
1418
1419## Definition
1420A Point has:
1421 an x, which is Int.
1422"#;
1423 let mut interner = Interner::new();
1424 let tokens = make_tokens(source, &mut interner);
1425
1426 for (i, tok) in tokens.iter().enumerate() {
1428 eprintln!("Token {}: {:?}", i, tok.kind);
1429 }
1430
1431 let mut discovery = DiscoveryPass::new(&tokens, &mut interner);
1432 let registry = discovery.run();
1433 let point = interner.intern("Point");
1434 assert!(registry.is_type(point), "Point should be discovered even with # header");
1435 }
1436
1437 #[test]
1438 fn discovery_parses_portable_enum() {
1439 let source = r#"## Definition
1440A Command is Portable and is either:
1441 a Start.
1442 a Stop.
1443 a Pause.
1444"#;
1445 let mut interner = Interner::new();
1446 let tokens = make_tokens(source, &mut interner);
1447
1448 eprintln!("Tokens for portable enum:");
1450 for (i, tok) in tokens.iter().enumerate() {
1451 eprintln!("Token {}: {:?} ({})", i, tok.kind, interner.resolve(tok.lexeme));
1452 }
1453
1454 let mut discovery = DiscoveryPass::new(&tokens, &mut interner);
1455 let registry = discovery.run();
1456
1457 let command = interner.intern("Command");
1458 assert!(registry.is_type(command), "Command should be registered as type");
1459
1460 if let Some(TypeDef::Enum { variants, is_portable, .. }) = registry.get(command) {
1461 eprintln!("Command is_portable: {}", is_portable);
1462 eprintln!("Variants: {:?}", variants.iter().map(|v| interner.resolve(v.name)).collect::<Vec<_>>());
1463 assert!(*is_portable, "Command should be portable");
1464 assert_eq!(variants.len(), 3, "Command should have 3 variants");
1465 } else {
1466 panic!("Command should be an enum, got: {:?}", registry.get(command));
1467 }
1468 }
1469
1470 #[test]
1471 fn discovery_parses_lww_int_field() {
1472 let source = r#"## Definition
1473A Setting is Shared and has:
1474 a volume, which is LastWriteWins of Int.
1475"#;
1476 let mut interner = Interner::new();
1477 let tokens = make_tokens(source, &mut interner);
1478
1479 eprintln!("Tokens for LWW of Int:");
1481 for (i, tok) in tokens.iter().enumerate() {
1482 eprintln!("{:3}: {:?} ({})", i, tok.kind, interner.resolve(tok.lexeme));
1483 }
1484
1485 let mut discovery = DiscoveryPass::new(&tokens, &mut interner);
1486 let registry = discovery.run();
1487
1488 let setting = interner.intern("Setting");
1489 assert!(registry.is_type(setting), "Setting should be registered");
1490
1491 if let Some(TypeDef::Struct { fields, is_shared, .. }) = registry.get(setting) {
1492 eprintln!("is_shared: {}", is_shared);
1493 eprintln!("Fields: {:?}", fields.len());
1494 for f in fields {
1495 eprintln!(" field: {} = {:?}", interner.resolve(f.name), f.ty);
1496 }
1497 assert!(*is_shared, "Setting should be shared");
1498 assert_eq!(fields.len(), 1, "Setting should have 1 field");
1499 } else {
1500 panic!("Setting should be a struct, got: {:?}", registry.get(setting));
1501 }
1502 }
1503}