1use crate::token::{Token, TokenType, BlockType};
20use logicaffeine_base::{Interner, Symbol};
21use super::registry::{TypeRegistry, TypeDef, FieldDef, FieldType, VariantDef};
22use super::policy::{PolicyRegistry, PredicateDef, CapabilityDef, PolicyCondition};
23use super::dependencies::scan_dependencies;
24
25pub struct DiscoveryResult {
27 pub types: TypeRegistry,
28 pub policies: PolicyRegistry,
29}
30
31pub struct DiscoveryPass<'a> {
40 tokens: &'a [Token],
41 pos: usize,
42 interner: &'a mut Interner,
43}
44
45impl<'a> DiscoveryPass<'a> {
46 pub fn new(tokens: &'a [Token], interner: &'a mut Interner) -> Self {
47 Self { tokens, pos: 0, interner }
48 }
49
50 pub fn run(&mut self) -> TypeRegistry {
53 self.run_full().types
54 }
55
56 pub fn run_full(&mut self) -> DiscoveryResult {
58 let mut type_registry = TypeRegistry::with_primitives(self.interner);
59 let mut policy_registry = PolicyRegistry::new();
60
61 while self.pos < self.tokens.len() {
62 if self.check_block_header(BlockType::Definition) {
64 self.advance(); self.scan_definition_block(&mut type_registry);
66 } else if self.check_block_header(BlockType::TypeDef) {
67 self.advance(); self.parse_type_definition_inline(&mut type_registry);
71 } else if self.check_block_header(BlockType::Policy) {
72 self.advance(); self.scan_policy_block(&mut policy_registry);
75 } else if self.check_block_header(BlockType::Requires) {
76 self.advance(); while self.pos < self.tokens.len() {
80 if matches!(self.tokens.get(self.pos), Some(Token { kind: TokenType::BlockHeader { .. }, .. })) {
81 break;
82 }
83 self.advance();
84 }
85 } else {
86 self.advance();
87 }
88 }
89
90 DiscoveryResult {
91 types: type_registry,
92 policies: policy_registry,
93 }
94 }
95
96 fn check_block_header(&self, expected: BlockType) -> bool {
97 matches!(
98 self.tokens.get(self.pos),
99 Some(Token { kind: TokenType::BlockHeader { block_type }, .. })
100 if *block_type == expected
101 )
102 }
103
104 fn scan_definition_block(&mut self, registry: &mut TypeRegistry) {
105 while self.pos < self.tokens.len() {
107 if matches!(self.peek(), Some(Token { kind: TokenType::BlockHeader { .. }, .. })) {
108 break;
109 }
110
111 if self.check_article() {
113 self.try_parse_type_definition(registry);
114 } else {
115 self.advance();
116 }
117 }
118 }
119
120 fn scan_policy_block(&mut self, registry: &mut PolicyRegistry) {
125 while self.pos < self.tokens.len() {
126 if matches!(self.peek(), Some(Token { kind: TokenType::BlockHeader { .. }, .. })) {
127 break;
128 }
129
130 if self.check_newline() || self.check_indent() || self.check_dedent() {
132 self.advance();
133 continue;
134 }
135
136 if self.check_article() {
138 self.try_parse_policy_definition(registry);
139 } else {
140 self.advance();
141 }
142 }
143 }
144
145 fn try_parse_policy_definition(&mut self, registry: &mut PolicyRegistry) {
147 self.advance(); let subject_type = match self.consume_noun_or_proper() {
151 Some(sym) => sym,
152 None => return,
153 };
154
155 if self.check_copula() {
157 self.advance(); let predicate_name = match self.consume_noun_or_proper() {
162 Some(sym) => sym,
163 None => return,
164 };
165
166 if !self.check_word("if") {
168 self.skip_to_period();
169 return;
170 }
171 self.advance(); if self.check_colon() {
175 self.advance();
176 }
177 if self.check_newline() {
178 self.advance();
179 }
180 if self.check_indent() {
181 self.advance();
182 }
183
184 let condition = self.parse_policy_condition(subject_type, None);
186
187 registry.register_predicate(PredicateDef {
188 subject_type,
189 predicate_name,
190 condition,
191 });
192
193 self.skip_to_period();
194 } else if self.check_word("can") {
195 self.advance(); let action = match self.consume_noun_or_proper() {
200 Some(sym) => sym,
201 None => {
202 if let Some(Token { kind: TokenType::Verb { lemma, .. }, .. }) = self.peek() {
204 let sym = *lemma;
205 self.advance();
206 sym
207 } else {
208 return;
209 }
210 }
211 };
212
213 if self.check_article() {
215 self.advance();
216 }
217
218 let object_type = match self.consume_noun_or_proper() {
220 Some(sym) => sym,
221 None => return,
222 };
223
224 if !self.check_word("if") {
226 self.skip_to_period();
227 return;
228 }
229 self.advance(); if self.check_colon() {
233 self.advance();
234 }
235 if self.check_newline() {
236 self.advance();
237 }
238 if self.check_indent() {
239 self.advance();
240 }
241
242 let condition = self.parse_policy_condition(subject_type, Some(object_type));
243
244 registry.register_capability(CapabilityDef {
245 subject_type,
246 action,
247 object_type,
248 condition,
249 });
250
251 self.skip_policy_definition();
253 } else {
254 self.skip_to_period();
255 }
256 }
257
258 fn parse_policy_condition(&mut self, subject_type: Symbol, object_type: Option<Symbol>) -> PolicyCondition {
261 let first = self.parse_atomic_condition(subject_type, object_type);
262
263 loop {
265 while self.check_newline() {
267 self.advance();
268 }
269
270 if self.check_comma() {
272 self.advance(); while self.check_newline() {
275 self.advance();
276 }
277 }
278
279 if self.check_word("AND") {
280 self.advance();
281 while self.check_newline() {
283 self.advance();
284 }
285 let right = self.parse_atomic_condition(subject_type, object_type);
286 return PolicyCondition::And(Box::new(first), Box::new(right));
287 } else if self.check_word("OR") {
288 self.advance();
289 while self.check_newline() {
291 self.advance();
292 }
293 let right = self.parse_atomic_condition(subject_type, object_type);
294 return PolicyCondition::Or(Box::new(first), Box::new(right));
295 } else {
296 break;
297 }
298 }
299
300 first
301 }
302
303 fn parse_atomic_condition(&mut self, subject_type: Symbol, object_type: Option<Symbol>) -> PolicyCondition {
305 if self.check_article() {
307 self.advance();
308 }
309
310 let subject_ref = match self.consume_noun_or_proper() {
312 Some(sym) => sym,
313 None => return PolicyCondition::FieldEquals {
314 field: self.interner.intern("unknown"),
315 value: self.interner.intern("unknown"),
316 is_string_literal: false,
317 },
318 };
319
320 if self.check_possessive() {
322 self.advance(); let field = match self.consume_noun_or_proper() {
326 Some(sym) => sym,
327 None => return PolicyCondition::FieldEquals {
328 field: self.interner.intern("unknown"),
329 value: self.interner.intern("unknown"),
330 is_string_literal: false,
331 },
332 };
333
334 if self.check_word("equals") {
336 self.advance();
337
338 let (value, is_string_literal) = self.consume_value();
340
341 return PolicyCondition::FieldEquals { field, value, is_string_literal };
342 }
343 } else if self.check_copula() {
344 self.advance(); let predicate = match self.consume_noun_or_proper() {
349 Some(sym) => sym,
350 None => return PolicyCondition::FieldEquals {
351 field: self.interner.intern("unknown"),
352 value: self.interner.intern("unknown"),
353 is_string_literal: false,
354 },
355 };
356
357 return PolicyCondition::Predicate {
358 subject: subject_ref,
359 predicate,
360 };
361 } else if self.check_word("equals") {
362 self.advance(); if self.check_article() {
367 self.advance();
368 }
369
370 if let Some(obj_ref) = self.consume_noun_or_proper() {
372 if self.check_possessive() {
373 self.advance(); if let Some(field) = self.consume_noun_or_proper() {
375 return PolicyCondition::ObjectFieldEquals {
376 subject: subject_ref,
377 object: obj_ref,
378 field,
379 };
380 }
381 }
382 }
383 }
384
385 PolicyCondition::FieldEquals {
387 field: self.interner.intern("unknown"),
388 value: self.interner.intern("unknown"),
389 is_string_literal: false,
390 }
391 }
392
393 fn consume_value(&mut self) -> (Symbol, bool) {
395 if let Some(Token { kind: TokenType::StringLiteral(sym), .. }) = self.peek() {
396 let s = *sym;
397 self.advance();
398 (s, true)
399 } else if let Some(sym) = self.consume_noun_or_proper() {
400 (sym, false)
401 } else {
402 (self.interner.intern("unknown"), false)
403 }
404 }
405
406 fn check_possessive(&self) -> bool {
408 matches!(self.peek(), Some(Token { kind: TokenType::Possessive, .. }))
409 }
410
411 fn skip_policy_definition(&mut self) {
413 let mut depth = 0;
414 while self.pos < self.tokens.len() {
415 if self.check_indent() {
416 depth += 1;
417 } else if self.check_dedent() {
418 if depth == 0 {
419 break;
420 }
421 depth -= 1;
422 }
423 if self.check_period() && depth == 0 {
424 self.advance();
425 break;
426 }
427 if matches!(self.peek(), Some(Token { kind: TokenType::BlockHeader { .. }, .. })) {
428 break;
429 }
430 self.advance();
431 }
432 }
433
434 fn parse_type_definition_inline(&mut self, registry: &mut TypeRegistry) {
436 self.parse_type_definition_body(registry);
438 }
439
440 fn try_parse_type_definition(&mut self, registry: &mut TypeRegistry) {
441 self.advance(); self.parse_type_definition_body(registry);
443 }
444
445 fn parse_type_definition_body(&mut self, registry: &mut TypeRegistry) {
446 let mut is_portable = false;
448 let mut is_shared = false;
449 loop {
450 if self.check_portable() {
451 is_portable = true;
452 self.advance();
453 } else if self.check_shared() {
454 is_shared = true;
455 self.advance();
456 } else {
457 break;
458 }
459 }
460
461 if let Some(name_sym) = self.consume_noun_or_proper() {
462 let type_params = if self.check_preposition("of") {
464 self.advance(); self.parse_type_params()
466 } else {
467 vec![]
468 };
469 if self.check_copula() {
470 let copula_pos = self.pos;
471 self.advance(); loop {
475 if self.check_portable() {
476 self.advance(); is_portable = true;
478 if self.check_word("and") {
479 self.advance(); }
481 } else if self.check_shared() {
482 self.advance(); is_shared = true;
484 if self.check_word("and") {
485 self.advance(); }
487 } else {
488 break;
489 }
490 }
491
492 if !is_portable && !is_shared {
494 self.pos = copula_pos;
495 }
496 }
497
498 if self.check_word("has") {
501 self.advance(); if self.check_colon() {
503 self.advance(); if self.check_newline() {
506 self.advance();
507 }
508 if self.check_indent() {
509 self.advance(); let fields = self.parse_struct_fields_with_params(&type_params);
511 registry.register(name_sym, TypeDef::Struct { fields, generics: type_params, is_portable, is_shared });
512 return;
513 }
514 }
515 }
516
517 if self.check_copula() {
519 self.advance(); let is_enum_pattern = if self.check_either() {
523 self.advance(); true
525 } else if self.check_word("one") {
526 self.advance(); if self.check_word("of") {
528 self.advance(); true
530 } else {
531 false
532 }
533 } else {
534 false
535 };
536
537 if is_enum_pattern {
538 if self.check_colon() {
539 self.advance(); if self.check_newline() {
542 self.advance();
543 }
544 if self.check_indent() {
545 self.advance(); let variants = self.parse_enum_variants_with_params(&type_params);
547 registry.register(name_sym, TypeDef::Enum { variants, generics: type_params, is_portable, is_shared });
548 return;
549 }
550 }
551 }
552
553 if self.check_article() {
554 self.advance(); if self.check_word("generic") {
558 registry.register(name_sym, TypeDef::Generic { param_count: 1 });
559 self.skip_to_period();
560 } else if self.check_word("record") || self.check_word("struct") || self.check_word("structure") {
561 registry.register(name_sym, TypeDef::Struct { fields: vec![], generics: vec![], is_portable: false, is_shared: false });
562 self.skip_to_period();
563 } else if self.check_word("sum") || self.check_word("enum") || self.check_word("choice") {
564 registry.register(name_sym, TypeDef::Enum { variants: vec![], generics: vec![], is_portable: false, is_shared: false });
565 self.skip_to_period();
566 }
567 }
568 } else if !type_params.is_empty() {
569 registry.register(name_sym, TypeDef::Generic { param_count: type_params.len() });
571 self.skip_to_period();
572 }
573 }
574 }
575
576 fn parse_enum_variants_with_params(&mut self, type_params: &[Symbol]) -> Vec<VariantDef> {
580 let mut variants = Vec::new();
581
582 while self.pos < self.tokens.len() {
583 if self.check_dedent() {
585 self.advance();
586 break;
587 }
588 if matches!(self.peek(), Some(Token { kind: TokenType::BlockHeader { .. }, .. })) {
589 break;
590 }
591
592 if self.check_newline() {
594 self.advance();
595 continue;
596 }
597
598 if self.check_article() {
601 self.advance(); }
603
604 if let Some(variant_name) = self.consume_noun_or_proper() {
606 let fields = if self.check_word("with") {
608 self.parse_variant_fields_natural_with_params(type_params)
610 } else if self.check_lparen() {
611 self.parse_variant_fields_concise_with_params(type_params)
613 } else {
614 vec![]
616 };
617
618 variants.push(VariantDef {
619 name: variant_name,
620 fields,
621 });
622
623 if self.check_period() {
625 self.advance();
626 }
627 } else {
628 self.advance(); }
630 }
631
632 variants
633 }
634
635 fn parse_enum_variants(&mut self) -> Vec<VariantDef> {
637 self.parse_enum_variants_with_params(&[])
638 }
639
640 fn parse_variant_fields_natural_with_params(&mut self, type_params: &[Symbol]) -> Vec<FieldDef> {
645 let mut fields = Vec::new();
646
647 self.advance();
649
650 loop {
651 if self.check_article() {
653 self.advance();
654 }
655
656 if let Some(field_name) = self.consume_noun_or_proper() {
658 let ty = if self.check_comma() {
662 self.advance(); if self.check_word("which") {
665 self.advance();
666 }
667 if self.check_copula() {
669 self.advance();
670 }
671 self.consume_field_type_with_params(type_params)
672 } else if self.check_colon() {
673 self.advance(); self.consume_field_type_with_params(type_params)
675 } else {
676 self.consume_field_type_with_params(type_params)
678 };
679
680 fields.push(FieldDef {
681 name: field_name,
682 ty,
683 is_public: true, });
685
686 if self.check_comma() {
689 self.advance(); }
691 if self.check_word("and") {
692 self.advance();
693 continue;
694 }
695 }
696 break;
697 }
698
699 fields
700 }
701
702 fn parse_variant_fields_natural(&mut self) -> Vec<FieldDef> {
704 self.parse_variant_fields_natural_with_params(&[])
705 }
706
707 fn parse_variant_fields_concise_with_params(&mut self, type_params: &[Symbol]) -> Vec<FieldDef> {
709 let mut fields = Vec::new();
710
711 self.advance();
713
714 loop {
715 if let Some(field_name) = self.consume_noun_or_proper() {
717 let ty = if self.check_colon() {
719 self.advance(); self.consume_field_type_with_params(type_params)
721 } else {
722 FieldType::Primitive(self.interner.intern("Unknown"))
723 };
724
725 fields.push(FieldDef {
726 name: field_name,
727 ty,
728 is_public: true, });
730
731 if self.check_comma() {
733 self.advance();
734 continue;
735 }
736 }
737 break;
738 }
739
740 if self.check_rparen() {
742 self.advance();
743 }
744
745 fields
746 }
747
748 fn parse_variant_fields_concise(&mut self) -> Vec<FieldDef> {
750 self.parse_variant_fields_concise_with_params(&[])
751 }
752
753 fn parse_struct_fields_with_params(&mut self, type_params: &[Symbol]) -> Vec<FieldDef> {
756 let mut fields = Vec::new();
757
758 while self.pos < self.tokens.len() {
759 if self.check_dedent() {
761 self.advance();
762 break;
763 }
764 if matches!(self.peek(), Some(Token { kind: TokenType::BlockHeader { .. }, .. })) {
765 break;
766 }
767
768 if self.check_newline() {
770 self.advance();
771 continue;
772 }
773
774 let has_article = self.check_article();
777 if has_article {
778 self.advance(); }
780
781 let has_public_keyword = if self.check_word("public") {
783 self.advance();
784 true
785 } else {
786 false
787 };
788 let mut is_public = has_public_keyword;
790
791 if let Some(field_name) = self.consume_noun_or_proper() {
793 let ty = if self.check_colon() {
797 is_public = true;
799 self.advance(); self.consume_field_type_with_params(type_params)
801 } else if self.check_comma() {
802 is_public = true;
804 self.advance(); if self.check_word("which") {
807 self.advance();
808 }
809 if self.check_copula() {
811 self.advance();
812 }
813 self.consume_field_type_with_params(type_params)
814 } else if !has_article {
815 continue;
817 } else {
818 FieldType::Primitive(self.interner.intern("Unknown"))
820 };
821
822 fields.push(FieldDef {
823 name: field_name,
824 ty,
825 is_public,
826 });
827
828 if self.check_period() {
830 self.advance();
831 }
832 } else if !has_article {
833 self.advance();
835 }
836 }
837
838 fields
839 }
840
841 fn parse_struct_fields(&mut self) -> Vec<FieldDef> {
843 self.parse_struct_fields_with_params(&[])
844 }
845
846 fn consume_field_type(&mut self) -> FieldType {
848 if self.check_lparen() {
850 self.advance(); let inner_type = self.consume_field_type();
852 if self.check_rparen() {
853 self.advance(); }
855 return inner_type;
856 }
857
858 if self.check_article() {
860 self.advance();
861 }
862
863 if let Some(name) = self.consume_noun_or_proper() {
864 let name_str = self.interner.resolve(name);
865
866 let modified_name = if name_str == "SharedSet" || name_str == "ORSet" {
868 if self.check_lparen() {
869 self.advance(); let modifier = if self.check_removewins() {
871 self.advance(); Some("SharedSet_RemoveWins")
873 } else if self.check_addwins() {
874 self.advance(); Some("SharedSet_AddWins")
876 } else {
877 None
878 };
879 if self.check_rparen() {
880 self.advance(); }
882 modifier.map(|m| self.interner.intern(m))
883 } else {
884 None
885 }
886 } else if name_str == "SharedSequence" {
887 if self.check_lparen() {
889 self.advance(); let modifier = if self.check_yata() {
891 self.advance(); Some("SharedSequence_YATA")
893 } else {
894 None
895 };
896 if self.check_rparen() {
897 self.advance(); }
899 modifier.map(|m| self.interner.intern(m))
900 } else {
901 None
902 }
903 } else {
904 None
905 };
906
907 let final_name = modified_name.unwrap_or(name);
909 let final_name_str = self.interner.resolve(final_name);
910
911 if (final_name_str == "SharedMap" || final_name_str == "ORMap") && self.check_from() {
913 self.advance(); let key_type = self.consume_field_type();
915 if self.check_to() {
917 self.advance(); }
919 let value_type = self.consume_field_type();
920 return FieldType::Generic { base: final_name, params: vec![key_type, value_type] };
921 }
922
923 if self.check_preposition("of") {
925 let is_map_type = final_name_str == "Map" || final_name_str == "HashMap";
927
928 self.advance();
929 let first_param = self.consume_field_type();
930
931 if is_map_type && self.check_to() {
933 self.advance(); let second_param = self.consume_field_type();
935 return FieldType::Generic { base: final_name, params: vec![first_param, second_param] };
936 }
937
938 return FieldType::Generic { base: final_name, params: vec![first_param] };
939 }
940
941 if final_name_str == "Divergent" {
943 let param = self.consume_field_type();
945 return FieldType::Generic { base: final_name, params: vec![param] };
946 }
947
948 match final_name_str {
950 "Int" | "Nat" | "Text" | "Bool" | "Real" | "Unit" => FieldType::Primitive(final_name),
951 _ => FieldType::Named(final_name),
952 }
953 } else {
954 FieldType::Primitive(self.interner.intern("Unknown"))
955 }
956 }
957
958 fn peek(&self) -> Option<&Token> {
960 self.tokens.get(self.pos)
961 }
962
963 fn advance(&mut self) {
964 if self.pos < self.tokens.len() {
965 self.pos += 1;
966 }
967 }
968
969 fn check_article(&self) -> bool {
970 match self.peek() {
971 Some(Token { kind: TokenType::Article(_), .. }) => true,
972 Some(Token { kind: TokenType::ProperName(sym), .. }) => {
974 let text = self.interner.resolve(*sym);
975 text.eq_ignore_ascii_case("a") || text.eq_ignore_ascii_case("an")
976 }
977 _ => false,
978 }
979 }
980
981 fn check_copula(&self) -> bool {
982 match self.peek() {
983 Some(Token { kind: TokenType::Is | TokenType::Are, .. }) => true,
984 Some(Token { kind: TokenType::Verb { lemma, .. }, .. }) => {
986 let word = self.interner.resolve(*lemma).to_lowercase();
987 word == "is" || word == "are"
988 }
989 _ => false,
990 }
991 }
992
993 fn check_preposition(&self, word: &str) -> bool {
994 if let Some(Token { kind: TokenType::Preposition(sym), .. }) = self.peek() {
995 self.interner.resolve(*sym) == word
996 } else {
997 false
998 }
999 }
1000
1001 fn consume_noun_or_proper(&mut self) -> Option<Symbol> {
1002 let t = self.peek()?;
1003 match &t.kind {
1004 TokenType::Noun(s) | TokenType::ProperName(s) => {
1005 let sym = *s;
1006 self.advance();
1007 Some(sym)
1008 }
1009 TokenType::Adjective(s) => {
1011 let sym = *s;
1012 self.advance();
1013 Some(sym)
1014 }
1015 TokenType::Performative(s) => {
1017 let sym = *s;
1018 self.advance();
1019 Some(sym)
1020 }
1021 TokenType::Items | TokenType::Some => {
1023 let sym = t.lexeme;
1024 self.advance();
1025 Some(sym)
1026 }
1027 TokenType::Verb { .. } => {
1032 let sym = t.lexeme;
1033 self.advance();
1034 Some(sym)
1035 }
1036 TokenType::Tally => {
1038 self.advance();
1039 Some(self.interner.intern("Tally"))
1040 }
1041 TokenType::SharedSet => {
1042 self.advance();
1043 Some(self.interner.intern("SharedSet"))
1044 }
1045 TokenType::SharedSequence => {
1046 self.advance();
1047 Some(self.interner.intern("SharedSequence"))
1048 }
1049 TokenType::CollaborativeSequence => {
1050 self.advance();
1051 Some(self.interner.intern("CollaborativeSequence"))
1052 }
1053 TokenType::SharedMap => {
1054 self.advance();
1055 Some(self.interner.intern("SharedMap"))
1056 }
1057 TokenType::Divergent => {
1058 self.advance();
1059 Some(self.interner.intern("Divergent"))
1060 }
1061 TokenType::Ambiguous { .. } => {
1064 let sym = t.lexeme;
1065 self.advance();
1066 Some(sym)
1067 }
1068 TokenType::Escape => {
1070 let sym = t.lexeme;
1071 self.advance();
1072 Some(sym)
1073 }
1074 TokenType::Focus(_) => {
1076 let sym = t.lexeme;
1077 self.advance();
1078 Some(sym)
1079 }
1080 TokenType::Nothing => {
1082 let sym = t.lexeme;
1083 self.advance();
1084 Some(sym)
1085 }
1086 TokenType::Article(_) => {
1088 let sym = t.lexeme;
1089 self.advance();
1090 Some(sym)
1091 }
1092 TokenType::Either => {
1094 let sym = t.lexeme;
1095 self.advance();
1096 Some(sym)
1097 }
1098 TokenType::CalendarUnit(_) => {
1100 let sym = t.lexeme;
1101 self.advance();
1102 Some(sym)
1103 }
1104 _ => None
1105 }
1106 }
1107
1108 fn check_word(&self, word: &str) -> bool {
1109 if let Some(token) = self.peek() {
1110 self.interner.resolve(token.lexeme).eq_ignore_ascii_case(word)
1112 } else {
1113 false
1114 }
1115 }
1116
1117 fn skip_to_period(&mut self) {
1118 while self.pos < self.tokens.len() {
1119 if matches!(self.peek(), Some(Token { kind: TokenType::Period, .. })) {
1120 self.advance();
1121 break;
1122 }
1123 self.advance();
1124 }
1125 }
1126
1127 fn check_colon(&self) -> bool {
1128 matches!(self.peek(), Some(Token { kind: TokenType::Colon, .. }))
1129 }
1130
1131 fn check_newline(&self) -> bool {
1132 matches!(self.peek(), Some(Token { kind: TokenType::Newline, .. }))
1133 }
1134
1135 fn check_indent(&self) -> bool {
1136 matches!(self.peek(), Some(Token { kind: TokenType::Indent, .. }))
1137 }
1138
1139 fn check_dedent(&self) -> bool {
1140 matches!(self.peek(), Some(Token { kind: TokenType::Dedent, .. }))
1141 }
1142
1143 fn check_comma(&self) -> bool {
1144 matches!(self.peek(), Some(Token { kind: TokenType::Comma, .. }))
1145 }
1146
1147 fn check_period(&self) -> bool {
1148 matches!(self.peek(), Some(Token { kind: TokenType::Period, .. }))
1149 }
1150
1151 fn check_either(&self) -> bool {
1152 matches!(self.peek(), Some(Token { kind: TokenType::Either, .. }))
1153 }
1154
1155 fn check_lparen(&self) -> bool {
1156 matches!(self.peek(), Some(Token { kind: TokenType::LParen, .. }))
1157 }
1158
1159 fn check_rparen(&self) -> bool {
1160 matches!(self.peek(), Some(Token { kind: TokenType::RParen, .. }))
1161 }
1162
1163 fn check_addwins(&self) -> bool {
1165 matches!(self.peek(), Some(Token { kind: TokenType::AddWins, .. }))
1166 }
1167
1168 fn check_removewins(&self) -> bool {
1170 matches!(self.peek(), Some(Token { kind: TokenType::RemoveWins, .. }))
1171 }
1172
1173 fn check_yata(&self) -> bool {
1175 matches!(self.peek(), Some(Token { kind: TokenType::YATA, .. }))
1176 }
1177
1178 fn check_to(&self) -> bool {
1180 match self.peek() {
1181 Some(Token { kind: TokenType::To, .. }) => true,
1182 Some(Token { kind: TokenType::Preposition(sym), .. }) => {
1183 self.interner.resolve(*sym) == "to"
1184 }
1185 _ => false,
1186 }
1187 }
1188
1189 fn check_from(&self) -> bool {
1191 match self.peek() {
1192 Some(Token { kind: TokenType::From, .. }) => true,
1193 Some(Token { kind: TokenType::Preposition(sym), .. }) => {
1194 self.interner.resolve(*sym) == "from"
1195 }
1196 _ => false,
1197 }
1198 }
1199
1200 fn check_portable(&self) -> bool {
1202 matches!(self.peek(), Some(Token { kind: TokenType::Portable, .. }))
1203 }
1204
1205 fn check_shared(&self) -> bool {
1207 matches!(self.peek(), Some(Token { kind: TokenType::Shared, .. }))
1208 }
1209
1210 fn check_lbracket(&self) -> bool {
1212 matches!(self.peek(), Some(Token { kind: TokenType::LBracket, .. }))
1213 }
1214
1215 fn check_rbracket(&self) -> bool {
1216 matches!(self.peek(), Some(Token { kind: TokenType::RBracket, .. }))
1217 }
1218
1219 fn parse_type_params(&mut self) -> Vec<Symbol> {
1221 let mut params = Vec::new();
1222
1223 loop {
1224 if self.check_lbracket() {
1225 self.advance(); if let Some(param) = self.consume_noun_or_proper() {
1227 params.push(param);
1228 }
1229 if self.check_rbracket() {
1230 self.advance(); }
1232 }
1233
1234 if self.check_word("and") {
1236 self.advance();
1237 continue;
1238 }
1239 break;
1240 }
1241 params
1242 }
1243
1244 fn consume_field_type_with_params(&mut self, type_params: &[Symbol]) -> FieldType {
1246 if self.check_lparen() {
1248 self.advance(); let inner_type = self.consume_field_type_with_params(type_params);
1250 if self.check_rparen() {
1251 self.advance(); }
1253 return inner_type;
1254 }
1255
1256 if let Some(Token { kind: TokenType::Article(_), lexeme, .. }) = self.peek() {
1259 let text = self.interner.resolve(*lexeme);
1260 for ¶m_sym in type_params {
1262 let param_name = self.interner.resolve(param_sym);
1263 if text.eq_ignore_ascii_case(param_name) {
1264 self.advance(); return FieldType::TypeParam(param_sym);
1266 }
1267 }
1268 self.advance();
1270 }
1271
1272 if let Some(name) = self.consume_noun_or_proper() {
1273 if type_params.contains(&name) {
1275 return FieldType::TypeParam(name);
1276 }
1277
1278 let name_str = self.interner.resolve(name);
1279
1280 let modified_name = if name_str == "SharedSet" || name_str == "ORSet" {
1282 if self.check_lparen() {
1283 self.advance(); let modifier = if self.check_removewins() {
1285 self.advance(); Some("SharedSet_RemoveWins")
1287 } else if self.check_addwins() {
1288 self.advance(); Some("SharedSet_AddWins")
1290 } else {
1291 None
1292 };
1293 if self.check_rparen() {
1294 self.advance(); }
1296 modifier.map(|m| self.interner.intern(m))
1297 } else {
1298 None
1299 }
1300 } else if name_str == "SharedSequence" {
1301 if self.check_lparen() {
1303 self.advance(); let modifier = if self.check_yata() {
1305 self.advance(); Some("SharedSequence_YATA")
1307 } else {
1308 None
1309 };
1310 if self.check_rparen() {
1311 self.advance(); }
1313 modifier.map(|m| self.interner.intern(m))
1314 } else {
1315 None
1316 }
1317 } else {
1318 None
1319 };
1320
1321 let final_name = modified_name.unwrap_or(name);
1323 let final_name_str = self.interner.resolve(final_name);
1324
1325 if (final_name_str == "SharedMap" || final_name_str == "ORMap") && self.check_from() {
1327 self.advance(); let key_type = self.consume_field_type_with_params(type_params);
1329 if self.check_to() {
1331 self.advance(); }
1333 let value_type = self.consume_field_type_with_params(type_params);
1334 return FieldType::Generic { base: final_name, params: vec![key_type, value_type] };
1335 }
1336
1337 if self.check_preposition("of") {
1339 let is_map_type = final_name_str == "Map" || final_name_str == "HashMap";
1341
1342 self.advance();
1343 let first_param = self.consume_field_type_with_params(type_params);
1344
1345 if is_map_type && self.check_to() {
1347 self.advance(); let second_param = self.consume_field_type_with_params(type_params);
1349 return FieldType::Generic { base: final_name, params: vec![first_param, second_param] };
1350 }
1351
1352 return FieldType::Generic { base: final_name, params: vec![first_param] };
1353 }
1354
1355 if final_name_str == "Divergent" {
1357 let param = self.consume_field_type_with_params(type_params);
1359 return FieldType::Generic { base: final_name, params: vec![param] };
1360 }
1361
1362 match final_name_str {
1364 "Int" | "Nat" | "Text" | "Bool" | "Real" | "Unit" => FieldType::Primitive(final_name),
1365 _ => FieldType::Named(final_name),
1366 }
1367 } else {
1368 FieldType::Primitive(self.interner.intern("Unknown"))
1369 }
1370 }
1371}
1372
1373#[cfg(test)]
1377mod tests {
1378 use super::*;
1379 use crate::Lexer;
1380 use crate::mwe;
1381
1382 fn make_tokens(source: &str, interner: &mut Interner) -> Vec<Token> {
1383 let mut lexer = Lexer::new(source, interner);
1384 let tokens = lexer.tokenize();
1385 let mwe_trie = mwe::build_mwe_trie();
1386 mwe::apply_mwe_pipeline(tokens, &mwe_trie, interner)
1387 }
1388
1389 #[test]
1390 fn discovery_finds_generic_in_definition_block() {
1391 let source = "## Definition\nA Stack is a generic collection.";
1392 let mut interner = Interner::new();
1393 let tokens = make_tokens(source, &mut interner);
1394
1395 let mut discovery = DiscoveryPass::new(&tokens, &mut interner);
1396 let registry = discovery.run();
1397
1398 let stack = interner.intern("Stack");
1399 assert!(registry.is_generic(stack), "Stack should be discovered as generic");
1400 }
1401
1402 #[test]
1403 fn discovery_parses_struct_with_fields() {
1404 let source = r#"## Definition
1405A Point has:
1406 an x, which is Int.
1407 a y, which is Int.
1408"#;
1409 let mut interner = Interner::new();
1410 let tokens = make_tokens(source, &mut interner);
1411
1412 let mut discovery = DiscoveryPass::new(&tokens, &mut interner);
1413 let registry = discovery.run();
1414
1415 let point = interner.intern("Point");
1416 assert!(registry.is_type(point), "Point should be registered");
1417
1418 if let Some(TypeDef::Struct { fields, generics, .. }) = registry.get(point) {
1419 assert_eq!(fields.len(), 2, "Point should have 2 fields, got {:?}", fields);
1420 assert_eq!(interner.resolve(fields[0].name), "x");
1421 assert_eq!(interner.resolve(fields[1].name), "y");
1422 assert!(generics.is_empty(), "Point should have no generics");
1423 } else {
1424 panic!("Point should be a struct with fields");
1425 }
1426 }
1427
1428 #[test]
1429 fn discovery_works_with_markdown_header() {
1430 let source = r#"# Geometry
1432
1433## Definition
1434A Point has:
1435 an x, which is Int.
1436"#;
1437 let mut interner = Interner::new();
1438 let tokens = make_tokens(source, &mut interner);
1439
1440 for (i, tok) in tokens.iter().enumerate() {
1442 eprintln!("Token {}: {:?}", i, tok.kind);
1443 }
1444
1445 let mut discovery = DiscoveryPass::new(&tokens, &mut interner);
1446 let registry = discovery.run();
1447 let point = interner.intern("Point");
1448 assert!(registry.is_type(point), "Point should be discovered even with # header");
1449 }
1450
1451 #[test]
1452 fn discovery_parses_portable_enum() {
1453 let source = r#"## Definition
1454A Command is Portable and is either:
1455 a Start.
1456 a Stop.
1457 a Pause.
1458"#;
1459 let mut interner = Interner::new();
1460 let tokens = make_tokens(source, &mut interner);
1461
1462 eprintln!("Tokens for portable enum:");
1464 for (i, tok) in tokens.iter().enumerate() {
1465 eprintln!("Token {}: {:?} ({})", i, tok.kind, interner.resolve(tok.lexeme));
1466 }
1467
1468 let mut discovery = DiscoveryPass::new(&tokens, &mut interner);
1469 let registry = discovery.run();
1470
1471 let command = interner.intern("Command");
1472 assert!(registry.is_type(command), "Command should be registered as type");
1473
1474 if let Some(TypeDef::Enum { variants, is_portable, .. }) = registry.get(command) {
1475 eprintln!("Command is_portable: {}", is_portable);
1476 eprintln!("Variants: {:?}", variants.iter().map(|v| interner.resolve(v.name)).collect::<Vec<_>>());
1477 assert!(*is_portable, "Command should be portable");
1478 assert_eq!(variants.len(), 3, "Command should have 3 variants");
1479 } else {
1480 panic!("Command should be an enum, got: {:?}", registry.get(command));
1481 }
1482 }
1483
1484 #[test]
1485 fn discovery_parses_lww_int_field() {
1486 let source = r#"## Definition
1487A Setting is Shared and has:
1488 a volume, which is LastWriteWins of Int.
1489"#;
1490 let mut interner = Interner::new();
1491 let tokens = make_tokens(source, &mut interner);
1492
1493 eprintln!("Tokens for LWW of Int:");
1495 for (i, tok) in tokens.iter().enumerate() {
1496 eprintln!("{:3}: {:?} ({})", i, tok.kind, interner.resolve(tok.lexeme));
1497 }
1498
1499 let mut discovery = DiscoveryPass::new(&tokens, &mut interner);
1500 let registry = discovery.run();
1501
1502 let setting = interner.intern("Setting");
1503 assert!(registry.is_type(setting), "Setting should be registered");
1504
1505 if let Some(TypeDef::Struct { fields, is_shared, .. }) = registry.get(setting) {
1506 eprintln!("is_shared: {}", is_shared);
1507 eprintln!("Fields: {:?}", fields.len());
1508 for f in fields {
1509 eprintln!(" field: {} = {:?}", interner.resolve(f.name), f.ty);
1510 }
1511 assert!(*is_shared, "Setting should be shared");
1512 assert_eq!(fields.len(), 1, "Setting should have 1 field");
1513 } else {
1514 panic!("Setting should be a struct, got: {:?}", registry.get(setting));
1515 }
1516 }
1517}