1use std::collections::BTreeMap;
13
14use thiserror::Error;
15
16use crate::clock::ClockTime;
17use crate::lex::{LexError, Position, Spanned, Token};
18
19#[derive(Clone, Debug, PartialEq, Eq, Hash)]
28pub struct RawSymbolName {
29 pub name: String,
31 pub kind: Option<String>,
34}
35
36impl RawSymbolName {
37 #[must_use]
39 pub fn new(name: impl Into<String>) -> Self {
40 Self {
41 name: name.into(),
42 kind: None,
43 }
44 }
45
46 #[must_use]
48 pub fn with_kind(name: impl Into<String>, kind: impl Into<String>) -> Self {
49 Self {
50 name: name.into(),
51 kind: Some(kind.into()),
52 }
53 }
54
55 #[must_use]
57 pub fn as_str(&self) -> &str {
58 &self.name
59 }
60}
61
62#[derive(Clone, Debug, PartialEq)]
73pub enum RawValue {
74 RawSymbol(RawSymbolName),
76 TypedSymbol {
78 name: RawSymbolName,
80 kind: String,
82 },
83 Bareword(String),
86 String(String),
88 Integer(i64),
90 Float(f64),
92 Boolean(bool),
94 Nil,
96 List(Vec<RawValue>),
100 Timestamp(ClockTime),
103 TimestampRaw(String),
109}
110
111pub type KeywordArgs = BTreeMap<String, RawValue>;
116
117pub type QuerySelector = RawValue;
121
122#[derive(Clone, Debug, PartialEq)]
128pub enum UnboundForm {
129 Sem {
131 s: RawSymbolName,
133 p: RawSymbolName,
135 o: RawValue,
137 keywords: KeywordArgs,
140 },
141 Epi {
143 event_id: RawSymbolName,
145 kind: RawSymbolName,
147 participants: Vec<RawSymbolName>,
149 location: RawSymbolName,
151 keywords: KeywordArgs,
153 },
154 Pro {
156 rule_id: RawSymbolName,
158 trigger: RawValue,
160 action: RawValue,
162 keywords: KeywordArgs,
164 },
165 Inf {
167 s: RawSymbolName,
169 p: RawSymbolName,
171 o: RawValue,
173 derived_from: Vec<RawSymbolName>,
175 method: RawSymbolName,
177 keywords: KeywordArgs,
179 },
180 Alias {
182 a: RawSymbolName,
184 b: RawSymbolName,
186 },
187 Rename {
189 old: RawSymbolName,
191 new: RawSymbolName,
193 },
194 Retire {
196 name: RawSymbolName,
198 keywords: KeywordArgs,
200 },
201 Correct {
205 target_episode: RawSymbolName,
207 corrected: Box<UnboundForm>,
209 },
210 Promote {
212 name: RawSymbolName,
214 },
215 Query {
222 selector: Option<QuerySelector>,
224 keywords: KeywordArgs,
226 },
227 Episode {
240 action: EpisodeAction,
242 label: Option<String>,
245 parent_episode: Option<RawSymbolName>,
247 retracts: Vec<RawSymbolName>,
249 },
250 Flag {
257 action: FlagAction,
259 memory: RawSymbolName,
261 actor: RawSymbolName,
265 },
266}
267
268#[derive(Copy, Clone, Debug, PartialEq, Eq)]
270pub enum EpisodeAction {
271 Start,
273 Close,
276}
277
278#[derive(Copy, Clone, Debug, PartialEq, Eq)]
283pub enum FlagAction {
284 Pin,
286 Unpin,
288 AuthoritativeSet,
290 AuthoritativeClear,
292}
293
294#[derive(Debug, Error, PartialEq)]
299pub enum ParseError {
300 #[error("lex error: {0}")]
302 Lex(#[from] LexError),
303
304 #[error("unexpected token {found:?} at {pos:?}; expected {expected}")]
306 UnexpectedToken {
307 found: Token,
309 expected: &'static str,
311 pos: Position,
313 },
314
315 #[error("unexpected end of input; expected {expected}")]
317 UnexpectedEof {
318 expected: &'static str,
320 },
321
322 #[error("unknown opcode {found:?} at {pos:?}")]
324 UnknownOpcode {
325 found: String,
327 pos: Position,
329 },
330
331 #[error("unexpected keyword {keyword:?} for form {form:?} at {pos:?}")]
333 BadKeyword {
334 keyword: String,
336 form: &'static str,
338 pos: Position,
340 },
341
342 #[error("duplicate keyword {keyword:?} at {pos:?}")]
344 DuplicateKeyword {
345 keyword: String,
347 pos: Position,
349 },
350
351 #[error("missing required keyword {missing:?} for form {form:?}")]
353 MissingRequiredKeyword {
354 missing: &'static str,
356 form: &'static str,
358 },
359
360 #[error("arity mismatch for {form:?}: expected {expected}, found {found} at {pos:?}")]
362 ArityMismatch {
363 form: &'static str,
365 expected: usize,
367 found: usize,
369 pos: Position,
371 },
372
373 #[error("expected value at {pos:?}, got {found:?}")]
375 ExpectedValue {
376 found: Token,
378 pos: Position,
380 },
381
382 #[error("unbalanced list at {pos:?}")]
384 UnbalancedList {
385 pos: Position,
387 },
388
389 #[error("expected symbol list element at {pos:?}, got {found:?}")]
391 ExpectedSymbolInList {
392 found: RawValue,
394 pos: Position,
396 },
397
398 #[error("invalid timestamp {text:?} at {pos:?}")]
400 InvalidTimestamp {
401 text: String,
403 pos: Position,
405 },
406
407 #[error("nesting too deep at {pos:?}: limit is {max}")]
415 NestingTooDeep {
416 pos: Position,
418 max: usize,
420 },
421}
422
423pub const MAX_NESTING_DEPTH: usize = 256;
431
432pub fn parse(input: &str) -> Result<Vec<UnboundForm>, ParseError> {
448 let tokens = crate::lex::tokenize(input)?;
449 let mut parser = Parser::new(tokens);
450 let mut out = Vec::new();
451 while parser.peek().is_some() {
452 out.push(parser.parse_form()?);
453 }
454 Ok(out)
455}
456
457struct Parser {
458 tokens: Vec<Spanned>,
459 idx: usize,
460 depth: usize,
465}
466
467impl Parser {
468 fn new(tokens: Vec<Spanned>) -> Self {
469 Self {
470 tokens,
471 idx: 0,
472 depth: 0,
473 }
474 }
475
476 fn peek(&self) -> Option<&Spanned> {
477 self.tokens.get(self.idx)
478 }
479
480 fn bump(&mut self) -> Option<Spanned> {
481 let t = self.tokens.get(self.idx).cloned()?;
482 self.idx += 1;
483 Some(t)
484 }
485
486 fn expect_lparen(&mut self, expected: &'static str) -> Result<Position, ParseError> {
487 let Some(spanned) = self.bump() else {
488 return Err(ParseError::UnexpectedEof { expected });
489 };
490 if spanned.token == Token::LParen {
491 Ok(spanned.position)
492 } else {
493 Err(ParseError::UnexpectedToken {
494 found: spanned.token,
495 expected,
496 pos: spanned.position,
497 })
498 }
499 }
500
501 fn expect_rparen(&mut self, expected: &'static str) -> Result<(), ParseError> {
502 let Some(spanned) = self.bump() else {
503 return Err(ParseError::UnexpectedEof { expected });
504 };
505 if spanned.token == Token::RParen {
506 Ok(())
507 } else {
508 Err(ParseError::UnexpectedToken {
509 found: spanned.token,
510 expected,
511 pos: spanned.position,
512 })
513 }
514 }
515
516 fn expect_symbol(&mut self, expected: &'static str) -> Result<RawSymbolName, ParseError> {
517 let Some(spanned) = self.bump() else {
518 return Err(ParseError::UnexpectedEof { expected });
519 };
520 match spanned.token {
521 Token::Symbol(name) => Ok(RawSymbolName::new(name)),
522 Token::TypedSymbol { name, kind } => Ok(RawSymbolName::with_kind(name, kind)),
523 other => Err(ParseError::UnexpectedToken {
524 found: other,
525 expected,
526 pos: spanned.position,
527 }),
528 }
529 }
530
531 fn expect_predicate(&mut self, expected: &'static str) -> Result<RawSymbolName, ParseError> {
535 let Some(spanned) = self.bump() else {
536 return Err(ParseError::UnexpectedEof { expected });
537 };
538 match spanned.token {
539 Token::Symbol(name) | Token::Bareword(name) => Ok(RawSymbolName::new(name)),
540 Token::TypedSymbol { name, kind } => Ok(RawSymbolName::with_kind(name, kind)),
541 other => Err(ParseError::UnexpectedToken {
542 found: other,
543 expected,
544 pos: spanned.position,
545 }),
546 }
547 }
548
549 fn parse_form(&mut self) -> Result<UnboundForm, ParseError> {
550 let open = self.expect_lparen("top-level `(`")?;
551 let Some(head) = self.bump() else {
552 return Err(ParseError::UnexpectedEof {
553 expected: "opcode after `(`",
554 });
555 };
556 let opcode = match head.token {
557 Token::Bareword(ref b) => b.clone(),
558 other => {
559 return Err(ParseError::UnexpectedToken {
560 found: other,
561 expected: "opcode bareword at form head",
562 pos: head.position,
563 });
564 }
565 };
566 match opcode.as_str() {
567 "sem" => self.parse_sem(open),
568 "epi" => self.parse_epi(open),
569 "pro" => self.parse_pro(open),
570 "inf" => self.parse_inf(open),
571 "alias" => self.parse_alias(),
572 "rename" => self.parse_rename(),
573 "retire" => self.parse_retire(),
574 "correct" => self.parse_correct(open),
575 "promote" => self.parse_promote(),
576 "query" => self.parse_query(),
577 "episode" => self.parse_episode(),
578 "pin" => self.parse_flag(FlagAction::Pin, "pin"),
579 "unpin" => self.parse_flag(FlagAction::Unpin, "unpin"),
580 "authoritative_set" => {
584 self.parse_flag(FlagAction::AuthoritativeSet, "authoritative_set")
585 }
586 "authoritative_clear" => {
587 self.parse_flag(FlagAction::AuthoritativeClear, "authoritative_clear")
588 }
589 _ => Err(ParseError::UnknownOpcode {
590 found: opcode,
591 pos: head.position,
592 }),
593 }
594 }
595
596 fn parse_sem(&mut self, _open: Position) -> Result<UnboundForm, ParseError> {
599 let s = self.expect_symbol("sem subject")?;
600 let p = self.expect_predicate("sem predicate")?;
601 let o = self.parse_value("sem object")?;
602 let keywords = self.parse_keywords("sem", &["src", "c", "v", "projected"])?;
603 Self::require_keywords("sem", &keywords, &["src", "c", "v"])?;
604 self.expect_rparen("closing `)` for sem")?;
605 Ok(UnboundForm::Sem { s, p, o, keywords })
606 }
607
608 fn parse_epi(&mut self, open: Position) -> Result<UnboundForm, ParseError> {
609 let event_id = self.expect_symbol("epi event_id")?;
610 let kind = self.expect_symbol("epi kind")?;
611 let participants = self.parse_symbol_list(open, "epi participants")?;
612 let location = self.expect_symbol("epi location")?;
613 let keywords = self.parse_keywords("epi", &["at", "obs", "src", "c"])?;
614 Self::require_keywords("epi", &keywords, &["at", "obs", "src", "c"])?;
615 self.expect_rparen("closing `)` for epi")?;
616 Ok(UnboundForm::Epi {
617 event_id,
618 kind,
619 participants,
620 location,
621 keywords,
622 })
623 }
624
625 fn parse_pro(&mut self, _open: Position) -> Result<UnboundForm, ParseError> {
626 let rule_id = self.expect_symbol("pro rule_id")?;
627 let trigger = self.parse_value("pro trigger")?;
628 let action = self.parse_value("pro action")?;
629 let keywords = self.parse_keywords("pro", &["scp", "src", "c", "pre"])?;
630 Self::require_keywords("pro", &keywords, &["scp", "src", "c"])?;
631 self.expect_rparen("closing `)` for pro")?;
632 Ok(UnboundForm::Pro {
633 rule_id,
634 trigger,
635 action,
636 keywords,
637 })
638 }
639
640 fn parse_inf(&mut self, open: Position) -> Result<UnboundForm, ParseError> {
641 let s = self.expect_symbol("inf subject")?;
642 let p = self.expect_predicate("inf predicate")?;
643 let o = self.parse_value("inf object")?;
644 let derived_from = self.parse_symbol_list(open, "inf derived_from")?;
645 let method = self.expect_symbol("inf method")?;
646 let keywords = self.parse_keywords("inf", &["c", "v", "projected"])?;
647 Self::require_keywords("inf", &keywords, &["c", "v"])?;
648 self.expect_rparen("closing `)` for inf")?;
649 Ok(UnboundForm::Inf {
650 s,
651 p,
652 o,
653 derived_from,
654 method,
655 keywords,
656 })
657 }
658
659 fn parse_alias(&mut self) -> Result<UnboundForm, ParseError> {
660 let a = self.expect_symbol("alias first arg")?;
661 let b = self.expect_symbol("alias second arg")?;
662 self.expect_rparen("closing `)` for alias")?;
663 Ok(UnboundForm::Alias { a, b })
664 }
665
666 fn parse_rename(&mut self) -> Result<UnboundForm, ParseError> {
667 let old = self.expect_symbol("rename old name")?;
668 let new = self.expect_symbol("rename new name")?;
669 self.expect_rparen("closing `)` for rename")?;
670 Ok(UnboundForm::Rename { old, new })
671 }
672
673 fn parse_retire(&mut self) -> Result<UnboundForm, ParseError> {
674 let name = self.expect_symbol("retire target")?;
675 let keywords = self.parse_keywords("retire", &["reason"])?;
676 self.expect_rparen("closing `)` for retire")?;
677 Ok(UnboundForm::Retire { name, keywords })
678 }
679
680 fn parse_correct(&mut self, open: Position) -> Result<UnboundForm, ParseError> {
681 let target_episode = self.expect_symbol("correct target_episode")?;
682 if self.depth >= MAX_NESTING_DEPTH {
686 return Err(ParseError::NestingTooDeep {
687 pos: open,
688 max: MAX_NESTING_DEPTH,
689 });
690 }
691 self.depth += 1;
692 let inner = self.parse_form();
693 self.depth -= 1;
694 let corrected = Box::new(inner?);
695 if !matches!(&*corrected, UnboundForm::Epi { .. }) {
696 return Err(ParseError::UnexpectedToken {
697 found: Token::LParen,
698 expected: "corrected body must be an `epi` form",
699 pos: Position::start(),
700 });
701 }
702 self.expect_rparen("closing `)` for correct")?;
703 Ok(UnboundForm::Correct {
704 target_episode,
705 corrected,
706 })
707 }
708
709 fn parse_promote(&mut self) -> Result<UnboundForm, ParseError> {
710 let name = self.expect_symbol("promote target")?;
711 self.expect_rparen("closing `)` for promote")?;
712 Ok(UnboundForm::Promote { name })
713 }
714
715 #[allow(clippy::too_many_lines)]
716 fn parse_episode(&mut self) -> Result<UnboundForm, ParseError> {
717 let head = self.bump().ok_or(ParseError::UnexpectedEof {
721 expected: "`:start` or `:close`",
722 })?;
723 let action_name = match head.token {
724 Token::Keyword(name) => name,
725 other => {
726 return Err(ParseError::UnexpectedToken {
727 found: other,
728 expected: "`:start` or `:close`",
729 pos: head.position,
730 });
731 }
732 };
733 let action = match action_name.as_str() {
734 "start" => EpisodeAction::Start,
735 "close" => EpisodeAction::Close,
736 other => {
737 return Err(ParseError::BadKeyword {
738 keyword: other.to_string(),
739 form: "episode",
740 pos: head.position,
741 });
742 }
743 };
744
745 if matches!(action, EpisodeAction::Close) {
746 self.expect_rparen("closing `)` for episode :close")?;
748 return Ok(UnboundForm::Episode {
749 action,
750 label: None,
751 parent_episode: None,
752 retracts: Vec::new(),
753 });
754 }
755
756 let mut label: Option<String> = None;
759 let mut parent_episode: Option<RawSymbolName> = None;
760 let mut retracts: Option<Vec<RawSymbolName>> = None;
761
762 while let Some(spanned) = self.peek() {
763 match &spanned.token {
764 Token::RParen => break,
765 Token::Keyword(k) => {
766 let key = k.clone();
767 let pos = spanned.position;
768 self.bump();
769 match key.as_str() {
770 "label" => {
771 if label.is_some() {
772 return Err(ParseError::DuplicateKeyword { keyword: key, pos });
773 }
774 let Some(v) = self.bump() else {
775 return Err(ParseError::UnexpectedEof {
776 expected: "`:label` string value",
777 });
778 };
779 match v.token {
780 Token::String(s) => label = Some(s),
781 other => {
782 return Err(ParseError::UnexpectedToken {
783 found: other,
784 expected: "string literal for `:label`",
785 pos: v.position,
786 });
787 }
788 }
789 }
790 "parent_episode" => {
791 if parent_episode.is_some() {
792 return Err(ParseError::DuplicateKeyword { keyword: key, pos });
793 }
794 parent_episode = Some(self.expect_symbol("`:parent_episode` symbol")?);
795 }
796 "retracts" => {
797 if retracts.is_some() {
798 return Err(ParseError::DuplicateKeyword { keyword: key, pos });
799 }
800 let list_open = self.expect_lparen("`:retracts (`")?;
804 retracts = Some(self.parse_retracts_list(list_open)?);
805 }
806 _ => {
807 return Err(ParseError::BadKeyword {
808 form: "episode :start",
809 keyword: key,
810 pos,
811 });
812 }
813 }
814 }
815 _ => {
816 let t = spanned.token.clone();
817 let pos = spanned.position;
818 return Err(ParseError::UnexpectedToken {
819 found: t,
820 expected: "keyword argument in `episode :start`",
821 pos,
822 });
823 }
824 }
825 }
826
827 self.expect_rparen("closing `)` for episode :start")?;
828 Ok(UnboundForm::Episode {
829 action,
830 label,
831 parent_episode,
832 retracts: retracts.unwrap_or_default(),
833 })
834 }
835
836 fn parse_flag(
837 &mut self,
838 action: FlagAction,
839 form: &'static str,
840 ) -> Result<UnboundForm, ParseError> {
841 let memory = self.expect_symbol(match action {
845 FlagAction::Pin => "pin target",
846 FlagAction::Unpin => "unpin target",
847 FlagAction::AuthoritativeSet => "authoritative_set target",
848 FlagAction::AuthoritativeClear => "authoritative_clear target",
849 })?;
850 let keywords = self.parse_keywords(form, &["actor"])?;
851 Self::require_keywords(form, &keywords, &["actor"])?;
852 self.expect_rparen("closing `)` for flag form")?;
853 let actor = match keywords.get("actor") {
854 Some(RawValue::RawSymbol(s) | RawValue::TypedSymbol { name: s, .. }) => s.clone(),
855 _ => {
856 return Err(ParseError::BadKeyword {
857 keyword: "actor".into(),
858 form,
859 pos: Position::start(),
860 });
861 }
862 };
863 Ok(UnboundForm::Flag {
864 action,
865 memory,
866 actor,
867 })
868 }
869
870 fn parse_retracts_list(&mut self, open: Position) -> Result<Vec<RawSymbolName>, ParseError> {
871 let raw = self.parse_value_list_body(open)?;
872 raw.into_iter()
873 .map(|v| match v {
874 RawValue::RawSymbol(name) | RawValue::TypedSymbol { name, .. } => Ok(name),
875 other => Err(ParseError::ExpectedSymbolInList {
876 found: other,
877 pos: open,
878 }),
879 })
880 .collect()
881 }
882
883 fn parse_query(&mut self) -> Result<UnboundForm, ParseError> {
884 let selector = if matches!(self.peek().map(|s| &s.token), Some(Token::Keyword(_)))
887 || matches!(self.peek().map(|s| &s.token), Some(Token::RParen))
888 {
889 None
890 } else {
891 Some(self.parse_value("query selector")?)
892 };
893 let keywords = self.parse_keywords(
894 "query",
895 &[
896 "kind",
897 "s",
898 "p",
899 "o",
900 "in_episode",
901 "after_episode",
902 "before_episode",
903 "episode_chain",
904 "as_of",
905 "as_committed",
906 "include_retired",
907 "include_projected",
908 "confidence_threshold",
909 "limit",
910 "explain_filtered",
911 "show_framing",
912 "debug_mode",
913 "read_after",
914 "timeout_ms",
915 ],
916 )?;
917 self.expect_rparen("closing `)` for query")?;
918 Ok(UnboundForm::Query { selector, keywords })
919 }
920
921 fn parse_value(&mut self, expected: &'static str) -> Result<RawValue, ParseError> {
924 let Some(spanned) = self.bump() else {
925 return Err(ParseError::UnexpectedEof { expected });
926 };
927 match spanned.token {
928 Token::Symbol(name) => Ok(RawValue::RawSymbol(RawSymbolName::new(name))),
929 Token::TypedSymbol { name, kind } => Ok(RawValue::TypedSymbol {
930 name: RawSymbolName::new(name),
931 kind,
932 }),
933 Token::Bareword(b) => Ok(RawValue::Bareword(b)),
934 Token::String(s) => Ok(RawValue::String(s)),
935 Token::Integer(i) => Ok(RawValue::Integer(i)),
936 Token::Float(f) => Ok(RawValue::Float(f)),
937 Token::Boolean(b) => Ok(RawValue::Boolean(b)),
938 Token::Nil => Ok(RawValue::Nil),
939 Token::Timestamp(text) => parse_timestamp(&text, spanned.position)
940 .map(RawValue::Timestamp)
941 .or(Ok(RawValue::TimestampRaw(text))),
942 Token::LParen => {
943 if self.depth >= MAX_NESTING_DEPTH {
948 return Err(ParseError::NestingTooDeep {
949 pos: spanned.position,
950 max: MAX_NESTING_DEPTH,
951 });
952 }
953 self.depth += 1;
954 let result = self.parse_value_list_body(spanned.position);
955 self.depth -= 1;
956 let inner = result?;
957 Ok(RawValue::List(inner))
958 }
959 other @ (Token::RParen | Token::Keyword(_)) => Err(ParseError::ExpectedValue {
960 found: other,
961 pos: spanned.position,
962 }),
963 }
964 }
965
966 fn parse_value_list_body(&mut self, open: Position) -> Result<Vec<RawValue>, ParseError> {
967 let mut out = Vec::new();
968 loop {
969 match self.peek().map(|s| &s.token) {
970 None => return Err(ParseError::UnbalancedList { pos: open }),
971 Some(Token::RParen) => {
972 self.bump();
973 return Ok(out);
974 }
975 _ => {
976 out.push(self.parse_value("list element")?);
977 }
978 }
979 }
980 }
981
982 fn parse_symbol_list(
983 &mut self,
984 _open: Position,
985 expected: &'static str,
986 ) -> Result<Vec<RawSymbolName>, ParseError> {
987 let list_open = self.expect_lparen(expected)?;
988 let raw = self.parse_value_list_body(list_open)?;
989 raw.into_iter()
990 .map(|v| match v {
991 RawValue::RawSymbol(name) | RawValue::TypedSymbol { name, .. } => Ok(name),
992 other => Err(ParseError::ExpectedSymbolInList {
993 found: other,
994 pos: list_open,
995 }),
996 })
997 .collect()
998 }
999
1000 fn parse_keywords(
1001 &mut self,
1002 form: &'static str,
1003 allowed: &[&str],
1004 ) -> Result<KeywordArgs, ParseError> {
1005 let mut out = BTreeMap::new();
1006 while let Some(spanned) = self.peek() {
1007 match &spanned.token {
1008 Token::RParen => break,
1009 Token::Keyword(k) => {
1010 let key = k.clone();
1011 let pos = spanned.position;
1012 if !allowed.iter().any(|allowed| *allowed == key) {
1013 return Err(ParseError::BadKeyword {
1014 keyword: key,
1015 form,
1016 pos,
1017 });
1018 }
1019 self.bump(); let value = self.parse_value("keyword value")?;
1021 if out.insert(key.clone(), value).is_some() {
1022 return Err(ParseError::DuplicateKeyword { keyword: key, pos });
1023 }
1024 }
1025 other => {
1026 return Err(ParseError::UnexpectedToken {
1027 found: other.clone(),
1028 expected: "`:keyword value` pair or closing `)`",
1029 pos: spanned.position,
1030 });
1031 }
1032 }
1033 }
1034 Ok(out)
1035 }
1036
1037 fn require_keywords(
1038 form: &'static str,
1039 keywords: &KeywordArgs,
1040 required: &[&'static str],
1041 ) -> Result<(), ParseError> {
1042 for k in required {
1043 if !keywords.contains_key(*k) {
1044 return Err(ParseError::MissingRequiredKeyword { missing: k, form });
1045 }
1046 }
1047 Ok(())
1048 }
1049}
1050
1051fn parse_timestamp(text: &str, pos: Position) -> Result<ClockTime, ParseError> {
1052 let bad = || ParseError::InvalidTimestamp {
1055 text: text.to_string(),
1056 pos,
1057 };
1058 if text.len() == 10 {
1059 let millis = date_to_millis(text).ok_or_else(bad)?;
1061 return ClockTime::try_from_millis(millis).map_err(|_| bad());
1062 }
1063 if text.len() < 20 || !text.is_char_boundary(10) || &text[10..11] != "T" {
1065 return Err(bad());
1066 }
1067 let (date_part, rest) = text.split_at(10);
1068 let rest = rest
1070 .strip_prefix('T')
1071 .ok_or_else(bad)?
1072 .trim_end_matches('Z');
1073 let (hms_part, frac_millis) = if let Some(dot) = rest.find('.') {
1074 let (hms, frac) = rest.split_at(dot);
1075 let frac = &frac[1..];
1076 if frac.is_empty() || !frac.chars().all(|c| c.is_ascii_digit()) {
1077 return Err(bad());
1078 }
1079 let millis_str = if frac.len() >= 3 { &frac[..3] } else { frac };
1080 let mut millis: u64 = millis_str.parse().map_err(|_| bad())?;
1081 for _ in millis_str.len()..3 {
1083 millis *= 10;
1084 }
1085 (hms, millis)
1086 } else {
1087 (rest, 0_u64)
1088 };
1089 let parts: Vec<&str> = hms_part.split(':').collect();
1090 if parts.len() != 3 {
1091 return Err(bad());
1092 }
1093 let hours: u64 = parts[0].parse().map_err(|_| bad())?;
1094 let minutes: u64 = parts[1].parse().map_err(|_| bad())?;
1095 let seconds: u64 = parts[2].parse().map_err(|_| bad())?;
1096 let date_millis = date_to_millis(date_part).ok_or_else(bad)?;
1097 let total = date_millis + hours * 3_600_000 + minutes * 60_000 + seconds * 1_000 + frac_millis;
1098 ClockTime::try_from_millis(total).map_err(|_| bad())
1099}
1100
1101fn date_to_millis(date: &str) -> Option<u64> {
1102 if date.len() != 10 {
1105 return None;
1106 }
1107 let b = date.as_bytes();
1108 if b[4] != b'-' || b[7] != b'-' {
1109 return None;
1110 }
1111 let year: i64 = std::str::from_utf8(&b[..4]).ok()?.parse().ok()?;
1112 let month: u32 = std::str::from_utf8(&b[5..7]).ok()?.parse().ok()?;
1113 let day: u32 = std::str::from_utf8(&b[8..10]).ok()?.parse().ok()?;
1114 if !(1..=12).contains(&month) || !(1..=31).contains(&day) {
1115 return None;
1116 }
1117
1118 let year_adjusted = if month <= 2 { year - 1 } else { year };
1120 let era = if year_adjusted >= 0 {
1121 year_adjusted
1122 } else {
1123 year_adjusted - 399
1124 } / 400;
1125 let year_of_era: u32 = u32::try_from(year_adjusted - era * 400).ok()?;
1126 let day_of_year = (153_u32 * (if month > 2 { month - 3 } else { month + 9 }) + 2) / 5 + day - 1;
1127 let day_of_era = year_of_era * 365 + year_of_era / 4 - year_of_era / 100 + day_of_year;
1128 let days = era * 146_097 + i64::from(day_of_era) - 719_468;
1129 if days < 0 {
1130 return None;
1131 }
1132 let millis = u64::try_from(days).ok()? * 86_400_000;
1133 Some(millis)
1134}
1135
1136#[cfg(test)]
1137mod tests {
1138 use super::*;
1139
1140 #[test]
1141 fn promote_form_is_single_symbol() {
1142 let forms = parse("(promote @scratch_42)").unwrap();
1143 assert_eq!(
1144 forms[0],
1145 UnboundForm::Promote {
1146 name: RawSymbolName::new("scratch_42"),
1147 }
1148 );
1149 }
1150
1151 #[test]
1152 fn alias_and_rename() {
1153 let a = parse("(alias @a @b)").unwrap();
1154 assert_eq!(
1155 a[0],
1156 UnboundForm::Alias {
1157 a: RawSymbolName::new("a"),
1158 b: RawSymbolName::new("b"),
1159 }
1160 );
1161 let r = parse("(rename @old @new)").unwrap();
1162 assert_eq!(
1163 r[0],
1164 UnboundForm::Rename {
1165 old: RawSymbolName::new("old"),
1166 new: RawSymbolName::new("new"),
1167 }
1168 );
1169 }
1170
1171 #[test]
1172 fn sem_form_with_all_required_keywords() {
1173 let src = r#"(sem @alice email "alice@example.com" :src @profile :c 0.95 :v 2024-01-15)"#;
1174 let forms = parse(src).unwrap();
1175 let UnboundForm::Sem { s, p, o, keywords } = &forms[0] else {
1176 panic!("expected sem form");
1177 };
1178 assert_eq!(s, &RawSymbolName::new("alice"));
1179 assert_eq!(p, &RawSymbolName::new("email"));
1180 assert_eq!(o, &RawValue::String("alice@example.com".into()));
1181 assert!(keywords.contains_key("src"));
1182 assert!(keywords.contains_key("c"));
1183 assert!(keywords.contains_key("v"));
1184 assert!(matches!(keywords.get("v"), Some(RawValue::Timestamp(_))));
1185 }
1186
1187 #[test]
1188 fn sem_missing_required_keyword_errors() {
1189 let src = r#"(sem @alice email "a" :src @profile :c 0.95)"#;
1190 let err = parse(src).unwrap_err();
1191 assert!(matches!(
1192 err,
1193 ParseError::MissingRequiredKeyword {
1194 missing: "v",
1195 form: "sem"
1196 }
1197 ));
1198 }
1199
1200 #[test]
1201 fn unknown_opcode_errors() {
1202 let err = parse("(xyz @a @b)").unwrap_err();
1203 assert!(matches!(err, ParseError::UnknownOpcode { .. }));
1204 }
1205
1206 #[test]
1207 fn unknown_keyword_errors() {
1208 let src = r#"(sem @a b "x" :src @y :c 0.5 :v 2024-01-15 :bogus 1)"#;
1209 let err = parse(src).unwrap_err();
1210 assert!(matches!(err, ParseError::BadKeyword { .. }));
1211 }
1212
1213 #[test]
1214 fn duplicate_keyword_errors() {
1215 let src = r#"(sem @a b "x" :src @y :src @y :c 0.5 :v 2024-01-15)"#;
1216 let err = parse(src).unwrap_err();
1217 assert!(matches!(err, ParseError::DuplicateKeyword { .. }));
1218 }
1219
1220 #[test]
1221 fn epi_parses_participants_list() {
1222 let src = r"(epi @ep_001 @rename (@old @new) @github
1223 :at 2026-04-17T10:00:00Z :obs 2026-04-17T10:00:00Z
1224 :src @alice :c 1.0)";
1225 let forms = parse(src).unwrap();
1226 let UnboundForm::Epi {
1227 event_id,
1228 kind,
1229 participants,
1230 location,
1231 ..
1232 } = &forms[0]
1233 else {
1234 panic!("expected epi form");
1235 };
1236 assert_eq!(event_id, &RawSymbolName::new("ep_001"));
1237 assert_eq!(kind, &RawSymbolName::new("rename"));
1238 assert_eq!(participants.len(), 2);
1239 assert_eq!(participants[0], RawSymbolName::new("old"));
1240 assert_eq!(location, &RawSymbolName::new("github"));
1241 }
1242
1243 #[test]
1244 fn pro_with_optional_precondition() {
1245 let src = r#"(pro @rule_1 "agent about to write" "route via librarian"
1246 :pre nil :scp @mimir :src @agents_md :c 1.0)"#;
1247 let forms = parse(src).unwrap();
1248 let UnboundForm::Pro {
1249 rule_id, keywords, ..
1250 } = &forms[0]
1251 else {
1252 panic!("expected pro form");
1253 };
1254 assert_eq!(rule_id, &RawSymbolName::new("rule_1"));
1255 assert_eq!(keywords.get("pre"), Some(&RawValue::Nil));
1256 }
1257
1258 #[test]
1259 fn inf_requires_method_and_derived_from() {
1260 let src = r"(inf @a p @b (@m1 @m2) @pattern_summarize :c 0.7 :v 2024-03-15)";
1261 let forms = parse(src).unwrap();
1262 let UnboundForm::Inf {
1263 derived_from,
1264 method,
1265 ..
1266 } = &forms[0]
1267 else {
1268 panic!("expected inf form");
1269 };
1270 assert_eq!(derived_from.len(), 2);
1271 assert_eq!(method, &RawSymbolName::new("pattern_summarize"));
1272 }
1273
1274 #[test]
1275 fn query_with_keywords_only() {
1276 let src = "(query :s @alice :p email :debug_mode true)";
1277 let forms = parse(src).unwrap();
1278 let UnboundForm::Query {
1279 selector, keywords, ..
1280 } = &forms[0]
1281 else {
1282 panic!("expected query form");
1283 };
1284 assert!(selector.is_none());
1285 assert_eq!(keywords.get("debug_mode"), Some(&RawValue::Boolean(true)));
1286 }
1287
1288 #[test]
1289 fn query_with_positional_selector() {
1290 let src = "(query @mem_x)";
1291 let forms = parse(src).unwrap();
1292 let UnboundForm::Query {
1293 selector,
1294 keywords: _,
1295 } = &forms[0]
1296 else {
1297 panic!("expected query form");
1298 };
1299 assert_eq!(
1300 selector.as_ref(),
1301 Some(&RawValue::RawSymbol(RawSymbolName::new("mem_x"))),
1302 );
1303 }
1304
1305 #[test]
1306 fn timestamp_converts_to_clocktime() {
1307 let src = r#"(sem @a b "x" :src @y :c 0.5 :v 2024-01-15)"#;
1308 let forms = parse(src).unwrap();
1309 let UnboundForm::Sem { keywords, .. } = &forms[0] else {
1310 panic!();
1311 };
1312 match keywords.get("v") {
1313 Some(RawValue::Timestamp(ct)) => {
1314 assert_eq!(ct.as_millis(), 1_705_276_800_000);
1316 }
1317 other => panic!("expected Timestamp, got {other:?}"),
1318 }
1319 }
1320
1321 #[test]
1322 fn multiple_forms_in_one_input() {
1323 let src = r"
1324 (alias @a @b)
1325 (rename @old @new)
1326 (promote @tmp)
1327 ";
1328 let forms = parse(src).unwrap();
1329 assert_eq!(forms.len(), 3);
1330 }
1331
1332 fn nested_value_input(depth: usize) -> String {
1345 let opens = "(".repeat(depth);
1346 let closes = ")".repeat(depth);
1347 format!("(sem @s @p {opens}0{closes} :src @observation :c 0.5 :v 2024-01-15)")
1348 }
1349
1350 #[test]
1351 fn parser_accepts_value_nesting_at_limit() {
1352 let src = nested_value_input(MAX_NESTING_DEPTH);
1354 let forms = parse(&src).expect("must accept depth at the limit");
1355 assert_eq!(forms.len(), 1);
1356 }
1357
1358 #[test]
1359 fn parser_rejects_value_nesting_one_over_limit() {
1360 let src = nested_value_input(MAX_NESTING_DEPTH + 1);
1362 let err = parse(&src).expect_err("must reject depth over the limit");
1363 match err {
1364 ParseError::NestingTooDeep { max, .. } => {
1365 assert_eq!(max, MAX_NESTING_DEPTH);
1366 }
1367 other => panic!("expected NestingTooDeep, got {other:?}"),
1368 }
1369 }
1370
1371 #[test]
1372 fn parser_rejects_pathologically_deep_value_nesting_without_stack_overflow() {
1373 let src = nested_value_input(MAX_NESTING_DEPTH * 10);
1378 let err = parse(&src).expect_err("must reject pathological nesting");
1379 assert!(
1380 matches!(err, ParseError::NestingTooDeep { .. }),
1381 "expected NestingTooDeep, got {err:?}"
1382 );
1383 }
1384
1385 #[test]
1386 fn parser_rejects_nested_correct_forms_past_limit() {
1387 let depth = MAX_NESTING_DEPTH + 1;
1391 let mut src = String::new();
1392 for i in 0..depth {
1393 std::fmt::Write::write_fmt(&mut src, format_args!("(correct @e{i} "))
1394 .expect("write to String never fails");
1395 }
1396 src.push_str("(epi @ev @kind () @loc :at 2024-01-15 :obs 2024-01-15 :src @y :c 0.5)");
1397 for _ in 0..depth {
1398 src.push(')');
1399 }
1400 let err = parse(&src).expect_err("must reject deep `correct` nesting");
1401 assert!(
1402 matches!(err, ParseError::NestingTooDeep { .. }),
1403 "expected NestingTooDeep, got {err:?}"
1404 );
1405 }
1406}