Skip to main content

logicaffeine_language/
token.rs

1//! Token types for the LOGOS lexer and parser.
2//!
3//! This module defines the vocabulary of the LOGOS language at the token level.
4//! Tokens represent the atomic syntactic units produced by the lexer and consumed
5//! by the parser.
6//!
7//! ## Token Categories
8//!
9//! | Category | Examples | Description |
10//! |----------|----------|-------------|
11//! | **Quantifiers** | every, some, no | Bind variables over domains |
12//! | **Determiners** | the, a, this | Select referents |
13//! | **Nouns** | cat, philosopher | Predicates over individuals |
14//! | **Verbs** | runs, loves | Relations between arguments |
15//! | **Adjectives** | red, happy | Modify noun denotations |
16//! | **Connectives** | and, or, implies | Combine propositions |
17//! | **Pronouns** | he, she, it | Resolve to antecedents |
18//!
19//! ## Block Types
20//!
21//! LOGOS uses markdown-style block headers for structured documents:
22//!
23//! - `## Theorem`: Declares a proposition to be proved
24//! - `## Proof`: Contains the proof steps
25//! - `## Definition`: Introduces new terminology
26//! - `## Main`: Program entry point
27
28use logicaffeine_base::Symbol;
29use logicaffeine_lexicon::{Aspect, Case, Definiteness, Gender, Number, Time, VerbClass};
30
31#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
32pub struct Span {
33    pub start: usize,
34    pub end: usize,
35}
36
37impl Span {
38    pub fn new(start: usize, end: usize) -> Self {
39        Self { start, end }
40    }
41}
42
43#[derive(Debug, Clone, Copy, PartialEq, Eq)]
44pub enum PresupKind {
45    Stop,
46    Start,
47    Regret,
48    Continue,
49    Realize,
50    Know,
51}
52
53#[derive(Debug, Clone, Copy, PartialEq, Eq)]
54pub enum FocusKind {
55    Only,
56    Even,
57    Just,
58}
59
60#[derive(Debug, Clone, Copy, PartialEq, Eq)]
61pub enum MeasureKind {
62    Much,
63    Little,
64}
65
66/// Calendar time units for Span expressions.
67///
68/// These represent variable-length calendar durations, as opposed to
69/// fixed SI time units (ns, ms, s, etc.) used in Duration.
70#[derive(Debug, Clone, Copy, PartialEq, Eq)]
71pub enum CalendarUnit {
72    Day,
73    Week,
74    Month,
75    Year,
76}
77
78/// Document structure block type markers.
79///
80/// LOGOS uses markdown-style `## Header` syntax to delimit different
81/// sections of a program or proof document.
82#[derive(Debug, Clone, Copy, PartialEq, Eq)]
83pub enum BlockType {
84    /// `## Theorem` - Declares a proposition to be proved.
85    Theorem,
86    /// `## Main` - Program entry point for imperative code.
87    Main,
88    /// `## Definition` - Introduces new terminology or type definitions.
89    Definition,
90    /// `## Proof` - Contains proof steps for a theorem.
91    Proof,
92    /// `## Example` - Illustrative examples.
93    Example,
94    /// `## Logic` - Direct logical notation input.
95    Logic,
96    /// `## Note` - Explanatory documentation.
97    Note,
98    /// `## To` - Function definition block.
99    Function,
100    /// Inline type definition: `## A Point has:` or `## A Color is one of:`.
101    TypeDef,
102    /// `## Policy` - Security policy rule definitions.
103    Policy,
104    /// `## Requires` - External crate dependency declarations.
105    Requires,
106    /// `## No` - Optimization annotation (followed by Memo, TCO, Peephole, Borrow, or Optimize).
107    No,
108}
109
110#[derive(Debug, Clone, PartialEq, Eq)]
111pub enum TokenType {
112    // Document Structure
113    BlockHeader { block_type: BlockType },
114
115    // Quantifiers
116    All,
117    No,
118    Some,
119    Any,
120    Both, // Correlative conjunction marker: "both X and Y"
121    Most,
122    Few,
123    Many,
124    Cardinal(u32),
125    AtLeast(u32),
126    AtMost(u32),
127
128    // Negative Polarity Items (NPIs)
129    Anything,
130    Anyone,
131    Nothing,
132    Nobody,
133    NoOne,
134    Nowhere,
135    Ever,
136    Never,
137
138    // Logical Connectives
139    And,
140    Or,
141    If,
142    Then,
143    Not,
144    Iff,
145    Because,
146
147    // Modal Operators
148    Must,
149    Shall,
150    Should,
151    Can,
152    May,
153    Cannot,
154    Would,
155    Could,
156    Might,
157    Had,
158
159    // Imperative Statement Keywords
160    Let,
161    Set,
162    Return,
163    /// Exits the innermost while loop: `Break.`
164    Break,
165    Be,
166    While,
167    Repeat,
168    For,
169    In,
170    From,
171    Assert,
172    /// Documented assertion with justification string.
173    Trust,
174    Otherwise,
175    /// Alias for `Otherwise` - Pythonic else clause
176    Else,
177    /// Python-style else-if shorthand
178    Elif,
179    Call,
180    /// Constructor keyword for struct instantiation.
181    New,
182    /// Sum type definition keyword.
183    Either,
184    /// Pattern matching statement keyword.
185    Inspect,
186    /// Native function modifier for FFI bindings.
187    Native,
188    /// Escape hatch header keyword: "Escape to Rust:"
189    Escape,
190    /// Raw code block captured verbatim from an escape hatch body.
191    /// The Symbol holds the interned raw foreign code (indentation-stripped).
192    EscapeBlock(Symbol),
193
194    // Theorem Keywords
195    /// Premise marker in theorem blocks.
196    Given,
197    /// Goal marker in theorem blocks.
198    Prove,
199    /// Automatic proof strategy directive.
200    Auto,
201
202    // IO Keywords
203    /// "Read input from..."
204    Read,
205    /// "Write x to file..."
206    Write,
207    /// "...from the console"
208    Console,
209    /// "...from file..." or "...to file..."
210    File,
211
212    // Ownership Keywords (Move/Borrow Semantics)
213    /// Move ownership: "Give x to processor"
214    Give,
215    /// Immutable borrow: "Show x to console"
216    Show,
217
218    // Collection Operations
219    /// "Push x to items"
220    Push,
221    /// "Pop from items"
222    Pop,
223    /// "copy of slice" → slice.to_vec()
224    Copy,
225    /// "items 1 through 3" → inclusive slice
226    Through,
227    /// "length of items" → items.len()
228    Length,
229    /// "items at i" → `items[i]`
230    At,
231
232    // Set Operations
233    /// "Add x to set" (insert)
234    Add,
235    /// "Remove x from set"
236    Remove,
237    /// "set contains x"
238    Contains,
239    /// "a union b"
240    Union,
241    /// "a intersection b"
242    Intersection,
243
244    // Memory Management (Zones)
245    /// "Inside a new zone..."
246    Inside,
247    /// "...zone called..."
248    Zone,
249    /// "...called 'Scratch'"
250    Called,
251    /// "...of size 1 MB"
252    Size,
253    /// "...mapped from 'file.bin'"
254    Mapped,
255
256    // Structured Concurrency
257    /// "Attempt all of the following:" → concurrent (async, I/O-bound)
258    Attempt,
259    /// "the following"
260    Following,
261    /// "Simultaneously:" → parallel (CPU-bound)
262    Simultaneously,
263
264    // Agent System (Actor Model)
265    /// "Spawn a Worker called 'w1'" → create agent
266    Spawn,
267    /// "Send Ping to 'agent'" → send message to agent
268    Send,
269    /// "Await response from 'agent' into result" → receive message
270    Await,
271
272    // Serialization
273    /// "A Message is Portable and has:" → serde derives
274    Portable,
275
276    // Sipping Protocol
277    /// "the manifest of Zone" → FileSipper manifest
278    Manifest,
279    /// "the chunk at N in Zone" → FileSipper chunk
280    Chunk,
281
282    // CRDT Keywords
283    /// "A Counter is Shared and has:" → CRDT struct
284    Shared,
285    /// "Merge remote into local" → CRDT merge
286    Merge,
287    /// "Increase x's count by 10" → GCounter increment
288    Increase,
289
290    // Extended CRDT Keywords
291    /// "Decrease x's count by 5" → PNCounter decrement
292    Decrease,
293    /// "which is a Tally" → PNCounter type
294    Tally,
295    /// "which is a SharedSet of T" → ORSet type
296    SharedSet,
297    /// "which is a SharedSequence of T" → RGA type
298    SharedSequence,
299    /// "which is a CollaborativeSequence of T" → YATA type
300    CollaborativeSequence,
301    /// "which is a SharedMap from K to V" → ORMap type
302    SharedMap,
303    /// "which is a Divergent T" → MVRegister type
304    Divergent,
305    /// "Append x to seq" → RGA append
306    Append,
307    /// "Resolve x to value" → MVRegister resolve
308    Resolve,
309    /// "(RemoveWins)" → ORSet bias
310    RemoveWins,
311    /// "(AddWins)" → ORSet bias (default)
312    AddWins,
313    /// "(YATA)" → Sequence algorithm
314    YATA,
315    /// "x's values" → MVRegister values accessor
316    Values,
317
318    // Security Keywords
319    /// "Check that user is admin" → mandatory runtime guard
320    Check,
321
322    // P2P Networking Keywords
323    /// "Listen on \[addr\]" → bind to network address
324    Listen,
325    /// "Connect to \[addr\]" → dial a peer (NetConnect to avoid conflict)
326    NetConnect,
327    /// "Sleep N." → pause execution for N milliseconds
328    Sleep,
329
330    // GossipSub Keywords
331    /// "Sync x on 'topic'" → automatic CRDT replication
332    Sync,
333
334    // Persistence Keywords
335    /// "Mount x at \[path\]" → load/create persistent CRDT from journal
336    Mount,
337    /// "Persistent Counter" → type wrapped with journaling
338    Persistent,
339    /// "x combined with y" → string concatenation
340    Combined,
341
342    // Go-like Concurrency Keywords
343    /// "Launch a task to..." → spawn green thread
344    Launch,
345    /// "a task" → identifier for task context
346    Task,
347    /// "Pipe of Type" → channel creation
348    Pipe,
349    /// "Receive from pipe" → recv from channel
350    Receive,
351    /// "Stop handle" → abort task
352    Stop,
353    /// "Try to send/receive" → non-blocking variant
354    Try,
355    /// "Send value into pipe" → channel send
356    Into,
357    /// "Await the first of:" → select statement
358    First,
359    /// "After N seconds:" → timeout branch
360    After,
361
362    // Block Scoping
363    Colon,
364    Indent,
365    Dedent,
366    Newline,
367
368    // Content Words
369    Noun(Symbol),
370    Adjective(Symbol),
371    NonIntersectiveAdjective(Symbol),
372    Adverb(Symbol),
373    ScopalAdverb(Symbol),
374    TemporalAdverb(Symbol),
375    Verb {
376        lemma: Symbol,
377        time: Time,
378        aspect: Aspect,
379        class: VerbClass,
380    },
381    ProperName(Symbol),
382
383    /// Lexically ambiguous token (e.g., "fish" as noun or verb).
384    ///
385    /// The parser tries the primary interpretation first, then alternatives
386    /// if parsing fails. Used for parse forest generation.
387    Ambiguous {
388        primary: Box<TokenType>,
389        alternatives: Vec<TokenType>,
390    },
391
392    // Speech Acts (Performatives)
393    Performative(Symbol),
394    Exclamation,
395
396    // Articles (Definiteness)
397    Article(Definiteness),
398
399    // Temporal Auxiliaries
400    Auxiliary(Time),
401
402    // Copula & Functional
403    Is,
404    Are,
405    Was,
406    Were,
407    That,
408    Who,
409    What,
410    Where,
411    When,
412    Why,
413    Does,
414    Do,
415
416    // Identity & Reflexive (FOL)
417    Identity,
418    Equals,
419    Reflexive,
420    Reciprocal,
421    /// Pairwise list coordination: "A and B respectively love C and D"
422    Respectively,
423
424    // Pronouns (Discourse)
425    Pronoun {
426        gender: Gender,
427        number: Number,
428        case: Case,
429    },
430
431    // Prepositions (for N-ary relations)
432    Preposition(Symbol),
433
434    // Phrasal Verb Particles (up, down, out, in, off, on, away)
435    Particle(Symbol),
436
437    // Comparatives & Superlatives (Pillar 3 - Degree Semantics)
438    Comparative(Symbol),
439    Superlative(Symbol),
440    Than,
441
442    // Control Verbs (Chomsky's Control Theory)
443    To,
444
445    // Presupposition Triggers (Austin/Strawson)
446    PresupTrigger(PresupKind),
447
448    // Focus Particles (Rooth)
449    Focus(FocusKind),
450
451    // Mass Noun Measure
452    Measure(MeasureKind),
453
454    // Numeric Literals (prover-ready: stores raw string for symbolic math)
455    Number(Symbol),
456
457    /// Duration literal with SI suffix: 500ms, 2s, 50ns
458    /// Stores the value normalized to nanoseconds and preserves the original unit.
459    DurationLiteral {
460        nanos: i64,
461        original_unit: Symbol,
462    },
463
464    /// Date literal in ISO-8601 format: 2026-05-20
465    /// Stores days since Unix epoch (1970-01-01).
466    DateLiteral {
467        days: i32,
468    },
469
470    /// Time-of-day literal: 4pm, 9:30am, noon, midnight
471    /// Stores nanoseconds from midnight (00:00:00).
472    TimeLiteral {
473        nanos_from_midnight: i64,
474    },
475
476    /// Calendar time unit word: day, week, month, year (or plurals)
477    /// Used in Span expressions like "3 days" or "2 months and 5 days"
478    CalendarUnit(CalendarUnit),
479
480    /// Postfix operator for relative past time: "3 days ago"
481    Ago,
482
483    /// Postfix operator for relative future time: "3 days hence"
484    Hence,
485
486    /// Binary operator for span subtraction from a date: "3 days before 2026-05-20"
487    Before,
488
489    /// String literal: `"hello world"`
490    StringLiteral(Symbol),
491
492    /// Interpolated string literal: `"Hello, {name}!"`
493    /// Contains raw content with {} holes preserved
494    InterpolatedString(Symbol),
495
496    // Character literal: `x` (backtick syntax)
497    CharLiteral(Symbol),
498
499    // Index Access (1-indexed)
500    Item,
501    Items,
502
503    // Possession (Genitive Case)
504    Possessive,
505
506    // Punctuation
507    LParen,
508    RParen,
509    LBracket,
510    RBracket,
511    Comma,
512    Period,
513
514    // Bitwise Operators
515    /// "x xor y" → bitwise XOR (`^`)
516    Xor,
517    /// "x shifted left/right by y" → bit shift (`<<`/`>>`)
518    Shifted,
519
520    // Arithmetic Operators
521    Plus,
522    Minus,
523    Star,
524    Slash,
525    Percent,  // Modulo operator
526
527    // Comparison Operators
528    /// `<`
529    Lt,
530    /// `>`
531    Gt,
532    /// `<=`
533    LtEq,
534    /// `>=`
535    GtEq,
536    /// `==`
537    EqEq,
538    /// `!=`
539    NotEq,
540
541    /// Arrow for return type syntax: `->`
542    Arrow,
543
544    /// Assignment operator `=` for `identifier = value` syntax
545    Assign,
546
547    /// Mutability keyword `mut` for explicit mutable declarations
548    Mut,
549
550    /// Generic identifier (for equals-style assignment)
551    Identifier,
552
553    EOF,
554}
555
556#[derive(Debug, Clone)]
557pub struct Token {
558    pub kind: TokenType,
559    pub lexeme: Symbol,
560    pub span: Span,
561}
562
563impl Token {
564    pub fn new(kind: TokenType, lexeme: Symbol, span: Span) -> Self {
565        Token { kind, lexeme, span }
566    }
567}
568
569impl TokenType {
570    pub const WH_WORDS: &'static [TokenType] = &[
571        TokenType::Who,
572        TokenType::What,
573        TokenType::Where,
574        TokenType::When,
575        TokenType::Why,
576    ];
577
578    pub const MODALS: &'static [TokenType] = &[
579        TokenType::Must,
580        TokenType::Shall,
581        TokenType::Should,
582        TokenType::Can,
583        TokenType::May,
584        TokenType::Cannot,
585        TokenType::Would,
586        TokenType::Could,
587        TokenType::Might,
588    ];
589}
590
591#[cfg(test)]
592mod tests {
593    use super::*;
594
595    #[test]
596    fn span_new_stores_positions() {
597        let span = Span::new(5, 10);
598        assert_eq!(span.start, 5);
599        assert_eq!(span.end, 10);
600    }
601
602    #[test]
603    fn span_default_is_zero() {
604        let span = Span::default();
605        assert_eq!(span.start, 0);
606        assert_eq!(span.end, 0);
607    }
608
609    #[test]
610    fn token_has_span_field() {
611        use logicaffeine_base::Interner;
612        let mut interner = Interner::new();
613        let lexeme = interner.intern("test");
614        let token = Token::new(TokenType::Noun(lexeme), lexeme, Span::new(0, 4));
615        assert_eq!(token.span.start, 0);
616        assert_eq!(token.span.end, 4);
617    }
618
619    #[test]
620    fn wh_words_contains_all_wh_tokens() {
621        assert_eq!(TokenType::WH_WORDS.len(), 5);
622        assert!(TokenType::WH_WORDS.contains(&TokenType::Who));
623        assert!(TokenType::WH_WORDS.contains(&TokenType::What));
624        assert!(TokenType::WH_WORDS.contains(&TokenType::Where));
625        assert!(TokenType::WH_WORDS.contains(&TokenType::When));
626        assert!(TokenType::WH_WORDS.contains(&TokenType::Why));
627    }
628
629    #[test]
630    fn modals_contains_all_modal_tokens() {
631        assert_eq!(TokenType::MODALS.len(), 9);
632        assert!(TokenType::MODALS.contains(&TokenType::Must));
633        assert!(TokenType::MODALS.contains(&TokenType::Shall));
634        assert!(TokenType::MODALS.contains(&TokenType::Should));
635        assert!(TokenType::MODALS.contains(&TokenType::Can));
636        assert!(TokenType::MODALS.contains(&TokenType::May));
637        assert!(TokenType::MODALS.contains(&TokenType::Cannot));
638        assert!(TokenType::MODALS.contains(&TokenType::Would));
639        assert!(TokenType::MODALS.contains(&TokenType::Could));
640        assert!(TokenType::MODALS.contains(&TokenType::Might));
641    }
642}