Skip to main content

logicaffeine_language/
token.rs

1//! Token types for the LOGOS lexer and parser.
2//!
3//! This module defines the vocabulary of the LOGOS language at the token level.
4//! Tokens represent the atomic syntactic units produced by the lexer and consumed
5//! by the parser.
6//!
7//! ## Token Categories
8//!
9//! | Category | Examples | Description |
10//! |----------|----------|-------------|
11//! | **Quantifiers** | every, some, no | Bind variables over domains |
12//! | **Determiners** | the, a, this | Select referents |
13//! | **Nouns** | cat, philosopher | Predicates over individuals |
14//! | **Verbs** | runs, loves | Relations between arguments |
15//! | **Adjectives** | red, happy | Modify noun denotations |
16//! | **Connectives** | and, or, implies | Combine propositions |
17//! | **Pronouns** | he, she, it | Resolve to antecedents |
18//!
19//! ## Block Types
20//!
21//! LOGOS uses markdown-style block headers for structured documents:
22//!
23//! - `## Theorem`: Declares a proposition to be proved
24//! - `## Proof`: Contains the proof steps
25//! - `## Definition`: Introduces new terminology
26//! - `## Main`: Program entry point
27
28use logicaffeine_base::Symbol;
29use logicaffeine_lexicon::{Aspect, Case, Definiteness, Gender, Number, Time, VerbClass};
30
31#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
32pub struct Span {
33    pub start: usize,
34    pub end: usize,
35}
36
37impl Span {
38    pub fn new(start: usize, end: usize) -> Self {
39        Self { start, end }
40    }
41}
42
43#[derive(Debug, Clone, Copy, PartialEq, Eq)]
44pub enum PresupKind {
45    Stop,
46    Start,
47    Regret,
48    Continue,
49    Realize,
50    Know,
51}
52
53#[derive(Debug, Clone, Copy, PartialEq, Eq)]
54pub enum FocusKind {
55    Only,
56    Even,
57    Just,
58}
59
60#[derive(Debug, Clone, Copy, PartialEq, Eq)]
61pub enum MeasureKind {
62    Much,
63    Little,
64}
65
66/// Calendar time units for Span expressions.
67///
68/// These represent variable-length calendar durations, as opposed to
69/// fixed SI time units (ns, ms, s, etc.) used in Duration.
70#[derive(Debug, Clone, Copy, PartialEq, Eq)]
71pub enum CalendarUnit {
72    Day,
73    Week,
74    Month,
75    Year,
76}
77
78/// Document structure block type markers.
79///
80/// LOGOS uses markdown-style `## Header` syntax to delimit different
81/// sections of a program or proof document.
82#[derive(Debug, Clone, Copy, PartialEq, Eq)]
83pub enum BlockType {
84    /// `## Theorem` - Declares a proposition to be proved.
85    Theorem,
86    /// `## Main` - Program entry point for imperative code.
87    Main,
88    /// `## Definition` - Introduces new terminology or type definitions.
89    Definition,
90    /// `## Proof` - Contains proof steps for a theorem.
91    Proof,
92    /// `## Example` - Illustrative examples.
93    Example,
94    /// `## Logic` - Direct logical notation input.
95    Logic,
96    /// `## Note` - Explanatory documentation.
97    Note,
98    /// `## To` - Function definition block.
99    Function,
100    /// Inline type definition: `## A Point has:` or `## A Color is one of:`.
101    TypeDef,
102    /// `## Policy` - Security policy rule definitions.
103    Policy,
104    /// `## Requires` - External crate dependency declarations.
105    Requires,
106}
107
108#[derive(Debug, Clone, PartialEq, Eq)]
109pub enum TokenType {
110    // Document Structure
111    BlockHeader { block_type: BlockType },
112
113    // Quantifiers
114    All,
115    No,
116    Some,
117    Any,
118    Both, // Correlative conjunction marker: "both X and Y"
119    Most,
120    Few,
121    Many,
122    Cardinal(u32),
123    AtLeast(u32),
124    AtMost(u32),
125
126    // Negative Polarity Items (NPIs)
127    Anything,
128    Anyone,
129    Nothing,
130    Nobody,
131    NoOne,
132    Nowhere,
133    Ever,
134    Never,
135
136    // Logical Connectives
137    And,
138    Or,
139    If,
140    Then,
141    Not,
142    Iff,
143    Because,
144
145    // Modal Operators
146    Must,
147    Shall,
148    Should,
149    Can,
150    May,
151    Cannot,
152    Would,
153    Could,
154    Might,
155    Had,
156
157    // Imperative Statement Keywords
158    Let,
159    Set,
160    Return,
161    Be,
162    While,
163    Repeat,
164    For,
165    In,
166    From,
167    Assert,
168    /// Documented assertion with justification string.
169    Trust,
170    Otherwise,
171    /// Alias for `Otherwise` - Pythonic else clause
172    Else,
173    /// Python-style else-if shorthand
174    Elif,
175    Call,
176    /// Constructor keyword for struct instantiation.
177    New,
178    /// Sum type definition keyword.
179    Either,
180    /// Pattern matching statement keyword.
181    Inspect,
182    /// Native function modifier for FFI bindings.
183    Native,
184    /// Escape hatch header keyword: "Escape to Rust:"
185    Escape,
186    /// Raw code block captured verbatim from an escape hatch body.
187    /// The Symbol holds the interned raw foreign code (indentation-stripped).
188    EscapeBlock(Symbol),
189
190    // Theorem Keywords
191    /// Premise marker in theorem blocks.
192    Given,
193    /// Goal marker in theorem blocks.
194    Prove,
195    /// Automatic proof strategy directive.
196    Auto,
197
198    // IO Keywords
199    /// "Read input from..."
200    Read,
201    /// "Write x to file..."
202    Write,
203    /// "...from the console"
204    Console,
205    /// "...from file..." or "...to file..."
206    File,
207
208    // Ownership Keywords (Move/Borrow Semantics)
209    /// Move ownership: "Give x to processor"
210    Give,
211    /// Immutable borrow: "Show x to console"
212    Show,
213
214    // Collection Operations
215    /// "Push x to items"
216    Push,
217    /// "Pop from items"
218    Pop,
219    /// "copy of slice" → slice.to_vec()
220    Copy,
221    /// "items 1 through 3" → inclusive slice
222    Through,
223    /// "length of items" → items.len()
224    Length,
225    /// "items at i" → `items[i]`
226    At,
227
228    // Set Operations
229    /// "Add x to set" (insert)
230    Add,
231    /// "Remove x from set"
232    Remove,
233    /// "set contains x"
234    Contains,
235    /// "a union b"
236    Union,
237    /// "a intersection b"
238    Intersection,
239
240    // Memory Management (Zones)
241    /// "Inside a new zone..."
242    Inside,
243    /// "...zone called..."
244    Zone,
245    /// "...called 'Scratch'"
246    Called,
247    /// "...of size 1 MB"
248    Size,
249    /// "...mapped from 'file.bin'"
250    Mapped,
251
252    // Structured Concurrency
253    /// "Attempt all of the following:" → concurrent (async, I/O-bound)
254    Attempt,
255    /// "the following"
256    Following,
257    /// "Simultaneously:" → parallel (CPU-bound)
258    Simultaneously,
259
260    // Agent System (Actor Model)
261    /// "Spawn a Worker called 'w1'" → create agent
262    Spawn,
263    /// "Send Ping to 'agent'" → send message to agent
264    Send,
265    /// "Await response from 'agent' into result" → receive message
266    Await,
267
268    // Serialization
269    /// "A Message is Portable and has:" → serde derives
270    Portable,
271
272    // Sipping Protocol
273    /// "the manifest of Zone" → FileSipper manifest
274    Manifest,
275    /// "the chunk at N in Zone" → FileSipper chunk
276    Chunk,
277
278    // CRDT Keywords
279    /// "A Counter is Shared and has:" → CRDT struct
280    Shared,
281    /// "Merge remote into local" → CRDT merge
282    Merge,
283    /// "Increase x's count by 10" → GCounter increment
284    Increase,
285
286    // Extended CRDT Keywords
287    /// "Decrease x's count by 5" → PNCounter decrement
288    Decrease,
289    /// "which is a Tally" → PNCounter type
290    Tally,
291    /// "which is a SharedSet of T" → ORSet type
292    SharedSet,
293    /// "which is a SharedSequence of T" → RGA type
294    SharedSequence,
295    /// "which is a CollaborativeSequence of T" → YATA type
296    CollaborativeSequence,
297    /// "which is a SharedMap from K to V" → ORMap type
298    SharedMap,
299    /// "which is a Divergent T" → MVRegister type
300    Divergent,
301    /// "Append x to seq" → RGA append
302    Append,
303    /// "Resolve x to value" → MVRegister resolve
304    Resolve,
305    /// "(RemoveWins)" → ORSet bias
306    RemoveWins,
307    /// "(AddWins)" → ORSet bias (default)
308    AddWins,
309    /// "(YATA)" → Sequence algorithm
310    YATA,
311    /// "x's values" → MVRegister values accessor
312    Values,
313
314    // Security Keywords
315    /// "Check that user is admin" → mandatory runtime guard
316    Check,
317
318    // P2P Networking Keywords
319    /// "Listen on \[addr\]" → bind to network address
320    Listen,
321    /// "Connect to \[addr\]" → dial a peer (NetConnect to avoid conflict)
322    NetConnect,
323    /// "Sleep N." → pause execution for N milliseconds
324    Sleep,
325
326    // GossipSub Keywords
327    /// "Sync x on 'topic'" → automatic CRDT replication
328    Sync,
329
330    // Persistence Keywords
331    /// "Mount x at \[path\]" → load/create persistent CRDT from journal
332    Mount,
333    /// "Persistent Counter" → type wrapped with journaling
334    Persistent,
335    /// "x combined with y" → string concatenation
336    Combined,
337
338    // Go-like Concurrency Keywords
339    /// "Launch a task to..." → spawn green thread
340    Launch,
341    /// "a task" → identifier for task context
342    Task,
343    /// "Pipe of Type" → channel creation
344    Pipe,
345    /// "Receive from pipe" → recv from channel
346    Receive,
347    /// "Stop handle" → abort task
348    Stop,
349    /// "Try to send/receive" → non-blocking variant
350    Try,
351    /// "Send value into pipe" → channel send
352    Into,
353    /// "Await the first of:" → select statement
354    First,
355    /// "After N seconds:" → timeout branch
356    After,
357
358    // Block Scoping
359    Colon,
360    Indent,
361    Dedent,
362    Newline,
363
364    // Content Words
365    Noun(Symbol),
366    Adjective(Symbol),
367    NonIntersectiveAdjective(Symbol),
368    Adverb(Symbol),
369    ScopalAdverb(Symbol),
370    TemporalAdverb(Symbol),
371    Verb {
372        lemma: Symbol,
373        time: Time,
374        aspect: Aspect,
375        class: VerbClass,
376    },
377    ProperName(Symbol),
378
379    /// Lexically ambiguous token (e.g., "fish" as noun or verb).
380    ///
381    /// The parser tries the primary interpretation first, then alternatives
382    /// if parsing fails. Used for parse forest generation.
383    Ambiguous {
384        primary: Box<TokenType>,
385        alternatives: Vec<TokenType>,
386    },
387
388    // Speech Acts (Performatives)
389    Performative(Symbol),
390    Exclamation,
391
392    // Articles (Definiteness)
393    Article(Definiteness),
394
395    // Temporal Auxiliaries
396    Auxiliary(Time),
397
398    // Copula & Functional
399    Is,
400    Are,
401    Was,
402    Were,
403    That,
404    Who,
405    What,
406    Where,
407    When,
408    Why,
409    Does,
410    Do,
411
412    // Identity & Reflexive (FOL)
413    Identity,
414    Equals,
415    Reflexive,
416    Reciprocal,
417    /// Pairwise list coordination: "A and B respectively love C and D"
418    Respectively,
419
420    // Pronouns (Discourse)
421    Pronoun {
422        gender: Gender,
423        number: Number,
424        case: Case,
425    },
426
427    // Prepositions (for N-ary relations)
428    Preposition(Symbol),
429
430    // Phrasal Verb Particles (up, down, out, in, off, on, away)
431    Particle(Symbol),
432
433    // Comparatives & Superlatives (Pillar 3 - Degree Semantics)
434    Comparative(Symbol),
435    Superlative(Symbol),
436    Than,
437
438    // Control Verbs (Chomsky's Control Theory)
439    To,
440
441    // Presupposition Triggers (Austin/Strawson)
442    PresupTrigger(PresupKind),
443
444    // Focus Particles (Rooth)
445    Focus(FocusKind),
446
447    // Mass Noun Measure
448    Measure(MeasureKind),
449
450    // Numeric Literals (prover-ready: stores raw string for symbolic math)
451    Number(Symbol),
452
453    /// Duration literal with SI suffix: 500ms, 2s, 50ns
454    /// Stores the value normalized to nanoseconds and preserves the original unit.
455    DurationLiteral {
456        nanos: i64,
457        original_unit: Symbol,
458    },
459
460    /// Date literal in ISO-8601 format: 2026-05-20
461    /// Stores days since Unix epoch (1970-01-01).
462    DateLiteral {
463        days: i32,
464    },
465
466    /// Time-of-day literal: 4pm, 9:30am, noon, midnight
467    /// Stores nanoseconds from midnight (00:00:00).
468    TimeLiteral {
469        nanos_from_midnight: i64,
470    },
471
472    /// Calendar time unit word: day, week, month, year (or plurals)
473    /// Used in Span expressions like "3 days" or "2 months and 5 days"
474    CalendarUnit(CalendarUnit),
475
476    /// Postfix operator for relative past time: "3 days ago"
477    Ago,
478
479    /// Postfix operator for relative future time: "3 days hence"
480    Hence,
481
482    /// Binary operator for span subtraction from a date: "3 days before 2026-05-20"
483    Before,
484
485    /// String literal: `"hello world"`
486    StringLiteral(Symbol),
487
488    // Character literal: `x` (backtick syntax)
489    CharLiteral(Symbol),
490
491    // Index Access (1-indexed)
492    Item,
493    Items,
494
495    // Possession (Genitive Case)
496    Possessive,
497
498    // Punctuation
499    LParen,
500    RParen,
501    LBracket,
502    RBracket,
503    Comma,
504    Period,
505
506    // Arithmetic Operators
507    Plus,
508    Minus,
509    Star,
510    Slash,
511    Percent,  // Modulo operator
512
513    // Comparison Operators
514    /// `<`
515    Lt,
516    /// `>`
517    Gt,
518    /// `<=`
519    LtEq,
520    /// `>=`
521    GtEq,
522    /// `==`
523    EqEq,
524    /// `!=`
525    NotEq,
526
527    /// Arrow for return type syntax: `->`
528    Arrow,
529
530    /// Assignment operator `=` for `identifier = value` syntax
531    Assign,
532
533    /// Mutability keyword `mut` for explicit mutable declarations
534    Mut,
535
536    /// Generic identifier (for equals-style assignment)
537    Identifier,
538
539    EOF,
540}
541
542#[derive(Debug, Clone)]
543pub struct Token {
544    pub kind: TokenType,
545    pub lexeme: Symbol,
546    pub span: Span,
547}
548
549impl Token {
550    pub fn new(kind: TokenType, lexeme: Symbol, span: Span) -> Self {
551        Token { kind, lexeme, span }
552    }
553}
554
555impl TokenType {
556    pub const WH_WORDS: &'static [TokenType] = &[
557        TokenType::Who,
558        TokenType::What,
559        TokenType::Where,
560        TokenType::When,
561        TokenType::Why,
562    ];
563
564    pub const MODALS: &'static [TokenType] = &[
565        TokenType::Must,
566        TokenType::Shall,
567        TokenType::Should,
568        TokenType::Can,
569        TokenType::May,
570        TokenType::Cannot,
571        TokenType::Would,
572        TokenType::Could,
573        TokenType::Might,
574    ];
575}
576
577#[cfg(test)]
578mod tests {
579    use super::*;
580
581    #[test]
582    fn span_new_stores_positions() {
583        let span = Span::new(5, 10);
584        assert_eq!(span.start, 5);
585        assert_eq!(span.end, 10);
586    }
587
588    #[test]
589    fn span_default_is_zero() {
590        let span = Span::default();
591        assert_eq!(span.start, 0);
592        assert_eq!(span.end, 0);
593    }
594
595    #[test]
596    fn token_has_span_field() {
597        use logicaffeine_base::Interner;
598        let mut interner = Interner::new();
599        let lexeme = interner.intern("test");
600        let token = Token::new(TokenType::Noun(lexeme), lexeme, Span::new(0, 4));
601        assert_eq!(token.span.start, 0);
602        assert_eq!(token.span.end, 4);
603    }
604
605    #[test]
606    fn wh_words_contains_all_wh_tokens() {
607        assert_eq!(TokenType::WH_WORDS.len(), 5);
608        assert!(TokenType::WH_WORDS.contains(&TokenType::Who));
609        assert!(TokenType::WH_WORDS.contains(&TokenType::What));
610        assert!(TokenType::WH_WORDS.contains(&TokenType::Where));
611        assert!(TokenType::WH_WORDS.contains(&TokenType::When));
612        assert!(TokenType::WH_WORDS.contains(&TokenType::Why));
613    }
614
615    #[test]
616    fn modals_contains_all_modal_tokens() {
617        assert_eq!(TokenType::MODALS.len(), 9);
618        assert!(TokenType::MODALS.contains(&TokenType::Must));
619        assert!(TokenType::MODALS.contains(&TokenType::Shall));
620        assert!(TokenType::MODALS.contains(&TokenType::Should));
621        assert!(TokenType::MODALS.contains(&TokenType::Can));
622        assert!(TokenType::MODALS.contains(&TokenType::May));
623        assert!(TokenType::MODALS.contains(&TokenType::Cannot));
624        assert!(TokenType::MODALS.contains(&TokenType::Would));
625        assert!(TokenType::MODALS.contains(&TokenType::Could));
626        assert!(TokenType::MODALS.contains(&TokenType::Might));
627    }
628}