logicaffeine_language/token.rs
1//! Token types for the LOGOS lexer and parser.
2//!
3//! This module defines the vocabulary of the LOGOS language at the token level.
4//! Tokens represent the atomic syntactic units produced by the lexer and consumed
5//! by the parser.
6//!
7//! ## Token Categories
8//!
9//! | Category | Examples | Description |
10//! |----------|----------|-------------|
11//! | **Quantifiers** | every, some, no | Bind variables over domains |
12//! | **Determiners** | the, a, this | Select referents |
13//! | **Nouns** | cat, philosopher | Predicates over individuals |
14//! | **Verbs** | runs, loves | Relations between arguments |
15//! | **Adjectives** | red, happy | Modify noun denotations |
16//! | **Connectives** | and, or, implies | Combine propositions |
17//! | **Pronouns** | he, she, it | Resolve to antecedents |
18//!
19//! ## Block Types
20//!
21//! LOGOS uses markdown-style block headers for structured documents:
22//!
23//! - `## Theorem`: Declares a proposition to be proved
24//! - `## Proof`: Contains the proof steps
25//! - `## Definition`: Introduces new terminology
26//! - `## Main`: Program entry point
27
28use logicaffeine_base::Symbol;
29use logicaffeine_lexicon::{Aspect, Case, Definiteness, Gender, Number, Time, VerbClass};
30
31#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
32pub struct Span {
33 pub start: usize,
34 pub end: usize,
35}
36
37impl Span {
38 pub fn new(start: usize, end: usize) -> Self {
39 Self { start, end }
40 }
41}
42
43#[derive(Debug, Clone, Copy, PartialEq, Eq)]
44pub enum PresupKind {
45 Stop,
46 Start,
47 Regret,
48 Continue,
49 Realize,
50 Know,
51}
52
53#[derive(Debug, Clone, Copy, PartialEq, Eq)]
54pub enum FocusKind {
55 Only,
56 Even,
57 Just,
58}
59
60#[derive(Debug, Clone, Copy, PartialEq, Eq)]
61pub enum MeasureKind {
62 Much,
63 Little,
64}
65
66/// Calendar time units for Span expressions.
67///
68/// These represent variable-length calendar durations, as opposed to
69/// fixed SI time units (ns, ms, s, etc.) used in Duration.
70#[derive(Debug, Clone, Copy, PartialEq, Eq)]
71pub enum CalendarUnit {
72 Day,
73 Week,
74 Month,
75 Year,
76}
77
78/// Document structure block type markers.
79///
80/// LOGOS uses markdown-style `## Header` syntax to delimit different
81/// sections of a program or proof document.
82#[derive(Debug, Clone, Copy, PartialEq, Eq)]
83pub enum BlockType {
84 /// `## Theorem` - Declares a proposition to be proved.
85 Theorem,
86 /// `## Main` - Program entry point for imperative code.
87 Main,
88 /// `## Definition` - Introduces new terminology or type definitions.
89 Definition,
90 /// `## Proof` - Contains proof steps for a theorem.
91 Proof,
92 /// `## Example` - Illustrative examples.
93 Example,
94 /// `## Logic` - Direct logical notation input.
95 Logic,
96 /// `## Note` - Explanatory documentation.
97 Note,
98 /// `## To` - Function definition block.
99 Function,
100 /// Inline type definition: `## A Point has:` or `## A Color is one of:`.
101 TypeDef,
102 /// `## Policy` - Security policy rule definitions.
103 Policy,
104 /// `## Requires` - External crate dependency declarations.
105 Requires,
106 /// `## No` - Optimization annotation (followed by Memo, TCO, Peephole, Borrow, or Optimize).
107 No,
108}
109
110#[derive(Debug, Clone, PartialEq, Eq)]
111pub enum TokenType {
112 // Document Structure
113 BlockHeader { block_type: BlockType },
114
115 // Quantifiers
116 All,
117 No,
118 Some,
119 Any,
120 Both, // Correlative conjunction marker: "both X and Y"
121 Most,
122 Few,
123 Many,
124 Cardinal(u32),
125 AtLeast(u32),
126 AtMost(u32),
127
128 // Negative Polarity Items (NPIs)
129 Anything,
130 Anyone,
131 Nothing,
132 Nobody,
133 NoOne,
134 Nowhere,
135 Ever,
136 Never,
137
138 // Logical Connectives
139 And,
140 Or,
141 If,
142 Then,
143 Not,
144 Iff,
145 Because,
146
147 // Modal Operators
148 Must,
149 Shall,
150 Should,
151 Can,
152 May,
153 Cannot,
154 Would,
155 Could,
156 Might,
157 Had,
158
159 // Imperative Statement Keywords
160 Let,
161 Set,
162 Return,
163 /// Exits the innermost while loop: `Break.`
164 Break,
165 Be,
166 While,
167 Repeat,
168 For,
169 In,
170 From,
171 Assert,
172 /// Documented assertion with justification string.
173 Trust,
174 Otherwise,
175 /// Alias for `Otherwise` - Pythonic else clause
176 Else,
177 /// Python-style else-if shorthand
178 Elif,
179 Call,
180 /// Constructor keyword for struct instantiation.
181 New,
182 /// Sum type definition keyword.
183 Either,
184 /// Pattern matching statement keyword.
185 Inspect,
186 /// Native function modifier for FFI bindings.
187 Native,
188 /// Escape hatch header keyword: "Escape to Rust:"
189 Escape,
190 /// Raw code block captured verbatim from an escape hatch body.
191 /// The Symbol holds the interned raw foreign code (indentation-stripped).
192 EscapeBlock(Symbol),
193
194 // Theorem Keywords
195 /// Premise marker in theorem blocks.
196 Given,
197 /// Goal marker in theorem blocks.
198 Prove,
199 /// Automatic proof strategy directive.
200 Auto,
201
202 // IO Keywords
203 /// "Read input from..."
204 Read,
205 /// "Write x to file..."
206 Write,
207 /// "...from the console"
208 Console,
209 /// "...from file..." or "...to file..."
210 File,
211
212 // Ownership Keywords (Move/Borrow Semantics)
213 /// Move ownership: "Give x to processor"
214 Give,
215 /// Immutable borrow: "Show x to console"
216 Show,
217
218 // Collection Operations
219 /// "Push x to items"
220 Push,
221 /// "Pop from items"
222 Pop,
223 /// "copy of slice" → slice.to_vec()
224 Copy,
225 /// "items 1 through 3" → inclusive slice
226 Through,
227 /// "length of items" → items.len()
228 Length,
229 /// "items at i" → `items[i]`
230 At,
231
232 // Set Operations
233 /// "Add x to set" (insert)
234 Add,
235 /// "Remove x from set"
236 Remove,
237 /// "set contains x"
238 Contains,
239 /// "a union b"
240 Union,
241 /// "a intersection b"
242 Intersection,
243
244 // Memory Management (Zones)
245 /// "Inside a new zone..."
246 Inside,
247 /// "...zone called..."
248 Zone,
249 /// "...called 'Scratch'"
250 Called,
251 /// "...of size 1 MB"
252 Size,
253 /// "...mapped from 'file.bin'"
254 Mapped,
255
256 // Structured Concurrency
257 /// "Attempt all of the following:" → concurrent (async, I/O-bound)
258 Attempt,
259 /// "the following"
260 Following,
261 /// "Simultaneously:" → parallel (CPU-bound)
262 Simultaneously,
263
264 // Agent System (Actor Model)
265 /// "Spawn a Worker called 'w1'" → create agent
266 Spawn,
267 /// "Send Ping to 'agent'" → send message to agent
268 Send,
269 /// "Await response from 'agent' into result" → receive message
270 Await,
271
272 // Serialization
273 /// "A Message is Portable and has:" → serde derives
274 Portable,
275
276 // Sipping Protocol
277 /// "the manifest of Zone" → FileSipper manifest
278 Manifest,
279 /// "the chunk at N in Zone" → FileSipper chunk
280 Chunk,
281
282 // CRDT Keywords
283 /// "A Counter is Shared and has:" → CRDT struct
284 Shared,
285 /// "Merge remote into local" → CRDT merge
286 Merge,
287 /// "Increase x's count by 10" → GCounter increment
288 Increase,
289
290 // Extended CRDT Keywords
291 /// "Decrease x's count by 5" → PNCounter decrement
292 Decrease,
293 /// "which is a Tally" → PNCounter type
294 Tally,
295 /// "which is a SharedSet of T" → ORSet type
296 SharedSet,
297 /// "which is a SharedSequence of T" → RGA type
298 SharedSequence,
299 /// "which is a CollaborativeSequence of T" → YATA type
300 CollaborativeSequence,
301 /// "which is a SharedMap from K to V" → ORMap type
302 SharedMap,
303 /// "which is a Divergent T" → MVRegister type
304 Divergent,
305 /// "Append x to seq" → RGA append
306 Append,
307 /// "Resolve x to value" → MVRegister resolve
308 Resolve,
309 /// "(RemoveWins)" → ORSet bias
310 RemoveWins,
311 /// "(AddWins)" → ORSet bias (default)
312 AddWins,
313 /// "(YATA)" → Sequence algorithm
314 YATA,
315 /// "x's values" → MVRegister values accessor
316 Values,
317
318 // Security Keywords
319 /// "Check that user is admin" → mandatory runtime guard
320 Check,
321
322 // P2P Networking Keywords
323 /// "Listen on \[addr\]" → bind to network address
324 Listen,
325 /// "Connect to \[addr\]" → dial a peer (NetConnect to avoid conflict)
326 NetConnect,
327 /// "Sleep N." → pause execution for N milliseconds
328 Sleep,
329
330 // GossipSub Keywords
331 /// "Sync x on 'topic'" → automatic CRDT replication
332 Sync,
333
334 // Persistence Keywords
335 /// "Mount x at \[path\]" → load/create persistent CRDT from journal
336 Mount,
337 /// "Persistent Counter" → type wrapped with journaling
338 Persistent,
339 /// "x combined with y" → string concatenation
340 Combined,
341
342 // Go-like Concurrency Keywords
343 /// "Launch a task to..." → spawn green thread
344 Launch,
345 /// "a task" → identifier for task context
346 Task,
347 /// "Pipe of Type" → channel creation
348 Pipe,
349 /// "Receive from pipe" → recv from channel
350 Receive,
351 /// "Stop handle" → abort task
352 Stop,
353 /// "Try to send/receive" → non-blocking variant
354 Try,
355 /// "Send value into pipe" → channel send
356 Into,
357 /// "Await the first of:" → select statement
358 First,
359 /// "After N seconds:" → timeout branch
360 After,
361
362 // Block Scoping
363 Colon,
364 Indent,
365 Dedent,
366 Newline,
367
368 // Content Words
369 Noun(Symbol),
370 Adjective(Symbol),
371 NonIntersectiveAdjective(Symbol),
372 Adverb(Symbol),
373 ScopalAdverb(Symbol),
374 TemporalAdverb(Symbol),
375 Verb {
376 lemma: Symbol,
377 time: Time,
378 aspect: Aspect,
379 class: VerbClass,
380 },
381 ProperName(Symbol),
382
383 /// Lexically ambiguous token (e.g., "fish" as noun or verb).
384 ///
385 /// The parser tries the primary interpretation first, then alternatives
386 /// if parsing fails. Used for parse forest generation.
387 Ambiguous {
388 primary: Box<TokenType>,
389 alternatives: Vec<TokenType>,
390 },
391
392 // Speech Acts (Performatives)
393 Performative(Symbol),
394 Exclamation,
395
396 // Articles (Definiteness)
397 Article(Definiteness),
398
399 // Temporal Auxiliaries
400 Auxiliary(Time),
401
402 // Copula & Functional
403 Is,
404 Are,
405 Was,
406 Were,
407 That,
408 Who,
409 What,
410 Where,
411 When,
412 Why,
413 Does,
414 Do,
415
416 // Identity & Reflexive (FOL)
417 Identity,
418 Equals,
419 Reflexive,
420 Reciprocal,
421 /// Pairwise list coordination: "A and B respectively love C and D"
422 Respectively,
423
424 // Pronouns (Discourse)
425 Pronoun {
426 gender: Gender,
427 number: Number,
428 case: Case,
429 },
430
431 // Prepositions (for N-ary relations)
432 Preposition(Symbol),
433
434 // Phrasal Verb Particles (up, down, out, in, off, on, away)
435 Particle(Symbol),
436
437 // Comparatives & Superlatives (Pillar 3 - Degree Semantics)
438 Comparative(Symbol),
439 Superlative(Symbol),
440 Than,
441
442 // Control Verbs (Chomsky's Control Theory)
443 To,
444
445 // Presupposition Triggers (Austin/Strawson)
446 PresupTrigger(PresupKind),
447
448 // Focus Particles (Rooth)
449 Focus(FocusKind),
450
451 // Mass Noun Measure
452 Measure(MeasureKind),
453
454 // Numeric Literals (prover-ready: stores raw string for symbolic math)
455 Number(Symbol),
456
457 /// Duration literal with SI suffix: 500ms, 2s, 50ns
458 /// Stores the value normalized to nanoseconds and preserves the original unit.
459 DurationLiteral {
460 nanos: i64,
461 original_unit: Symbol,
462 },
463
464 /// Date literal in ISO-8601 format: 2026-05-20
465 /// Stores days since Unix epoch (1970-01-01).
466 DateLiteral {
467 days: i32,
468 },
469
470 /// Time-of-day literal: 4pm, 9:30am, noon, midnight
471 /// Stores nanoseconds from midnight (00:00:00).
472 TimeLiteral {
473 nanos_from_midnight: i64,
474 },
475
476 /// Calendar time unit word: day, week, month, year (or plurals)
477 /// Used in Span expressions like "3 days" or "2 months and 5 days"
478 CalendarUnit(CalendarUnit),
479
480 /// Postfix operator for relative past time: "3 days ago"
481 Ago,
482
483 /// Postfix operator for relative future time: "3 days hence"
484 Hence,
485
486 /// Binary operator for span subtraction from a date: "3 days before 2026-05-20"
487 Before,
488
489 /// String literal: `"hello world"`
490 StringLiteral(Symbol),
491
492 /// Interpolated string literal: `"Hello, {name}!"`
493 /// Contains raw content with {} holes preserved
494 InterpolatedString(Symbol),
495
496 // Character literal: `x` (backtick syntax)
497 CharLiteral(Symbol),
498
499 // Index Access (1-indexed)
500 Item,
501 Items,
502
503 // Possession (Genitive Case)
504 Possessive,
505
506 // Punctuation
507 LParen,
508 RParen,
509 LBracket,
510 RBracket,
511 Comma,
512 Period,
513
514 // Bitwise Operators
515 /// "x xor y" → bitwise XOR (`^`)
516 Xor,
517 /// "x shifted left/right by y" → bit shift (`<<`/`>>`)
518 Shifted,
519
520 // Arithmetic Operators
521 Plus,
522 Minus,
523 Star,
524 Slash,
525 Percent, // Modulo operator
526
527 // Comparison Operators
528 /// `<`
529 Lt,
530 /// `>`
531 Gt,
532 /// `<=`
533 LtEq,
534 /// `>=`
535 GtEq,
536 /// `==`
537 EqEq,
538 /// `!=`
539 NotEq,
540
541 /// Arrow for return type syntax: `->`
542 Arrow,
543
544 /// Assignment operator `=` for `identifier = value` syntax
545 Assign,
546
547 /// Mutability keyword `mut` for explicit mutable declarations
548 Mut,
549
550 /// Generic identifier (for equals-style assignment)
551 Identifier,
552
553 EOF,
554}
555
556#[derive(Debug, Clone)]
557pub struct Token {
558 pub kind: TokenType,
559 pub lexeme: Symbol,
560 pub span: Span,
561}
562
563impl Token {
564 pub fn new(kind: TokenType, lexeme: Symbol, span: Span) -> Self {
565 Token { kind, lexeme, span }
566 }
567}
568
569impl TokenType {
570 pub const WH_WORDS: &'static [TokenType] = &[
571 TokenType::Who,
572 TokenType::What,
573 TokenType::Where,
574 TokenType::When,
575 TokenType::Why,
576 ];
577
578 pub const MODALS: &'static [TokenType] = &[
579 TokenType::Must,
580 TokenType::Shall,
581 TokenType::Should,
582 TokenType::Can,
583 TokenType::May,
584 TokenType::Cannot,
585 TokenType::Would,
586 TokenType::Could,
587 TokenType::Might,
588 ];
589}
590
591#[cfg(test)]
592mod tests {
593 use super::*;
594
595 #[test]
596 fn span_new_stores_positions() {
597 let span = Span::new(5, 10);
598 assert_eq!(span.start, 5);
599 assert_eq!(span.end, 10);
600 }
601
602 #[test]
603 fn span_default_is_zero() {
604 let span = Span::default();
605 assert_eq!(span.start, 0);
606 assert_eq!(span.end, 0);
607 }
608
609 #[test]
610 fn token_has_span_field() {
611 use logicaffeine_base::Interner;
612 let mut interner = Interner::new();
613 let lexeme = interner.intern("test");
614 let token = Token::new(TokenType::Noun(lexeme), lexeme, Span::new(0, 4));
615 assert_eq!(token.span.start, 0);
616 assert_eq!(token.span.end, 4);
617 }
618
619 #[test]
620 fn wh_words_contains_all_wh_tokens() {
621 assert_eq!(TokenType::WH_WORDS.len(), 5);
622 assert!(TokenType::WH_WORDS.contains(&TokenType::Who));
623 assert!(TokenType::WH_WORDS.contains(&TokenType::What));
624 assert!(TokenType::WH_WORDS.contains(&TokenType::Where));
625 assert!(TokenType::WH_WORDS.contains(&TokenType::When));
626 assert!(TokenType::WH_WORDS.contains(&TokenType::Why));
627 }
628
629 #[test]
630 fn modals_contains_all_modal_tokens() {
631 assert_eq!(TokenType::MODALS.len(), 9);
632 assert!(TokenType::MODALS.contains(&TokenType::Must));
633 assert!(TokenType::MODALS.contains(&TokenType::Shall));
634 assert!(TokenType::MODALS.contains(&TokenType::Should));
635 assert!(TokenType::MODALS.contains(&TokenType::Can));
636 assert!(TokenType::MODALS.contains(&TokenType::May));
637 assert!(TokenType::MODALS.contains(&TokenType::Cannot));
638 assert!(TokenType::MODALS.contains(&TokenType::Would));
639 assert!(TokenType::MODALS.contains(&TokenType::Could));
640 assert!(TokenType::MODALS.contains(&TokenType::Might));
641 }
642}