logicaffeine_language/token.rs
1//! Token types for the LOGOS lexer and parser.
2//!
3//! This module defines the vocabulary of the LOGOS language at the token level.
4//! Tokens represent the atomic syntactic units produced by the lexer and consumed
5//! by the parser.
6//!
7//! ## Token Categories
8//!
9//! | Category | Examples | Description |
10//! |----------|----------|-------------|
11//! | **Quantifiers** | every, some, no | Bind variables over domains |
12//! | **Determiners** | the, a, this | Select referents |
13//! | **Nouns** | cat, philosopher | Predicates over individuals |
14//! | **Verbs** | runs, loves | Relations between arguments |
15//! | **Adjectives** | red, happy | Modify noun denotations |
16//! | **Connectives** | and, or, implies | Combine propositions |
17//! | **Pronouns** | he, she, it | Resolve to antecedents |
18//!
19//! ## Block Types
20//!
21//! LOGOS uses markdown-style block headers for structured documents:
22//!
23//! - `## Theorem`: Declares a proposition to be proved
24//! - `## Proof`: Contains the proof steps
25//! - `## Definition`: Introduces new terminology
26//! - `## Main`: Program entry point
27
28use logicaffeine_base::Symbol;
29use logicaffeine_lexicon::{Aspect, Case, Definiteness, Gender, Number, Time, VerbClass};
30
31#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
32pub struct Span {
33 pub start: usize,
34 pub end: usize,
35}
36
37impl Span {
38 pub fn new(start: usize, end: usize) -> Self {
39 Self { start, end }
40 }
41}
42
43#[derive(Debug, Clone, Copy, PartialEq, Eq)]
44pub enum PresupKind {
45 Stop,
46 Start,
47 Regret,
48 Continue,
49 Realize,
50 Know,
51}
52
53#[derive(Debug, Clone, Copy, PartialEq, Eq)]
54pub enum FocusKind {
55 Only,
56 Even,
57 Just,
58}
59
60#[derive(Debug, Clone, Copy, PartialEq, Eq)]
61pub enum MeasureKind {
62 Much,
63 Little,
64}
65
66/// Calendar time units for Span expressions.
67///
68/// These represent variable-length calendar durations, as opposed to
69/// fixed SI time units (ns, ms, s, etc.) used in Duration.
70#[derive(Debug, Clone, Copy, PartialEq, Eq)]
71pub enum CalendarUnit {
72 Day,
73 Week,
74 Month,
75 Year,
76}
77
78/// Document structure block type markers.
79///
80/// LOGOS uses markdown-style `## Header` syntax to delimit different
81/// sections of a program or proof document.
82#[derive(Debug, Clone, Copy, PartialEq, Eq)]
83pub enum BlockType {
84 /// `## Theorem` - Declares a proposition to be proved.
85 Theorem,
86 /// `## Main` - Program entry point for imperative code.
87 Main,
88 /// `## Definition` - Introduces new terminology or type definitions.
89 Definition,
90 /// `## Proof` - Contains proof steps for a theorem.
91 Proof,
92 /// `## Example` - Illustrative examples.
93 Example,
94 /// `## Logic` - Direct logical notation input.
95 Logic,
96 /// `## Note` - Explanatory documentation.
97 Note,
98 /// `## To` - Function definition block.
99 Function,
100 /// Inline type definition: `## A Point has:` or `## A Color is one of:`.
101 TypeDef,
102 /// `## Policy` - Security policy rule definitions.
103 Policy,
104 /// `## Requires` - External crate dependency declarations.
105 Requires,
106}
107
108#[derive(Debug, Clone, PartialEq, Eq)]
109pub enum TokenType {
110 // Document Structure
111 BlockHeader { block_type: BlockType },
112
113 // Quantifiers
114 All,
115 No,
116 Some,
117 Any,
118 Both, // Correlative conjunction marker: "both X and Y"
119 Most,
120 Few,
121 Many,
122 Cardinal(u32),
123 AtLeast(u32),
124 AtMost(u32),
125
126 // Negative Polarity Items (NPIs)
127 Anything,
128 Anyone,
129 Nothing,
130 Nobody,
131 NoOne,
132 Nowhere,
133 Ever,
134 Never,
135
136 // Logical Connectives
137 And,
138 Or,
139 If,
140 Then,
141 Not,
142 Iff,
143 Because,
144
145 // Modal Operators
146 Must,
147 Shall,
148 Should,
149 Can,
150 May,
151 Cannot,
152 Would,
153 Could,
154 Might,
155 Had,
156
157 // Imperative Statement Keywords
158 Let,
159 Set,
160 Return,
161 Be,
162 While,
163 Repeat,
164 For,
165 In,
166 From,
167 Assert,
168 /// Documented assertion with justification string.
169 Trust,
170 Otherwise,
171 /// Alias for `Otherwise` - Pythonic else clause
172 Else,
173 /// Python-style else-if shorthand
174 Elif,
175 Call,
176 /// Constructor keyword for struct instantiation.
177 New,
178 /// Sum type definition keyword.
179 Either,
180 /// Pattern matching statement keyword.
181 Inspect,
182 /// Native function modifier for FFI bindings.
183 Native,
184 /// Escape hatch header keyword: "Escape to Rust:"
185 Escape,
186 /// Raw code block captured verbatim from an escape hatch body.
187 /// The Symbol holds the interned raw foreign code (indentation-stripped).
188 EscapeBlock(Symbol),
189
190 // Theorem Keywords
191 /// Premise marker in theorem blocks.
192 Given,
193 /// Goal marker in theorem blocks.
194 Prove,
195 /// Automatic proof strategy directive.
196 Auto,
197
198 // IO Keywords
199 /// "Read input from..."
200 Read,
201 /// "Write x to file..."
202 Write,
203 /// "...from the console"
204 Console,
205 /// "...from file..." or "...to file..."
206 File,
207
208 // Ownership Keywords (Move/Borrow Semantics)
209 /// Move ownership: "Give x to processor"
210 Give,
211 /// Immutable borrow: "Show x to console"
212 Show,
213
214 // Collection Operations
215 /// "Push x to items"
216 Push,
217 /// "Pop from items"
218 Pop,
219 /// "copy of slice" → slice.to_vec()
220 Copy,
221 /// "items 1 through 3" → inclusive slice
222 Through,
223 /// "length of items" → items.len()
224 Length,
225 /// "items at i" → `items[i]`
226 At,
227
228 // Set Operations
229 /// "Add x to set" (insert)
230 Add,
231 /// "Remove x from set"
232 Remove,
233 /// "set contains x"
234 Contains,
235 /// "a union b"
236 Union,
237 /// "a intersection b"
238 Intersection,
239
240 // Memory Management (Zones)
241 /// "Inside a new zone..."
242 Inside,
243 /// "...zone called..."
244 Zone,
245 /// "...called 'Scratch'"
246 Called,
247 /// "...of size 1 MB"
248 Size,
249 /// "...mapped from 'file.bin'"
250 Mapped,
251
252 // Structured Concurrency
253 /// "Attempt all of the following:" → concurrent (async, I/O-bound)
254 Attempt,
255 /// "the following"
256 Following,
257 /// "Simultaneously:" → parallel (CPU-bound)
258 Simultaneously,
259
260 // Agent System (Actor Model)
261 /// "Spawn a Worker called 'w1'" → create agent
262 Spawn,
263 /// "Send Ping to 'agent'" → send message to agent
264 Send,
265 /// "Await response from 'agent' into result" → receive message
266 Await,
267
268 // Serialization
269 /// "A Message is Portable and has:" → serde derives
270 Portable,
271
272 // Sipping Protocol
273 /// "the manifest of Zone" → FileSipper manifest
274 Manifest,
275 /// "the chunk at N in Zone" → FileSipper chunk
276 Chunk,
277
278 // CRDT Keywords
279 /// "A Counter is Shared and has:" → CRDT struct
280 Shared,
281 /// "Merge remote into local" → CRDT merge
282 Merge,
283 /// "Increase x's count by 10" → GCounter increment
284 Increase,
285
286 // Extended CRDT Keywords
287 /// "Decrease x's count by 5" → PNCounter decrement
288 Decrease,
289 /// "which is a Tally" → PNCounter type
290 Tally,
291 /// "which is a SharedSet of T" → ORSet type
292 SharedSet,
293 /// "which is a SharedSequence of T" → RGA type
294 SharedSequence,
295 /// "which is a CollaborativeSequence of T" → YATA type
296 CollaborativeSequence,
297 /// "which is a SharedMap from K to V" → ORMap type
298 SharedMap,
299 /// "which is a Divergent T" → MVRegister type
300 Divergent,
301 /// "Append x to seq" → RGA append
302 Append,
303 /// "Resolve x to value" → MVRegister resolve
304 Resolve,
305 /// "(RemoveWins)" → ORSet bias
306 RemoveWins,
307 /// "(AddWins)" → ORSet bias (default)
308 AddWins,
309 /// "(YATA)" → Sequence algorithm
310 YATA,
311 /// "x's values" → MVRegister values accessor
312 Values,
313
314 // Security Keywords
315 /// "Check that user is admin" → mandatory runtime guard
316 Check,
317
318 // P2P Networking Keywords
319 /// "Listen on \[addr\]" → bind to network address
320 Listen,
321 /// "Connect to \[addr\]" → dial a peer (NetConnect to avoid conflict)
322 NetConnect,
323 /// "Sleep N." → pause execution for N milliseconds
324 Sleep,
325
326 // GossipSub Keywords
327 /// "Sync x on 'topic'" → automatic CRDT replication
328 Sync,
329
330 // Persistence Keywords
331 /// "Mount x at \[path\]" → load/create persistent CRDT from journal
332 Mount,
333 /// "Persistent Counter" → type wrapped with journaling
334 Persistent,
335 /// "x combined with y" → string concatenation
336 Combined,
337
338 // Go-like Concurrency Keywords
339 /// "Launch a task to..." → spawn green thread
340 Launch,
341 /// "a task" → identifier for task context
342 Task,
343 /// "Pipe of Type" → channel creation
344 Pipe,
345 /// "Receive from pipe" → recv from channel
346 Receive,
347 /// "Stop handle" → abort task
348 Stop,
349 /// "Try to send/receive" → non-blocking variant
350 Try,
351 /// "Send value into pipe" → channel send
352 Into,
353 /// "Await the first of:" → select statement
354 First,
355 /// "After N seconds:" → timeout branch
356 After,
357
358 // Block Scoping
359 Colon,
360 Indent,
361 Dedent,
362 Newline,
363
364 // Content Words
365 Noun(Symbol),
366 Adjective(Symbol),
367 NonIntersectiveAdjective(Symbol),
368 Adverb(Symbol),
369 ScopalAdverb(Symbol),
370 TemporalAdverb(Symbol),
371 Verb {
372 lemma: Symbol,
373 time: Time,
374 aspect: Aspect,
375 class: VerbClass,
376 },
377 ProperName(Symbol),
378
379 /// Lexically ambiguous token (e.g., "fish" as noun or verb).
380 ///
381 /// The parser tries the primary interpretation first, then alternatives
382 /// if parsing fails. Used for parse forest generation.
383 Ambiguous {
384 primary: Box<TokenType>,
385 alternatives: Vec<TokenType>,
386 },
387
388 // Speech Acts (Performatives)
389 Performative(Symbol),
390 Exclamation,
391
392 // Articles (Definiteness)
393 Article(Definiteness),
394
395 // Temporal Auxiliaries
396 Auxiliary(Time),
397
398 // Copula & Functional
399 Is,
400 Are,
401 Was,
402 Were,
403 That,
404 Who,
405 What,
406 Where,
407 When,
408 Why,
409 Does,
410 Do,
411
412 // Identity & Reflexive (FOL)
413 Identity,
414 Equals,
415 Reflexive,
416 Reciprocal,
417 /// Pairwise list coordination: "A and B respectively love C and D"
418 Respectively,
419
420 // Pronouns (Discourse)
421 Pronoun {
422 gender: Gender,
423 number: Number,
424 case: Case,
425 },
426
427 // Prepositions (for N-ary relations)
428 Preposition(Symbol),
429
430 // Phrasal Verb Particles (up, down, out, in, off, on, away)
431 Particle(Symbol),
432
433 // Comparatives & Superlatives (Pillar 3 - Degree Semantics)
434 Comparative(Symbol),
435 Superlative(Symbol),
436 Than,
437
438 // Control Verbs (Chomsky's Control Theory)
439 To,
440
441 // Presupposition Triggers (Austin/Strawson)
442 PresupTrigger(PresupKind),
443
444 // Focus Particles (Rooth)
445 Focus(FocusKind),
446
447 // Mass Noun Measure
448 Measure(MeasureKind),
449
450 // Numeric Literals (prover-ready: stores raw string for symbolic math)
451 Number(Symbol),
452
453 /// Duration literal with SI suffix: 500ms, 2s, 50ns
454 /// Stores the value normalized to nanoseconds and preserves the original unit.
455 DurationLiteral {
456 nanos: i64,
457 original_unit: Symbol,
458 },
459
460 /// Date literal in ISO-8601 format: 2026-05-20
461 /// Stores days since Unix epoch (1970-01-01).
462 DateLiteral {
463 days: i32,
464 },
465
466 /// Time-of-day literal: 4pm, 9:30am, noon, midnight
467 /// Stores nanoseconds from midnight (00:00:00).
468 TimeLiteral {
469 nanos_from_midnight: i64,
470 },
471
472 /// Calendar time unit word: day, week, month, year (or plurals)
473 /// Used in Span expressions like "3 days" or "2 months and 5 days"
474 CalendarUnit(CalendarUnit),
475
476 /// Postfix operator for relative past time: "3 days ago"
477 Ago,
478
479 /// Postfix operator for relative future time: "3 days hence"
480 Hence,
481
482 /// Binary operator for span subtraction from a date: "3 days before 2026-05-20"
483 Before,
484
485 /// String literal: `"hello world"`
486 StringLiteral(Symbol),
487
488 // Character literal: `x` (backtick syntax)
489 CharLiteral(Symbol),
490
491 // Index Access (1-indexed)
492 Item,
493 Items,
494
495 // Possession (Genitive Case)
496 Possessive,
497
498 // Punctuation
499 LParen,
500 RParen,
501 LBracket,
502 RBracket,
503 Comma,
504 Period,
505
506 // Arithmetic Operators
507 Plus,
508 Minus,
509 Star,
510 Slash,
511 Percent, // Modulo operator
512
513 // Comparison Operators
514 /// `<`
515 Lt,
516 /// `>`
517 Gt,
518 /// `<=`
519 LtEq,
520 /// `>=`
521 GtEq,
522 /// `==`
523 EqEq,
524 /// `!=`
525 NotEq,
526
527 /// Arrow for return type syntax: `->`
528 Arrow,
529
530 /// Assignment operator `=` for `identifier = value` syntax
531 Assign,
532
533 /// Mutability keyword `mut` for explicit mutable declarations
534 Mut,
535
536 /// Generic identifier (for equals-style assignment)
537 Identifier,
538
539 EOF,
540}
541
542#[derive(Debug, Clone)]
543pub struct Token {
544 pub kind: TokenType,
545 pub lexeme: Symbol,
546 pub span: Span,
547}
548
549impl Token {
550 pub fn new(kind: TokenType, lexeme: Symbol, span: Span) -> Self {
551 Token { kind, lexeme, span }
552 }
553}
554
555impl TokenType {
556 pub const WH_WORDS: &'static [TokenType] = &[
557 TokenType::Who,
558 TokenType::What,
559 TokenType::Where,
560 TokenType::When,
561 TokenType::Why,
562 ];
563
564 pub const MODALS: &'static [TokenType] = &[
565 TokenType::Must,
566 TokenType::Shall,
567 TokenType::Should,
568 TokenType::Can,
569 TokenType::May,
570 TokenType::Cannot,
571 TokenType::Would,
572 TokenType::Could,
573 TokenType::Might,
574 ];
575}
576
577#[cfg(test)]
578mod tests {
579 use super::*;
580
581 #[test]
582 fn span_new_stores_positions() {
583 let span = Span::new(5, 10);
584 assert_eq!(span.start, 5);
585 assert_eq!(span.end, 10);
586 }
587
588 #[test]
589 fn span_default_is_zero() {
590 let span = Span::default();
591 assert_eq!(span.start, 0);
592 assert_eq!(span.end, 0);
593 }
594
595 #[test]
596 fn token_has_span_field() {
597 use logicaffeine_base::Interner;
598 let mut interner = Interner::new();
599 let lexeme = interner.intern("test");
600 let token = Token::new(TokenType::Noun(lexeme), lexeme, Span::new(0, 4));
601 assert_eq!(token.span.start, 0);
602 assert_eq!(token.span.end, 4);
603 }
604
605 #[test]
606 fn wh_words_contains_all_wh_tokens() {
607 assert_eq!(TokenType::WH_WORDS.len(), 5);
608 assert!(TokenType::WH_WORDS.contains(&TokenType::Who));
609 assert!(TokenType::WH_WORDS.contains(&TokenType::What));
610 assert!(TokenType::WH_WORDS.contains(&TokenType::Where));
611 assert!(TokenType::WH_WORDS.contains(&TokenType::When));
612 assert!(TokenType::WH_WORDS.contains(&TokenType::Why));
613 }
614
615 #[test]
616 fn modals_contains_all_modal_tokens() {
617 assert_eq!(TokenType::MODALS.len(), 9);
618 assert!(TokenType::MODALS.contains(&TokenType::Must));
619 assert!(TokenType::MODALS.contains(&TokenType::Shall));
620 assert!(TokenType::MODALS.contains(&TokenType::Should));
621 assert!(TokenType::MODALS.contains(&TokenType::Can));
622 assert!(TokenType::MODALS.contains(&TokenType::May));
623 assert!(TokenType::MODALS.contains(&TokenType::Cannot));
624 assert!(TokenType::MODALS.contains(&TokenType::Would));
625 assert!(TokenType::MODALS.contains(&TokenType::Could));
626 assert!(TokenType::MODALS.contains(&TokenType::Might));
627 }
628}