Skip to main content

patch_prolog_frontend/tokenizer/
token.rs

1//! Token kinds and the `Token` value (kind + source line/col).
2//!
3//! Ported from patch-prolog's `tokenizer.rs`, minus the serde derives —
4//! tokens never cross a serialization boundary in the compiler.
5
6/// Token types for Edinburgh Prolog.
7#[derive(Debug, Clone, PartialEq)]
8pub enum TokenKind {
9    // Identifiers
10    Atom(String),     // lowercase-starting or single-quoted
11    Variable(String), // uppercase-starting or _
12    Integer(i64),
13    Float(f64),
14
15    // Operators
16    Neck,      // :-
17    QueryOp,   // ?-
18    Equals,    // =
19    NotEquals, // \=
20    TermEq,    // ==
21    TermNeq,   // \==
22    Is,        // is
23    Lt,        // <
24    Gt,        // >
25    Lte,       // =<
26    Gte,       // >=
27    ArithEq,   // =:=
28    ArithNeq,  // =\=
29    Plus,      // +
30    Minus,     // -
31    Star,      // *
32    Slash,     // /
33    IntDiv,    // //
34    Mod,       // mod
35    Rem,       // rem
36    Not,       // \+
37    Backslash, // \  (prefix bitwise complement; #28)
38    Cut,       // !
39    Arrow,     // ->
40    Semicolon, // ;
41    // Issue #29: missing standard operators
42    Pow,        // **  (xfx 200, float power)
43    Caret,      // ^   (xfy 200, int power)
44    Colon,      // :   (xfy 200, module qualifier — parser only)
45    ShiftLeft,  // <<  (yfx 400)
46    ShiftRight, // >>  (yfx 400)
47    Div,        // div (yfx 400, floor division)
48    BitAnd,     // /\  (yfx 500)
49    BitOr,      // \/  (yfx 500)
50    Xor,        // xor (yfx 500)
51
52    // Punctuation
53    Dot,      // .
54    Comma,    // ,
55    LParen,   // (
56    RParen,   // )
57    LBracket, // [
58    RBracket, // ]
59    Pipe,     // |
60
61    // End of input
62    Eof,
63}
64
65impl std::fmt::Display for TokenKind {
66    /// Surface lexeme for error messages (issue #20). Symbolic operators and
67    /// punctuation are backticked so they stand out from prose; named tokens
68    /// (atoms, variables, numbers) are written as the user would have typed
69    /// them; EOF is described in words.
70    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
71        match self {
72            TokenKind::Atom(s) => write!(f, "atom `{s}`"),
73            TokenKind::Variable(s) => write!(f, "variable `{s}`"),
74            TokenKind::Integer(n) => write!(f, "integer `{n}`"),
75            TokenKind::Float(x) => write!(f, "float `{x}`"),
76            TokenKind::Neck => f.write_str("`:-`"),
77            TokenKind::QueryOp => f.write_str("`?-`"),
78            TokenKind::Equals => f.write_str("`=`"),
79            TokenKind::NotEquals => f.write_str("`\\=`"),
80            TokenKind::TermEq => f.write_str("`==`"),
81            TokenKind::TermNeq => f.write_str("`\\==`"),
82            TokenKind::Is => f.write_str("`is`"),
83            TokenKind::Lt => f.write_str("`<`"),
84            TokenKind::Gt => f.write_str("`>`"),
85            TokenKind::Lte => f.write_str("`=<`"),
86            TokenKind::Gte => f.write_str("`>=`"),
87            TokenKind::ArithEq => f.write_str("`=:=`"),
88            TokenKind::ArithNeq => f.write_str("`=\\=`"),
89            TokenKind::Plus => f.write_str("`+`"),
90            TokenKind::Minus => f.write_str("`-`"),
91            TokenKind::Star => f.write_str("`*`"),
92            TokenKind::Slash => f.write_str("`/`"),
93            TokenKind::IntDiv => f.write_str("`//`"),
94            TokenKind::Mod => f.write_str("`mod`"),
95            TokenKind::Rem => f.write_str("`rem`"),
96            TokenKind::Not => f.write_str("`\\+`"),
97            TokenKind::Backslash => f.write_str("`\\`"),
98            TokenKind::Pow => f.write_str("`**`"),
99            TokenKind::Caret => f.write_str("`^`"),
100            TokenKind::Colon => f.write_str("`:`"),
101            TokenKind::ShiftLeft => f.write_str("`<<`"),
102            TokenKind::ShiftRight => f.write_str("`>>`"),
103            TokenKind::Div => f.write_str("`div`"),
104            TokenKind::BitAnd => f.write_str("`/\\`"),
105            TokenKind::BitOr => f.write_str("`\\/`"),
106            TokenKind::Xor => f.write_str("`xor`"),
107            TokenKind::Cut => f.write_str("`!`"),
108            TokenKind::Arrow => f.write_str("`->`"),
109            TokenKind::Semicolon => f.write_str("`;`"),
110            TokenKind::Dot => f.write_str("`.`"),
111            TokenKind::Comma => f.write_str("`,`"),
112            TokenKind::LParen => f.write_str("`(`"),
113            TokenKind::RParen => f.write_str("`)`"),
114            TokenKind::LBracket => f.write_str("`[`"),
115            TokenKind::RBracket => f.write_str("`]`"),
116            TokenKind::Pipe => f.write_str("`|`"),
117            TokenKind::Eof => f.write_str("end of input"),
118        }
119    }
120}
121
122#[derive(Debug, Clone, PartialEq)]
123pub struct Token {
124    pub kind: TokenKind,
125    pub line: usize,
126    pub col: usize,
127    /// Byte offset of the token's first byte into the source.
128    pub lo: u32,
129    /// Byte offset one past the token's last byte.
130    pub hi: u32,
131}
132
133impl Token {
134    /// Construct with `lo`/`hi` left as `0`; `Tokenizer::next_token` stamps
135    /// the real byte offsets after the token is read (a single point, so the
136    /// per-kind helpers don't each need to track offsets).
137    pub fn new(kind: TokenKind, line: usize, col: usize) -> Self {
138        Token {
139            kind,
140            line,
141            col,
142            lo: 0,
143            hi: 0,
144        }
145    }
146}