Skip to main content

harn_lexer/
token.rs

1use std::fmt;
2
3/// A segment of an interpolated string.
4#[derive(Debug, Clone, PartialEq)]
5pub enum StringSegment {
6    Literal(String),
7    /// An interpolated expression with its source position (line, column).
8    Expression(String, usize, usize),
9}
10
11impl fmt::Display for StringSegment {
12    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
13        match self {
14            StringSegment::Literal(s) => write!(f, "{s}"),
15            StringSegment::Expression(e, _, _) => write!(f, "${{{e}}}"),
16        }
17    }
18}
19
20/// Source location for error reporting.
21#[derive(Debug, Clone, Copy, PartialEq, Eq)]
22pub struct Span {
23    /// Byte offset from start of source (inclusive).
24    pub start: usize,
25    /// Byte offset from start of source (exclusive).
26    pub end: usize,
27    /// 1-based line number of start position.
28    pub line: usize,
29    /// 1-based column number of start position.
30    pub column: usize,
31    /// 1-based line number of end position (for multiline span detection).
32    pub end_line: usize,
33}
34
35impl Span {
36    pub fn with_offsets(start: usize, end: usize, line: usize, column: usize) -> Self {
37        Self {
38            start,
39            end,
40            line,
41            column,
42            end_line: line,
43        }
44    }
45
46    /// Create a span covering two spans (from start of `a` to end of `b`).
47    pub fn merge(a: Span, b: Span) -> Span {
48        Span {
49            start: a.start,
50            end: b.end,
51            line: a.line,
52            column: a.column,
53            end_line: b.end_line,
54        }
55    }
56
57    /// A dummy span for synthetic/generated nodes.
58    pub fn dummy() -> Self {
59        Self {
60            start: 0,
61            end: 0,
62            line: 0,
63            column: 0,
64            end_line: 0,
65        }
66    }
67}
68
69impl fmt::Display for Span {
70    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
71        write!(f, "{}:{}", self.line, self.column)
72    }
73}
74
75/// Canonical list of Harn language keywords.
76///
77/// This is the single source of truth for keyword tokens. The lexer's
78/// identifier-to-keyword match in `lexer.rs` must stay in sync; the unit test
79/// `test_keywords_const_covers_lexer` verifies parity between the two.
80///
81/// Tooling that needs the keyword set (syntax highlighters, the LSP, etc.)
82/// should read `KEYWORDS` rather than hard-coding a duplicate list.
83pub const KEYWORDS: &[&str] = &[
84    "ask",
85    "break",
86    "catch",
87    "continue",
88    "deadline",
89    "else",
90    "enum",
91    "extends",
92    "false",
93    "finally",
94    "fn",
95    "for",
96    "from",
97    "guard",
98    "if",
99    "impl",
100    "import",
101    "in",
102    "interface",
103    "let",
104    "match",
105    "mutex",
106    "nil",
107    "override",
108    "parallel",
109    "parallel_map",
110    "parallel_settle",
111    "pipeline",
112    "pub",
113    "require",
114    "retry",
115    "return",
116    "select",
117    "spawn",
118    "struct",
119    "throw",
120    "thru",
121    "true",
122    "try",
123    "type",
124    "upto",
125    "var",
126    "while",
127    "yield",
128];
129
130/// Token kinds produced by the lexer.
131#[derive(Debug, Clone, PartialEq)]
132pub enum TokenKind {
133    // Keywords
134    Pipeline,
135    Extends,
136    Override,
137    Let,
138    Var,
139    If,
140    Else,
141    For,
142    In,
143    Match,
144    Retry,
145    Parallel,
146    ParallelMap,
147    ParallelSettle,
148    Return,
149    Import,
150    True,
151    False,
152    Nil,
153    Try,
154    Catch,
155    Throw,
156    Finally,
157    Fn,
158    Spawn,
159    While,
160    TypeKw,
161    Enum,
162    Struct,
163    Interface,
164    Pub,
165    From,
166    Thru,
167    Upto,
168    Guard,
169    Require,
170    Ask,
171    Deadline,
172    Yield,
173    Mutex,
174    Break,
175    Continue,
176    Select,
177    Impl,
178
179    // Literals
180    Identifier(String),
181    StringLiteral(String),
182    InterpolatedString(Vec<StringSegment>),
183    IntLiteral(i64),
184    FloatLiteral(f64),
185    /// Duration literal in milliseconds: 500ms, 5s, 30m, 2h
186    DurationLiteral(u64),
187
188    // Two-character operators
189    Eq,            // ==
190    Neq,           // !=
191    And,           // &&
192    Or,            // ||
193    Pipe,          // |>
194    NilCoal,       // ??
195    QuestionDot,   // ?.
196    Arrow,         // ->
197    Lte,           // <=
198    Gte,           // >=
199    PlusAssign,    // +=
200    MinusAssign,   // -=
201    StarAssign,    // *=
202    SlashAssign,   // /=
203    PercentAssign, // %=
204
205    // Single-character operators
206    Assign,   // =
207    Not,      // !
208    Dot,      // .
209    Plus,     // +
210    Minus,    // -
211    Star,     // *
212    Slash,    // /
213    Percent,  // %
214    Lt,       // <
215    Gt,       // >
216    Question, // ?
217    Bar,      // |  (for union types)
218
219    // Delimiters
220    LBrace,    // {
221    RBrace,    // }
222    LParen,    // (
223    RParen,    // )
224    LBracket,  // [
225    RBracket,  // ]
226    Comma,     // ,
227    Colon,     // :
228    Semicolon, // ;
229
230    // Comments
231    LineComment(String),  // // text
232    BlockComment(String), // /* text */
233
234    // Special
235    Newline,
236    Eof,
237}
238
239impl fmt::Display for TokenKind {
240    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
241        match self {
242            TokenKind::Pipeline => write!(f, "pipeline"),
243            TokenKind::Extends => write!(f, "extends"),
244            TokenKind::Override => write!(f, "override"),
245            TokenKind::Let => write!(f, "let"),
246            TokenKind::Var => write!(f, "var"),
247            TokenKind::If => write!(f, "if"),
248            TokenKind::Else => write!(f, "else"),
249            TokenKind::For => write!(f, "for"),
250            TokenKind::In => write!(f, "in"),
251            TokenKind::Match => write!(f, "match"),
252            TokenKind::Retry => write!(f, "retry"),
253            TokenKind::Parallel => write!(f, "parallel"),
254            TokenKind::ParallelMap => write!(f, "parallel_map"),
255            TokenKind::ParallelSettle => write!(f, "parallel_settle"),
256            TokenKind::Return => write!(f, "return"),
257            TokenKind::Import => write!(f, "import"),
258            TokenKind::True => write!(f, "true"),
259            TokenKind::False => write!(f, "false"),
260            TokenKind::Nil => write!(f, "nil"),
261            TokenKind::Try => write!(f, "try"),
262            TokenKind::Catch => write!(f, "catch"),
263            TokenKind::Throw => write!(f, "throw"),
264            TokenKind::Finally => write!(f, "finally"),
265            TokenKind::Fn => write!(f, "fn"),
266            TokenKind::Spawn => write!(f, "spawn"),
267            TokenKind::While => write!(f, "while"),
268            TokenKind::TypeKw => write!(f, "type"),
269            TokenKind::Enum => write!(f, "enum"),
270            TokenKind::Struct => write!(f, "struct"),
271            TokenKind::Interface => write!(f, "interface"),
272            TokenKind::Pub => write!(f, "pub"),
273            TokenKind::From => write!(f, "from"),
274            TokenKind::Thru => write!(f, "thru"),
275            TokenKind::Upto => write!(f, "upto"),
276            TokenKind::Guard => write!(f, "guard"),
277            TokenKind::Require => write!(f, "require"),
278            TokenKind::Ask => write!(f, "ask"),
279            TokenKind::Deadline => write!(f, "deadline"),
280            TokenKind::Yield => write!(f, "yield"),
281            TokenKind::Mutex => write!(f, "mutex"),
282            TokenKind::Break => write!(f, "break"),
283            TokenKind::Continue => write!(f, "continue"),
284            TokenKind::Select => write!(f, "select"),
285            TokenKind::Impl => write!(f, "impl"),
286            TokenKind::Identifier(s) => write!(f, "id({s})"),
287            TokenKind::StringLiteral(s) => write!(f, "str({s})"),
288            TokenKind::InterpolatedString(_) => write!(f, "istr(...)"),
289            TokenKind::IntLiteral(n) => write!(f, "int({n})"),
290            TokenKind::FloatLiteral(n) => write!(f, "float({n})"),
291            TokenKind::DurationLiteral(ms) => write!(f, "duration({ms}ms)"),
292            TokenKind::Eq => write!(f, "=="),
293            TokenKind::Neq => write!(f, "!="),
294            TokenKind::And => write!(f, "&&"),
295            TokenKind::Or => write!(f, "||"),
296            TokenKind::Pipe => write!(f, "|>"),
297            TokenKind::NilCoal => write!(f, "??"),
298            TokenKind::QuestionDot => write!(f, "?."),
299            TokenKind::Arrow => write!(f, "->"),
300            TokenKind::Lte => write!(f, "<="),
301            TokenKind::Gte => write!(f, ">="),
302            TokenKind::PlusAssign => write!(f, "+="),
303            TokenKind::MinusAssign => write!(f, "-="),
304            TokenKind::StarAssign => write!(f, "*="),
305            TokenKind::SlashAssign => write!(f, "/="),
306            TokenKind::PercentAssign => write!(f, "%="),
307            TokenKind::Assign => write!(f, "="),
308            TokenKind::Not => write!(f, "!"),
309            TokenKind::Dot => write!(f, "."),
310            TokenKind::Plus => write!(f, "+"),
311            TokenKind::Minus => write!(f, "-"),
312            TokenKind::Star => write!(f, "*"),
313            TokenKind::Slash => write!(f, "/"),
314            TokenKind::Percent => write!(f, "%"),
315            TokenKind::Lt => write!(f, "<"),
316            TokenKind::Gt => write!(f, ">"),
317            TokenKind::Question => write!(f, "?"),
318            TokenKind::Bar => write!(f, "|"),
319            TokenKind::LBrace => write!(f, "{{"),
320            TokenKind::RBrace => write!(f, "}}"),
321            TokenKind::LParen => write!(f, "("),
322            TokenKind::RParen => write!(f, ")"),
323            TokenKind::LBracket => write!(f, "["),
324            TokenKind::RBracket => write!(f, "]"),
325            TokenKind::Comma => write!(f, ","),
326            TokenKind::Colon => write!(f, ":"),
327            TokenKind::Semicolon => write!(f, ";"),
328            TokenKind::LineComment(s) => write!(f, "// {s}"),
329            TokenKind::BlockComment(s) => write!(f, "/* {s} */"),
330            TokenKind::Newline => write!(f, "\\n"),
331            TokenKind::Eof => write!(f, "EOF"),
332        }
333    }
334}
335
336/// A token with its kind and source location.
337#[derive(Debug, Clone, PartialEq)]
338pub struct Token {
339    pub kind: TokenKind,
340    pub span: Span,
341}
342
343impl Token {
344    pub fn with_span(kind: TokenKind, span: Span) -> Self {
345        Self { kind, span }
346    }
347}