Skip to main content

harn_lexer/
token.rs

1use std::fmt;
2
3/// A segment of an interpolated string.
4#[derive(Debug, Clone, PartialEq)]
5pub enum StringSegment {
6    Literal(String),
7    /// An interpolated expression with its source position (line, column).
8    Expression(String, usize, usize),
9}
10
11impl fmt::Display for StringSegment {
12    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
13        match self {
14            StringSegment::Literal(s) => write!(f, "{s}"),
15            StringSegment::Expression(e, _, _) => write!(f, "${{{e}}}"),
16        }
17    }
18}
19
20/// Source location for error reporting.
21#[derive(Debug, Clone, Copy, PartialEq, Eq)]
22pub struct Span {
23    /// Byte offset from start of source (inclusive).
24    pub start: usize,
25    /// Byte offset from start of source (exclusive).
26    pub end: usize,
27    /// 1-based line number of start position.
28    pub line: usize,
29    /// 1-based column number of start position.
30    pub column: usize,
31    /// 1-based line number of end position (for multiline span detection).
32    pub end_line: usize,
33}
34
35impl Span {
36    pub fn with_offsets(start: usize, end: usize, line: usize, column: usize) -> Self {
37        Self {
38            start,
39            end,
40            line,
41            column,
42            end_line: line,
43        }
44    }
45
46    /// Create a span covering two spans (from start of `a` to end of `b`).
47    pub fn merge(a: Span, b: Span) -> Span {
48        Span {
49            start: a.start,
50            end: b.end,
51            line: a.line,
52            column: a.column,
53            end_line: b.end_line,
54        }
55    }
56
57    /// A dummy span for synthetic/generated nodes.
58    pub fn dummy() -> Self {
59        Self {
60            start: 0,
61            end: 0,
62            line: 0,
63            column: 0,
64            end_line: 0,
65        }
66    }
67}
68
69impl fmt::Display for Span {
70    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
71        write!(f, "{}:{}", self.line, self.column)
72    }
73}
74
75/// A machine-applicable text replacement for autofixing diagnostics.
76#[derive(Debug, Clone)]
77pub struct FixEdit {
78    /// The source span to replace.
79    pub span: Span,
80    /// The replacement text (empty string = deletion).
81    pub replacement: String,
82}
83
84/// Canonical list of Harn language keywords.
85///
86/// This is the single source of truth for keyword tokens. The lexer's
87/// identifier-to-keyword match in `lexer.rs` must stay in sync; the unit test
88/// `test_keywords_const_covers_lexer` verifies parity between the two.
89///
90/// Tooling that needs the keyword set (syntax highlighters, the LSP, etc.)
91/// should read `KEYWORDS` rather than hard-coding a duplicate list.
92pub const KEYWORDS: &[&str] = &[
93    "ask",
94    "break",
95    "catch",
96    "continue",
97    "deadline",
98    "else",
99    "enum",
100    "extends",
101    "false",
102    "finally",
103    "fn",
104    "for",
105    "from",
106    "guard",
107    "if",
108    "impl",
109    "import",
110    "in",
111    "interface",
112    "let",
113    "match",
114    "mutex",
115    "nil",
116    "override",
117    "parallel",
118    "parallel_map",
119    "parallel_settle",
120    "pipeline",
121    "pub",
122    "require",
123    "retry",
124    "return",
125    "select",
126    "spawn",
127    "struct",
128    "throw",
129    "thru",
130    "tool",
131    "true",
132    "try",
133    "type",
134    "upto",
135    "var",
136    "while",
137    "yield",
138];
139
140/// Token kinds produced by the lexer.
141#[derive(Debug, Clone, PartialEq)]
142pub enum TokenKind {
143    // Keywords
144    Pipeline,
145    Extends,
146    Override,
147    Let,
148    Var,
149    If,
150    Else,
151    For,
152    In,
153    Match,
154    Retry,
155    Parallel,
156    ParallelMap,
157    ParallelSettle,
158    Return,
159    Import,
160    True,
161    False,
162    Nil,
163    Try,
164    Catch,
165    Throw,
166    Finally,
167    Fn,
168    Spawn,
169    While,
170    TypeKw,
171    Enum,
172    Struct,
173    Interface,
174    Pub,
175    From,
176    Thru,
177    Tool,
178    Upto,
179    Guard,
180    Require,
181    Ask,
182    Deadline,
183    Yield,
184    Mutex,
185    Break,
186    Continue,
187    Select,
188    Impl,
189
190    // Literals
191    Identifier(String),
192    StringLiteral(String),
193    InterpolatedString(Vec<StringSegment>),
194    /// Raw string literal `r"..."` — no escape processing, no interpolation.
195    RawStringLiteral(String),
196    IntLiteral(i64),
197    FloatLiteral(f64),
198    /// Duration literal in milliseconds: 500ms, 5s, 30m, 2h
199    DurationLiteral(u64),
200
201    // Two-character operators
202    Eq,            // ==
203    Neq,           // !=
204    And,           // &&
205    Or,            // ||
206    Pipe,          // |>
207    NilCoal,       // ??
208    QuestionDot,   // ?.
209    Arrow,         // ->
210    Lte,           // <=
211    Gte,           // >=
212    PlusAssign,    // +=
213    MinusAssign,   // -=
214    StarAssign,    // *=
215    SlashAssign,   // /=
216    PercentAssign, // %=
217
218    // Single-character operators
219    Assign,   // =
220    Not,      // !
221    Dot,      // .
222    Plus,     // +
223    Minus,    // -
224    Star,     // *
225    Slash,    // /
226    Percent,  // %
227    Lt,       // <
228    Gt,       // >
229    Question, // ?
230    Bar,      // |  (for union types)
231
232    // Delimiters
233    LBrace,    // {
234    RBrace,    // }
235    LParen,    // (
236    RParen,    // )
237    LBracket,  // [
238    RBracket,  // ]
239    Comma,     // ,
240    Colon,     // :
241    Semicolon, // ;
242
243    // Comments
244    LineComment(String),  // // text
245    BlockComment(String), // /* text */
246
247    // Special
248    Newline,
249    Eof,
250}
251
252impl fmt::Display for TokenKind {
253    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
254        match self {
255            TokenKind::Pipeline => write!(f, "pipeline"),
256            TokenKind::Extends => write!(f, "extends"),
257            TokenKind::Override => write!(f, "override"),
258            TokenKind::Let => write!(f, "let"),
259            TokenKind::Var => write!(f, "var"),
260            TokenKind::If => write!(f, "if"),
261            TokenKind::Else => write!(f, "else"),
262            TokenKind::For => write!(f, "for"),
263            TokenKind::In => write!(f, "in"),
264            TokenKind::Match => write!(f, "match"),
265            TokenKind::Retry => write!(f, "retry"),
266            TokenKind::Parallel => write!(f, "parallel"),
267            TokenKind::ParallelMap => write!(f, "parallel_map"),
268            TokenKind::ParallelSettle => write!(f, "parallel_settle"),
269            TokenKind::Return => write!(f, "return"),
270            TokenKind::Import => write!(f, "import"),
271            TokenKind::True => write!(f, "true"),
272            TokenKind::False => write!(f, "false"),
273            TokenKind::Nil => write!(f, "nil"),
274            TokenKind::Try => write!(f, "try"),
275            TokenKind::Catch => write!(f, "catch"),
276            TokenKind::Throw => write!(f, "throw"),
277            TokenKind::Finally => write!(f, "finally"),
278            TokenKind::Fn => write!(f, "fn"),
279            TokenKind::Spawn => write!(f, "spawn"),
280            TokenKind::While => write!(f, "while"),
281            TokenKind::TypeKw => write!(f, "type"),
282            TokenKind::Enum => write!(f, "enum"),
283            TokenKind::Struct => write!(f, "struct"),
284            TokenKind::Interface => write!(f, "interface"),
285            TokenKind::Pub => write!(f, "pub"),
286            TokenKind::From => write!(f, "from"),
287            TokenKind::Thru => write!(f, "thru"),
288            TokenKind::Tool => write!(f, "tool"),
289            TokenKind::Upto => write!(f, "upto"),
290            TokenKind::Guard => write!(f, "guard"),
291            TokenKind::Require => write!(f, "require"),
292            TokenKind::Ask => write!(f, "ask"),
293            TokenKind::Deadline => write!(f, "deadline"),
294            TokenKind::Yield => write!(f, "yield"),
295            TokenKind::Mutex => write!(f, "mutex"),
296            TokenKind::Break => write!(f, "break"),
297            TokenKind::Continue => write!(f, "continue"),
298            TokenKind::Select => write!(f, "select"),
299            TokenKind::Impl => write!(f, "impl"),
300            TokenKind::Identifier(s) => write!(f, "id({s})"),
301            TokenKind::StringLiteral(s) => write!(f, "str({s})"),
302            TokenKind::InterpolatedString(_) => write!(f, "istr(...)"),
303            TokenKind::RawStringLiteral(s) => write!(f, "rstr({s})"),
304            TokenKind::IntLiteral(n) => write!(f, "int({n})"),
305            TokenKind::FloatLiteral(n) => write!(f, "float({n})"),
306            TokenKind::DurationLiteral(ms) => write!(f, "duration({ms}ms)"),
307            TokenKind::Eq => write!(f, "=="),
308            TokenKind::Neq => write!(f, "!="),
309            TokenKind::And => write!(f, "&&"),
310            TokenKind::Or => write!(f, "||"),
311            TokenKind::Pipe => write!(f, "|>"),
312            TokenKind::NilCoal => write!(f, "??"),
313            TokenKind::QuestionDot => write!(f, "?."),
314            TokenKind::Arrow => write!(f, "->"),
315            TokenKind::Lte => write!(f, "<="),
316            TokenKind::Gte => write!(f, ">="),
317            TokenKind::PlusAssign => write!(f, "+="),
318            TokenKind::MinusAssign => write!(f, "-="),
319            TokenKind::StarAssign => write!(f, "*="),
320            TokenKind::SlashAssign => write!(f, "/="),
321            TokenKind::PercentAssign => write!(f, "%="),
322            TokenKind::Assign => write!(f, "="),
323            TokenKind::Not => write!(f, "!"),
324            TokenKind::Dot => write!(f, "."),
325            TokenKind::Plus => write!(f, "+"),
326            TokenKind::Minus => write!(f, "-"),
327            TokenKind::Star => write!(f, "*"),
328            TokenKind::Slash => write!(f, "/"),
329            TokenKind::Percent => write!(f, "%"),
330            TokenKind::Lt => write!(f, "<"),
331            TokenKind::Gt => write!(f, ">"),
332            TokenKind::Question => write!(f, "?"),
333            TokenKind::Bar => write!(f, "|"),
334            TokenKind::LBrace => write!(f, "{{"),
335            TokenKind::RBrace => write!(f, "}}"),
336            TokenKind::LParen => write!(f, "("),
337            TokenKind::RParen => write!(f, ")"),
338            TokenKind::LBracket => write!(f, "["),
339            TokenKind::RBracket => write!(f, "]"),
340            TokenKind::Comma => write!(f, ","),
341            TokenKind::Colon => write!(f, ":"),
342            TokenKind::Semicolon => write!(f, ";"),
343            TokenKind::LineComment(s) => write!(f, "// {s}"),
344            TokenKind::BlockComment(s) => write!(f, "/* {s} */"),
345            TokenKind::Newline => write!(f, "\\n"),
346            TokenKind::Eof => write!(f, "EOF"),
347        }
348    }
349}
350
351/// A token with its kind and source location.
352#[derive(Debug, Clone, PartialEq)]
353pub struct Token {
354    pub kind: TokenKind,
355    pub span: Span,
356}
357
358impl Token {
359    pub fn with_span(kind: TokenKind, span: Span) -> Self {
360        Self { kind, span }
361    }
362}