Skip to main content

harn_lexer/
token.rs

1use std::fmt;
2
3/// A segment of an interpolated string.
4#[derive(Debug, Clone, PartialEq)]
5pub enum StringSegment {
6    Literal(String),
7    /// An interpolated expression with its source position (line, column).
8    Expression(String, usize, usize),
9}
10
11impl fmt::Display for StringSegment {
12    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
13        match self {
14            StringSegment::Literal(s) => write!(f, "{s}"),
15            StringSegment::Expression(e, _, _) => write!(f, "${{{e}}}"),
16        }
17    }
18}
19
20/// Source location for error reporting.
21#[derive(Debug, Clone, Copy, PartialEq, Eq)]
22pub struct Span {
23    /// Byte offset from start of source (inclusive).
24    pub start: usize,
25    /// Byte offset from start of source (exclusive).
26    pub end: usize,
27    /// 1-based line number of start position.
28    pub line: usize,
29    /// 1-based column number of start position.
30    pub column: usize,
31    /// 1-based line number of end position (for multiline span detection).
32    pub end_line: usize,
33}
34
35impl Span {
36    pub fn with_offsets(start: usize, end: usize, line: usize, column: usize) -> Self {
37        Self {
38            start,
39            end,
40            line,
41            column,
42            end_line: line,
43        }
44    }
45
46    /// Create a span covering two spans (from start of `a` to end of `b`).
47    pub fn merge(a: Span, b: Span) -> Span {
48        Span {
49            start: a.start,
50            end: b.end,
51            line: a.line,
52            column: a.column,
53            end_line: b.end_line,
54        }
55    }
56
57    /// A dummy span for synthetic/generated nodes.
58    pub fn dummy() -> Self {
59        Self {
60            start: 0,
61            end: 0,
62            line: 0,
63            column: 0,
64            end_line: 0,
65        }
66    }
67}
68
69impl fmt::Display for Span {
70    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
71        write!(f, "{}:{}", self.line, self.column)
72    }
73}
74
75/// A machine-applicable text replacement for autofixing diagnostics.
76#[derive(Debug, Clone)]
77pub struct FixEdit {
78    /// The source span to replace.
79    pub span: Span,
80    /// The replacement text (empty string = deletion).
81    pub replacement: String,
82}
83
84/// Canonical list of Harn language keywords.
85///
86/// This is the single source of truth for keyword tokens. The lexer's
87/// identifier-to-keyword match in `lexer.rs` must stay in sync; the unit test
88/// `test_keywords_const_covers_lexer` verifies parity between the two.
89///
90/// Tooling that needs the keyword set (syntax highlighters, the LSP, etc.)
91/// should read `KEYWORDS` rather than hard-coding a duplicate list.
92pub const KEYWORDS: &[&str] = &[
93    "break",
94    "catch",
95    "continue",
96    "deadline",
97    "defer",
98    "else",
99    "enum",
100    "extends",
101    "false",
102    "finally",
103    "fn",
104    "for",
105    "from",
106    "guard",
107    "if",
108    "impl",
109    "import",
110    "in",
111    "interface",
112    "let",
113    "match",
114    "mutex",
115    "nil",
116    "override",
117    "parallel",
118    "pipeline",
119    "pub",
120    "require",
121    "retry",
122    "return",
123    "select",
124    "spawn",
125    "struct",
126    "throw",
127    "thru",
128    "tool",
129    "true",
130    "try",
131    "type",
132    "upto",
133    "var",
134    "while",
135    "yield",
136];
137
138/// Token kinds produced by the lexer.
139#[derive(Debug, Clone, PartialEq)]
140pub enum TokenKind {
141    // Keywords
142    Pipeline,
143    Extends,
144    Override,
145    Let,
146    Var,
147    If,
148    Else,
149    For,
150    In,
151    Match,
152    Retry,
153    Parallel,
154    Return,
155    Import,
156    True,
157    False,
158    Nil,
159    Try,
160    Catch,
161    Throw,
162    Finally,
163    Fn,
164    Spawn,
165    While,
166    TypeKw,
167    Enum,
168    Struct,
169    Interface,
170    Pub,
171    From,
172    Thru,
173    Tool,
174    Upto,
175    Guard,
176    Require,
177    Deadline,
178    Defer,
179    Yield,
180    Mutex,
181    Break,
182    Continue,
183    Select,
184    Impl,
185
186    // Literals
187    Identifier(String),
188    StringLiteral(String),
189    InterpolatedString(Vec<StringSegment>),
190    /// Raw string literal `r"..."` — no escape processing, no interpolation.
191    RawStringLiteral(String),
192    IntLiteral(i64),
193    FloatLiteral(f64),
194    /// Duration literal in milliseconds: 500ms, 5s, 30m, 2h, 1d, 1w
195    DurationLiteral(u64),
196
197    // Two-character operators
198    Eq,            // ==
199    Neq,           // !=
200    And,           // &&
201    Or,            // ||
202    Pipe,          // |>
203    NilCoal,       // ??
204    QuestionDot,   // ?.
205    Arrow,         // ->
206    Lte,           // <=
207    Gte,           // >=
208    PlusAssign,    // +=
209    MinusAssign,   // -=
210    StarAssign,    // *=
211    SlashAssign,   // /=
212    PercentAssign, // %=
213
214    // Single-character operators
215    Assign,   // =
216    Not,      // !
217    Dot,      // .
218    Plus,     // +
219    Minus,    // -
220    Star,     // *
221    Slash,    // /
222    Percent,  // %
223    Lt,       // <
224    Gt,       // >
225    Question, // ?
226    Bar,      // |  (for union types)
227
228    // Delimiters
229    LBrace,    // {
230    RBrace,    // }
231    LParen,    // (
232    RParen,    // )
233    LBracket,  // [
234    RBracket,  // ]
235    Comma,     // ,
236    Colon,     // :
237    Semicolon, // ;
238
239    // Comments
240    LineComment(String),  // // text
241    BlockComment(String), // /* text */
242
243    // Special
244    Newline,
245    Eof,
246}
247
248impl fmt::Display for TokenKind {
249    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
250        match self {
251            TokenKind::Pipeline => write!(f, "pipeline"),
252            TokenKind::Extends => write!(f, "extends"),
253            TokenKind::Override => write!(f, "override"),
254            TokenKind::Let => write!(f, "let"),
255            TokenKind::Var => write!(f, "var"),
256            TokenKind::If => write!(f, "if"),
257            TokenKind::Else => write!(f, "else"),
258            TokenKind::For => write!(f, "for"),
259            TokenKind::In => write!(f, "in"),
260            TokenKind::Match => write!(f, "match"),
261            TokenKind::Retry => write!(f, "retry"),
262            TokenKind::Parallel => write!(f, "parallel"),
263            TokenKind::Return => write!(f, "return"),
264            TokenKind::Import => write!(f, "import"),
265            TokenKind::True => write!(f, "true"),
266            TokenKind::False => write!(f, "false"),
267            TokenKind::Nil => write!(f, "nil"),
268            TokenKind::Try => write!(f, "try"),
269            TokenKind::Catch => write!(f, "catch"),
270            TokenKind::Throw => write!(f, "throw"),
271            TokenKind::Finally => write!(f, "finally"),
272            TokenKind::Fn => write!(f, "fn"),
273            TokenKind::Spawn => write!(f, "spawn"),
274            TokenKind::While => write!(f, "while"),
275            TokenKind::TypeKw => write!(f, "type"),
276            TokenKind::Enum => write!(f, "enum"),
277            TokenKind::Struct => write!(f, "struct"),
278            TokenKind::Interface => write!(f, "interface"),
279            TokenKind::Pub => write!(f, "pub"),
280            TokenKind::From => write!(f, "from"),
281            TokenKind::Thru => write!(f, "thru"),
282            TokenKind::Tool => write!(f, "tool"),
283            TokenKind::Upto => write!(f, "upto"),
284            TokenKind::Guard => write!(f, "guard"),
285            TokenKind::Require => write!(f, "require"),
286            TokenKind::Deadline => write!(f, "deadline"),
287            TokenKind::Defer => write!(f, "defer"),
288            TokenKind::Yield => write!(f, "yield"),
289            TokenKind::Mutex => write!(f, "mutex"),
290            TokenKind::Break => write!(f, "break"),
291            TokenKind::Continue => write!(f, "continue"),
292            TokenKind::Select => write!(f, "select"),
293            TokenKind::Impl => write!(f, "impl"),
294            TokenKind::Identifier(s) => write!(f, "id({s})"),
295            TokenKind::StringLiteral(s) => write!(f, "str({s})"),
296            TokenKind::InterpolatedString(_) => write!(f, "istr(...)"),
297            TokenKind::RawStringLiteral(s) => write!(f, "rstr({s})"),
298            TokenKind::IntLiteral(n) => write!(f, "int({n})"),
299            TokenKind::FloatLiteral(n) => write!(f, "float({n})"),
300            TokenKind::DurationLiteral(ms) => write!(f, "duration({ms}ms)"),
301            TokenKind::Eq => write!(f, "=="),
302            TokenKind::Neq => write!(f, "!="),
303            TokenKind::And => write!(f, "&&"),
304            TokenKind::Or => write!(f, "||"),
305            TokenKind::Pipe => write!(f, "|>"),
306            TokenKind::NilCoal => write!(f, "??"),
307            TokenKind::QuestionDot => write!(f, "?."),
308            TokenKind::Arrow => write!(f, "->"),
309            TokenKind::Lte => write!(f, "<="),
310            TokenKind::Gte => write!(f, ">="),
311            TokenKind::PlusAssign => write!(f, "+="),
312            TokenKind::MinusAssign => write!(f, "-="),
313            TokenKind::StarAssign => write!(f, "*="),
314            TokenKind::SlashAssign => write!(f, "/="),
315            TokenKind::PercentAssign => write!(f, "%="),
316            TokenKind::Assign => write!(f, "="),
317            TokenKind::Not => write!(f, "!"),
318            TokenKind::Dot => write!(f, "."),
319            TokenKind::Plus => write!(f, "+"),
320            TokenKind::Minus => write!(f, "-"),
321            TokenKind::Star => write!(f, "*"),
322            TokenKind::Slash => write!(f, "/"),
323            TokenKind::Percent => write!(f, "%"),
324            TokenKind::Lt => write!(f, "<"),
325            TokenKind::Gt => write!(f, ">"),
326            TokenKind::Question => write!(f, "?"),
327            TokenKind::Bar => write!(f, "|"),
328            TokenKind::LBrace => write!(f, "{{"),
329            TokenKind::RBrace => write!(f, "}}"),
330            TokenKind::LParen => write!(f, "("),
331            TokenKind::RParen => write!(f, ")"),
332            TokenKind::LBracket => write!(f, "["),
333            TokenKind::RBracket => write!(f, "]"),
334            TokenKind::Comma => write!(f, ","),
335            TokenKind::Colon => write!(f, ":"),
336            TokenKind::Semicolon => write!(f, ";"),
337            TokenKind::LineComment(s) => write!(f, "// {s}"),
338            TokenKind::BlockComment(s) => write!(f, "/* {s} */"),
339            TokenKind::Newline => write!(f, "\\n"),
340            TokenKind::Eof => write!(f, "EOF"),
341        }
342    }
343}
344
345/// A token with its kind and source location.
346#[derive(Debug, Clone, PartialEq)]
347pub struct Token {
348    pub kind: TokenKind,
349    pub span: Span,
350}
351
352impl Token {
353    pub fn with_span(kind: TokenKind, span: Span) -> Self {
354        Self { kind, span }
355    }
356}