Skip to main content

harn_lexer/
token.rs

1use std::fmt;
2
3/// A segment of an interpolated string.
4#[derive(Debug, Clone, PartialEq)]
5pub enum StringSegment {
6    Literal(String),
7    /// An interpolated expression with its source position (line, column).
8    Expression(String, usize, usize),
9}
10
11impl fmt::Display for StringSegment {
12    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
13        match self {
14            StringSegment::Literal(s) => write!(f, "{s}"),
15            StringSegment::Expression(e, _, _) => write!(f, "${{{e}}}"),
16        }
17    }
18}
19
20/// Source location for error reporting.
21#[derive(Debug, Clone, Copy, PartialEq, Eq)]
22pub struct Span {
23    /// Byte offset from start of source (inclusive).
24    pub start: usize,
25    /// Byte offset from start of source (exclusive).
26    pub end: usize,
27    /// 1-based line number of start position.
28    pub line: usize,
29    /// 1-based column number of start position.
30    pub column: usize,
31    /// 1-based line number of end position (for multiline span detection).
32    pub end_line: usize,
33}
34
35impl Span {
36    pub fn with_offsets(start: usize, end: usize, line: usize, column: usize) -> Self {
37        Self {
38            start,
39            end,
40            line,
41            column,
42            end_line: line,
43        }
44    }
45
46    /// Create a span covering two spans (from start of `a` to end of `b`).
47    pub fn merge(a: Span, b: Span) -> Span {
48        Span {
49            start: a.start,
50            end: b.end,
51            line: a.line,
52            column: a.column,
53            end_line: b.end_line,
54        }
55    }
56
57    /// A dummy span for synthetic/generated nodes.
58    pub fn dummy() -> Self {
59        Self {
60            start: 0,
61            end: 0,
62            line: 0,
63            column: 0,
64            end_line: 0,
65        }
66    }
67}
68
69impl fmt::Display for Span {
70    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
71        write!(f, "{}:{}", self.line, self.column)
72    }
73}
74
75/// A machine-applicable text replacement for autofixing diagnostics.
76#[derive(Debug, Clone)]
77pub struct FixEdit {
78    /// The source span to replace.
79    pub span: Span,
80    /// The replacement text (empty string = deletion).
81    pub replacement: String,
82}
83
84/// Canonical list of Harn language keywords. Single source of truth; the lexer's
85/// identifier-to-keyword match must stay in sync (enforced by
86/// `test_keywords_const_covers_lexer`). External tooling should consume this
87/// rather than duplicate it.
88pub const KEYWORDS: &[&str] = &[
89    "break",
90    "catch",
91    "continue",
92    "deadline",
93    "defer",
94    "else",
95    "enum",
96    "exclusive",
97    "extends",
98    "false",
99    "finally",
100    "fn",
101    "for",
102    "from",
103    "guard",
104    "if",
105    "impl",
106    "import",
107    "in",
108    "interface",
109    "let",
110    "match",
111    "mutex",
112    "nil",
113    "override",
114    "parallel",
115    "pipeline",
116    "pub",
117    "require",
118    "retry",
119    "return",
120    "select",
121    "spawn",
122    "struct",
123    "throw",
124    "to",
125    "tool",
126    "true",
127    "try",
128    "type",
129    "var",
130    "while",
131    "yield",
132];
133
134/// Token kinds produced by the lexer.
135#[derive(Debug, Clone, PartialEq)]
136pub enum TokenKind {
137    Pipeline,
138    Extends,
139    Override,
140    Let,
141    Var,
142    If,
143    Else,
144    For,
145    In,
146    Match,
147    Retry,
148    Parallel,
149    Return,
150    Import,
151    True,
152    False,
153    Nil,
154    Try,
155    Catch,
156    Throw,
157    Finally,
158    Fn,
159    Spawn,
160    While,
161    TypeKw,
162    Enum,
163    Struct,
164    Interface,
165    Pub,
166    From,
167    To,
168    Tool,
169    Exclusive,
170    Guard,
171    Require,
172    Deadline,
173    Defer,
174    Yield,
175    Mutex,
176    Break,
177    Continue,
178    Select,
179    Impl,
180
181    Identifier(String),
182    StringLiteral(String),
183    InterpolatedString(Vec<StringSegment>),
184    /// Raw string literal `r"..."` — no escape processing, no interpolation.
185    RawStringLiteral(String),
186    IntLiteral(i64),
187    FloatLiteral(f64),
188    /// Duration literal in milliseconds: 500ms, 5s, 30m, 2h, 1d, 1w
189    DurationLiteral(u64),
190
191    Eq,            // ==
192    Neq,           // !=
193    And,           // &&
194    Or,            // ||
195    Pipe,          // |>
196    NilCoal,       // ??
197    Pow,           // **
198    QuestionDot,   // ?.
199    Arrow,         // ->
200    Lte,           // <=
201    Gte,           // >=
202    PlusAssign,    // +=
203    MinusAssign,   // -=
204    StarAssign,    // *=
205    SlashAssign,   // /=
206    PercentAssign, // %=
207
208    Assign,   // =
209    Not,      // !
210    Dot,      // .
211    Plus,     // +
212    Minus,    // -
213    Star,     // *
214    Slash,    // /
215    Percent,  // %
216    Lt,       // <
217    Gt,       // >
218    Question, // ?
219    Bar,      // |  (for union types)
220
221    LBrace,    // {
222    RBrace,    // }
223    LParen,    // (
224    RParen,    // )
225    LBracket,  // [
226    RBracket,  // ]
227    Comma,     // ,
228    Colon,     // :
229    Semicolon, // ;
230
231    LineComment {
232        text: String,
233        is_doc: bool,
234    }, // // text or /// text
235    BlockComment {
236        text: String,
237        is_doc: bool,
238    }, // /* text */ or /** text */
239
240    Newline,
241    Eof,
242}
243
244impl fmt::Display for TokenKind {
245    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
246        match self {
247            TokenKind::Pipeline => write!(f, "pipeline"),
248            TokenKind::Extends => write!(f, "extends"),
249            TokenKind::Override => write!(f, "override"),
250            TokenKind::Let => write!(f, "let"),
251            TokenKind::Var => write!(f, "var"),
252            TokenKind::If => write!(f, "if"),
253            TokenKind::Else => write!(f, "else"),
254            TokenKind::For => write!(f, "for"),
255            TokenKind::In => write!(f, "in"),
256            TokenKind::Match => write!(f, "match"),
257            TokenKind::Retry => write!(f, "retry"),
258            TokenKind::Parallel => write!(f, "parallel"),
259            TokenKind::Return => write!(f, "return"),
260            TokenKind::Import => write!(f, "import"),
261            TokenKind::True => write!(f, "true"),
262            TokenKind::False => write!(f, "false"),
263            TokenKind::Nil => write!(f, "nil"),
264            TokenKind::Try => write!(f, "try"),
265            TokenKind::Catch => write!(f, "catch"),
266            TokenKind::Throw => write!(f, "throw"),
267            TokenKind::Finally => write!(f, "finally"),
268            TokenKind::Fn => write!(f, "fn"),
269            TokenKind::Spawn => write!(f, "spawn"),
270            TokenKind::While => write!(f, "while"),
271            TokenKind::TypeKw => write!(f, "type"),
272            TokenKind::Enum => write!(f, "enum"),
273            TokenKind::Struct => write!(f, "struct"),
274            TokenKind::Interface => write!(f, "interface"),
275            TokenKind::Pub => write!(f, "pub"),
276            TokenKind::From => write!(f, "from"),
277            TokenKind::To => write!(f, "to"),
278            TokenKind::Tool => write!(f, "tool"),
279            TokenKind::Exclusive => write!(f, "exclusive"),
280            TokenKind::Guard => write!(f, "guard"),
281            TokenKind::Require => write!(f, "require"),
282            TokenKind::Deadline => write!(f, "deadline"),
283            TokenKind::Defer => write!(f, "defer"),
284            TokenKind::Yield => write!(f, "yield"),
285            TokenKind::Mutex => write!(f, "mutex"),
286            TokenKind::Break => write!(f, "break"),
287            TokenKind::Continue => write!(f, "continue"),
288            TokenKind::Select => write!(f, "select"),
289            TokenKind::Impl => write!(f, "impl"),
290            TokenKind::Identifier(s) => write!(f, "id({s})"),
291            TokenKind::StringLiteral(s) => write!(f, "str({s})"),
292            TokenKind::InterpolatedString(_) => write!(f, "istr(...)"),
293            TokenKind::RawStringLiteral(s) => write!(f, "rstr({s})"),
294            TokenKind::IntLiteral(n) => write!(f, "int({n})"),
295            TokenKind::FloatLiteral(n) => write!(f, "float({n})"),
296            TokenKind::DurationLiteral(ms) => write!(f, "duration({ms}ms)"),
297            TokenKind::Eq => write!(f, "=="),
298            TokenKind::Neq => write!(f, "!="),
299            TokenKind::And => write!(f, "&&"),
300            TokenKind::Or => write!(f, "||"),
301            TokenKind::Pipe => write!(f, "|>"),
302            TokenKind::NilCoal => write!(f, "??"),
303            TokenKind::Pow => write!(f, "**"),
304            TokenKind::QuestionDot => write!(f, "?."),
305            TokenKind::Arrow => write!(f, "->"),
306            TokenKind::Lte => write!(f, "<="),
307            TokenKind::Gte => write!(f, ">="),
308            TokenKind::PlusAssign => write!(f, "+="),
309            TokenKind::MinusAssign => write!(f, "-="),
310            TokenKind::StarAssign => write!(f, "*="),
311            TokenKind::SlashAssign => write!(f, "/="),
312            TokenKind::PercentAssign => write!(f, "%="),
313            TokenKind::Assign => write!(f, "="),
314            TokenKind::Not => write!(f, "!"),
315            TokenKind::Dot => write!(f, "."),
316            TokenKind::Plus => write!(f, "+"),
317            TokenKind::Minus => write!(f, "-"),
318            TokenKind::Star => write!(f, "*"),
319            TokenKind::Slash => write!(f, "/"),
320            TokenKind::Percent => write!(f, "%"),
321            TokenKind::Lt => write!(f, "<"),
322            TokenKind::Gt => write!(f, ">"),
323            TokenKind::Question => write!(f, "?"),
324            TokenKind::Bar => write!(f, "|"),
325            TokenKind::LBrace => write!(f, "{{"),
326            TokenKind::RBrace => write!(f, "}}"),
327            TokenKind::LParen => write!(f, "("),
328            TokenKind::RParen => write!(f, ")"),
329            TokenKind::LBracket => write!(f, "["),
330            TokenKind::RBracket => write!(f, "]"),
331            TokenKind::Comma => write!(f, ","),
332            TokenKind::Colon => write!(f, ":"),
333            TokenKind::Semicolon => write!(f, ";"),
334            TokenKind::LineComment { text, is_doc } => {
335                let prefix = if *is_doc { "///" } else { "//" };
336                write!(f, "{prefix} {text}")
337            }
338            TokenKind::BlockComment { text, is_doc } => {
339                let prefix = if *is_doc { "/**" } else { "/*" };
340                write!(f, "{prefix} {text} */")
341            }
342            TokenKind::Newline => write!(f, "\\n"),
343            TokenKind::Eof => write!(f, "EOF"),
344        }
345    }
346}
347
348/// A token with its kind and source location.
349#[derive(Debug, Clone, PartialEq)]
350pub struct Token {
351    pub kind: TokenKind,
352    pub span: Span,
353}
354
355impl Token {
356    pub fn with_span(kind: TokenKind, span: Span) -> Self {
357        Self { kind, span }
358    }
359}