Skip to main content

harn_lexer/
token.rs

1use std::fmt;
2
3/// A segment of an interpolated string.
4#[derive(Debug, Clone, PartialEq)]
5pub enum StringSegment {
6    Literal(String),
7    /// An interpolated expression with its source position (line, column).
8    Expression(String, usize, usize),
9}
10
11impl fmt::Display for StringSegment {
12    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
13        match self {
14            StringSegment::Literal(s) => write!(f, "{s}"),
15            StringSegment::Expression(e, _, _) => write!(f, "${{{e}}}"),
16        }
17    }
18}
19
20/// Source location for error reporting.
21#[derive(Debug, Clone, Copy, PartialEq, Eq)]
22pub struct Span {
23    /// Byte offset from start of source (inclusive).
24    pub start: usize,
25    /// Byte offset from start of source (exclusive).
26    pub end: usize,
27    /// 1-based line number of start position.
28    pub line: usize,
29    /// 1-based column number of start position.
30    pub column: usize,
31    /// 1-based line number of end position (for multiline span detection).
32    pub end_line: usize,
33}
34
35impl Span {
36    pub fn with_offsets(start: usize, end: usize, line: usize, column: usize) -> Self {
37        Self {
38            start,
39            end,
40            line,
41            column,
42            end_line: line,
43        }
44    }
45
46    /// Create a span covering two spans (from start of `a` to end of `b`).
47    pub fn merge(a: Span, b: Span) -> Span {
48        Span {
49            start: a.start,
50            end: b.end,
51            line: a.line,
52            column: a.column,
53            end_line: b.end_line,
54        }
55    }
56
57    /// A dummy span for synthetic/generated nodes.
58    pub fn dummy() -> Self {
59        Self {
60            start: 0,
61            end: 0,
62            line: 0,
63            column: 0,
64            end_line: 0,
65        }
66    }
67}
68
69impl fmt::Display for Span {
70    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
71        write!(f, "{}:{}", self.line, self.column)
72    }
73}
74
75/// A machine-applicable text replacement for autofixing diagnostics.
76#[derive(Debug, Clone)]
77pub struct FixEdit {
78    /// The source span to replace.
79    pub span: Span,
80    /// The replacement text (empty string = deletion).
81    pub replacement: String,
82}
83
84/// Canonical list of Harn language keywords. Single source of truth; the lexer's
85/// identifier-to-keyword match must stay in sync (enforced by
86/// `test_keywords_const_covers_lexer`). External tooling should consume this
87/// rather than duplicate it.
88pub const KEYWORDS: &[&str] = &[
89    "break",
90    "catch",
91    "continue",
92    "deadline",
93    "defer",
94    "emit",
95    "else",
96    "enum",
97    "exclusive",
98    "extends",
99    "false",
100    "finally",
101    "fn",
102    "for",
103    "from",
104    "guard",
105    "if",
106    "impl",
107    "import",
108    "in",
109    "interface",
110    "let",
111    "match",
112    "mutex",
113    "nil",
114    "override",
115    "parallel",
116    "pipeline",
117    "pub",
118    "require",
119    "retry",
120    "return",
121    "select",
122    "skill",
123    "spawn",
124    "struct",
125    "throw",
126    "to",
127    "tool",
128    "true",
129    "try",
130    "type",
131    "var",
132    "while",
133    "yield",
134];
135
136/// Token kinds produced by the lexer.
137#[derive(Debug, Clone, PartialEq)]
138pub enum TokenKind {
139    Pipeline,
140    Extends,
141    Override,
142    Let,
143    Var,
144    If,
145    Else,
146    For,
147    In,
148    Match,
149    Retry,
150    Parallel,
151    Return,
152    Import,
153    True,
154    False,
155    Nil,
156    Try,
157    Catch,
158    Throw,
159    Finally,
160    Fn,
161    Spawn,
162    While,
163    TypeKw,
164    Enum,
165    Struct,
166    Interface,
167    Emit,
168    Pub,
169    From,
170    To,
171    Tool,
172    Exclusive,
173    Guard,
174    Require,
175    Deadline,
176    Defer,
177    Yield,
178    Mutex,
179    Break,
180    Continue,
181    Select,
182    Impl,
183    Skill,
184
185    Identifier(String),
186    StringLiteral(String),
187    InterpolatedString(Vec<StringSegment>),
188    /// Raw string literal `r"..."` — no escape processing, no interpolation.
189    RawStringLiteral(String),
190    IntLiteral(i64),
191    FloatLiteral(f64),
192    /// Duration literal in milliseconds: 500ms, 5s, 30m, 2h, 1d, 1w
193    DurationLiteral(u64),
194
195    Eq,            // ==
196    Neq,           // !=
197    And,           // &&
198    Or,            // ||
199    Pipe,          // |>
200    NilCoal,       // ??
201    Pow,           // **
202    QuestionDot,   // ?.
203    Arrow,         // ->
204    Lte,           // <=
205    Gte,           // >=
206    PlusAssign,    // +=
207    MinusAssign,   // -=
208    StarAssign,    // *=
209    SlashAssign,   // /=
210    PercentAssign, // %=
211
212    Assign,   // =
213    Not,      // !
214    Dot,      // .
215    Plus,     // +
216    Minus,    // -
217    Star,     // *
218    Slash,    // /
219    Percent,  // %
220    Lt,       // <
221    Gt,       // >
222    Question, // ?
223    Bar,      // |  (for union types)
224
225    LBrace,    // {
226    RBrace,    // }
227    LParen,    // (
228    RParen,    // )
229    LBracket,  // [
230    RBracket,  // ]
231    Comma,     // ,
232    Colon,     // :
233    Semicolon, // ;
234    At,        // @ (attribute prefix)
235
236    LineComment {
237        text: String,
238        is_doc: bool,
239    }, // // text or /// text
240    BlockComment {
241        text: String,
242        is_doc: bool,
243    }, // /* text */ or /** text */
244
245    Newline,
246    Eof,
247}
248
249impl fmt::Display for TokenKind {
250    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
251        match self {
252            TokenKind::Pipeline => write!(f, "pipeline"),
253            TokenKind::Extends => write!(f, "extends"),
254            TokenKind::Override => write!(f, "override"),
255            TokenKind::Let => write!(f, "let"),
256            TokenKind::Var => write!(f, "var"),
257            TokenKind::If => write!(f, "if"),
258            TokenKind::Else => write!(f, "else"),
259            TokenKind::For => write!(f, "for"),
260            TokenKind::In => write!(f, "in"),
261            TokenKind::Match => write!(f, "match"),
262            TokenKind::Retry => write!(f, "retry"),
263            TokenKind::Parallel => write!(f, "parallel"),
264            TokenKind::Return => write!(f, "return"),
265            TokenKind::Import => write!(f, "import"),
266            TokenKind::True => write!(f, "true"),
267            TokenKind::False => write!(f, "false"),
268            TokenKind::Nil => write!(f, "nil"),
269            TokenKind::Try => write!(f, "try"),
270            TokenKind::Catch => write!(f, "catch"),
271            TokenKind::Throw => write!(f, "throw"),
272            TokenKind::Finally => write!(f, "finally"),
273            TokenKind::Fn => write!(f, "fn"),
274            TokenKind::Spawn => write!(f, "spawn"),
275            TokenKind::While => write!(f, "while"),
276            TokenKind::TypeKw => write!(f, "type"),
277            TokenKind::Enum => write!(f, "enum"),
278            TokenKind::Struct => write!(f, "struct"),
279            TokenKind::Interface => write!(f, "interface"),
280            TokenKind::Emit => write!(f, "emit"),
281            TokenKind::Pub => write!(f, "pub"),
282            TokenKind::From => write!(f, "from"),
283            TokenKind::To => write!(f, "to"),
284            TokenKind::Tool => write!(f, "tool"),
285            TokenKind::Exclusive => write!(f, "exclusive"),
286            TokenKind::Guard => write!(f, "guard"),
287            TokenKind::Require => write!(f, "require"),
288            TokenKind::Deadline => write!(f, "deadline"),
289            TokenKind::Defer => write!(f, "defer"),
290            TokenKind::Yield => write!(f, "yield"),
291            TokenKind::Mutex => write!(f, "mutex"),
292            TokenKind::Break => write!(f, "break"),
293            TokenKind::Continue => write!(f, "continue"),
294            TokenKind::Select => write!(f, "select"),
295            TokenKind::Impl => write!(f, "impl"),
296            TokenKind::Skill => write!(f, "skill"),
297            TokenKind::Identifier(s) => write!(f, "id({s})"),
298            TokenKind::StringLiteral(s) => write!(f, "str({s})"),
299            TokenKind::InterpolatedString(_) => write!(f, "istr(...)"),
300            TokenKind::RawStringLiteral(s) => write!(f, "rstr({s})"),
301            TokenKind::IntLiteral(n) => write!(f, "int({n})"),
302            TokenKind::FloatLiteral(n) => write!(f, "float({n})"),
303            TokenKind::DurationLiteral(ms) => write!(f, "duration({ms}ms)"),
304            TokenKind::Eq => write!(f, "=="),
305            TokenKind::Neq => write!(f, "!="),
306            TokenKind::And => write!(f, "&&"),
307            TokenKind::Or => write!(f, "||"),
308            TokenKind::Pipe => write!(f, "|>"),
309            TokenKind::NilCoal => write!(f, "??"),
310            TokenKind::Pow => write!(f, "**"),
311            TokenKind::QuestionDot => write!(f, "?."),
312            TokenKind::Arrow => write!(f, "->"),
313            TokenKind::Lte => write!(f, "<="),
314            TokenKind::Gte => write!(f, ">="),
315            TokenKind::PlusAssign => write!(f, "+="),
316            TokenKind::MinusAssign => write!(f, "-="),
317            TokenKind::StarAssign => write!(f, "*="),
318            TokenKind::SlashAssign => write!(f, "/="),
319            TokenKind::PercentAssign => write!(f, "%="),
320            TokenKind::Assign => write!(f, "="),
321            TokenKind::Not => write!(f, "!"),
322            TokenKind::Dot => write!(f, "."),
323            TokenKind::Plus => write!(f, "+"),
324            TokenKind::Minus => write!(f, "-"),
325            TokenKind::Star => write!(f, "*"),
326            TokenKind::Slash => write!(f, "/"),
327            TokenKind::Percent => write!(f, "%"),
328            TokenKind::Lt => write!(f, "<"),
329            TokenKind::Gt => write!(f, ">"),
330            TokenKind::Question => write!(f, "?"),
331            TokenKind::Bar => write!(f, "|"),
332            TokenKind::LBrace => write!(f, "{{"),
333            TokenKind::RBrace => write!(f, "}}"),
334            TokenKind::LParen => write!(f, "("),
335            TokenKind::RParen => write!(f, ")"),
336            TokenKind::LBracket => write!(f, "["),
337            TokenKind::RBracket => write!(f, "]"),
338            TokenKind::Comma => write!(f, ","),
339            TokenKind::Colon => write!(f, ":"),
340            TokenKind::Semicolon => write!(f, ";"),
341            TokenKind::At => write!(f, "@"),
342            TokenKind::LineComment { text, is_doc } => {
343                let prefix = if *is_doc { "///" } else { "//" };
344                write!(f, "{prefix} {text}")
345            }
346            TokenKind::BlockComment { text, is_doc } => {
347                let prefix = if *is_doc { "/**" } else { "/*" };
348                write!(f, "{prefix} {text} */")
349            }
350            TokenKind::Newline => write!(f, "\\n"),
351            TokenKind::Eof => write!(f, "EOF"),
352        }
353    }
354}
355
356/// A token with its kind and source location.
357#[derive(Debug, Clone, PartialEq)]
358pub struct Token {
359    pub kind: TokenKind,
360    pub span: Span,
361}
362
363impl Token {
364    pub fn with_span(kind: TokenKind, span: Span) -> Self {
365        Self { kind, span }
366    }
367}