Skip to main content

harn_lexer/
token.rs

1use std::fmt;
2
3/// A segment of an interpolated string.
4#[derive(Debug, Clone, PartialEq)]
5pub enum StringSegment {
6    Literal(String),
7    /// An interpolated expression with its source position (line, column).
8    Expression(String, usize, usize),
9}
10
11impl fmt::Display for StringSegment {
12    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
13        match self {
14            StringSegment::Literal(s) => write!(f, "{s}"),
15            StringSegment::Expression(e, _, _) => write!(f, "${{{e}}}"),
16        }
17    }
18}
19
20/// Source location for error reporting.
21#[derive(Debug, Clone, Copy, PartialEq, Eq)]
22pub struct Span {
23    /// Byte offset from start of source (inclusive).
24    pub start: usize,
25    /// Byte offset from start of source (exclusive).
26    pub end: usize,
27    /// 1-based line number of start position.
28    pub line: usize,
29    /// 1-based column number of start position.
30    pub column: usize,
31    /// 1-based line number of end position (for multiline span detection).
32    pub end_line: usize,
33}
34
35impl Span {
36    pub fn with_offsets(start: usize, end: usize, line: usize, column: usize) -> Self {
37        Self {
38            start,
39            end,
40            line,
41            column,
42            end_line: line,
43        }
44    }
45
46    /// Create a span covering two spans (from start of `a` to end of `b`).
47    pub fn merge(a: Span, b: Span) -> Span {
48        Span {
49            start: a.start,
50            end: b.end,
51            line: a.line,
52            column: a.column,
53            end_line: b.end_line,
54        }
55    }
56
57    /// A dummy span for synthetic/generated nodes.
58    pub fn dummy() -> Self {
59        Self {
60            start: 0,
61            end: 0,
62            line: 0,
63            column: 0,
64            end_line: 0,
65        }
66    }
67}
68
69impl fmt::Display for Span {
70    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
71        write!(f, "{}:{}", self.line, self.column)
72    }
73}
74
75/// A machine-applicable text replacement for autofixing diagnostics.
76#[derive(Debug, Clone)]
77pub struct FixEdit {
78    /// The source span to replace.
79    pub span: Span,
80    /// The replacement text (empty string = deletion).
81    pub replacement: String,
82}
83
84/// Canonical list of Harn language keywords. Single source of truth; the lexer's
85/// identifier-to-keyword match must stay in sync (enforced by
86/// `test_keywords_const_covers_lexer`). External tooling should consume this
87/// rather than duplicate it.
88pub const KEYWORDS: &[&str] = &[
89    "break",
90    "catch",
91    "continue",
92    "deadline",
93    "defer",
94    "else",
95    "enum",
96    "exclusive",
97    "extends",
98    "false",
99    "finally",
100    "fn",
101    "for",
102    "from",
103    "guard",
104    "if",
105    "impl",
106    "import",
107    "in",
108    "interface",
109    "let",
110    "match",
111    "mutex",
112    "nil",
113    "override",
114    "parallel",
115    "pipeline",
116    "pub",
117    "require",
118    "retry",
119    "return",
120    "select",
121    "skill",
122    "spawn",
123    "struct",
124    "throw",
125    "to",
126    "tool",
127    "true",
128    "try",
129    "type",
130    "var",
131    "while",
132    "yield",
133];
134
135/// Token kinds produced by the lexer.
136#[derive(Debug, Clone, PartialEq)]
137pub enum TokenKind {
138    Pipeline,
139    Extends,
140    Override,
141    Let,
142    Var,
143    If,
144    Else,
145    For,
146    In,
147    Match,
148    Retry,
149    Parallel,
150    Return,
151    Import,
152    True,
153    False,
154    Nil,
155    Try,
156    Catch,
157    Throw,
158    Finally,
159    Fn,
160    Spawn,
161    While,
162    TypeKw,
163    Enum,
164    Struct,
165    Interface,
166    Pub,
167    From,
168    To,
169    Tool,
170    Exclusive,
171    Guard,
172    Require,
173    Deadline,
174    Defer,
175    Yield,
176    Mutex,
177    Break,
178    Continue,
179    Select,
180    Impl,
181    Skill,
182
183    Identifier(String),
184    StringLiteral(String),
185    InterpolatedString(Vec<StringSegment>),
186    /// Raw string literal `r"..."` — no escape processing, no interpolation.
187    RawStringLiteral(String),
188    IntLiteral(i64),
189    FloatLiteral(f64),
190    /// Duration literal in milliseconds: 500ms, 5s, 30m, 2h, 1d, 1w
191    DurationLiteral(u64),
192
193    Eq,            // ==
194    Neq,           // !=
195    And,           // &&
196    Or,            // ||
197    Pipe,          // |>
198    NilCoal,       // ??
199    Pow,           // **
200    QuestionDot,   // ?.
201    Arrow,         // ->
202    Lte,           // <=
203    Gte,           // >=
204    PlusAssign,    // +=
205    MinusAssign,   // -=
206    StarAssign,    // *=
207    SlashAssign,   // /=
208    PercentAssign, // %=
209
210    Assign,   // =
211    Not,      // !
212    Dot,      // .
213    Plus,     // +
214    Minus,    // -
215    Star,     // *
216    Slash,    // /
217    Percent,  // %
218    Lt,       // <
219    Gt,       // >
220    Question, // ?
221    Bar,      // |  (for union types)
222
223    LBrace,    // {
224    RBrace,    // }
225    LParen,    // (
226    RParen,    // )
227    LBracket,  // [
228    RBracket,  // ]
229    Comma,     // ,
230    Colon,     // :
231    Semicolon, // ;
232    At,        // @ (attribute prefix)
233
234    LineComment {
235        text: String,
236        is_doc: bool,
237    }, // // text or /// text
238    BlockComment {
239        text: String,
240        is_doc: bool,
241    }, // /* text */ or /** text */
242
243    Newline,
244    Eof,
245}
246
247impl fmt::Display for TokenKind {
248    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
249        match self {
250            TokenKind::Pipeline => write!(f, "pipeline"),
251            TokenKind::Extends => write!(f, "extends"),
252            TokenKind::Override => write!(f, "override"),
253            TokenKind::Let => write!(f, "let"),
254            TokenKind::Var => write!(f, "var"),
255            TokenKind::If => write!(f, "if"),
256            TokenKind::Else => write!(f, "else"),
257            TokenKind::For => write!(f, "for"),
258            TokenKind::In => write!(f, "in"),
259            TokenKind::Match => write!(f, "match"),
260            TokenKind::Retry => write!(f, "retry"),
261            TokenKind::Parallel => write!(f, "parallel"),
262            TokenKind::Return => write!(f, "return"),
263            TokenKind::Import => write!(f, "import"),
264            TokenKind::True => write!(f, "true"),
265            TokenKind::False => write!(f, "false"),
266            TokenKind::Nil => write!(f, "nil"),
267            TokenKind::Try => write!(f, "try"),
268            TokenKind::Catch => write!(f, "catch"),
269            TokenKind::Throw => write!(f, "throw"),
270            TokenKind::Finally => write!(f, "finally"),
271            TokenKind::Fn => write!(f, "fn"),
272            TokenKind::Spawn => write!(f, "spawn"),
273            TokenKind::While => write!(f, "while"),
274            TokenKind::TypeKw => write!(f, "type"),
275            TokenKind::Enum => write!(f, "enum"),
276            TokenKind::Struct => write!(f, "struct"),
277            TokenKind::Interface => write!(f, "interface"),
278            TokenKind::Pub => write!(f, "pub"),
279            TokenKind::From => write!(f, "from"),
280            TokenKind::To => write!(f, "to"),
281            TokenKind::Tool => write!(f, "tool"),
282            TokenKind::Exclusive => write!(f, "exclusive"),
283            TokenKind::Guard => write!(f, "guard"),
284            TokenKind::Require => write!(f, "require"),
285            TokenKind::Deadline => write!(f, "deadline"),
286            TokenKind::Defer => write!(f, "defer"),
287            TokenKind::Yield => write!(f, "yield"),
288            TokenKind::Mutex => write!(f, "mutex"),
289            TokenKind::Break => write!(f, "break"),
290            TokenKind::Continue => write!(f, "continue"),
291            TokenKind::Select => write!(f, "select"),
292            TokenKind::Impl => write!(f, "impl"),
293            TokenKind::Skill => write!(f, "skill"),
294            TokenKind::Identifier(s) => write!(f, "id({s})"),
295            TokenKind::StringLiteral(s) => write!(f, "str({s})"),
296            TokenKind::InterpolatedString(_) => write!(f, "istr(...)"),
297            TokenKind::RawStringLiteral(s) => write!(f, "rstr({s})"),
298            TokenKind::IntLiteral(n) => write!(f, "int({n})"),
299            TokenKind::FloatLiteral(n) => write!(f, "float({n})"),
300            TokenKind::DurationLiteral(ms) => write!(f, "duration({ms}ms)"),
301            TokenKind::Eq => write!(f, "=="),
302            TokenKind::Neq => write!(f, "!="),
303            TokenKind::And => write!(f, "&&"),
304            TokenKind::Or => write!(f, "||"),
305            TokenKind::Pipe => write!(f, "|>"),
306            TokenKind::NilCoal => write!(f, "??"),
307            TokenKind::Pow => write!(f, "**"),
308            TokenKind::QuestionDot => write!(f, "?."),
309            TokenKind::Arrow => write!(f, "->"),
310            TokenKind::Lte => write!(f, "<="),
311            TokenKind::Gte => write!(f, ">="),
312            TokenKind::PlusAssign => write!(f, "+="),
313            TokenKind::MinusAssign => write!(f, "-="),
314            TokenKind::StarAssign => write!(f, "*="),
315            TokenKind::SlashAssign => write!(f, "/="),
316            TokenKind::PercentAssign => write!(f, "%="),
317            TokenKind::Assign => write!(f, "="),
318            TokenKind::Not => write!(f, "!"),
319            TokenKind::Dot => write!(f, "."),
320            TokenKind::Plus => write!(f, "+"),
321            TokenKind::Minus => write!(f, "-"),
322            TokenKind::Star => write!(f, "*"),
323            TokenKind::Slash => write!(f, "/"),
324            TokenKind::Percent => write!(f, "%"),
325            TokenKind::Lt => write!(f, "<"),
326            TokenKind::Gt => write!(f, ">"),
327            TokenKind::Question => write!(f, "?"),
328            TokenKind::Bar => write!(f, "|"),
329            TokenKind::LBrace => write!(f, "{{"),
330            TokenKind::RBrace => write!(f, "}}"),
331            TokenKind::LParen => write!(f, "("),
332            TokenKind::RParen => write!(f, ")"),
333            TokenKind::LBracket => write!(f, "["),
334            TokenKind::RBracket => write!(f, "]"),
335            TokenKind::Comma => write!(f, ","),
336            TokenKind::Colon => write!(f, ":"),
337            TokenKind::Semicolon => write!(f, ";"),
338            TokenKind::At => write!(f, "@"),
339            TokenKind::LineComment { text, is_doc } => {
340                let prefix = if *is_doc { "///" } else { "//" };
341                write!(f, "{prefix} {text}")
342            }
343            TokenKind::BlockComment { text, is_doc } => {
344                let prefix = if *is_doc { "/**" } else { "/*" };
345                write!(f, "{prefix} {text} */")
346            }
347            TokenKind::Newline => write!(f, "\\n"),
348            TokenKind::Eof => write!(f, "EOF"),
349        }
350    }
351}
352
353/// A token with its kind and source location.
354#[derive(Debug, Clone, PartialEq)]
355pub struct Token {
356    pub kind: TokenKind,
357    pub span: Span,
358}
359
360impl Token {
361    pub fn with_span(kind: TokenKind, span: Span) -> Self {
362        Self { kind, span }
363    }
364}