Skip to main content

harn_lexer/
token.rs

1use std::fmt;
2
3/// A segment of an interpolated string.
4#[derive(Debug, Clone, PartialEq)]
5pub enum StringSegment {
6    Literal(String),
7    /// An interpolated expression with its source position (line, column).
8    Expression(String, usize, usize),
9}
10
11impl fmt::Display for StringSegment {
12    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
13        match self {
14            StringSegment::Literal(s) => write!(f, "{s}"),
15            StringSegment::Expression(e, _, _) => write!(f, "${{{e}}}"),
16        }
17    }
18}
19
20/// Source location for error reporting.
21#[derive(Debug, Clone, Copy, PartialEq, Eq)]
22pub struct Span {
23    /// Byte offset from start of source (inclusive).
24    pub start: usize,
25    /// Byte offset from start of source (exclusive).
26    pub end: usize,
27    /// 1-based line number of start position.
28    pub line: usize,
29    /// 1-based column number of start position.
30    pub column: usize,
31    /// 1-based line number of end position (for multiline span detection).
32    pub end_line: usize,
33}
34
35impl Span {
36    pub fn with_offsets(start: usize, end: usize, line: usize, column: usize) -> Self {
37        Self {
38            start,
39            end,
40            line,
41            column,
42            end_line: line,
43        }
44    }
45
46    /// Create a span covering two spans (from start of `a` to end of `b`).
47    pub fn merge(a: Span, b: Span) -> Span {
48        Span {
49            start: a.start,
50            end: b.end,
51            line: a.line,
52            column: a.column,
53            end_line: b.end_line,
54        }
55    }
56
57    /// A dummy span for synthetic/generated nodes.
58    pub fn dummy() -> Self {
59        Self {
60            start: 0,
61            end: 0,
62            line: 0,
63            column: 0,
64            end_line: 0,
65        }
66    }
67}
68
69impl fmt::Display for Span {
70    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
71        write!(f, "{}:{}", self.line, self.column)
72    }
73}
74
75/// A machine-applicable text replacement for autofixing diagnostics.
76#[derive(Debug, Clone)]
77pub struct FixEdit {
78    /// The source span to replace.
79    pub span: Span,
80    /// The replacement text (empty string = deletion).
81    pub replacement: String,
82}
83
84/// Canonical list of Harn language keywords. Single source of truth; the lexer's
85/// identifier-to-keyword match must stay in sync (enforced by
86/// `test_keywords_const_covers_lexer`). External tooling should consume this
87/// rather than duplicate it.
88pub const KEYWORDS: &[&str] = &[
89    "ask_user",
90    "break",
91    "catch",
92    "continue",
93    "deadline",
94    "defer",
95    "dual_control",
96    "else",
97    "emit",
98    "enum",
99    "escalate_to",
100    "eval_pack",
101    "exclusive",
102    "extends",
103    "false",
104    "finally",
105    "fn",
106    "for",
107    "from",
108    "guard",
109    "if",
110    "impl",
111    "import",
112    "in",
113    "interface",
114    "let",
115    "match",
116    "mutex",
117    "nil",
118    "override",
119    "parallel",
120    "pipeline",
121    "pub",
122    "request_approval",
123    "require",
124    "retry",
125    "return",
126    "select",
127    "skill",
128    "spawn",
129    "struct",
130    "throw",
131    "to",
132    "tool",
133    "true",
134    "try",
135    "type",
136    "var",
137    "while",
138    "yield",
139];
140
141/// Token kinds produced by the lexer.
142#[derive(Debug, Clone, PartialEq)]
143pub enum TokenKind {
144    Pipeline,
145    Extends,
146    Override,
147    Let,
148    Var,
149    If,
150    Else,
151    For,
152    In,
153    Match,
154    Retry,
155    Parallel,
156    Return,
157    Import,
158    True,
159    False,
160    Nil,
161    Try,
162    Catch,
163    Throw,
164    Finally,
165    Fn,
166    Spawn,
167    While,
168    TypeKw,
169    Enum,
170    EvalPack,
171    Struct,
172    Interface,
173    Emit,
174    Pub,
175    From,
176    To,
177    Tool,
178    Exclusive,
179    Guard,
180    Require,
181    Deadline,
182    Defer,
183    Yield,
184    Mutex,
185    Break,
186    Continue,
187    Select,
188    Impl,
189    Skill,
190    /// First-class HITL primitive: `request_approval(...)`.
191    RequestApproval,
192    /// First-class HITL primitive: `dual_control(...)`.
193    DualControl,
194    /// First-class HITL primitive: `ask_user(...)`.
195    AskUser,
196    /// First-class HITL primitive: `escalate_to(...)`.
197    EscalateTo,
198
199    Identifier(String),
200    StringLiteral(String),
201    InterpolatedString(Vec<StringSegment>),
202    /// Raw string literal `r"..."` — no escape processing, no interpolation.
203    RawStringLiteral(String),
204    IntLiteral(i64),
205    FloatLiteral(f64),
206    /// Duration literal in milliseconds: 500ms, 5s, 30m, 2h, 1d, 1w
207    DurationLiteral(u64),
208
209    Eq,            // ==
210    Neq,           // !=
211    And,           // &&
212    Or,            // ||
213    Pipe,          // |>
214    NilCoal,       // ??
215    Pow,           // **
216    QuestionDot,   // ?.
217    Arrow,         // ->
218    Lte,           // <=
219    Gte,           // >=
220    PlusAssign,    // +=
221    MinusAssign,   // -=
222    StarAssign,    // *=
223    SlashAssign,   // /=
224    PercentAssign, // %=
225
226    Assign,   // =
227    Not,      // !
228    Dot,      // .
229    Plus,     // +
230    Minus,    // -
231    Star,     // *
232    Slash,    // /
233    Percent,  // %
234    Lt,       // <
235    Gt,       // >
236    Question, // ?
237    Bar,      // |  (for union types)
238    Amp,      // &  (for intersection types)
239
240    LBrace,    // {
241    RBrace,    // }
242    LParen,    // (
243    RParen,    // )
244    LBracket,  // [
245    RBracket,  // ]
246    Comma,     // ,
247    Colon,     // :
248    Semicolon, // ;
249    At,        // @ (attribute prefix)
250
251    LineComment {
252        text: String,
253        is_doc: bool,
254    }, // // text or /// text
255    BlockComment {
256        text: String,
257        is_doc: bool,
258    }, // /* text */ or /** text */
259
260    Newline,
261    Eof,
262}
263
264impl fmt::Display for TokenKind {
265    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
266        match self {
267            TokenKind::Pipeline => write!(f, "pipeline"),
268            TokenKind::Extends => write!(f, "extends"),
269            TokenKind::Override => write!(f, "override"),
270            TokenKind::Let => write!(f, "let"),
271            TokenKind::Var => write!(f, "var"),
272            TokenKind::If => write!(f, "if"),
273            TokenKind::Else => write!(f, "else"),
274            TokenKind::For => write!(f, "for"),
275            TokenKind::In => write!(f, "in"),
276            TokenKind::Match => write!(f, "match"),
277            TokenKind::Retry => write!(f, "retry"),
278            TokenKind::Parallel => write!(f, "parallel"),
279            TokenKind::Return => write!(f, "return"),
280            TokenKind::Import => write!(f, "import"),
281            TokenKind::True => write!(f, "true"),
282            TokenKind::False => write!(f, "false"),
283            TokenKind::Nil => write!(f, "nil"),
284            TokenKind::Try => write!(f, "try"),
285            TokenKind::Catch => write!(f, "catch"),
286            TokenKind::Throw => write!(f, "throw"),
287            TokenKind::Finally => write!(f, "finally"),
288            TokenKind::Fn => write!(f, "fn"),
289            TokenKind::Spawn => write!(f, "spawn"),
290            TokenKind::While => write!(f, "while"),
291            TokenKind::TypeKw => write!(f, "type"),
292            TokenKind::Enum => write!(f, "enum"),
293            TokenKind::EvalPack => write!(f, "eval_pack"),
294            TokenKind::Struct => write!(f, "struct"),
295            TokenKind::Interface => write!(f, "interface"),
296            TokenKind::Emit => write!(f, "emit"),
297            TokenKind::Pub => write!(f, "pub"),
298            TokenKind::From => write!(f, "from"),
299            TokenKind::To => write!(f, "to"),
300            TokenKind::Tool => write!(f, "tool"),
301            TokenKind::Exclusive => write!(f, "exclusive"),
302            TokenKind::Guard => write!(f, "guard"),
303            TokenKind::Require => write!(f, "require"),
304            TokenKind::Deadline => write!(f, "deadline"),
305            TokenKind::Defer => write!(f, "defer"),
306            TokenKind::Yield => write!(f, "yield"),
307            TokenKind::Mutex => write!(f, "mutex"),
308            TokenKind::Break => write!(f, "break"),
309            TokenKind::Continue => write!(f, "continue"),
310            TokenKind::Select => write!(f, "select"),
311            TokenKind::Impl => write!(f, "impl"),
312            TokenKind::Skill => write!(f, "skill"),
313            TokenKind::RequestApproval => write!(f, "request_approval"),
314            TokenKind::DualControl => write!(f, "dual_control"),
315            TokenKind::AskUser => write!(f, "ask_user"),
316            TokenKind::EscalateTo => write!(f, "escalate_to"),
317            TokenKind::Identifier(s) => write!(f, "id({s})"),
318            TokenKind::StringLiteral(s) => write!(f, "str({s})"),
319            TokenKind::InterpolatedString(_) => write!(f, "istr(...)"),
320            TokenKind::RawStringLiteral(s) => write!(f, "rstr({s})"),
321            TokenKind::IntLiteral(n) => write!(f, "int({n})"),
322            TokenKind::FloatLiteral(n) => write!(f, "float({n})"),
323            TokenKind::DurationLiteral(ms) => write!(f, "duration({ms}ms)"),
324            TokenKind::Eq => write!(f, "=="),
325            TokenKind::Neq => write!(f, "!="),
326            TokenKind::And => write!(f, "&&"),
327            TokenKind::Or => write!(f, "||"),
328            TokenKind::Pipe => write!(f, "|>"),
329            TokenKind::NilCoal => write!(f, "??"),
330            TokenKind::Pow => write!(f, "**"),
331            TokenKind::QuestionDot => write!(f, "?."),
332            TokenKind::Arrow => write!(f, "->"),
333            TokenKind::Lte => write!(f, "<="),
334            TokenKind::Gte => write!(f, ">="),
335            TokenKind::PlusAssign => write!(f, "+="),
336            TokenKind::MinusAssign => write!(f, "-="),
337            TokenKind::StarAssign => write!(f, "*="),
338            TokenKind::SlashAssign => write!(f, "/="),
339            TokenKind::PercentAssign => write!(f, "%="),
340            TokenKind::Assign => write!(f, "="),
341            TokenKind::Not => write!(f, "!"),
342            TokenKind::Dot => write!(f, "."),
343            TokenKind::Plus => write!(f, "+"),
344            TokenKind::Minus => write!(f, "-"),
345            TokenKind::Star => write!(f, "*"),
346            TokenKind::Slash => write!(f, "/"),
347            TokenKind::Percent => write!(f, "%"),
348            TokenKind::Lt => write!(f, "<"),
349            TokenKind::Gt => write!(f, ">"),
350            TokenKind::Question => write!(f, "?"),
351            TokenKind::Bar => write!(f, "|"),
352            TokenKind::Amp => write!(f, "&"),
353            TokenKind::LBrace => write!(f, "{{"),
354            TokenKind::RBrace => write!(f, "}}"),
355            TokenKind::LParen => write!(f, "("),
356            TokenKind::RParen => write!(f, ")"),
357            TokenKind::LBracket => write!(f, "["),
358            TokenKind::RBracket => write!(f, "]"),
359            TokenKind::Comma => write!(f, ","),
360            TokenKind::Colon => write!(f, ":"),
361            TokenKind::Semicolon => write!(f, ";"),
362            TokenKind::At => write!(f, "@"),
363            TokenKind::LineComment { text, is_doc } => {
364                let prefix = if *is_doc { "///" } else { "//" };
365                write!(f, "{prefix} {text}")
366            }
367            TokenKind::BlockComment { text, is_doc } => {
368                let prefix = if *is_doc { "/**" } else { "/*" };
369                write!(f, "{prefix} {text} */")
370            }
371            TokenKind::Newline => write!(f, "\\n"),
372            TokenKind::Eof => write!(f, "EOF"),
373        }
374    }
375}
376
377/// A token with its kind and source location.
378#[derive(Debug, Clone, PartialEq)]
379pub struct Token {
380    pub kind: TokenKind,
381    pub span: Span,
382}
383
384impl Token {
385    pub fn with_span(kind: TokenKind, span: Span) -> Self {
386        Self { kind, span }
387    }
388}