koto_parser/
node.rs

1use crate::{StringFormatOptions, StringQuote, ast::AstIndex, constant_pool::ConstantIndex};
2use smallvec::SmallVec;
3use std::fmt;
4
5/// The Vec type used in the AST
6//
7//  Q. Why 4 elements in the small Vec?
8//  A. It's the maximum number of elements that can be used in [Node] without increasing its overall
9//     size.
10pub type AstVec<T> = SmallVec<[T; 4]>;
11
12/// A convenience macro for initializing an [`AstVec`]
13pub use smallvec::smallvec as astvec;
14
15/// A parsed node that can be included in the [AST](crate::Ast).
16///
17/// Nodes refer to each other via [`AstIndex`], see [`AstNode`](crate::AstNode).
18#[derive(Clone, Debug, Default, PartialEq, Eq, derive_name::VariantName)]
19pub enum Node {
20    /// The `null` keyword
21    #[default]
22    Null,
23
24    /// A single expression wrapped in parentheses
25    Nested(AstIndex),
26
27    /// An identifier, and optionally the type hint node
28    Id(ConstantIndex, Option<AstIndex>),
29
30    /// A meta identifier, e.g. `@display` or `@test my_test`
31    Meta(MetaKeyId, Option<ConstantIndex>),
32
33    /// A chained expression, and optionally the node that follows it in the chain
34    Chain((ChainNode, Option<AstIndex>)), // Chain node, next node
35
36    /// The `true` keyword
37    BoolTrue,
38
39    /// The `false` keyword
40    BoolFalse,
41
42    /// An integer in the range -255..=255
43    SmallInt(i16),
44
45    /// An integer outside of the range -255..=255
46    Int(ConstantIndex),
47
48    /// A float literal
49    Float(ConstantIndex),
50
51    /// A string literal
52    Str(AstString),
53
54    /// A list literal
55    ///
56    /// E.g. `[foo, bar, 42]`
57    List(AstVec<AstIndex>),
58
59    /// A tuple literal
60    ///
61    /// E.g. `(foo, bar, 42)`
62    Tuple {
63        /// The tuple's elements
64        elements: AstVec<AstIndex>,
65        /// Whether or not parentheses were used for the tuple
66        parentheses: bool,
67    },
68
69    /// A temporary tuple
70    ///
71    /// Used in contexts where the result won't be exposed directly to the user,
72    /// e.g.
73    /// - `x, y = 1, 2`: x and y are indexed from the temporary tuple.
74    /// - `match foo, bar...`: foo and bar will be stored in a temporary tuple for comparison.
75    TempTuple(AstVec<AstIndex>),
76
77    /// A range with a defined start and end
78    Range {
79        /// The start of the range
80        start: AstIndex,
81        /// The end of the range
82        end: AstIndex,
83        /// Whether or not the end of the range includes the end value itself
84        ///
85        /// E.g. `1..10` - a range from 1 up to but not including 10
86        /// E.g. `1..=10` - a range from 1 up to and including 10
87        inclusive: bool,
88    },
89
90    /// A range without a defined end
91    RangeFrom {
92        /// The start of the range
93        start: AstIndex,
94    },
95
96    /// A range without a defined start
97    RangeTo {
98        /// The end of the range
99        end: AstIndex,
100        /// Whether or not the end of the range includes the end value itself
101        inclusive: bool,
102    },
103
104    /// The range operator without defined start or end
105    ///
106    /// Used when indexing a list or tuple, and the full contents are to be returned.
107    RangeFull,
108
109    /// A map literal, containing a series of key/value entries
110    Map {
111        /// The map's entries.
112        ///
113        /// If the map has braces, then values are optional and the valueless keys will point
114        /// directly to an Id instead of a MapEntry.
115        entries: AstVec<AstIndex>,
116        /// Whether or not the map was defined using braces.
117        braces: bool,
118    },
119
120    /// A key/value pair representing a Map entry.
121    ///
122    /// Keys will either be Id, String, or Meta nodes.
123    MapEntry(AstIndex, AstIndex),
124
125    /// The `self` keyword
126    Self_,
127
128    /// The main block node
129    ///
130    /// Typically all ASTs will have this node at the root.
131    MainBlock {
132        /// The main block's body as a series of expressions
133        body: AstVec<AstIndex>,
134        /// The number of locally assigned values in the main block
135        ///
136        /// This tells the compiler how many registers need to be reserved for locally
137        /// assigned values.
138        local_count: usize,
139    },
140
141    /// A block node
142    ///
143    /// Used for indented blocks that share the context of the frame they're in,
144    /// e.g. if expressions, arms in match or switch expressions, loop bodies.
145    Block(AstVec<AstIndex>),
146
147    /// A function node
148    Function(Function),
149
150    /// A function's arguments
151    FunctionArgs {
152        /// The arguments
153        args: AstVec<AstIndex>,
154        /// A flag that indicates if the function arguments end with a variadic `...` argument
155        variadic: bool,
156        /// The optional output type of the function
157        output_type: Option<AstIndex>,
158    },
159
160    /// An import expression
161    ///
162    /// E.g. `from foo.bar import baz, 'qux'`
163    Import {
164        /// Where the items should be imported from
165        ///
166        /// An empty list here implies that import without `from` has been used.
167        from: AstVec<AstIndex>,
168        /// The series of items to import
169        // The import items are stored in a `Vec` here rather than an `AstVec` to avoid bloating the
170        // overall size of `Node`.
171        ///
172        /// An empty list here implies that a `*` wildcard import was used.
173        items: Vec<ImportItem>,
174    },
175
176    /// An export expression
177    ///
178    /// The export item will be a map literal, with each map entry added to the exports map
179    Export(AstIndex),
180
181    /// An assignment expression
182    ///
183    /// Used for single-assignment, multiple-assignment is represented by [Node::MultiAssign].
184    Assign {
185        /// The target of the assignment
186        target: AstIndex,
187        /// The expression to be assigned
188        expression: AstIndex,
189        /// Whether or not the assignment uses `let`
190        let_assignment: bool,
191    },
192
193    /// A multiple-assignment expression
194    ///
195    /// E.g. `x, y = foo()`, or `foo, bar, baz = 1, 2, 3`
196    MultiAssign {
197        /// The targets of the assignment
198        targets: AstVec<AstIndex>,
199        /// The expression to be assigned
200        expression: AstIndex,
201        /// Whether or not the assignment uses `let`
202        let_assignment: bool,
203    },
204
205    /// A unary operation
206    UnaryOp {
207        /// The operator to use
208        op: AstUnaryOp,
209        /// The value used in the operation
210        value: AstIndex,
211    },
212
213    /// A binary operation
214    BinaryOp {
215        /// The operator to use
216        op: AstBinaryOp,
217        /// The "left hand side" of the operation
218        lhs: AstIndex,
219        /// The "right hand side" of the operation
220        rhs: AstIndex,
221    },
222
223    /// An if expression
224    If(AstIf),
225
226    /// A match expression
227    Match {
228        /// The expression that will be matched against
229        expression: AstIndex,
230        /// The series of arms that match against the provided expression
231        arms: AstVec<AstIndex>,
232    },
233
234    /// An arm of a [Self::Match] expression
235    MatchArm {
236        /// A series of match patterns
237        ///
238        /// If `patterns` is empty then `else` is implied, and should always appear as the last arm.
239        patterns: AstVec<AstIndex>,
240        /// An optional condition for the match arm
241        ///
242        /// e.g.
243        /// match foo
244        ///   bar if check_condition bar then ...
245        condition: Option<AstIndex>,
246        /// The body of the match arm
247        expression: AstIndex,
248    },
249
250    /// A switch expression
251    Switch(AstVec<AstIndex>),
252
253    /// An arm of a [Self::Switch] expression
254    SwitchArm {
255        /// An optional condition for the switch arm
256        ///
257        /// None implies `else`, and should always appear as the last arm.
258        condition: Option<AstIndex>,
259        /// The body of the switch arm
260        expression: AstIndex,
261    },
262
263    /// A `_`-prefixed identifier
264    ///
265    /// Used as a placeholder for unused function arguments or unpacked values,
266    /// or as an ignored match-all in match expressions.
267    ///
268    /// Comes with an optional name (e.g. `_foo` will have `foo` stored as a constant),
269    /// and an optional type hint.
270    Ignored(Option<ConstantIndex>, Option<AstIndex>),
271
272    /// Used when capturing variadic arguments, and when unpacking list or tuple arguments.
273    ///
274    /// The id is optional, e.g. `f = |(..., last)| last`
275    PackedId(Option<ConstantIndex>),
276
277    /// Used when an argument in a function call needs to be unpacked
278    ///
279    /// e.g. `f(args...)`
280    ///
281    /// The argument can be any expression, e.g. `f (1..100).take(3)...`
282    PackedExpression(AstIndex),
283
284    /// A `for` loop
285    For(AstFor),
286
287    /// A `loop` expression
288    Loop {
289        /// The loop's body
290        body: AstIndex,
291    },
292
293    /// A `while` loop
294    While {
295        /// The condition for the while loop
296        condition: AstIndex,
297        /// The body of the while loop
298        body: AstIndex,
299    },
300
301    /// An `until` expression
302    Until {
303        /// The condition for the until loop
304        condition: AstIndex,
305        /// The body of the until loop
306        body: AstIndex,
307    },
308
309    /// The break keyword, with optional break value
310    Break(Option<AstIndex>),
311
312    /// The continue keyword
313    Continue,
314
315    /// A return expression, with optional return value
316    Return(Option<AstIndex>),
317
318    /// A try expression
319    Try(AstTry),
320
321    /// A throw expression
322    Throw(AstIndex),
323
324    /// A yield expression
325    Yield(AstIndex),
326
327    /// A debug expression
328    Debug {
329        /// The stored string of the debugged expression to be used when printing the result
330        expression_string: ConstantIndex,
331        /// The expression that should be debugged
332        expression: AstIndex,
333    },
334
335    /// A type hint
336    ///
337    /// E.g. `let x: Number = 0`
338    ///            ^~~ This is the beginning of the type hint
339    Type {
340        /// The expected type as a string
341        type_index: ConstantIndex,
342        /// True if the type was specified with a `?` suffix
343        allow_null: bool,
344    },
345}
346
347/// A function definition
348#[derive(Clone, Debug, PartialEq, Eq)]
349pub struct Function {
350    /// The function's arguments
351    ///
352    /// See [Node::FunctionArgs].
353    pub args: AstIndex,
354    /// The number of locally assigned values
355    ///
356    /// Used by the compiler when reserving registers for local values at the start of the frame.
357    pub local_count: usize,
358    /// Any non-local values that are accessed in the function
359    ///
360    /// Any ID (or chain root) that's accessed in a function and which wasn't previously assigned
361    /// locally, is either an export or the value needs to be captured. The compiler takes care of
362    /// determining if an access is a capture or not at the moment the function is created.
363    pub accessed_non_locals: AstVec<ConstantIndex>,
364    /// The function's body
365    pub body: AstIndex,
366    /// A flag that indicates if the function is a generator or not
367    ///
368    /// The presence of a `yield` expression in the function body will set this to true.
369    pub is_generator: bool,
370}
371
372/// A string definition
373#[derive(Clone, Debug, PartialEq, Eq)]
374pub struct AstString {
375    /// Indicates if single or double quotation marks were used
376    pub quote: StringQuote,
377    /// The string's contents
378    pub contents: StringContents,
379}
380
381/// The contents of an [AstString]
382#[derive(Clone, Debug, PartialEq, Eq)]
383pub enum StringContents {
384    /// A string literal
385    Literal(ConstantIndex),
386    /// A raw string literal
387    Raw {
388        /// The literal's constant index
389        constant: ConstantIndex,
390        /// The number of hashes associated with the raw string's delimiter
391        hash_count: u8,
392    },
393    /// An interpolated string
394    ///
395    /// An interpolated string is made up of a series of literals and template expressions,
396    /// which are then joined together using a string builder.
397    // The interpolated nodes are stored in a `Vec` here rather than an `AstVec` to avoid bloating
398    // the overall size of `Node`.
399    Interpolated(Vec<StringNode>),
400}
401
402/// A node in a string definition
403#[derive(Copy, Clone, Debug, PartialEq, Eq)]
404pub enum StringNode {
405    /// A string literal
406    Literal(ConstantIndex),
407    /// An expression that should be evaluated and inserted into the string
408    Expression {
409        /// The expressions AST index
410        expression: AstIndex,
411        /// Formatting options for the rendered expression
412        format: StringFormatOptions,
413    },
414}
415
416/// A for loop definition
417#[derive(Clone, Debug, PartialEq, Eq)]
418pub struct AstFor {
419    /// The ids that capture each iteration's output values
420    pub args: AstVec<AstIndex>,
421    /// The expression that produces an iterable value
422    pub iterable: AstIndex,
423    /// The body of the for loop
424    pub body: AstIndex,
425}
426
427/// An if expression definition
428#[derive(Clone, Debug, PartialEq, Eq)]
429pub struct AstIf {
430    /// The if expression's condition
431    pub condition: AstIndex,
432    /// The if expression's `then` branch
433    pub then_node: AstIndex,
434    /// An optional series of `else if` conditions and branches
435    pub else_if_blocks: AstVec<(AstIndex, AstIndex)>,
436    /// An optional `else` branch
437    pub else_node: Option<AstIndex>,
438    /// Whether or not the if expression was defined using inline syntax
439    pub inline: bool,
440}
441
442/// An operation used in UnaryOp expressions
443#[derive(Clone, Copy, Debug, PartialEq, Eq)]
444#[allow(missing_docs)]
445pub enum AstUnaryOp {
446    Negate,
447    Not,
448}
449
450impl AstUnaryOp {
451    /// The binary op as a str
452    pub fn as_str(&self) -> &'static str {
453        match self {
454            AstUnaryOp::Negate => "-",
455            AstUnaryOp::Not => "not",
456        }
457    }
458}
459
460impl fmt::Display for AstUnaryOp {
461    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
462        f.write_str(self.as_str())
463    }
464}
465
466/// An operation used in BinaryOp expressions
467#[derive(Clone, Copy, Debug, PartialEq, Eq)]
468#[allow(missing_docs)]
469pub enum AstBinaryOp {
470    Add,
471    Subtract,
472    Multiply,
473    Divide,
474    Remainder,
475    Power,
476    AddAssign,
477    SubtractAssign,
478    MultiplyAssign,
479    DivideAssign,
480    RemainderAssign,
481    PowerAssign,
482    Equal,
483    NotEqual,
484    Less,
485    LessOrEqual,
486    Greater,
487    GreaterOrEqual,
488    And,
489    Or,
490    Pipe,
491}
492
493impl AstBinaryOp {
494    /// The binary op as a str
495    pub fn as_str(&self) -> &'static str {
496        match self {
497            AstBinaryOp::Add => "+",
498            AstBinaryOp::Subtract => "-",
499            AstBinaryOp::Multiply => "*",
500            AstBinaryOp::Divide => "/",
501            AstBinaryOp::Remainder => "%",
502            AstBinaryOp::Power => "^",
503            AstBinaryOp::AddAssign => "+=",
504            AstBinaryOp::SubtractAssign => "-=",
505            AstBinaryOp::MultiplyAssign => "*=",
506            AstBinaryOp::DivideAssign => "/=",
507            AstBinaryOp::RemainderAssign => "%=",
508            AstBinaryOp::PowerAssign => "^=",
509            AstBinaryOp::Equal => "==",
510            AstBinaryOp::NotEqual => "!=",
511            AstBinaryOp::Less => "<",
512            AstBinaryOp::LessOrEqual => "<=",
513            AstBinaryOp::Greater => ">",
514            AstBinaryOp::GreaterOrEqual => ">=",
515            AstBinaryOp::And => "and",
516            AstBinaryOp::Or => "or",
517            AstBinaryOp::Pipe => "->",
518        }
519    }
520}
521
522impl fmt::Display for AstBinaryOp {
523    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
524        f.write_str(self.as_str())
525    }
526}
527
528/// A try expression definition
529#[derive(Clone, Debug, PartialEq, Eq)]
530pub struct AstTry {
531    /// The block that's wrapped by the try
532    pub try_block: AstIndex,
533    /// The catch blocks associated with the try expression
534    pub catch_blocks: AstVec<AstCatch>,
535    /// An optional `finally` block
536    pub finally_block: Option<AstIndex>,
537}
538
539/// A catch block definition
540#[derive(Clone, Debug, PartialEq, Eq)]
541pub struct AstCatch {
542    /// The identifier that will receive a caught error
543    pub arg: AstIndex,
544    /// The catch block
545    pub block: AstIndex,
546}
547
548/// A node in a chained expression
549///
550/// Chains are any expressions that contain two or more nodes in a sequence.
551///
552/// In other words, some series of operations involving indexing, `.` accesses, and function calls.
553///
554/// e.g.
555/// `foo.bar."baz"[0]?(42)`
556///  |  |   |     |  |^ Call {args: 42, with_parens: true}
557///  |  |   |     |  ^ NullCheck
558///  |  |   |     ^ Index (0)
559///  |  |   ^ Str (baz)
560///  |  ^ Id (bar)
561///  ^ Root (foo)
562#[derive(Clone, Debug, PartialEq, Eq)]
563pub enum ChainNode {
564    /// The root of the chain
565    Root(AstIndex),
566    /// A `.` access using an identifier
567    Id(ConstantIndex),
568    /// A `.` access using a string
569    Str(AstString),
570    /// An index operation using square `[]` brackets.
571    Index(AstIndex),
572    /// A function call
573    Call {
574        /// The arguments used in the function call
575        args: AstVec<AstIndex>,
576        /// Whether or not parentheses are present in the function call
577        ///
578        /// This is not cosmetic, as parentheses represent a 'closed call', which has an impact on
579        /// function piping:
580        /// e.g.
581        ///   `99 -> foo.bar 42` is equivalent to `foo.bar(42, 99)`
582        /// but:
583        ///   `99 -> foo.bar(42)` is equivalent to `foo.bar(42)(99)`.
584        with_parens: bool,
585    },
586    /// A `?` short-circuiting null check
587    NullCheck,
588}
589
590/// A meta key
591#[derive(Clone, Copy, Debug, PartialEq, Eq)]
592#[repr(u8)]
593pub enum MetaKeyId {
594    /// @+
595    Add,
596    /// @-
597    Subtract,
598    /// @*
599    Multiply,
600    /// @/
601    Divide,
602    /// @%
603    Remainder,
604    /// @^
605    Power,
606    /// @r+
607    AddRhs,
608    /// @r-
609    SubtractRhs,
610    /// @r*
611    MultiplyRhs,
612    /// @r/
613    DivideRhs,
614    /// @r%
615    RemainderRhs,
616    /// @r^
617    PowerRhs,
618    /// @+=
619    AddAssign,
620    /// @-=
621    SubtractAssign,
622    /// @*=
623    MultiplyAssign,
624    /// @/=
625    DivideAssign,
626    /// @%=
627    RemainderAssign,
628    /// @^=
629    PowerAssign,
630    /// @<
631    Less,
632    /// @<=
633    LessOrEqual,
634    /// @>
635    Greater,
636    /// @>=
637    GreaterOrEqual,
638    /// @==
639    Equal,
640    /// @!=
641    NotEqual,
642
643    /// @index
644    Index,
645    /// @index_mut
646    IndexMut,
647
648    /// @debug
649    Debug,
650    /// @display
651    Display,
652    /// @iterator
653    Iterator,
654    /// @next
655    Next,
656    /// @next_back
657    NextBack,
658    /// @negate
659    Negate,
660    /// @size
661    Size,
662    /// @type
663    Type,
664    /// @base
665    Base,
666
667    /// @call
668    Call,
669
670    /// @test test_name
671    Test,
672    /// @pre_test
673    PreTest,
674    /// @post_test
675    PostTest,
676
677    /// @main
678    Main,
679
680    /// @meta name
681    Named,
682
683    /// Unused
684    ///
685    /// This entry must be last, see `TryFrom<u7>` for [MetaKeyId]
686    Invalid,
687}
688
689impl MetaKeyId {
690    /// Returns the key id as a static str
691    pub fn as_str(&self) -> &'static str {
692        use MetaKeyId::*;
693        match self {
694            Add => "@+",
695            Subtract => "@-",
696            Multiply => "@*",
697            Divide => "@/",
698            Remainder => "@%",
699            Power => "@^",
700            AddRhs => "@r+",
701            SubtractRhs => "@r-",
702            MultiplyRhs => "@r*",
703            DivideRhs => "@r/",
704            RemainderRhs => "@r%",
705            PowerRhs => "@r^",
706            AddAssign => "@+=",
707            SubtractAssign => "@-=",
708            MultiplyAssign => "@*=",
709            DivideAssign => "@/=",
710            RemainderAssign => "@%=",
711            PowerAssign => "@^=",
712            Less => "@<",
713            LessOrEqual => "@<=",
714            Greater => "@>",
715            GreaterOrEqual => "@>=",
716            Equal => "@==",
717            NotEqual => "@!=",
718            Index => "@index",
719            IndexMut => "@index_mut",
720            Debug => "@debug",
721            Display => "@display",
722            Iterator => "@iterator",
723            Next => "@next",
724            NextBack => "@next_back",
725            Negate => "@negate",
726            Size => "@size",
727            Type => "@type",
728            Base => "@base",
729            Call => "@call",
730            Test => "@test",
731            PreTest => "@pre_test",
732            PostTest => "@post_test",
733            Main => "@main",
734            Named => "@meta",
735            Invalid => unreachable!(),
736        }
737    }
738}
739
740impl TryFrom<u8> for MetaKeyId {
741    type Error = u8;
742
743    fn try_from(byte: u8) -> Result<Self, Self::Error> {
744        if byte < Self::Invalid as u8 {
745            // Safety: Any value less than Invalid is safe to transmute.
746            Ok(unsafe { std::mem::transmute::<u8, Self>(byte) })
747        } else {
748            Err(byte)
749        }
750    }
751}
752
753// Display impl used by koto-ls
754impl fmt::Display for MetaKeyId {
755    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
756        f.write_str(self.as_str())
757    }
758}
759
760/// A node in an import item, see [Node::Import]
761#[derive(Clone, Debug, PartialEq, Eq)]
762pub struct ImportItem {
763    /// The imported item
764    pub item: AstIndex,
765    /// An optional 'as' name for the imported item
766    pub name: Option<AstIndex>,
767}
768
769#[cfg(test)]
770mod tests {
771    use super::*;
772
773    #[test]
774    fn node_size() {
775        // Think carefully before allowing Node to increase in size
776        let size = std::mem::size_of::<Node>();
777        let maximum_size = 72;
778        assert!(
779            size <= maximum_size,
780            "Node has a size of {size} bytes, the allowed maximum is {maximum_size} bytes"
781        );
782    }
783}