koto_parser/node.rs
1use crate::{StringFormatOptions, StringQuote, ast::AstIndex, constant_pool::ConstantIndex};
2use smallvec::SmallVec;
3use std::fmt;
4
5/// The Vec type used in the AST
6//
7// Q. Why 4 elements in the small Vec?
8// A. It's the maximum number of elements that can be used in [Node] without increasing its overall
9// size.
10pub type AstVec<T> = SmallVec<[T; 4]>;
11
12/// A convenience macro for initializing an [`AstVec`]
13pub use smallvec::smallvec as astvec;
14
15/// A parsed node that can be included in the [AST](crate::Ast).
16///
17/// Nodes refer to each other via [`AstIndex`], see [`AstNode`](crate::AstNode).
18#[derive(Clone, Debug, Default, PartialEq, Eq, derive_name::VariantName)]
19pub enum Node {
20 /// The `null` keyword
21 #[default]
22 Null,
23
24 /// A single expression wrapped in parentheses
25 Nested(AstIndex),
26
27 /// An identifier, and optionally the type hint node
28 Id(ConstantIndex, Option<AstIndex>),
29
30 /// A meta identifier, e.g. `@display` or `@test my_test`
31 Meta(MetaKeyId, Option<ConstantIndex>),
32
33 /// A chained expression, and optionally the node that follows it in the chain
34 Chain((ChainNode, Option<AstIndex>)), // Chain node, next node
35
36 /// The `true` keyword
37 BoolTrue,
38
39 /// The `false` keyword
40 BoolFalse,
41
42 /// An integer in the range -255..=255
43 SmallInt(i16),
44
45 /// An integer outside of the range -255..=255
46 Int(ConstantIndex),
47
48 /// A float literal
49 Float(ConstantIndex),
50
51 /// A string literal
52 Str(AstString),
53
54 /// A list literal
55 ///
56 /// E.g. `[foo, bar, 42]`
57 List(AstVec<AstIndex>),
58
59 /// A tuple literal
60 ///
61 /// E.g. `(foo, bar, 42)`
62 Tuple {
63 /// The tuple's elements
64 elements: AstVec<AstIndex>,
65 /// Whether or not parentheses were used for the tuple
66 parentheses: bool,
67 },
68
69 /// A temporary tuple
70 ///
71 /// Used in contexts where the result won't be exposed directly to the user,
72 /// e.g.
73 /// - `x, y = 1, 2`: x and y are indexed from the temporary tuple.
74 /// - `match foo, bar...`: foo and bar will be stored in a temporary tuple for comparison.
75 TempTuple(AstVec<AstIndex>),
76
77 /// A range with a defined start and end
78 Range {
79 /// The start of the range
80 start: AstIndex,
81 /// The end of the range
82 end: AstIndex,
83 /// Whether or not the end of the range includes the end value itself
84 ///
85 /// E.g. `1..10` - a range from 1 up to but not including 10
86 /// E.g. `1..=10` - a range from 1 up to and including 10
87 inclusive: bool,
88 },
89
90 /// A range without a defined end
91 RangeFrom {
92 /// The start of the range
93 start: AstIndex,
94 },
95
96 /// A range without a defined start
97 RangeTo {
98 /// The end of the range
99 end: AstIndex,
100 /// Whether or not the end of the range includes the end value itself
101 inclusive: bool,
102 },
103
104 /// The range operator without defined start or end
105 ///
106 /// Used when indexing a list or tuple, and the full contents are to be returned.
107 RangeFull,
108
109 /// A map literal, containing a series of key/value entries
110 Map {
111 /// The map's entries.
112 ///
113 /// If the map has braces, then values are optional and the valueless keys will point
114 /// directly to an Id instead of a MapEntry.
115 entries: AstVec<AstIndex>,
116 /// Whether or not the map was defined using braces.
117 braces: bool,
118 },
119
120 /// A key/value pair representing a Map entry.
121 ///
122 /// Keys will either be Id, String, or Meta nodes.
123 MapEntry(AstIndex, AstIndex),
124
125 /// The `self` keyword
126 Self_,
127
128 /// The main block node
129 ///
130 /// Typically all ASTs will have this node at the root.
131 MainBlock {
132 /// The main block's body as a series of expressions
133 body: AstVec<AstIndex>,
134 /// The number of locally assigned values in the main block
135 ///
136 /// This tells the compiler how many registers need to be reserved for locally
137 /// assigned values.
138 local_count: usize,
139 },
140
141 /// A block node
142 ///
143 /// Used for indented blocks that share the context of the frame they're in,
144 /// e.g. if expressions, arms in match or switch expressions, loop bodies.
145 Block(AstVec<AstIndex>),
146
147 /// A function node
148 Function(Function),
149
150 /// A function's arguments
151 FunctionArgs {
152 /// The arguments
153 args: AstVec<AstIndex>,
154 /// A flag that indicates if the function arguments end with a variadic `...` argument
155 variadic: bool,
156 /// The optional output type of the function
157 output_type: Option<AstIndex>,
158 },
159
160 /// An import expression
161 ///
162 /// E.g. `from foo.bar import baz, 'qux'`
163 Import {
164 /// Where the items should be imported from
165 ///
166 /// An empty list here implies that import without `from` has been used.
167 from: AstVec<AstIndex>,
168 /// The series of items to import
169 // The import items are stored in a `Vec` here rather than an `AstVec` to avoid bloating the
170 // overall size of `Node`.
171 ///
172 /// An empty list here implies that a `*` wildcard import was used.
173 items: Vec<ImportItem>,
174 },
175
176 /// An export expression
177 ///
178 /// The export item will be a map literal, with each map entry added to the exports map
179 Export(AstIndex),
180
181 /// An assignment expression
182 ///
183 /// Used for single-assignment, multiple-assignment is represented by [Node::MultiAssign].
184 Assign {
185 /// The target of the assignment
186 target: AstIndex,
187 /// The expression to be assigned
188 expression: AstIndex,
189 /// Whether or not the assignment uses `let`
190 let_assignment: bool,
191 },
192
193 /// A multiple-assignment expression
194 ///
195 /// E.g. `x, y = foo()`, or `foo, bar, baz = 1, 2, 3`
196 MultiAssign {
197 /// The targets of the assignment
198 targets: AstVec<AstIndex>,
199 /// The expression to be assigned
200 expression: AstIndex,
201 /// Whether or not the assignment uses `let`
202 let_assignment: bool,
203 },
204
205 /// A unary operation
206 UnaryOp {
207 /// The operator to use
208 op: AstUnaryOp,
209 /// The value used in the operation
210 value: AstIndex,
211 },
212
213 /// A binary operation
214 BinaryOp {
215 /// The operator to use
216 op: AstBinaryOp,
217 /// The "left hand side" of the operation
218 lhs: AstIndex,
219 /// The "right hand side" of the operation
220 rhs: AstIndex,
221 },
222
223 /// An if expression
224 If(AstIf),
225
226 /// A match expression
227 Match {
228 /// The expression that will be matched against
229 expression: AstIndex,
230 /// The series of arms that match against the provided expression
231 arms: AstVec<AstIndex>,
232 },
233
234 /// An arm of a [Self::Match] expression
235 MatchArm {
236 /// A series of match patterns
237 ///
238 /// If `patterns` is empty then `else` is implied, and should always appear as the last arm.
239 patterns: AstVec<AstIndex>,
240 /// An optional condition for the match arm
241 ///
242 /// e.g.
243 /// match foo
244 /// bar if check_condition bar then ...
245 condition: Option<AstIndex>,
246 /// The body of the match arm
247 expression: AstIndex,
248 },
249
250 /// A switch expression
251 Switch(AstVec<AstIndex>),
252
253 /// An arm of a [Self::Switch] expression
254 SwitchArm {
255 /// An optional condition for the switch arm
256 ///
257 /// None implies `else`, and should always appear as the last arm.
258 condition: Option<AstIndex>,
259 /// The body of the switch arm
260 expression: AstIndex,
261 },
262
263 /// A `_`-prefixed identifier
264 ///
265 /// Used as a placeholder for unused function arguments or unpacked values,
266 /// or as an ignored match-all in match expressions.
267 ///
268 /// Comes with an optional name (e.g. `_foo` will have `foo` stored as a constant),
269 /// and an optional type hint.
270 Ignored(Option<ConstantIndex>, Option<AstIndex>),
271
272 /// Used when capturing variadic arguments, and when unpacking list or tuple arguments.
273 ///
274 /// The id is optional, e.g. `f = |(..., last)| last`
275 PackedId(Option<ConstantIndex>),
276
277 /// Used when an argument in a function call needs to be unpacked
278 ///
279 /// e.g. `f(args...)`
280 ///
281 /// The argument can be any expression, e.g. `f (1..100).take(3)...`
282 PackedExpression(AstIndex),
283
284 /// A `for` loop
285 For(AstFor),
286
287 /// A `loop` expression
288 Loop {
289 /// The loop's body
290 body: AstIndex,
291 },
292
293 /// A `while` loop
294 While {
295 /// The condition for the while loop
296 condition: AstIndex,
297 /// The body of the while loop
298 body: AstIndex,
299 },
300
301 /// An `until` expression
302 Until {
303 /// The condition for the until loop
304 condition: AstIndex,
305 /// The body of the until loop
306 body: AstIndex,
307 },
308
309 /// The break keyword, with optional break value
310 Break(Option<AstIndex>),
311
312 /// The continue keyword
313 Continue,
314
315 /// A return expression, with optional return value
316 Return(Option<AstIndex>),
317
318 /// A try expression
319 Try(AstTry),
320
321 /// A throw expression
322 Throw(AstIndex),
323
324 /// A yield expression
325 Yield(AstIndex),
326
327 /// A debug expression
328 Debug {
329 /// The stored string of the debugged expression to be used when printing the result
330 expression_string: ConstantIndex,
331 /// The expression that should be debugged
332 expression: AstIndex,
333 },
334
335 /// A type hint
336 ///
337 /// E.g. `let x: Number = 0`
338 /// ^~~ This is the beginning of the type hint
339 Type {
340 /// The expected type as a string
341 type_index: ConstantIndex,
342 /// True if the type was specified with a `?` suffix
343 allow_null: bool,
344 },
345}
346
347/// A function definition
348#[derive(Clone, Debug, PartialEq, Eq)]
349pub struct Function {
350 /// The function's arguments
351 ///
352 /// See [Node::FunctionArgs].
353 pub args: AstIndex,
354 /// The number of locally assigned values
355 ///
356 /// Used by the compiler when reserving registers for local values at the start of the frame.
357 pub local_count: usize,
358 /// Any non-local values that are accessed in the function
359 ///
360 /// Any ID (or chain root) that's accessed in a function and which wasn't previously assigned
361 /// locally, is either an export or the value needs to be captured. The compiler takes care of
362 /// determining if an access is a capture or not at the moment the function is created.
363 pub accessed_non_locals: AstVec<ConstantIndex>,
364 /// The function's body
365 pub body: AstIndex,
366 /// A flag that indicates if the function is a generator or not
367 ///
368 /// The presence of a `yield` expression in the function body will set this to true.
369 pub is_generator: bool,
370}
371
372/// A string definition
373#[derive(Clone, Debug, PartialEq, Eq)]
374pub struct AstString {
375 /// Indicates if single or double quotation marks were used
376 pub quote: StringQuote,
377 /// The string's contents
378 pub contents: StringContents,
379}
380
381/// The contents of an [AstString]
382#[derive(Clone, Debug, PartialEq, Eq)]
383pub enum StringContents {
384 /// A string literal
385 Literal(ConstantIndex),
386 /// A raw string literal
387 Raw {
388 /// The literal's constant index
389 constant: ConstantIndex,
390 /// The number of hashes associated with the raw string's delimiter
391 hash_count: u8,
392 },
393 /// An interpolated string
394 ///
395 /// An interpolated string is made up of a series of literals and template expressions,
396 /// which are then joined together using a string builder.
397 // The interpolated nodes are stored in a `Vec` here rather than an `AstVec` to avoid bloating
398 // the overall size of `Node`.
399 Interpolated(Vec<StringNode>),
400}
401
402/// A node in a string definition
403#[derive(Copy, Clone, Debug, PartialEq, Eq)]
404pub enum StringNode {
405 /// A string literal
406 Literal(ConstantIndex),
407 /// An expression that should be evaluated and inserted into the string
408 Expression {
409 /// The expressions AST index
410 expression: AstIndex,
411 /// Formatting options for the rendered expression
412 format: StringFormatOptions,
413 },
414}
415
416/// A for loop definition
417#[derive(Clone, Debug, PartialEq, Eq)]
418pub struct AstFor {
419 /// The ids that capture each iteration's output values
420 pub args: AstVec<AstIndex>,
421 /// The expression that produces an iterable value
422 pub iterable: AstIndex,
423 /// The body of the for loop
424 pub body: AstIndex,
425}
426
427/// An if expression definition
428#[derive(Clone, Debug, PartialEq, Eq)]
429pub struct AstIf {
430 /// The if expression's condition
431 pub condition: AstIndex,
432 /// The if expression's `then` branch
433 pub then_node: AstIndex,
434 /// An optional series of `else if` conditions and branches
435 pub else_if_blocks: AstVec<(AstIndex, AstIndex)>,
436 /// An optional `else` branch
437 pub else_node: Option<AstIndex>,
438 /// Whether or not the if expression was defined using inline syntax
439 pub inline: bool,
440}
441
442/// An operation used in UnaryOp expressions
443#[derive(Clone, Copy, Debug, PartialEq, Eq)]
444#[allow(missing_docs)]
445pub enum AstUnaryOp {
446 Negate,
447 Not,
448}
449
450impl AstUnaryOp {
451 /// The binary op as a str
452 pub fn as_str(&self) -> &'static str {
453 match self {
454 AstUnaryOp::Negate => "-",
455 AstUnaryOp::Not => "not",
456 }
457 }
458}
459
460impl fmt::Display for AstUnaryOp {
461 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
462 f.write_str(self.as_str())
463 }
464}
465
466/// An operation used in BinaryOp expressions
467#[derive(Clone, Copy, Debug, PartialEq, Eq)]
468#[allow(missing_docs)]
469pub enum AstBinaryOp {
470 Add,
471 Subtract,
472 Multiply,
473 Divide,
474 Remainder,
475 Power,
476 AddAssign,
477 SubtractAssign,
478 MultiplyAssign,
479 DivideAssign,
480 RemainderAssign,
481 PowerAssign,
482 Equal,
483 NotEqual,
484 Less,
485 LessOrEqual,
486 Greater,
487 GreaterOrEqual,
488 And,
489 Or,
490 Pipe,
491}
492
493impl AstBinaryOp {
494 /// The binary op as a str
495 pub fn as_str(&self) -> &'static str {
496 match self {
497 AstBinaryOp::Add => "+",
498 AstBinaryOp::Subtract => "-",
499 AstBinaryOp::Multiply => "*",
500 AstBinaryOp::Divide => "/",
501 AstBinaryOp::Remainder => "%",
502 AstBinaryOp::Power => "^",
503 AstBinaryOp::AddAssign => "+=",
504 AstBinaryOp::SubtractAssign => "-=",
505 AstBinaryOp::MultiplyAssign => "*=",
506 AstBinaryOp::DivideAssign => "/=",
507 AstBinaryOp::RemainderAssign => "%=",
508 AstBinaryOp::PowerAssign => "^=",
509 AstBinaryOp::Equal => "==",
510 AstBinaryOp::NotEqual => "!=",
511 AstBinaryOp::Less => "<",
512 AstBinaryOp::LessOrEqual => "<=",
513 AstBinaryOp::Greater => ">",
514 AstBinaryOp::GreaterOrEqual => ">=",
515 AstBinaryOp::And => "and",
516 AstBinaryOp::Or => "or",
517 AstBinaryOp::Pipe => "->",
518 }
519 }
520}
521
522impl fmt::Display for AstBinaryOp {
523 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
524 f.write_str(self.as_str())
525 }
526}
527
528/// A try expression definition
529#[derive(Clone, Debug, PartialEq, Eq)]
530pub struct AstTry {
531 /// The block that's wrapped by the try
532 pub try_block: AstIndex,
533 /// The catch blocks associated with the try expression
534 pub catch_blocks: AstVec<AstCatch>,
535 /// An optional `finally` block
536 pub finally_block: Option<AstIndex>,
537}
538
539/// A catch block definition
540#[derive(Clone, Debug, PartialEq, Eq)]
541pub struct AstCatch {
542 /// The identifier that will receive a caught error
543 pub arg: AstIndex,
544 /// The catch block
545 pub block: AstIndex,
546}
547
548/// A node in a chained expression
549///
550/// Chains are any expressions that contain two or more nodes in a sequence.
551///
552/// In other words, some series of operations involving indexing, `.` accesses, and function calls.
553///
554/// e.g.
555/// `foo.bar."baz"[0]?(42)`
556/// | | | | |^ Call {args: 42, with_parens: true}
557/// | | | | ^ NullCheck
558/// | | | ^ Index (0)
559/// | | ^ Str (baz)
560/// | ^ Id (bar)
561/// ^ Root (foo)
562#[derive(Clone, Debug, PartialEq, Eq)]
563pub enum ChainNode {
564 /// The root of the chain
565 Root(AstIndex),
566 /// A `.` access using an identifier
567 Id(ConstantIndex),
568 /// A `.` access using a string
569 Str(AstString),
570 /// An index operation using square `[]` brackets.
571 Index(AstIndex),
572 /// A function call
573 Call {
574 /// The arguments used in the function call
575 args: AstVec<AstIndex>,
576 /// Whether or not parentheses are present in the function call
577 ///
578 /// This is not cosmetic, as parentheses represent a 'closed call', which has an impact on
579 /// function piping:
580 /// e.g.
581 /// `99 -> foo.bar 42` is equivalent to `foo.bar(42, 99)`
582 /// but:
583 /// `99 -> foo.bar(42)` is equivalent to `foo.bar(42)(99)`.
584 with_parens: bool,
585 },
586 /// A `?` short-circuiting null check
587 NullCheck,
588}
589
590/// A meta key
591#[derive(Clone, Copy, Debug, PartialEq, Eq)]
592#[repr(u8)]
593pub enum MetaKeyId {
594 /// @+
595 Add,
596 /// @-
597 Subtract,
598 /// @*
599 Multiply,
600 /// @/
601 Divide,
602 /// @%
603 Remainder,
604 /// @^
605 Power,
606 /// @r+
607 AddRhs,
608 /// @r-
609 SubtractRhs,
610 /// @r*
611 MultiplyRhs,
612 /// @r/
613 DivideRhs,
614 /// @r%
615 RemainderRhs,
616 /// @r^
617 PowerRhs,
618 /// @+=
619 AddAssign,
620 /// @-=
621 SubtractAssign,
622 /// @*=
623 MultiplyAssign,
624 /// @/=
625 DivideAssign,
626 /// @%=
627 RemainderAssign,
628 /// @^=
629 PowerAssign,
630 /// @<
631 Less,
632 /// @<=
633 LessOrEqual,
634 /// @>
635 Greater,
636 /// @>=
637 GreaterOrEqual,
638 /// @==
639 Equal,
640 /// @!=
641 NotEqual,
642
643 /// @index
644 Index,
645 /// @index_mut
646 IndexMut,
647
648 /// @debug
649 Debug,
650 /// @display
651 Display,
652 /// @iterator
653 Iterator,
654 /// @next
655 Next,
656 /// @next_back
657 NextBack,
658 /// @negate
659 Negate,
660 /// @size
661 Size,
662 /// @type
663 Type,
664 /// @base
665 Base,
666
667 /// @call
668 Call,
669
670 /// @test test_name
671 Test,
672 /// @pre_test
673 PreTest,
674 /// @post_test
675 PostTest,
676
677 /// @main
678 Main,
679
680 /// @meta name
681 Named,
682
683 /// Unused
684 ///
685 /// This entry must be last, see `TryFrom<u7>` for [MetaKeyId]
686 Invalid,
687}
688
689impl MetaKeyId {
690 /// Returns the key id as a static str
691 pub fn as_str(&self) -> &'static str {
692 use MetaKeyId::*;
693 match self {
694 Add => "@+",
695 Subtract => "@-",
696 Multiply => "@*",
697 Divide => "@/",
698 Remainder => "@%",
699 Power => "@^",
700 AddRhs => "@r+",
701 SubtractRhs => "@r-",
702 MultiplyRhs => "@r*",
703 DivideRhs => "@r/",
704 RemainderRhs => "@r%",
705 PowerRhs => "@r^",
706 AddAssign => "@+=",
707 SubtractAssign => "@-=",
708 MultiplyAssign => "@*=",
709 DivideAssign => "@/=",
710 RemainderAssign => "@%=",
711 PowerAssign => "@^=",
712 Less => "@<",
713 LessOrEqual => "@<=",
714 Greater => "@>",
715 GreaterOrEqual => "@>=",
716 Equal => "@==",
717 NotEqual => "@!=",
718 Index => "@index",
719 IndexMut => "@index_mut",
720 Debug => "@debug",
721 Display => "@display",
722 Iterator => "@iterator",
723 Next => "@next",
724 NextBack => "@next_back",
725 Negate => "@negate",
726 Size => "@size",
727 Type => "@type",
728 Base => "@base",
729 Call => "@call",
730 Test => "@test",
731 PreTest => "@pre_test",
732 PostTest => "@post_test",
733 Main => "@main",
734 Named => "@meta",
735 Invalid => unreachable!(),
736 }
737 }
738}
739
740impl TryFrom<u8> for MetaKeyId {
741 type Error = u8;
742
743 fn try_from(byte: u8) -> Result<Self, Self::Error> {
744 if byte < Self::Invalid as u8 {
745 // Safety: Any value less than Invalid is safe to transmute.
746 Ok(unsafe { std::mem::transmute::<u8, Self>(byte) })
747 } else {
748 Err(byte)
749 }
750 }
751}
752
753// Display impl used by koto-ls
754impl fmt::Display for MetaKeyId {
755 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
756 f.write_str(self.as_str())
757 }
758}
759
760/// A node in an import item, see [Node::Import]
761#[derive(Clone, Debug, PartialEq, Eq)]
762pub struct ImportItem {
763 /// The imported item
764 pub item: AstIndex,
765 /// An optional 'as' name for the imported item
766 pub name: Option<AstIndex>,
767}
768
769#[cfg(test)]
770mod tests {
771 use super::*;
772
773 #[test]
774 fn node_size() {
775 // Think carefully before allowing Node to increase in size
776 let size = std::mem::size_of::<Node>();
777 let maximum_size = 72;
778 assert!(
779 size <= maximum_size,
780 "Node has a size of {size} bytes, the allowed maximum is {maximum_size} bytes"
781 );
782 }
783}