kcl_lib/parsing/token/
mod.rs

1// Clippy does not agree with rustc here for some reason.
2#![allow(clippy::needless_lifetimes)]
3
4use std::{fmt, iter::Enumerate, num::NonZeroUsize, str::FromStr};
5
6use anyhow::Result;
7use parse_display::Display;
8use serde::{Deserialize, Serialize};
9use tokeniser::Input;
10use tower_lsp::lsp_types::SemanticTokenType;
11use winnow::{
12    self,
13    error::ParseError,
14    stream::{ContainsToken, Stream},
15};
16
17use crate::{
18    CompilationError, ModuleId, SourceRange,
19    errors::KclError,
20    parsing::ast::types::{ItemVisibility, VariableKind},
21};
22
23mod tokeniser;
24
25pub(crate) use tokeniser::RESERVED_WORDS;
26
27// Note the ordering, it's important that `m` comes after `mm` and `cm`.
28pub const NUM_SUFFIXES: [&str; 10] = ["mm", "cm", "m", "inch", "in", "ft", "yd", "deg", "rad", "?"];
29
30#[derive(Clone, Copy, Debug, Eq, PartialEq, Serialize, Deserialize, ts_rs::TS)]
31#[repr(u32)]
32pub enum NumericSuffix {
33    None,
34    Count,
35    Length,
36    Angle,
37    Mm,
38    Cm,
39    M,
40    Inch,
41    Ft,
42    Yd,
43    Deg,
44    Rad,
45    Unknown,
46}
47
48impl NumericSuffix {
49    #[allow(dead_code)]
50    pub fn is_none(self) -> bool {
51        self == Self::None
52    }
53
54    pub fn is_some(self) -> bool {
55        self != Self::None
56    }
57
58    pub fn digestable_id(&self) -> &[u8] {
59        match self {
60            NumericSuffix::None => &[],
61            NumericSuffix::Count => b"_",
62            NumericSuffix::Unknown => b"?",
63            NumericSuffix::Length => b"Length",
64            NumericSuffix::Angle => b"Angle",
65            NumericSuffix::Mm => b"mm",
66            NumericSuffix::Cm => b"cm",
67            NumericSuffix::M => b"m",
68            NumericSuffix::Inch => b"in",
69            NumericSuffix::Ft => b"ft",
70            NumericSuffix::Yd => b"yd",
71            NumericSuffix::Deg => b"deg",
72            NumericSuffix::Rad => b"rad",
73        }
74    }
75}
76
77impl FromStr for NumericSuffix {
78    type Err = CompilationError;
79
80    fn from_str(s: &str) -> Result<Self, Self::Err> {
81        match s {
82            "_" | "Count" => Ok(NumericSuffix::Count),
83            "Length" => Ok(NumericSuffix::Length),
84            "Angle" => Ok(NumericSuffix::Angle),
85            "mm" | "millimeters" => Ok(NumericSuffix::Mm),
86            "cm" | "centimeters" => Ok(NumericSuffix::Cm),
87            "m" | "meters" => Ok(NumericSuffix::M),
88            "inch" | "in" => Ok(NumericSuffix::Inch),
89            "ft" | "feet" => Ok(NumericSuffix::Ft),
90            "yd" | "yards" => Ok(NumericSuffix::Yd),
91            "deg" | "degrees" => Ok(NumericSuffix::Deg),
92            "rad" | "radians" => Ok(NumericSuffix::Rad),
93            "?" => Ok(NumericSuffix::Unknown),
94            _ => Err(CompilationError::err(SourceRange::default(), "invalid unit of measure")),
95        }
96    }
97}
98
99impl fmt::Display for NumericSuffix {
100    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
101        match self {
102            NumericSuffix::None => Ok(()),
103            NumericSuffix::Count => write!(f, "_"),
104            NumericSuffix::Unknown => write!(f, "_?"),
105            NumericSuffix::Length => write!(f, "Length"),
106            NumericSuffix::Angle => write!(f, "Angle"),
107            NumericSuffix::Mm => write!(f, "mm"),
108            NumericSuffix::Cm => write!(f, "cm"),
109            NumericSuffix::M => write!(f, "m"),
110            NumericSuffix::Inch => write!(f, "in"),
111            NumericSuffix::Ft => write!(f, "ft"),
112            NumericSuffix::Yd => write!(f, "yd"),
113            NumericSuffix::Deg => write!(f, "deg"),
114            NumericSuffix::Rad => write!(f, "rad"),
115        }
116    }
117}
118
119#[derive(Clone, Debug, PartialEq)]
120pub(crate) struct TokenStream {
121    tokens: Vec<Token>,
122}
123
124impl TokenStream {
125    fn new(tokens: Vec<Token>) -> Self {
126        Self { tokens }
127    }
128
129    pub(super) fn remove_unknown(&mut self) -> Vec<Token> {
130        let tokens = std::mem::take(&mut self.tokens);
131        let (tokens, unknown_tokens): (Vec<Token>, Vec<Token>) = tokens
132            .into_iter()
133            .partition(|token| token.token_type != TokenType::Unknown);
134        self.tokens = tokens;
135        unknown_tokens
136    }
137
138    pub fn iter(&self) -> impl Iterator<Item = &Token> {
139        self.tokens.iter()
140    }
141
142    pub fn is_empty(&self) -> bool {
143        self.tokens.is_empty()
144    }
145
146    pub fn as_slice(&self) -> TokenSlice<'_> {
147        TokenSlice::from(self)
148    }
149}
150
151impl<'a> From<&'a TokenStream> for TokenSlice<'a> {
152    fn from(stream: &'a TokenStream) -> Self {
153        TokenSlice {
154            start: 0,
155            end: stream.tokens.len(),
156            stream,
157        }
158    }
159}
160
161impl IntoIterator for TokenStream {
162    type Item = Token;
163
164    type IntoIter = std::vec::IntoIter<Token>;
165
166    fn into_iter(self) -> Self::IntoIter {
167        self.tokens.into_iter()
168    }
169}
170
171#[derive(Debug, Clone)]
172pub(crate) struct TokenSlice<'a> {
173    stream: &'a TokenStream,
174    /// Current position of the leading Token in the stream
175    start: usize,
176    /// The number of total Tokens in the stream
177    end: usize,
178}
179
180impl<'a> std::ops::Deref for TokenSlice<'a> {
181    type Target = [Token];
182
183    fn deref(&self) -> &Self::Target {
184        &self.stream.tokens[self.start..self.end]
185    }
186}
187
188impl<'a> TokenSlice<'a> {
189    pub fn token(&self, i: usize) -> &Token {
190        &self.stream.tokens[i + self.start]
191    }
192
193    pub fn iter(&self) -> impl Iterator<Item = &Token> {
194        (**self).iter()
195    }
196
197    pub fn without_ends(&self) -> Self {
198        Self {
199            start: self.start + 1,
200            end: self.end - 1,
201            stream: self.stream,
202        }
203    }
204
205    pub fn as_source_range(&self) -> SourceRange {
206        let stream_len = self.stream.tokens.len();
207        let first_token = if stream_len == self.start {
208            &self.stream.tokens[self.start - 1]
209        } else {
210            self.token(0)
211        };
212        let last_token = if stream_len == self.end {
213            &self.stream.tokens[stream_len - 1]
214        } else {
215            self.token(self.end - self.start)
216        };
217        SourceRange::new(first_token.start, last_token.end, last_token.module_id)
218    }
219}
220
221impl<'a> IntoIterator for TokenSlice<'a> {
222    type Item = &'a Token;
223
224    type IntoIter = std::slice::Iter<'a, Token>;
225
226    fn into_iter(self) -> Self::IntoIter {
227        self.stream.tokens[self.start..self.end].iter()
228    }
229}
230
231impl<'a> Stream for TokenSlice<'a> {
232    type Token = Token;
233    type Slice = Self;
234    type IterOffsets = Enumerate<std::vec::IntoIter<Token>>;
235    type Checkpoint = Checkpoint;
236
237    fn iter_offsets(&self) -> Self::IterOffsets {
238        #[allow(clippy::unnecessary_to_owned)]
239        self.to_vec().into_iter().enumerate()
240    }
241
242    fn eof_offset(&self) -> usize {
243        self.len()
244    }
245
246    fn next_token(&mut self) -> Option<Self::Token> {
247        let token = self.first()?.clone();
248        self.start += 1;
249        Some(token)
250    }
251
252    /// Split off the next token from the input
253    fn peek_token(&self) -> Option<Self::Token> {
254        Some(self.first()?.clone())
255    }
256
257    fn offset_for<P>(&self, predicate: P) -> Option<usize>
258    where
259        P: Fn(Self::Token) -> bool,
260    {
261        self.iter().position(|b| predicate(b.clone()))
262    }
263
264    fn offset_at(&self, tokens: usize) -> Result<usize, winnow::error::Needed> {
265        if let Some(needed) = tokens.checked_sub(self.len()).and_then(NonZeroUsize::new) {
266            Err(winnow::error::Needed::Size(needed))
267        } else {
268            Ok(tokens)
269        }
270    }
271
272    fn next_slice(&mut self, offset: usize) -> Self::Slice {
273        assert!(self.start + offset <= self.end);
274
275        let next = TokenSlice {
276            stream: self.stream,
277            start: self.start,
278            end: self.start + offset,
279        };
280        self.start += offset;
281        next
282    }
283
284    /// Split off a slice of tokens from the input
285    fn peek_slice(&self, offset: usize) -> Self::Slice {
286        assert!(self.start + offset <= self.end);
287
288        TokenSlice {
289            stream: self.stream,
290            start: self.start,
291            end: self.start + offset,
292        }
293    }
294
295    fn checkpoint(&self) -> Self::Checkpoint {
296        Checkpoint(self.start, self.end)
297    }
298
299    fn reset(&mut self, checkpoint: &Self::Checkpoint) {
300        self.start = checkpoint.0;
301        self.end = checkpoint.1;
302    }
303
304    fn raw(&self) -> &dyn fmt::Debug {
305        self
306    }
307}
308
309impl<'a> winnow::stream::Offset for TokenSlice<'a> {
310    fn offset_from(&self, start: &Self) -> usize {
311        self.start - start.start
312    }
313}
314
315impl<'a> winnow::stream::Offset<Checkpoint> for TokenSlice<'a> {
316    fn offset_from(&self, start: &Checkpoint) -> usize {
317        self.start - start.0
318    }
319}
320
321impl winnow::stream::Offset for Checkpoint {
322    fn offset_from(&self, start: &Self) -> usize {
323        self.0 - start.0
324    }
325}
326
327impl<'a> winnow::stream::StreamIsPartial for TokenSlice<'a> {
328    type PartialState = ();
329
330    fn complete(&mut self) -> Self::PartialState {}
331
332    fn restore_partial(&mut self, _: Self::PartialState) {}
333
334    fn is_partial_supported() -> bool {
335        false
336    }
337}
338
339impl<'a> winnow::stream::FindSlice<&str> for TokenSlice<'a> {
340    fn find_slice(&self, substr: &str) -> Option<std::ops::Range<usize>> {
341        self.iter()
342            .enumerate()
343            .find_map(|(i, b)| if b.value == substr { Some(i..self.end) } else { None })
344    }
345}
346
347#[derive(Clone, Debug)]
348pub struct Checkpoint(usize, usize);
349
350/// The types of tokens.
351#[derive(Debug, PartialEq, Eq, Copy, Clone, Display)]
352#[display(style = "camelCase")]
353pub enum TokenType {
354    /// A number.
355    Number,
356    /// A word.
357    Word,
358    /// An operator.
359    Operator,
360    /// A string.
361    String,
362    /// A keyword.
363    Keyword,
364    /// A type.
365    Type,
366    /// A brace.
367    Brace,
368    /// A hash.
369    Hash,
370    /// A bang.
371    Bang,
372    /// A dollar sign.
373    Dollar,
374    /// Whitespace.
375    Whitespace,
376    /// A comma.
377    Comma,
378    /// A colon.
379    Colon,
380    /// A double colon: `::`
381    DoubleColon,
382    /// A period.
383    Period,
384    /// A double period: `..`.
385    DoublePeriod,
386    /// A double period and a less than: `..<`.
387    DoublePeriodLessThan,
388    /// A line comment.
389    LineComment,
390    /// A block comment.
391    BlockComment,
392    /// A function name.
393    Function,
394    /// Unknown lexemes.
395    Unknown,
396    /// The ? symbol, used for optional values.
397    QuestionMark,
398    /// The @ symbol.
399    At,
400    /// `;`
401    SemiColon,
402}
403
404/// Most KCL tokens correspond to LSP semantic tokens (but not all).
405impl TryFrom<TokenType> for SemanticTokenType {
406    type Error = anyhow::Error;
407    fn try_from(token_type: TokenType) -> Result<Self> {
408        // If you return a new kind of `SemanticTokenType`, make sure to update `SEMANTIC_TOKEN_TYPES`
409        // in the LSP implementation.
410        Ok(match token_type {
411            TokenType::Number => Self::NUMBER,
412            TokenType::Word => Self::VARIABLE,
413            TokenType::Keyword => Self::KEYWORD,
414            TokenType::Type => Self::TYPE,
415            TokenType::Operator => Self::OPERATOR,
416            TokenType::QuestionMark => Self::OPERATOR,
417            TokenType::String => Self::STRING,
418            TokenType::Bang => Self::OPERATOR,
419            TokenType::LineComment => Self::COMMENT,
420            TokenType::BlockComment => Self::COMMENT,
421            TokenType::Function => Self::FUNCTION,
422            TokenType::Whitespace
423            | TokenType::Brace
424            | TokenType::Comma
425            | TokenType::Colon
426            | TokenType::DoubleColon
427            | TokenType::Period
428            | TokenType::DoublePeriod
429            | TokenType::DoublePeriodLessThan
430            | TokenType::Hash
431            | TokenType::Dollar
432            | TokenType::At
433            | TokenType::SemiColon
434            | TokenType::Unknown => {
435                anyhow::bail!("unsupported token type: {:?}", token_type)
436            }
437        })
438    }
439}
440
441impl TokenType {
442    pub fn is_whitespace(&self) -> bool {
443        matches!(self, Self::Whitespace)
444    }
445
446    pub fn is_comment(&self) -> bool {
447        matches!(self, Self::LineComment | Self::BlockComment)
448    }
449}
450
451#[derive(Debug, PartialEq, Eq, Clone)]
452pub struct Token {
453    pub token_type: TokenType,
454    /// Offset in the source code where this token begins.
455    pub start: usize,
456    /// Offset in the source code where this token ends.
457    pub end: usize,
458    pub(super) module_id: ModuleId,
459    pub(super) value: String,
460}
461
462impl ContainsToken<Token> for (TokenType, &str) {
463    fn contains_token(&self, token: Token) -> bool {
464        self.0 == token.token_type && self.1 == token.value
465    }
466}
467
468impl ContainsToken<Token> for TokenType {
469    fn contains_token(&self, token: Token) -> bool {
470        *self == token.token_type
471    }
472}
473
474impl Token {
475    pub fn from_range(
476        range: std::ops::Range<usize>,
477        module_id: ModuleId,
478        token_type: TokenType,
479        value: String,
480    ) -> Self {
481        Self {
482            start: range.start,
483            end: range.end,
484            module_id,
485            value,
486            token_type,
487        }
488    }
489    pub fn is_code_token(&self) -> bool {
490        !matches!(
491            self.token_type,
492            TokenType::Whitespace | TokenType::LineComment | TokenType::BlockComment
493        )
494    }
495
496    pub fn as_source_range(&self) -> SourceRange {
497        SourceRange::new(self.start, self.end, self.module_id)
498    }
499
500    pub fn as_source_ranges(&self) -> Vec<SourceRange> {
501        vec![self.as_source_range()]
502    }
503
504    pub fn visibility_keyword(&self) -> Option<ItemVisibility> {
505        if !matches!(self.token_type, TokenType::Keyword) {
506            return None;
507        }
508        match self.value.as_str() {
509            "export" => Some(ItemVisibility::Export),
510            _ => None,
511        }
512    }
513
514    pub fn numeric_value(&self) -> Option<f64> {
515        if self.token_type != TokenType::Number {
516            return None;
517        }
518        let value = &self.value;
519        let value = value
520            .split_once(|c: char| c == '_' || c.is_ascii_alphabetic())
521            .map(|(s, _)| s)
522            .unwrap_or(value);
523        value.parse().ok()
524    }
525
526    pub fn uint_value(&self) -> Option<u32> {
527        if self.token_type != TokenType::Number {
528            return None;
529        }
530        let value = &self.value;
531        let value = value
532            .split_once(|c: char| c == '_' || c.is_ascii_alphabetic())
533            .map(|(s, _)| s)
534            .unwrap_or(value);
535        value.parse().ok()
536    }
537
538    pub fn numeric_suffix(&self) -> NumericSuffix {
539        if self.token_type != TokenType::Number {
540            return NumericSuffix::None;
541        }
542
543        if self.value.ends_with('_') {
544            return NumericSuffix::Count;
545        }
546
547        for suffix in NUM_SUFFIXES {
548            if self.value.ends_with(suffix) {
549                return suffix.parse().unwrap();
550            }
551        }
552
553        NumericSuffix::None
554    }
555
556    /// Is this token the beginning of a variable/function declaration?
557    /// If so, what kind?
558    /// If not, returns None.
559    pub fn declaration_keyword(&self) -> Option<VariableKind> {
560        if !matches!(self.token_type, TokenType::Keyword) {
561            return None;
562        }
563        Some(match self.value.as_str() {
564            "fn" => VariableKind::Fn,
565            "var" | "let" | "const" => VariableKind::Const,
566            _ => return None,
567        })
568    }
569}
570
571impl From<Token> for SourceRange {
572    fn from(token: Token) -> Self {
573        Self::new(token.start, token.end, token.module_id)
574    }
575}
576
577impl From<&Token> for SourceRange {
578    fn from(token: &Token) -> Self {
579        Self::new(token.start, token.end, token.module_id)
580    }
581}
582
583pub fn lex(s: &str, module_id: ModuleId) -> Result<TokenStream, KclError> {
584    tokeniser::lex(s, module_id).map_err(From::from)
585}
586
587impl From<ParseError<Input<'_>, winnow::error::ContextError>> for KclError {
588    fn from(err: ParseError<Input<'_>, winnow::error::ContextError>) -> Self {
589        let (input, offset): (Vec<char>, usize) = (err.input().chars().collect(), err.offset());
590        let module_id = err.input().state.module_id;
591
592        if offset >= input.len() {
593            // From the winnow docs:
594            //
595            // This is an offset, not an index, and may point to
596            // the end of input (input.len()) on eof errors.
597
598            return KclError::new_lexical(crate::errors::KclErrorDetails::new(
599                "unexpected EOF while parsing".to_owned(),
600                vec![SourceRange::new(offset, offset, module_id)],
601            ));
602        }
603
604        // TODO: Add the Winnow tokenizer context to the error.
605        // See https://github.com/KittyCAD/modeling-app/issues/784
606        let bad_token = &input[offset];
607        // TODO: Add the Winnow parser context to the error.
608        // See https://github.com/KittyCAD/modeling-app/issues/784
609        KclError::new_lexical(crate::errors::KclErrorDetails::new(
610            format!("found unknown token '{bad_token}'"),
611            vec![SourceRange::new(offset, offset + 1, module_id)],
612        ))
613    }
614}