kcl_lib/parsing/token/
mod.rs

1// Clippy does not agree with rustc here for some reason.
2#![allow(clippy::needless_lifetimes)]
3
4use std::{fmt, iter::Enumerate, num::NonZeroUsize, str::FromStr};
5
6use anyhow::Result;
7use parse_display::Display;
8use schemars::JsonSchema;
9use serde::{Deserialize, Serialize};
10use tokeniser::Input;
11use tower_lsp::lsp_types::SemanticTokenType;
12use winnow::{
13    self,
14    error::ParseError,
15    stream::{ContainsToken, Stream},
16};
17
18use crate::{
19    errors::KclError,
20    parsing::ast::types::{ItemVisibility, VariableKind},
21    source_range::SourceRange,
22    CompilationError, ModuleId,
23};
24
25mod tokeniser;
26
27pub(crate) use tokeniser::RESERVED_WORDS;
28
29// Note the ordering, it's important that `m` comes after `mm` and `cm`.
30pub const NUM_SUFFIXES: [&str; 9] = ["mm", "cm", "m", "inch", "in", "ft", "yd", "deg", "rad"];
31
32#[derive(Clone, Copy, Debug, Eq, PartialEq, Serialize, Deserialize, ts_rs::TS, JsonSchema)]
33#[repr(u32)]
34pub enum NumericSuffix {
35    None,
36    Count,
37    Length,
38    Angle,
39    Mm,
40    Cm,
41    M,
42    Inch,
43    Ft,
44    Yd,
45    Deg,
46    Rad,
47}
48
49impl NumericSuffix {
50    #[allow(dead_code)]
51    pub fn is_none(self) -> bool {
52        self == Self::None
53    }
54
55    pub fn is_some(self) -> bool {
56        self != Self::None
57    }
58
59    pub fn digestable_id(&self) -> &[u8] {
60        match self {
61            NumericSuffix::None => &[],
62            NumericSuffix::Count => b"_",
63            NumericSuffix::Length => b"Length",
64            NumericSuffix::Angle => b"Angle",
65            NumericSuffix::Mm => b"mm",
66            NumericSuffix::Cm => b"cm",
67            NumericSuffix::M => b"m",
68            NumericSuffix::Inch => b"in",
69            NumericSuffix::Ft => b"ft",
70            NumericSuffix::Yd => b"yd",
71            NumericSuffix::Deg => b"deg",
72            NumericSuffix::Rad => b"rad",
73        }
74    }
75}
76
77impl FromStr for NumericSuffix {
78    type Err = CompilationError;
79
80    fn from_str(s: &str) -> Result<Self, Self::Err> {
81        match s {
82            "_" | "Count" => Ok(NumericSuffix::Count),
83            "Length" => Ok(NumericSuffix::Length),
84            "Angle" => Ok(NumericSuffix::Angle),
85            "mm" | "millimeters" => Ok(NumericSuffix::Mm),
86            "cm" | "centimeters" => Ok(NumericSuffix::Cm),
87            "m" | "meters" => Ok(NumericSuffix::M),
88            "inch" | "in" => Ok(NumericSuffix::Inch),
89            "ft" | "feet" => Ok(NumericSuffix::Ft),
90            "yd" | "yards" => Ok(NumericSuffix::Yd),
91            "deg" | "degrees" => Ok(NumericSuffix::Deg),
92            "rad" | "radians" => Ok(NumericSuffix::Rad),
93            _ => Err(CompilationError::err(SourceRange::default(), "invalid unit of measure")),
94        }
95    }
96}
97
98impl fmt::Display for NumericSuffix {
99    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
100        match self {
101            NumericSuffix::None => Ok(()),
102            NumericSuffix::Count => write!(f, "_"),
103            NumericSuffix::Length => write!(f, "Length"),
104            NumericSuffix::Angle => write!(f, "Angle"),
105            NumericSuffix::Mm => write!(f, "mm"),
106            NumericSuffix::Cm => write!(f, "cm"),
107            NumericSuffix::M => write!(f, "m"),
108            NumericSuffix::Inch => write!(f, "in"),
109            NumericSuffix::Ft => write!(f, "ft"),
110            NumericSuffix::Yd => write!(f, "yd"),
111            NumericSuffix::Deg => write!(f, "deg"),
112            NumericSuffix::Rad => write!(f, "rad"),
113        }
114    }
115}
116
117#[derive(Clone, Debug, PartialEq)]
118pub(crate) struct TokenStream {
119    tokens: Vec<Token>,
120}
121
122impl TokenStream {
123    fn new(tokens: Vec<Token>) -> Self {
124        Self { tokens }
125    }
126
127    pub(super) fn remove_unknown(&mut self) -> Vec<Token> {
128        let tokens = std::mem::take(&mut self.tokens);
129        let (tokens, unknown_tokens): (Vec<Token>, Vec<Token>) = tokens
130            .into_iter()
131            .partition(|token| token.token_type != TokenType::Unknown);
132        self.tokens = tokens;
133        unknown_tokens
134    }
135
136    pub fn iter(&self) -> impl Iterator<Item = &Token> {
137        self.tokens.iter()
138    }
139
140    pub fn is_empty(&self) -> bool {
141        self.tokens.is_empty()
142    }
143
144    pub fn as_slice(&self) -> TokenSlice {
145        TokenSlice::from(self)
146    }
147}
148
149impl<'a> From<&'a TokenStream> for TokenSlice<'a> {
150    fn from(stream: &'a TokenStream) -> Self {
151        TokenSlice {
152            start: 0,
153            end: stream.tokens.len(),
154            stream,
155        }
156    }
157}
158
159impl IntoIterator for TokenStream {
160    type Item = Token;
161
162    type IntoIter = std::vec::IntoIter<Token>;
163
164    fn into_iter(self) -> Self::IntoIter {
165        self.tokens.into_iter()
166    }
167}
168
169#[derive(Debug, Clone)]
170pub(crate) struct TokenSlice<'a> {
171    stream: &'a TokenStream,
172    /// Current position of the leading Token in the stream
173    start: usize,
174    /// The number of total Tokens in the stream
175    end: usize,
176}
177
178impl<'a> std::ops::Deref for TokenSlice<'a> {
179    type Target = [Token];
180
181    fn deref(&self) -> &Self::Target {
182        &self.stream.tokens[self.start..self.end]
183    }
184}
185
186impl<'a> TokenSlice<'a> {
187    pub fn token(&self, i: usize) -> &Token {
188        &self.stream.tokens[i + self.start]
189    }
190
191    pub fn iter(&self) -> impl Iterator<Item = &Token> {
192        (**self).iter()
193    }
194
195    pub fn without_ends(&self) -> Self {
196        Self {
197            start: self.start + 1,
198            end: self.end - 1,
199            stream: self.stream,
200        }
201    }
202
203    pub fn as_source_range(&self) -> SourceRange {
204        let stream_len = self.stream.tokens.len();
205        let first_token = if stream_len == self.start {
206            &self.stream.tokens[self.start - 1]
207        } else {
208            self.token(0)
209        };
210        let last_token = if stream_len == self.end {
211            &self.stream.tokens[stream_len - 1]
212        } else {
213            self.token(self.end - self.start)
214        };
215        SourceRange::new(first_token.start, last_token.end, last_token.module_id)
216    }
217}
218
219impl<'a> IntoIterator for TokenSlice<'a> {
220    type Item = &'a Token;
221
222    type IntoIter = std::slice::Iter<'a, Token>;
223
224    fn into_iter(self) -> Self::IntoIter {
225        self.stream.tokens[self.start..self.end].iter()
226    }
227}
228
229impl<'a> Stream for TokenSlice<'a> {
230    type Token = Token;
231    type Slice = Self;
232    type IterOffsets = Enumerate<std::vec::IntoIter<Token>>;
233    type Checkpoint = Checkpoint;
234
235    fn iter_offsets(&self) -> Self::IterOffsets {
236        #[allow(clippy::unnecessary_to_owned)]
237        self.to_vec().into_iter().enumerate()
238    }
239
240    fn eof_offset(&self) -> usize {
241        self.len()
242    }
243
244    fn next_token(&mut self) -> Option<Self::Token> {
245        let token = self.first()?.clone();
246        self.start += 1;
247        Some(token)
248    }
249
250    fn offset_for<P>(&self, predicate: P) -> Option<usize>
251    where
252        P: Fn(Self::Token) -> bool,
253    {
254        self.iter().position(|b| predicate(b.clone()))
255    }
256
257    fn offset_at(&self, tokens: usize) -> Result<usize, winnow::error::Needed> {
258        if let Some(needed) = tokens.checked_sub(self.len()).and_then(NonZeroUsize::new) {
259            Err(winnow::error::Needed::Size(needed))
260        } else {
261            Ok(tokens)
262        }
263    }
264
265    fn next_slice(&mut self, offset: usize) -> Self::Slice {
266        assert!(self.start + offset <= self.end);
267
268        let next = TokenSlice {
269            stream: self.stream,
270            start: self.start,
271            end: self.start + offset,
272        };
273        self.start += offset;
274        next
275    }
276
277    fn checkpoint(&self) -> Self::Checkpoint {
278        Checkpoint(self.start, self.end)
279    }
280
281    fn reset(&mut self, checkpoint: &Self::Checkpoint) {
282        self.start = checkpoint.0;
283        self.end = checkpoint.1;
284    }
285
286    fn raw(&self) -> &dyn fmt::Debug {
287        self
288    }
289}
290
291impl<'a> winnow::stream::Offset for TokenSlice<'a> {
292    fn offset_from(&self, start: &Self) -> usize {
293        self.start - start.start
294    }
295}
296
297impl<'a> winnow::stream::Offset<Checkpoint> for TokenSlice<'a> {
298    fn offset_from(&self, start: &Checkpoint) -> usize {
299        self.start - start.0
300    }
301}
302
303impl winnow::stream::Offset for Checkpoint {
304    fn offset_from(&self, start: &Self) -> usize {
305        self.0 - start.0
306    }
307}
308
309impl<'a> winnow::stream::StreamIsPartial for TokenSlice<'a> {
310    type PartialState = ();
311
312    fn complete(&mut self) -> Self::PartialState {}
313
314    fn restore_partial(&mut self, _: Self::PartialState) {}
315
316    fn is_partial_supported() -> bool {
317        false
318    }
319}
320
321impl<'a> winnow::stream::FindSlice<&str> for TokenSlice<'a> {
322    fn find_slice(&self, substr: &str) -> Option<std::ops::Range<usize>> {
323        self.iter()
324            .enumerate()
325            .find_map(|(i, b)| if b.value == substr { Some(i..self.end) } else { None })
326    }
327}
328
329#[derive(Clone, Debug)]
330pub struct Checkpoint(usize, usize);
331
332/// The types of tokens.
333#[derive(Debug, PartialEq, Eq, Copy, Clone, Display)]
334#[display(style = "camelCase")]
335pub enum TokenType {
336    /// A number.
337    Number,
338    /// A word.
339    Word,
340    /// An operator.
341    Operator,
342    /// A string.
343    String,
344    /// A keyword.
345    Keyword,
346    /// A type.
347    Type,
348    /// A brace.
349    Brace,
350    /// A hash.
351    Hash,
352    /// A bang.
353    Bang,
354    /// A dollar sign.
355    Dollar,
356    /// Whitespace.
357    Whitespace,
358    /// A comma.
359    Comma,
360    /// A colon.
361    Colon,
362    /// A double colon: `::`
363    DoubleColon,
364    /// A period.
365    Period,
366    /// A double period: `..`.
367    DoublePeriod,
368    /// A line comment.
369    LineComment,
370    /// A block comment.
371    BlockComment,
372    /// A function name.
373    Function,
374    /// Unknown lexemes.
375    Unknown,
376    /// The ? symbol, used for optional values.
377    QuestionMark,
378    /// The @ symbol.
379    At,
380    /// `;`
381    SemiColon,
382}
383
384/// Most KCL tokens correspond to LSP semantic tokens (but not all).
385impl TryFrom<TokenType> for SemanticTokenType {
386    type Error = anyhow::Error;
387    fn try_from(token_type: TokenType) -> Result<Self> {
388        // If you return a new kind of `SemanticTokenType`, make sure to update `SEMANTIC_TOKEN_TYPES`
389        // in the LSP implementation.
390        Ok(match token_type {
391            TokenType::Number => Self::NUMBER,
392            TokenType::Word => Self::VARIABLE,
393            TokenType::Keyword => Self::KEYWORD,
394            TokenType::Type => Self::TYPE,
395            TokenType::Operator => Self::OPERATOR,
396            TokenType::QuestionMark => Self::OPERATOR,
397            TokenType::String => Self::STRING,
398            TokenType::Bang => Self::OPERATOR,
399            TokenType::LineComment => Self::COMMENT,
400            TokenType::BlockComment => Self::COMMENT,
401            TokenType::Function => Self::FUNCTION,
402            TokenType::Whitespace
403            | TokenType::Brace
404            | TokenType::Comma
405            | TokenType::Colon
406            | TokenType::DoubleColon
407            | TokenType::Period
408            | TokenType::DoublePeriod
409            | TokenType::Hash
410            | TokenType::Dollar
411            | TokenType::At
412            | TokenType::SemiColon
413            | TokenType::Unknown => {
414                anyhow::bail!("unsupported token type: {:?}", token_type)
415            }
416        })
417    }
418}
419
420impl TokenType {
421    pub fn is_whitespace(&self) -> bool {
422        matches!(self, Self::Whitespace)
423    }
424
425    pub fn is_comment(&self) -> bool {
426        matches!(self, Self::LineComment | Self::BlockComment)
427    }
428}
429
430#[derive(Debug, PartialEq, Eq, Clone)]
431pub struct Token {
432    pub token_type: TokenType,
433    /// Offset in the source code where this token begins.
434    pub start: usize,
435    /// Offset in the source code where this token ends.
436    pub end: usize,
437    pub(super) module_id: ModuleId,
438    pub(super) value: String,
439}
440
441impl ContainsToken<Token> for (TokenType, &str) {
442    fn contains_token(&self, token: Token) -> bool {
443        self.0 == token.token_type && self.1 == token.value
444    }
445}
446
447impl ContainsToken<Token> for TokenType {
448    fn contains_token(&self, token: Token) -> bool {
449        *self == token.token_type
450    }
451}
452
453impl Token {
454    pub fn from_range(
455        range: std::ops::Range<usize>,
456        module_id: ModuleId,
457        token_type: TokenType,
458        value: String,
459    ) -> Self {
460        Self {
461            start: range.start,
462            end: range.end,
463            module_id,
464            value,
465            token_type,
466        }
467    }
468    pub fn is_code_token(&self) -> bool {
469        !matches!(
470            self.token_type,
471            TokenType::Whitespace | TokenType::LineComment | TokenType::BlockComment
472        )
473    }
474
475    pub fn as_source_range(&self) -> SourceRange {
476        SourceRange::new(self.start, self.end, self.module_id)
477    }
478
479    pub fn as_source_ranges(&self) -> Vec<SourceRange> {
480        vec![self.as_source_range()]
481    }
482
483    pub fn visibility_keyword(&self) -> Option<ItemVisibility> {
484        if !matches!(self.token_type, TokenType::Keyword) {
485            return None;
486        }
487        match self.value.as_str() {
488            "export" => Some(ItemVisibility::Export),
489            _ => None,
490        }
491    }
492
493    pub fn numeric_value(&self) -> Option<f64> {
494        if self.token_type != TokenType::Number {
495            return None;
496        }
497        let value = &self.value;
498        let value = value
499            .split_once(|c: char| c == '_' || c.is_ascii_alphabetic())
500            .map(|(s, _)| s)
501            .unwrap_or(value);
502        value.parse().ok()
503    }
504
505    pub fn uint_value(&self) -> Option<u32> {
506        if self.token_type != TokenType::Number {
507            return None;
508        }
509        let value = &self.value;
510        let value = value
511            .split_once(|c: char| c == '_' || c.is_ascii_alphabetic())
512            .map(|(s, _)| s)
513            .unwrap_or(value);
514        value.parse().ok()
515    }
516
517    pub fn numeric_suffix(&self) -> NumericSuffix {
518        if self.token_type != TokenType::Number {
519            return NumericSuffix::None;
520        }
521
522        if self.value.ends_with('_') {
523            return NumericSuffix::Count;
524        }
525
526        for suffix in NUM_SUFFIXES {
527            if self.value.ends_with(suffix) {
528                return suffix.parse().unwrap();
529            }
530        }
531
532        NumericSuffix::None
533    }
534
535    /// Is this token the beginning of a variable/function declaration?
536    /// If so, what kind?
537    /// If not, returns None.
538    pub fn declaration_keyword(&self) -> Option<VariableKind> {
539        if !matches!(self.token_type, TokenType::Keyword) {
540            return None;
541        }
542        Some(match self.value.as_str() {
543            "fn" => VariableKind::Fn,
544            "var" | "let" | "const" => VariableKind::Const,
545            _ => return None,
546        })
547    }
548}
549
550impl From<Token> for SourceRange {
551    fn from(token: Token) -> Self {
552        Self::new(token.start, token.end, token.module_id)
553    }
554}
555
556impl From<&Token> for SourceRange {
557    fn from(token: &Token) -> Self {
558        Self::new(token.start, token.end, token.module_id)
559    }
560}
561
562pub fn lex(s: &str, module_id: ModuleId) -> Result<TokenStream, KclError> {
563    tokeniser::lex(s, module_id).map_err(From::from)
564}
565
566impl From<ParseError<Input<'_>, winnow::error::ContextError>> for KclError {
567    fn from(err: ParseError<Input<'_>, winnow::error::ContextError>) -> Self {
568        let (input, offset): (Vec<char>, usize) = (err.input().chars().collect(), err.offset());
569        let module_id = err.input().state.module_id;
570
571        if offset >= input.len() {
572            // From the winnow docs:
573            //
574            // This is an offset, not an index, and may point to
575            // the end of input (input.len()) on eof errors.
576
577            return KclError::Lexical(crate::errors::KclErrorDetails {
578                source_ranges: vec![SourceRange::new(offset, offset, module_id)],
579                message: "unexpected EOF while parsing".to_string(),
580            });
581        }
582
583        // TODO: Add the Winnow tokenizer context to the error.
584        // See https://github.com/KittyCAD/modeling-app/issues/784
585        let bad_token = &input[offset];
586        // TODO: Add the Winnow parser context to the error.
587        // See https://github.com/KittyCAD/modeling-app/issues/784
588        KclError::Lexical(crate::errors::KclErrorDetails {
589            source_ranges: vec![SourceRange::new(offset, offset + 1, module_id)],
590            message: format!("found unknown token '{}'", bad_token),
591        })
592    }
593}