Skip to main content

kcl_lib/parsing/token/
mod.rs

1// Clippy does not agree with rustc here for some reason.
2#![allow(clippy::needless_lifetimes)]
3
4use std::fmt;
5use std::iter::Enumerate;
6use std::num::NonZeroUsize;
7use std::str::FromStr;
8
9use anyhow::Result;
10use parse_display::Display;
11use serde::Deserialize;
12use serde::Serialize;
13use tokeniser::Input;
14use tower_lsp::lsp_types::SemanticTokenType;
15use winnow::error::ParseError;
16use winnow::stream::ContainsToken;
17use winnow::stream::Stream;
18use winnow::{self};
19
20use crate::CompilationError;
21use crate::ModuleId;
22use crate::SourceRange;
23use crate::errors::KclError;
24use crate::parsing::ast::types::ItemVisibility;
25use crate::parsing::ast::types::VariableKind;
26
27mod tokeniser;
28
29pub(crate) use tokeniser::RESERVED_SKETCH_BLOCK_WORDS;
30pub(crate) use tokeniser::RESERVED_WORDS;
31
32// Note the ordering, it's important that `m` comes after `mm` and `cm`.
33pub const NUM_SUFFIXES: [&str; 10] = ["mm", "cm", "m", "inch", "in", "ft", "yd", "deg", "rad", "?"];
34
35#[derive(Clone, Copy, Debug, Eq, PartialEq, Serialize, Deserialize, ts_rs::TS)]
36#[repr(u32)]
37pub enum NumericSuffix {
38    None,
39    Count,
40    Length,
41    Angle,
42    Mm,
43    Cm,
44    M,
45    Inch,
46    Ft,
47    Yd,
48    Deg,
49    Rad,
50    Unknown,
51}
52
53impl NumericSuffix {
54    #[allow(dead_code)]
55    pub fn is_none(self) -> bool {
56        self == Self::None
57    }
58
59    pub fn is_some(self) -> bool {
60        self != Self::None
61    }
62
63    pub fn digestable_id(&self) -> &[u8] {
64        match self {
65            NumericSuffix::None => &[],
66            NumericSuffix::Count => b"_",
67            NumericSuffix::Unknown => b"?",
68            NumericSuffix::Length => b"Length",
69            NumericSuffix::Angle => b"Angle",
70            NumericSuffix::Mm => b"mm",
71            NumericSuffix::Cm => b"cm",
72            NumericSuffix::M => b"m",
73            NumericSuffix::Inch => b"in",
74            NumericSuffix::Ft => b"ft",
75            NumericSuffix::Yd => b"yd",
76            NumericSuffix::Deg => b"deg",
77            NumericSuffix::Rad => b"rad",
78        }
79    }
80}
81
82impl FromStr for NumericSuffix {
83    type Err = CompilationError;
84
85    fn from_str(s: &str) -> Result<Self, Self::Err> {
86        match s {
87            "_" | "Count" => Ok(NumericSuffix::Count),
88            "Length" => Ok(NumericSuffix::Length),
89            "Angle" => Ok(NumericSuffix::Angle),
90            "mm" | "millimeters" => Ok(NumericSuffix::Mm),
91            "cm" | "centimeters" => Ok(NumericSuffix::Cm),
92            "m" | "meters" => Ok(NumericSuffix::M),
93            "inch" | "in" => Ok(NumericSuffix::Inch),
94            "ft" | "feet" => Ok(NumericSuffix::Ft),
95            "yd" | "yards" => Ok(NumericSuffix::Yd),
96            "deg" | "degrees" => Ok(NumericSuffix::Deg),
97            "rad" | "radians" => Ok(NumericSuffix::Rad),
98            "?" => Ok(NumericSuffix::Unknown),
99            _ => Err(CompilationError::err(SourceRange::default(), "invalid unit of measure")),
100        }
101    }
102}
103
104impl fmt::Display for NumericSuffix {
105    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
106        match self {
107            NumericSuffix::None => Ok(()),
108            NumericSuffix::Count => write!(f, "_"),
109            NumericSuffix::Unknown => write!(f, "_?"),
110            NumericSuffix::Length => write!(f, "Length"),
111            NumericSuffix::Angle => write!(f, "Angle"),
112            NumericSuffix::Mm => write!(f, "mm"),
113            NumericSuffix::Cm => write!(f, "cm"),
114            NumericSuffix::M => write!(f, "m"),
115            NumericSuffix::Inch => write!(f, "in"),
116            NumericSuffix::Ft => write!(f, "ft"),
117            NumericSuffix::Yd => write!(f, "yd"),
118            NumericSuffix::Deg => write!(f, "deg"),
119            NumericSuffix::Rad => write!(f, "rad"),
120        }
121    }
122}
123
124#[derive(Clone, Debug, PartialEq)]
125pub(crate) struct TokenStream {
126    tokens: Vec<Token>,
127}
128
129impl TokenStream {
130    fn new(tokens: Vec<Token>) -> Self {
131        Self { tokens }
132    }
133
134    pub(super) fn remove_unknown(&mut self) -> Vec<Token> {
135        let tokens = std::mem::take(&mut self.tokens);
136        let (tokens, unknown_tokens): (Vec<Token>, Vec<Token>) = tokens
137            .into_iter()
138            .partition(|token| token.token_type != TokenType::Unknown);
139        self.tokens = tokens;
140        unknown_tokens
141    }
142
143    pub fn iter(&self) -> impl Iterator<Item = &Token> {
144        self.tokens.iter()
145    }
146
147    pub fn is_empty(&self) -> bool {
148        self.tokens.is_empty()
149    }
150
151    pub fn as_slice(&self) -> TokenSlice<'_> {
152        TokenSlice::from(self)
153    }
154}
155
156impl<'a> From<&'a TokenStream> for TokenSlice<'a> {
157    fn from(stream: &'a TokenStream) -> Self {
158        TokenSlice {
159            start: 0,
160            end: stream.tokens.len(),
161            stream,
162        }
163    }
164}
165
166impl IntoIterator for TokenStream {
167    type Item = Token;
168
169    type IntoIter = std::vec::IntoIter<Token>;
170
171    fn into_iter(self) -> Self::IntoIter {
172        self.tokens.into_iter()
173    }
174}
175
176#[derive(Debug, Clone)]
177pub(crate) struct TokenSlice<'a> {
178    stream: &'a TokenStream,
179    /// Current position of the leading Token in the stream
180    start: usize,
181    /// The number of total Tokens in the stream
182    end: usize,
183}
184
185impl<'a> std::ops::Deref for TokenSlice<'a> {
186    type Target = [Token];
187
188    fn deref(&self) -> &Self::Target {
189        &self.stream.tokens[self.start..self.end]
190    }
191}
192
193impl<'a> TokenSlice<'a> {
194    pub fn token(&self, i: usize) -> &Token {
195        &self.stream.tokens[i + self.start]
196    }
197
198    pub fn iter(&self) -> impl Iterator<Item = &Token> {
199        (**self).iter()
200    }
201
202    pub fn without_ends(&self) -> Self {
203        Self {
204            start: self.start + 1,
205            end: self.end - 1,
206            stream: self.stream,
207        }
208    }
209
210    pub fn as_source_range(&self) -> SourceRange {
211        let stream_len = self.stream.tokens.len();
212        let first_token = if stream_len == self.start {
213            &self.stream.tokens[self.start - 1]
214        } else {
215            self.token(0)
216        };
217        let last_token = if stream_len == self.end {
218            &self.stream.tokens[stream_len - 1]
219        } else {
220            self.token(self.end - self.start)
221        };
222        SourceRange::new(first_token.start, last_token.end, last_token.module_id)
223    }
224}
225
226impl<'a> IntoIterator for TokenSlice<'a> {
227    type Item = &'a Token;
228
229    type IntoIter = std::slice::Iter<'a, Token>;
230
231    fn into_iter(self) -> Self::IntoIter {
232        self.stream.tokens[self.start..self.end].iter()
233    }
234}
235
236impl<'a> Stream for TokenSlice<'a> {
237    type Token = Token;
238    type Slice = Self;
239    type IterOffsets = Enumerate<std::vec::IntoIter<Token>>;
240    type Checkpoint = Checkpoint;
241
242    fn iter_offsets(&self) -> Self::IterOffsets {
243        #[allow(clippy::unnecessary_to_owned)]
244        self.to_vec().into_iter().enumerate()
245    }
246
247    fn eof_offset(&self) -> usize {
248        self.len()
249    }
250
251    fn next_token(&mut self) -> Option<Self::Token> {
252        let token = self.first()?.clone();
253        self.start += 1;
254        Some(token)
255    }
256
257    /// Split off the next token from the input
258    fn peek_token(&self) -> Option<Self::Token> {
259        Some(self.first()?.clone())
260    }
261
262    fn offset_for<P>(&self, predicate: P) -> Option<usize>
263    where
264        P: Fn(Self::Token) -> bool,
265    {
266        self.iter().position(|b| predicate(b.clone()))
267    }
268
269    fn offset_at(&self, tokens: usize) -> Result<usize, winnow::error::Needed> {
270        if let Some(needed) = tokens.checked_sub(self.len()).and_then(NonZeroUsize::new) {
271            Err(winnow::error::Needed::Size(needed))
272        } else {
273            Ok(tokens)
274        }
275    }
276
277    fn next_slice(&mut self, offset: usize) -> Self::Slice {
278        assert!(self.start + offset <= self.end);
279
280        let next = TokenSlice {
281            stream: self.stream,
282            start: self.start,
283            end: self.start + offset,
284        };
285        self.start += offset;
286        next
287    }
288
289    /// Split off a slice of tokens from the input
290    fn peek_slice(&self, offset: usize) -> Self::Slice {
291        assert!(self.start + offset <= self.end);
292
293        TokenSlice {
294            stream: self.stream,
295            start: self.start,
296            end: self.start + offset,
297        }
298    }
299
300    fn checkpoint(&self) -> Self::Checkpoint {
301        Checkpoint(self.start, self.end)
302    }
303
304    fn reset(&mut self, checkpoint: &Self::Checkpoint) {
305        self.start = checkpoint.0;
306        self.end = checkpoint.1;
307    }
308
309    fn raw(&self) -> &dyn fmt::Debug {
310        self
311    }
312}
313
314impl<'a> winnow::stream::Offset for TokenSlice<'a> {
315    fn offset_from(&self, start: &Self) -> usize {
316        self.start - start.start
317    }
318}
319
320impl<'a> winnow::stream::Offset<Checkpoint> for TokenSlice<'a> {
321    fn offset_from(&self, start: &Checkpoint) -> usize {
322        self.start - start.0
323    }
324}
325
326impl winnow::stream::Offset for Checkpoint {
327    fn offset_from(&self, start: &Self) -> usize {
328        self.0 - start.0
329    }
330}
331
332impl<'a> winnow::stream::StreamIsPartial for TokenSlice<'a> {
333    type PartialState = ();
334
335    fn complete(&mut self) -> Self::PartialState {}
336
337    fn restore_partial(&mut self, _: Self::PartialState) {}
338
339    fn is_partial_supported() -> bool {
340        false
341    }
342}
343
344impl<'a> winnow::stream::FindSlice<&str> for TokenSlice<'a> {
345    fn find_slice(&self, substr: &str) -> Option<std::ops::Range<usize>> {
346        self.iter()
347            .enumerate()
348            .find_map(|(i, b)| if b.value == substr { Some(i..self.end) } else { None })
349    }
350}
351
352#[derive(Clone, Debug)]
353pub struct Checkpoint(usize, usize);
354
355/// The types of tokens.
356#[derive(Debug, PartialEq, Eq, Copy, Clone, Display)]
357#[display(style = "camelCase")]
358pub enum TokenType {
359    /// A number.
360    Number,
361    /// A word.
362    Word,
363    /// An operator.
364    Operator,
365    /// A string.
366    String,
367    /// A keyword.
368    Keyword,
369    /// A type.
370    Type,
371    /// A brace.
372    Brace,
373    /// A hash.
374    Hash,
375    /// A bang.
376    Bang,
377    /// A dollar sign.
378    Dollar,
379    /// Whitespace.
380    Whitespace,
381    /// A comma.
382    Comma,
383    /// A colon.
384    Colon,
385    /// A double colon: `::`
386    DoubleColon,
387    /// A period.
388    Period,
389    /// A double period: `..`.
390    DoublePeriod,
391    /// A double period and a less than: `..<`.
392    DoublePeriodLessThan,
393    /// A line comment.
394    LineComment,
395    /// A block comment.
396    BlockComment,
397    /// A function name.
398    Function,
399    /// Unknown lexemes.
400    Unknown,
401    /// The ? symbol, used for optional values.
402    QuestionMark,
403    /// The @ symbol.
404    At,
405    /// `;`
406    SemiColon,
407}
408
409/// Most KCL tokens correspond to LSP semantic tokens (but not all).
410impl TryFrom<TokenType> for SemanticTokenType {
411    type Error = anyhow::Error;
412    fn try_from(token_type: TokenType) -> Result<Self> {
413        // If you return a new kind of `SemanticTokenType`, make sure to update `SEMANTIC_TOKEN_TYPES`
414        // in the LSP implementation.
415        Ok(match token_type {
416            TokenType::Number => Self::NUMBER,
417            TokenType::Word => Self::VARIABLE,
418            TokenType::Keyword => Self::KEYWORD,
419            TokenType::Type => Self::TYPE,
420            TokenType::Operator => Self::OPERATOR,
421            TokenType::QuestionMark => Self::OPERATOR,
422            TokenType::String => Self::STRING,
423            TokenType::Bang => Self::OPERATOR,
424            TokenType::LineComment => Self::COMMENT,
425            TokenType::BlockComment => Self::COMMENT,
426            TokenType::Function => Self::FUNCTION,
427            TokenType::Whitespace
428            | TokenType::Brace
429            | TokenType::Comma
430            | TokenType::Colon
431            | TokenType::DoubleColon
432            | TokenType::Period
433            | TokenType::DoublePeriod
434            | TokenType::DoublePeriodLessThan
435            | TokenType::Hash
436            | TokenType::Dollar
437            | TokenType::At
438            | TokenType::SemiColon
439            | TokenType::Unknown => {
440                anyhow::bail!("unsupported token type: {:?}", token_type)
441            }
442        })
443    }
444}
445
446impl TokenType {
447    pub fn is_whitespace(&self) -> bool {
448        matches!(self, Self::Whitespace)
449    }
450
451    pub fn is_comment(&self) -> bool {
452        matches!(self, Self::LineComment | Self::BlockComment)
453    }
454}
455
456#[derive(Debug, PartialEq, Eq, Clone)]
457pub struct Token {
458    pub token_type: TokenType,
459    /// Offset in the source code where this token begins.
460    pub start: usize,
461    /// Offset in the source code where this token ends.
462    pub end: usize,
463    pub(super) module_id: ModuleId,
464    pub(super) value: String,
465}
466
467impl ContainsToken<Token> for (TokenType, &str) {
468    fn contains_token(&self, token: Token) -> bool {
469        self.0 == token.token_type && self.1 == token.value
470    }
471}
472
473impl ContainsToken<Token> for TokenType {
474    fn contains_token(&self, token: Token) -> bool {
475        *self == token.token_type
476    }
477}
478
479impl Token {
480    pub fn from_range(
481        range: std::ops::Range<usize>,
482        module_id: ModuleId,
483        token_type: TokenType,
484        value: String,
485    ) -> Self {
486        Self {
487            start: range.start,
488            end: range.end,
489            module_id,
490            value,
491            token_type,
492        }
493    }
494    pub fn is_code_token(&self) -> bool {
495        !matches!(
496            self.token_type,
497            TokenType::Whitespace | TokenType::LineComment | TokenType::BlockComment
498        )
499    }
500
501    pub fn as_source_range(&self) -> SourceRange {
502        SourceRange::new(self.start, self.end, self.module_id)
503    }
504
505    pub fn as_source_ranges(&self) -> Vec<SourceRange> {
506        vec![self.as_source_range()]
507    }
508
509    pub fn visibility_keyword(&self) -> Option<ItemVisibility> {
510        if !matches!(self.token_type, TokenType::Keyword) {
511            return None;
512        }
513        match self.value.as_str() {
514            "export" => Some(ItemVisibility::Export),
515            _ => None,
516        }
517    }
518
519    pub fn numeric_value(&self) -> Option<f64> {
520        if self.token_type != TokenType::Number {
521            return None;
522        }
523        let value = &self.value;
524        let value = value
525            .split_once(|c: char| c == '_' || c.is_ascii_alphabetic())
526            .map(|(s, _)| s)
527            .unwrap_or(value);
528        value.parse().ok()
529    }
530
531    pub fn uint_value(&self) -> Option<u32> {
532        if self.token_type != TokenType::Number {
533            return None;
534        }
535        let value = &self.value;
536        let value = value
537            .split_once(|c: char| c == '_' || c.is_ascii_alphabetic())
538            .map(|(s, _)| s)
539            .unwrap_or(value);
540        value.parse().ok()
541    }
542
543    pub fn numeric_suffix(&self) -> NumericSuffix {
544        if self.token_type != TokenType::Number {
545            return NumericSuffix::None;
546        }
547
548        if self.value.ends_with('_') {
549            return NumericSuffix::Count;
550        }
551
552        for suffix in NUM_SUFFIXES {
553            if self.value.ends_with(suffix) {
554                return suffix.parse().unwrap();
555            }
556        }
557
558        NumericSuffix::None
559    }
560
561    /// Is this token the beginning of a variable/function declaration?
562    /// If so, what kind?
563    /// If not, returns None.
564    pub fn declaration_keyword(&self) -> Option<VariableKind> {
565        if !matches!(self.token_type, TokenType::Keyword) {
566            return None;
567        }
568        Some(match self.value.as_str() {
569            "fn" => VariableKind::Fn,
570            "var" | "let" | "const" => VariableKind::Const,
571            _ => return None,
572        })
573    }
574}
575
576impl From<Token> for SourceRange {
577    fn from(token: Token) -> Self {
578        Self::new(token.start, token.end, token.module_id)
579    }
580}
581
582impl From<&Token> for SourceRange {
583    fn from(token: &Token) -> Self {
584        Self::new(token.start, token.end, token.module_id)
585    }
586}
587
588pub fn lex(s: &str, module_id: ModuleId) -> Result<TokenStream, KclError> {
589    tokeniser::lex(s, module_id).map_err(From::from)
590}
591
592impl From<ParseError<Input<'_>, winnow::error::ContextError>> for KclError {
593    fn from(err: ParseError<Input<'_>, winnow::error::ContextError>) -> Self {
594        let (input, offset): (Vec<char>, usize) = (err.input().chars().collect(), err.offset());
595        let module_id = err.input().state.module_id;
596
597        if offset >= input.len() {
598            // From the winnow docs:
599            //
600            // This is an offset, not an index, and may point to
601            // the end of input (input.len()) on eof errors.
602
603            return KclError::new_lexical(crate::errors::KclErrorDetails::new(
604                "unexpected EOF while parsing".to_owned(),
605                vec![SourceRange::new(offset, offset, module_id)],
606            ));
607        }
608
609        // TODO: Add the Winnow tokenizer context to the error.
610        // See https://github.com/KittyCAD/modeling-app/issues/784
611        let bad_token = &input[offset];
612        // TODO: Add the Winnow parser context to the error.
613        // See https://github.com/KittyCAD/modeling-app/issues/784
614        KclError::new_lexical(crate::errors::KclErrorDetails::new(
615            format!("found unknown token '{bad_token}'"),
616            vec![SourceRange::new(offset, offset + 1, module_id)],
617        ))
618    }
619}