kcl_lib/parsing/token/
mod.rs

1// Clippy does not agree with rustc here for some reason.
2#![allow(clippy::needless_lifetimes)]
3
4use std::{fmt, iter::Enumerate, num::NonZeroUsize, str::FromStr};
5
6use anyhow::Result;
7use parse_display::Display;
8use schemars::JsonSchema;
9use serde::{Deserialize, Serialize};
10use tokeniser::Input;
11use tower_lsp::lsp_types::SemanticTokenType;
12use winnow::{
13    self,
14    error::ParseError,
15    stream::{ContainsToken, Stream},
16};
17
18use crate::{
19    errors::KclError,
20    parsing::ast::types::{ItemVisibility, VariableKind},
21    source_range::SourceRange,
22    CompilationError, ModuleId,
23};
24
25mod tokeniser;
26
27pub(crate) use tokeniser::RESERVED_WORDS;
28
29// Note the ordering, it's important that `m` comes after `mm` and `cm`.
30pub const NUM_SUFFIXES: [&str; 9] = ["mm", "cm", "m", "inch", "in", "ft", "yd", "deg", "rad"];
31
32#[derive(Clone, Copy, Debug, Eq, PartialEq, Serialize, Deserialize, ts_rs::TS, JsonSchema)]
33#[repr(u32)]
34pub enum NumericSuffix {
35    None,
36    Count,
37    Mm,
38    Cm,
39    M,
40    Inch,
41    Ft,
42    Yd,
43    Deg,
44    Rad,
45}
46
47impl NumericSuffix {
48    #[allow(dead_code)]
49    pub fn is_none(self) -> bool {
50        self == Self::None
51    }
52
53    pub fn is_some(self) -> bool {
54        self != Self::None
55    }
56
57    pub fn digestable_id(&self) -> &[u8] {
58        match self {
59            NumericSuffix::None => &[],
60            NumericSuffix::Count => b"_",
61            NumericSuffix::Mm => b"mm",
62            NumericSuffix::Cm => b"cm",
63            NumericSuffix::M => b"m",
64            NumericSuffix::Inch => b"in",
65            NumericSuffix::Ft => b"ft",
66            NumericSuffix::Yd => b"yd",
67            NumericSuffix::Deg => b"deg",
68            NumericSuffix::Rad => b"rad",
69        }
70    }
71}
72
73impl FromStr for NumericSuffix {
74    type Err = CompilationError;
75
76    fn from_str(s: &str) -> Result<Self, Self::Err> {
77        match s {
78            "_" => Ok(NumericSuffix::Count),
79            "mm" | "millimeters" => Ok(NumericSuffix::Mm),
80            "cm" | "centimeters" => Ok(NumericSuffix::Cm),
81            "m" | "meters" => Ok(NumericSuffix::M),
82            "inch" | "in" => Ok(NumericSuffix::Inch),
83            "ft" | "feet" => Ok(NumericSuffix::Ft),
84            "yd" | "yards" => Ok(NumericSuffix::Yd),
85            "deg" | "degrees" => Ok(NumericSuffix::Deg),
86            "rad" | "radians" => Ok(NumericSuffix::Rad),
87            _ => Err(CompilationError::err(SourceRange::default(), "invalid unit of measure")),
88        }
89    }
90}
91
92impl fmt::Display for NumericSuffix {
93    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
94        match self {
95            NumericSuffix::None => Ok(()),
96            NumericSuffix::Count => write!(f, "_"),
97            NumericSuffix::Mm => write!(f, "mm"),
98            NumericSuffix::Cm => write!(f, "cm"),
99            NumericSuffix::M => write!(f, "m"),
100            NumericSuffix::Inch => write!(f, "in"),
101            NumericSuffix::Ft => write!(f, "ft"),
102            NumericSuffix::Yd => write!(f, "yd"),
103            NumericSuffix::Deg => write!(f, "deg"),
104            NumericSuffix::Rad => write!(f, "rad"),
105        }
106    }
107}
108
109#[derive(Clone, Debug, PartialEq)]
110pub(crate) struct TokenStream {
111    tokens: Vec<Token>,
112}
113
114impl TokenStream {
115    fn new(tokens: Vec<Token>) -> Self {
116        Self { tokens }
117    }
118
119    pub(super) fn remove_unknown(&mut self) -> Vec<Token> {
120        let tokens = std::mem::take(&mut self.tokens);
121        let (tokens, unknown_tokens): (Vec<Token>, Vec<Token>) = tokens
122            .into_iter()
123            .partition(|token| token.token_type != TokenType::Unknown);
124        self.tokens = tokens;
125        unknown_tokens
126    }
127
128    pub fn iter(&self) -> impl Iterator<Item = &Token> {
129        self.tokens.iter()
130    }
131
132    pub fn is_empty(&self) -> bool {
133        self.tokens.is_empty()
134    }
135
136    pub fn as_slice(&self) -> TokenSlice {
137        TokenSlice::from(self)
138    }
139}
140
141impl<'a> From<&'a TokenStream> for TokenSlice<'a> {
142    fn from(stream: &'a TokenStream) -> Self {
143        TokenSlice {
144            start: 0,
145            end: stream.tokens.len(),
146            stream,
147        }
148    }
149}
150
151impl IntoIterator for TokenStream {
152    type Item = Token;
153
154    type IntoIter = std::vec::IntoIter<Token>;
155
156    fn into_iter(self) -> Self::IntoIter {
157        self.tokens.into_iter()
158    }
159}
160
161#[derive(Debug, Clone)]
162pub(crate) struct TokenSlice<'a> {
163    stream: &'a TokenStream,
164    /// Current position of the leading Token in the stream
165    start: usize,
166    /// The number of total Tokens in the stream
167    end: usize,
168}
169
170impl<'a> std::ops::Deref for TokenSlice<'a> {
171    type Target = [Token];
172
173    fn deref(&self) -> &Self::Target {
174        &self.stream.tokens[self.start..self.end]
175    }
176}
177
178impl<'a> TokenSlice<'a> {
179    pub fn token(&self, i: usize) -> &Token {
180        &self.stream.tokens[i + self.start]
181    }
182
183    pub fn iter(&self) -> impl Iterator<Item = &Token> {
184        (**self).iter()
185    }
186
187    pub fn without_ends(&self) -> Self {
188        Self {
189            start: self.start + 1,
190            end: self.end - 1,
191            stream: self.stream,
192        }
193    }
194
195    pub fn as_source_range(&self) -> SourceRange {
196        let stream_len = self.stream.tokens.len();
197        let first_token = if stream_len == self.start {
198            &self.stream.tokens[self.start - 1]
199        } else {
200            self.token(0)
201        };
202        let last_token = if stream_len == self.end {
203            &self.stream.tokens[stream_len - 1]
204        } else {
205            self.token(self.end - self.start)
206        };
207        SourceRange::new(first_token.start, last_token.end, last_token.module_id)
208    }
209}
210
211impl<'a> IntoIterator for TokenSlice<'a> {
212    type Item = &'a Token;
213
214    type IntoIter = std::slice::Iter<'a, Token>;
215
216    fn into_iter(self) -> Self::IntoIter {
217        self.stream.tokens[self.start..self.end].iter()
218    }
219}
220
221impl<'a> Stream for TokenSlice<'a> {
222    type Token = Token;
223    type Slice = Self;
224    type IterOffsets = Enumerate<std::vec::IntoIter<Token>>;
225    type Checkpoint = Checkpoint;
226
227    fn iter_offsets(&self) -> Self::IterOffsets {
228        #[allow(clippy::unnecessary_to_owned)]
229        self.to_vec().into_iter().enumerate()
230    }
231
232    fn eof_offset(&self) -> usize {
233        self.len()
234    }
235
236    fn next_token(&mut self) -> Option<Self::Token> {
237        let token = self.first()?.clone();
238        self.start += 1;
239        Some(token)
240    }
241
242    fn offset_for<P>(&self, predicate: P) -> Option<usize>
243    where
244        P: Fn(Self::Token) -> bool,
245    {
246        self.iter().position(|b| predicate(b.clone()))
247    }
248
249    fn offset_at(&self, tokens: usize) -> Result<usize, winnow::error::Needed> {
250        if let Some(needed) = tokens.checked_sub(self.len()).and_then(NonZeroUsize::new) {
251            Err(winnow::error::Needed::Size(needed))
252        } else {
253            Ok(tokens)
254        }
255    }
256
257    fn next_slice(&mut self, offset: usize) -> Self::Slice {
258        assert!(self.start + offset <= self.end);
259
260        let next = TokenSlice {
261            stream: self.stream,
262            start: self.start,
263            end: self.start + offset,
264        };
265        self.start += offset;
266        next
267    }
268
269    fn checkpoint(&self) -> Self::Checkpoint {
270        Checkpoint(self.start, self.end)
271    }
272
273    fn reset(&mut self, checkpoint: &Self::Checkpoint) {
274        self.start = checkpoint.0;
275        self.end = checkpoint.1;
276    }
277
278    fn raw(&self) -> &dyn fmt::Debug {
279        self
280    }
281}
282
283impl<'a> winnow::stream::Offset for TokenSlice<'a> {
284    fn offset_from(&self, start: &Self) -> usize {
285        self.start - start.start
286    }
287}
288
289impl<'a> winnow::stream::Offset<Checkpoint> for TokenSlice<'a> {
290    fn offset_from(&self, start: &Checkpoint) -> usize {
291        self.start - start.0
292    }
293}
294
295impl winnow::stream::Offset for Checkpoint {
296    fn offset_from(&self, start: &Self) -> usize {
297        self.0 - start.0
298    }
299}
300
301impl<'a> winnow::stream::StreamIsPartial for TokenSlice<'a> {
302    type PartialState = ();
303
304    fn complete(&mut self) -> Self::PartialState {}
305
306    fn restore_partial(&mut self, _: Self::PartialState) {}
307
308    fn is_partial_supported() -> bool {
309        false
310    }
311}
312
313impl<'a> winnow::stream::FindSlice<&str> for TokenSlice<'a> {
314    fn find_slice(&self, substr: &str) -> Option<std::ops::Range<usize>> {
315        self.iter()
316            .enumerate()
317            .find_map(|(i, b)| if b.value == substr { Some(i..self.end) } else { None })
318    }
319}
320
321#[derive(Clone, Debug)]
322pub struct Checkpoint(usize, usize);
323
324/// The types of tokens.
325#[derive(Debug, PartialEq, Eq, Copy, Clone, Display)]
326#[display(style = "camelCase")]
327pub enum TokenType {
328    /// A number.
329    Number,
330    /// A word.
331    Word,
332    /// An operator.
333    Operator,
334    /// A string.
335    String,
336    /// A keyword.
337    Keyword,
338    /// A type.
339    Type,
340    /// A brace.
341    Brace,
342    /// A hash.
343    Hash,
344    /// A bang.
345    Bang,
346    /// A dollar sign.
347    Dollar,
348    /// Whitespace.
349    Whitespace,
350    /// A comma.
351    Comma,
352    /// A colon.
353    Colon,
354    /// A period.
355    Period,
356    /// A double period: `..`.
357    DoublePeriod,
358    /// A line comment.
359    LineComment,
360    /// A block comment.
361    BlockComment,
362    /// A function name.
363    Function,
364    /// Unknown lexemes.
365    Unknown,
366    /// The ? symbol, used for optional values.
367    QuestionMark,
368    /// The @ symbol.
369    At,
370}
371
372/// Most KCL tokens correspond to LSP semantic tokens (but not all).
373impl TryFrom<TokenType> for SemanticTokenType {
374    type Error = anyhow::Error;
375    fn try_from(token_type: TokenType) -> Result<Self> {
376        // If you return a new kind of `SemanticTokenType`, make sure to update `SEMANTIC_TOKEN_TYPES`
377        // in the LSP implementation.
378        Ok(match token_type {
379            TokenType::Number => Self::NUMBER,
380            TokenType::Word => Self::VARIABLE,
381            TokenType::Keyword => Self::KEYWORD,
382            TokenType::Type => Self::TYPE,
383            TokenType::Operator => Self::OPERATOR,
384            TokenType::QuestionMark => Self::OPERATOR,
385            TokenType::String => Self::STRING,
386            TokenType::Bang => Self::OPERATOR,
387            TokenType::LineComment => Self::COMMENT,
388            TokenType::BlockComment => Self::COMMENT,
389            TokenType::Function => Self::FUNCTION,
390            TokenType::Whitespace
391            | TokenType::Brace
392            | TokenType::Comma
393            | TokenType::Colon
394            | TokenType::Period
395            | TokenType::DoublePeriod
396            | TokenType::Hash
397            | TokenType::Dollar
398            | TokenType::At
399            | TokenType::Unknown => {
400                anyhow::bail!("unsupported token type: {:?}", token_type)
401            }
402        })
403    }
404}
405
406impl TokenType {
407    pub fn is_whitespace(&self) -> bool {
408        matches!(self, Self::Whitespace)
409    }
410
411    pub fn is_comment(&self) -> bool {
412        matches!(self, Self::LineComment | Self::BlockComment)
413    }
414}
415
416#[derive(Debug, PartialEq, Eq, Clone)]
417pub struct Token {
418    pub token_type: TokenType,
419    /// Offset in the source code where this token begins.
420    pub start: usize,
421    /// Offset in the source code where this token ends.
422    pub end: usize,
423    pub(super) module_id: ModuleId,
424    pub(super) value: String,
425}
426
427impl ContainsToken<Token> for (TokenType, &str) {
428    fn contains_token(&self, token: Token) -> bool {
429        self.0 == token.token_type && self.1 == token.value
430    }
431}
432
433impl ContainsToken<Token> for TokenType {
434    fn contains_token(&self, token: Token) -> bool {
435        *self == token.token_type
436    }
437}
438
439impl Token {
440    pub fn from_range(
441        range: std::ops::Range<usize>,
442        module_id: ModuleId,
443        token_type: TokenType,
444        value: String,
445    ) -> Self {
446        Self {
447            start: range.start,
448            end: range.end,
449            module_id,
450            value,
451            token_type,
452        }
453    }
454    pub fn is_code_token(&self) -> bool {
455        !matches!(
456            self.token_type,
457            TokenType::Whitespace | TokenType::LineComment | TokenType::BlockComment
458        )
459    }
460
461    pub fn as_source_range(&self) -> SourceRange {
462        SourceRange::new(self.start, self.end, self.module_id)
463    }
464
465    pub fn as_source_ranges(&self) -> Vec<SourceRange> {
466        vec![self.as_source_range()]
467    }
468
469    pub fn visibility_keyword(&self) -> Option<ItemVisibility> {
470        if !matches!(self.token_type, TokenType::Keyword) {
471            return None;
472        }
473        match self.value.as_str() {
474            "export" => Some(ItemVisibility::Export),
475            _ => None,
476        }
477    }
478
479    pub fn numeric_value(&self) -> Option<f64> {
480        if self.token_type != TokenType::Number {
481            return None;
482        }
483        let value = &self.value;
484        let value = value
485            .split_once(|c: char| c == '_' || c.is_ascii_alphabetic())
486            .map(|(s, _)| s)
487            .unwrap_or(value);
488        value.parse().ok()
489    }
490
491    pub fn numeric_suffix(&self) -> NumericSuffix {
492        if self.token_type != TokenType::Number {
493            return NumericSuffix::None;
494        }
495
496        if self.value.ends_with('_') {
497            return NumericSuffix::Count;
498        }
499
500        for suffix in NUM_SUFFIXES {
501            if self.value.ends_with(suffix) {
502                return suffix.parse().unwrap();
503            }
504        }
505
506        NumericSuffix::None
507    }
508
509    /// Is this token the beginning of a variable/function declaration?
510    /// If so, what kind?
511    /// If not, returns None.
512    pub fn declaration_keyword(&self) -> Option<VariableKind> {
513        if !matches!(self.token_type, TokenType::Keyword) {
514            return None;
515        }
516        Some(match self.value.as_str() {
517            "fn" => VariableKind::Fn,
518            "var" | "let" | "const" => VariableKind::Const,
519            _ => return None,
520        })
521    }
522}
523
524impl From<Token> for SourceRange {
525    fn from(token: Token) -> Self {
526        Self::new(token.start, token.end, token.module_id)
527    }
528}
529
530impl From<&Token> for SourceRange {
531    fn from(token: &Token) -> Self {
532        Self::new(token.start, token.end, token.module_id)
533    }
534}
535
536pub fn lex(s: &str, module_id: ModuleId) -> Result<TokenStream, KclError> {
537    tokeniser::lex(s, module_id).map_err(From::from)
538}
539
540impl From<ParseError<Input<'_>, winnow::error::ContextError>> for KclError {
541    fn from(err: ParseError<Input<'_>, winnow::error::ContextError>) -> Self {
542        let (input, offset): (Vec<char>, usize) = (err.input().chars().collect(), err.offset());
543        let module_id = err.input().state.module_id;
544
545        if offset >= input.len() {
546            // From the winnow docs:
547            //
548            // This is an offset, not an index, and may point to
549            // the end of input (input.len()) on eof errors.
550
551            return KclError::Lexical(crate::errors::KclErrorDetails {
552                source_ranges: vec![SourceRange::new(offset, offset, module_id)],
553                message: "unexpected EOF while parsing".to_string(),
554            });
555        }
556
557        // TODO: Add the Winnow tokenizer context to the error.
558        // See https://github.com/KittyCAD/modeling-app/issues/784
559        let bad_token = &input[offset];
560        // TODO: Add the Winnow parser context to the error.
561        // See https://github.com/KittyCAD/modeling-app/issues/784
562        KclError::Lexical(crate::errors::KclErrorDetails {
563            source_ranges: vec![SourceRange::new(offset, offset + 1, module_id)],
564            message: format!("found unknown token '{}'", bad_token),
565        })
566    }
567}