Skip to main content

kcl_lib/parsing/token/
mod.rs

1// Clippy does not agree with rustc here for some reason.
2#![allow(clippy::needless_lifetimes)]
3
4use std::fmt;
5use std::iter::Enumerate;
6use std::num::NonZeroUsize;
7use std::str::FromStr;
8
9use anyhow::Result;
10use parse_display::Display;
11use serde::Deserialize;
12use serde::Serialize;
13use tokeniser::Input;
14use tower_lsp::lsp_types::SemanticTokenType;
15use winnow::error::ParseError;
16use winnow::stream::ContainsToken;
17use winnow::stream::Stream;
18use winnow::{self};
19
20use crate::CompilationError;
21use crate::ModuleId;
22use crate::SourceRange;
23use crate::errors::KclError;
24use crate::parsing::ast::types::ItemVisibility;
25use crate::parsing::ast::types::VariableKind;
26
27mod tokeniser;
28
29pub(crate) use tokeniser::RESERVED_WORDS;
30
31// Note the ordering, it's important that `m` comes after `mm` and `cm`.
32pub const NUM_SUFFIXES: [&str; 10] = ["mm", "cm", "m", "inch", "in", "ft", "yd", "deg", "rad", "?"];
33
34#[derive(Clone, Copy, Debug, Eq, PartialEq, Serialize, Deserialize, ts_rs::TS)]
35#[repr(u32)]
36pub enum NumericSuffix {
37    None,
38    Count,
39    Length,
40    Angle,
41    Mm,
42    Cm,
43    M,
44    Inch,
45    Ft,
46    Yd,
47    Deg,
48    Rad,
49    Unknown,
50}
51
52impl NumericSuffix {
53    #[allow(dead_code)]
54    pub fn is_none(self) -> bool {
55        self == Self::None
56    }
57
58    pub fn is_some(self) -> bool {
59        self != Self::None
60    }
61
62    pub fn digestable_id(&self) -> &[u8] {
63        match self {
64            NumericSuffix::None => &[],
65            NumericSuffix::Count => b"_",
66            NumericSuffix::Unknown => b"?",
67            NumericSuffix::Length => b"Length",
68            NumericSuffix::Angle => b"Angle",
69            NumericSuffix::Mm => b"mm",
70            NumericSuffix::Cm => b"cm",
71            NumericSuffix::M => b"m",
72            NumericSuffix::Inch => b"in",
73            NumericSuffix::Ft => b"ft",
74            NumericSuffix::Yd => b"yd",
75            NumericSuffix::Deg => b"deg",
76            NumericSuffix::Rad => b"rad",
77        }
78    }
79}
80
81impl FromStr for NumericSuffix {
82    type Err = CompilationError;
83
84    fn from_str(s: &str) -> Result<Self, Self::Err> {
85        match s {
86            "_" | "Count" => Ok(NumericSuffix::Count),
87            "Length" => Ok(NumericSuffix::Length),
88            "Angle" => Ok(NumericSuffix::Angle),
89            "mm" | "millimeters" => Ok(NumericSuffix::Mm),
90            "cm" | "centimeters" => Ok(NumericSuffix::Cm),
91            "m" | "meters" => Ok(NumericSuffix::M),
92            "inch" | "in" => Ok(NumericSuffix::Inch),
93            "ft" | "feet" => Ok(NumericSuffix::Ft),
94            "yd" | "yards" => Ok(NumericSuffix::Yd),
95            "deg" | "degrees" => Ok(NumericSuffix::Deg),
96            "rad" | "radians" => Ok(NumericSuffix::Rad),
97            "?" => Ok(NumericSuffix::Unknown),
98            _ => Err(CompilationError::err(SourceRange::default(), "invalid unit of measure")),
99        }
100    }
101}
102
103impl fmt::Display for NumericSuffix {
104    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
105        match self {
106            NumericSuffix::None => Ok(()),
107            NumericSuffix::Count => write!(f, "_"),
108            NumericSuffix::Unknown => write!(f, "_?"),
109            NumericSuffix::Length => write!(f, "Length"),
110            NumericSuffix::Angle => write!(f, "Angle"),
111            NumericSuffix::Mm => write!(f, "mm"),
112            NumericSuffix::Cm => write!(f, "cm"),
113            NumericSuffix::M => write!(f, "m"),
114            NumericSuffix::Inch => write!(f, "in"),
115            NumericSuffix::Ft => write!(f, "ft"),
116            NumericSuffix::Yd => write!(f, "yd"),
117            NumericSuffix::Deg => write!(f, "deg"),
118            NumericSuffix::Rad => write!(f, "rad"),
119        }
120    }
121}
122
123#[derive(Clone, Debug, PartialEq)]
124pub(crate) struct TokenStream {
125    tokens: Vec<Token>,
126}
127
128impl TokenStream {
129    fn new(tokens: Vec<Token>) -> Self {
130        Self { tokens }
131    }
132
133    pub(super) fn remove_unknown(&mut self) -> Vec<Token> {
134        let tokens = std::mem::take(&mut self.tokens);
135        let (tokens, unknown_tokens): (Vec<Token>, Vec<Token>) = tokens
136            .into_iter()
137            .partition(|token| token.token_type != TokenType::Unknown);
138        self.tokens = tokens;
139        unknown_tokens
140    }
141
142    pub fn iter(&self) -> impl Iterator<Item = &Token> {
143        self.tokens.iter()
144    }
145
146    pub fn is_empty(&self) -> bool {
147        self.tokens.is_empty()
148    }
149
150    pub fn as_slice(&self) -> TokenSlice<'_> {
151        TokenSlice::from(self)
152    }
153}
154
155impl<'a> From<&'a TokenStream> for TokenSlice<'a> {
156    fn from(stream: &'a TokenStream) -> Self {
157        TokenSlice {
158            start: 0,
159            end: stream.tokens.len(),
160            stream,
161        }
162    }
163}
164
165impl IntoIterator for TokenStream {
166    type Item = Token;
167
168    type IntoIter = std::vec::IntoIter<Token>;
169
170    fn into_iter(self) -> Self::IntoIter {
171        self.tokens.into_iter()
172    }
173}
174
175#[derive(Debug, Clone)]
176pub(crate) struct TokenSlice<'a> {
177    stream: &'a TokenStream,
178    /// Current position of the leading Token in the stream
179    start: usize,
180    /// The number of total Tokens in the stream
181    end: usize,
182}
183
184impl<'a> std::ops::Deref for TokenSlice<'a> {
185    type Target = [Token];
186
187    fn deref(&self) -> &Self::Target {
188        &self.stream.tokens[self.start..self.end]
189    }
190}
191
192impl<'a> TokenSlice<'a> {
193    pub fn token(&self, i: usize) -> &Token {
194        &self.stream.tokens[i + self.start]
195    }
196
197    pub fn iter(&self) -> impl Iterator<Item = &Token> {
198        (**self).iter()
199    }
200
201    pub fn without_ends(&self) -> Self {
202        Self {
203            start: self.start + 1,
204            end: self.end - 1,
205            stream: self.stream,
206        }
207    }
208
209    pub fn as_source_range(&self) -> SourceRange {
210        let stream_len = self.stream.tokens.len();
211        let first_token = if stream_len == self.start {
212            &self.stream.tokens[self.start - 1]
213        } else {
214            self.token(0)
215        };
216        let last_token = if stream_len == self.end {
217            &self.stream.tokens[stream_len - 1]
218        } else {
219            self.token(self.end - self.start)
220        };
221        SourceRange::new(first_token.start, last_token.end, last_token.module_id)
222    }
223}
224
225impl<'a> IntoIterator for TokenSlice<'a> {
226    type Item = &'a Token;
227
228    type IntoIter = std::slice::Iter<'a, Token>;
229
230    fn into_iter(self) -> Self::IntoIter {
231        self.stream.tokens[self.start..self.end].iter()
232    }
233}
234
235impl<'a> Stream for TokenSlice<'a> {
236    type Token = Token;
237    type Slice = Self;
238    type IterOffsets = Enumerate<std::vec::IntoIter<Token>>;
239    type Checkpoint = Checkpoint;
240
241    fn iter_offsets(&self) -> Self::IterOffsets {
242        #[allow(clippy::unnecessary_to_owned)]
243        self.to_vec().into_iter().enumerate()
244    }
245
246    fn eof_offset(&self) -> usize {
247        self.len()
248    }
249
250    fn next_token(&mut self) -> Option<Self::Token> {
251        let token = self.first()?.clone();
252        self.start += 1;
253        Some(token)
254    }
255
256    /// Split off the next token from the input
257    fn peek_token(&self) -> Option<Self::Token> {
258        Some(self.first()?.clone())
259    }
260
261    fn offset_for<P>(&self, predicate: P) -> Option<usize>
262    where
263        P: Fn(Self::Token) -> bool,
264    {
265        self.iter().position(|b| predicate(b.clone()))
266    }
267
268    fn offset_at(&self, tokens: usize) -> Result<usize, winnow::error::Needed> {
269        if let Some(needed) = tokens.checked_sub(self.len()).and_then(NonZeroUsize::new) {
270            Err(winnow::error::Needed::Size(needed))
271        } else {
272            Ok(tokens)
273        }
274    }
275
276    fn next_slice(&mut self, offset: usize) -> Self::Slice {
277        assert!(self.start + offset <= self.end);
278
279        let next = TokenSlice {
280            stream: self.stream,
281            start: self.start,
282            end: self.start + offset,
283        };
284        self.start += offset;
285        next
286    }
287
288    /// Split off a slice of tokens from the input
289    fn peek_slice(&self, offset: usize) -> Self::Slice {
290        assert!(self.start + offset <= self.end);
291
292        TokenSlice {
293            stream: self.stream,
294            start: self.start,
295            end: self.start + offset,
296        }
297    }
298
299    fn checkpoint(&self) -> Self::Checkpoint {
300        Checkpoint(self.start, self.end)
301    }
302
303    fn reset(&mut self, checkpoint: &Self::Checkpoint) {
304        self.start = checkpoint.0;
305        self.end = checkpoint.1;
306    }
307
308    fn raw(&self) -> &dyn fmt::Debug {
309        self
310    }
311}
312
313impl<'a> winnow::stream::Offset for TokenSlice<'a> {
314    fn offset_from(&self, start: &Self) -> usize {
315        self.start - start.start
316    }
317}
318
319impl<'a> winnow::stream::Offset<Checkpoint> for TokenSlice<'a> {
320    fn offset_from(&self, start: &Checkpoint) -> usize {
321        self.start - start.0
322    }
323}
324
325impl winnow::stream::Offset for Checkpoint {
326    fn offset_from(&self, start: &Self) -> usize {
327        self.0 - start.0
328    }
329}
330
331impl<'a> winnow::stream::StreamIsPartial for TokenSlice<'a> {
332    type PartialState = ();
333
334    fn complete(&mut self) -> Self::PartialState {}
335
336    fn restore_partial(&mut self, _: Self::PartialState) {}
337
338    fn is_partial_supported() -> bool {
339        false
340    }
341}
342
343impl<'a> winnow::stream::FindSlice<&str> for TokenSlice<'a> {
344    fn find_slice(&self, substr: &str) -> Option<std::ops::Range<usize>> {
345        self.iter()
346            .enumerate()
347            .find_map(|(i, b)| if b.value == substr { Some(i..self.end) } else { None })
348    }
349}
350
351#[derive(Clone, Debug)]
352pub struct Checkpoint(usize, usize);
353
354/// The types of tokens.
355#[derive(Debug, PartialEq, Eq, Copy, Clone, Display)]
356#[display(style = "camelCase")]
357pub enum TokenType {
358    /// A number.
359    Number,
360    /// A word.
361    Word,
362    /// An operator.
363    Operator,
364    /// A string.
365    String,
366    /// A keyword.
367    Keyword,
368    /// A type.
369    Type,
370    /// A brace.
371    Brace,
372    /// A hash.
373    Hash,
374    /// A bang.
375    Bang,
376    /// A dollar sign.
377    Dollar,
378    /// Whitespace.
379    Whitespace,
380    /// A comma.
381    Comma,
382    /// A colon.
383    Colon,
384    /// A double colon: `::`
385    DoubleColon,
386    /// A period.
387    Period,
388    /// A double period: `..`.
389    DoublePeriod,
390    /// A double period and a less than: `..<`.
391    DoublePeriodLessThan,
392    /// A line comment.
393    LineComment,
394    /// A block comment.
395    BlockComment,
396    /// A function name.
397    Function,
398    /// Unknown lexemes.
399    Unknown,
400    /// The ? symbol, used for optional values.
401    QuestionMark,
402    /// The @ symbol.
403    At,
404    /// `;`
405    SemiColon,
406}
407
408/// Most KCL tokens correspond to LSP semantic tokens (but not all).
409impl TryFrom<TokenType> for SemanticTokenType {
410    type Error = anyhow::Error;
411    fn try_from(token_type: TokenType) -> Result<Self> {
412        // If you return a new kind of `SemanticTokenType`, make sure to update `SEMANTIC_TOKEN_TYPES`
413        // in the LSP implementation.
414        Ok(match token_type {
415            TokenType::Number => Self::NUMBER,
416            TokenType::Word => Self::VARIABLE,
417            TokenType::Keyword => Self::KEYWORD,
418            TokenType::Type => Self::TYPE,
419            TokenType::Operator => Self::OPERATOR,
420            TokenType::QuestionMark => Self::OPERATOR,
421            TokenType::String => Self::STRING,
422            TokenType::Bang => Self::OPERATOR,
423            TokenType::LineComment => Self::COMMENT,
424            TokenType::BlockComment => Self::COMMENT,
425            TokenType::Function => Self::FUNCTION,
426            TokenType::Whitespace
427            | TokenType::Brace
428            | TokenType::Comma
429            | TokenType::Colon
430            | TokenType::DoubleColon
431            | TokenType::Period
432            | TokenType::DoublePeriod
433            | TokenType::DoublePeriodLessThan
434            | TokenType::Hash
435            | TokenType::Dollar
436            | TokenType::At
437            | TokenType::SemiColon
438            | TokenType::Unknown => {
439                anyhow::bail!("unsupported token type: {:?}", token_type)
440            }
441        })
442    }
443}
444
445impl TokenType {
446    pub fn is_whitespace(&self) -> bool {
447        matches!(self, Self::Whitespace)
448    }
449
450    pub fn is_comment(&self) -> bool {
451        matches!(self, Self::LineComment | Self::BlockComment)
452    }
453}
454
455#[derive(Debug, PartialEq, Eq, Clone)]
456pub struct Token {
457    pub token_type: TokenType,
458    /// Offset in the source code where this token begins.
459    pub start: usize,
460    /// Offset in the source code where this token ends.
461    pub end: usize,
462    pub(super) module_id: ModuleId,
463    pub(super) value: String,
464}
465
466impl ContainsToken<Token> for (TokenType, &str) {
467    fn contains_token(&self, token: Token) -> bool {
468        self.0 == token.token_type && self.1 == token.value
469    }
470}
471
472impl ContainsToken<Token> for TokenType {
473    fn contains_token(&self, token: Token) -> bool {
474        *self == token.token_type
475    }
476}
477
478impl Token {
479    pub fn from_range(
480        range: std::ops::Range<usize>,
481        module_id: ModuleId,
482        token_type: TokenType,
483        value: String,
484    ) -> Self {
485        Self {
486            start: range.start,
487            end: range.end,
488            module_id,
489            value,
490            token_type,
491        }
492    }
493    pub fn is_code_token(&self) -> bool {
494        !matches!(
495            self.token_type,
496            TokenType::Whitespace | TokenType::LineComment | TokenType::BlockComment
497        )
498    }
499
500    pub fn as_source_range(&self) -> SourceRange {
501        SourceRange::new(self.start, self.end, self.module_id)
502    }
503
504    pub fn as_source_ranges(&self) -> Vec<SourceRange> {
505        vec![self.as_source_range()]
506    }
507
508    pub fn visibility_keyword(&self) -> Option<ItemVisibility> {
509        if !matches!(self.token_type, TokenType::Keyword) {
510            return None;
511        }
512        match self.value.as_str() {
513            "export" => Some(ItemVisibility::Export),
514            _ => None,
515        }
516    }
517
518    pub fn numeric_value(&self) -> Option<f64> {
519        if self.token_type != TokenType::Number {
520            return None;
521        }
522        let value = &self.value;
523        let value = value
524            .split_once(|c: char| c == '_' || c.is_ascii_alphabetic())
525            .map(|(s, _)| s)
526            .unwrap_or(value);
527        value.parse().ok()
528    }
529
530    pub fn uint_value(&self) -> Option<u32> {
531        if self.token_type != TokenType::Number {
532            return None;
533        }
534        let value = &self.value;
535        let value = value
536            .split_once(|c: char| c == '_' || c.is_ascii_alphabetic())
537            .map(|(s, _)| s)
538            .unwrap_or(value);
539        value.parse().ok()
540    }
541
542    pub fn numeric_suffix(&self) -> NumericSuffix {
543        if self.token_type != TokenType::Number {
544            return NumericSuffix::None;
545        }
546
547        if self.value.ends_with('_') {
548            return NumericSuffix::Count;
549        }
550
551        for suffix in NUM_SUFFIXES {
552            if self.value.ends_with(suffix) {
553                return suffix.parse().unwrap();
554            }
555        }
556
557        NumericSuffix::None
558    }
559
560    /// Is this token the beginning of a variable/function declaration?
561    /// If so, what kind?
562    /// If not, returns None.
563    pub fn declaration_keyword(&self) -> Option<VariableKind> {
564        if !matches!(self.token_type, TokenType::Keyword) {
565            return None;
566        }
567        Some(match self.value.as_str() {
568            "fn" => VariableKind::Fn,
569            "var" | "let" | "const" => VariableKind::Const,
570            _ => return None,
571        })
572    }
573}
574
575impl From<Token> for SourceRange {
576    fn from(token: Token) -> Self {
577        Self::new(token.start, token.end, token.module_id)
578    }
579}
580
581impl From<&Token> for SourceRange {
582    fn from(token: &Token) -> Self {
583        Self::new(token.start, token.end, token.module_id)
584    }
585}
586
587pub fn lex(s: &str, module_id: ModuleId) -> Result<TokenStream, KclError> {
588    tokeniser::lex(s, module_id).map_err(From::from)
589}
590
591impl From<ParseError<Input<'_>, winnow::error::ContextError>> for KclError {
592    fn from(err: ParseError<Input<'_>, winnow::error::ContextError>) -> Self {
593        let (input, offset): (Vec<char>, usize) = (err.input().chars().collect(), err.offset());
594        let module_id = err.input().state.module_id;
595
596        if offset >= input.len() {
597            // From the winnow docs:
598            //
599            // This is an offset, not an index, and may point to
600            // the end of input (input.len()) on eof errors.
601
602            return KclError::new_lexical(crate::errors::KclErrorDetails::new(
603                "unexpected EOF while parsing".to_owned(),
604                vec![SourceRange::new(offset, offset, module_id)],
605            ));
606        }
607
608        // TODO: Add the Winnow tokenizer context to the error.
609        // See https://github.com/KittyCAD/modeling-app/issues/784
610        let bad_token = &input[offset];
611        // TODO: Add the Winnow parser context to the error.
612        // See https://github.com/KittyCAD/modeling-app/issues/784
613        KclError::new_lexical(crate::errors::KclErrorDetails::new(
614            format!("found unknown token '{bad_token}'"),
615            vec![SourceRange::new(offset, offset + 1, module_id)],
616        ))
617    }
618}