kcl_lib/parsing/token/
mod.rs

1// Clippy does not agree with rustc here for some reason.
2#![allow(clippy::needless_lifetimes)]
3
4use std::{fmt, iter::Enumerate, num::NonZeroUsize, str::FromStr};
5
6use anyhow::Result;
7use parse_display::Display;
8use schemars::JsonSchema;
9use serde::{Deserialize, Serialize};
10use tokeniser::Input;
11use tower_lsp::lsp_types::SemanticTokenType;
12use winnow::{
13    self,
14    error::ParseError,
15    stream::{ContainsToken, Stream},
16};
17
18use crate::{
19    errors::KclError,
20    parsing::ast::types::{ItemVisibility, VariableKind},
21    source_range::SourceRange,
22    CompilationError, ModuleId,
23};
24
25mod tokeniser;
26
27#[cfg(test)]
28pub(crate) use tokeniser::RESERVED_WORDS;
29
30// Note the ordering, it's important that `m` comes after `mm` and `cm`.
31pub const NUM_SUFFIXES: [&str; 9] = ["mm", "cm", "m", "inch", "in", "ft", "yd", "deg", "rad"];
32
33#[derive(Clone, Copy, Debug, Eq, PartialEq, Serialize, Deserialize, ts_rs::TS, JsonSchema)]
34#[repr(u32)]
35pub enum NumericSuffix {
36    None,
37    Count,
38    Mm,
39    Cm,
40    M,
41    Inch,
42    Ft,
43    Yd,
44    Deg,
45    Rad,
46}
47
48impl NumericSuffix {
49    #[allow(dead_code)]
50    pub fn is_none(self) -> bool {
51        self == Self::None
52    }
53
54    pub fn is_some(self) -> bool {
55        self != Self::None
56    }
57
58    pub fn digestable_id(&self) -> &[u8] {
59        match self {
60            NumericSuffix::None => &[],
61            NumericSuffix::Count => b"_",
62            NumericSuffix::Mm => b"mm",
63            NumericSuffix::Cm => b"cm",
64            NumericSuffix::M => b"m",
65            NumericSuffix::Inch => b"in",
66            NumericSuffix::Ft => b"ft",
67            NumericSuffix::Yd => b"yd",
68            NumericSuffix::Deg => b"deg",
69            NumericSuffix::Rad => b"rad",
70        }
71    }
72}
73
74impl FromStr for NumericSuffix {
75    type Err = CompilationError;
76
77    fn from_str(s: &str) -> Result<Self, Self::Err> {
78        match s {
79            "_" => Ok(NumericSuffix::Count),
80            "mm" | "millimeters" => Ok(NumericSuffix::Mm),
81            "cm" | "centimeters" => Ok(NumericSuffix::Cm),
82            "m" | "meters" => Ok(NumericSuffix::M),
83            "inch" | "in" => Ok(NumericSuffix::Inch),
84            "ft" | "feet" => Ok(NumericSuffix::Ft),
85            "yd" | "yards" => Ok(NumericSuffix::Yd),
86            "deg" | "degrees" => Ok(NumericSuffix::Deg),
87            "rad" | "radians" => Ok(NumericSuffix::Rad),
88            _ => Err(CompilationError::err(SourceRange::default(), "invalid unit of measure")),
89        }
90    }
91}
92
93impl fmt::Display for NumericSuffix {
94    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
95        match self {
96            NumericSuffix::None => Ok(()),
97            NumericSuffix::Count => write!(f, "_"),
98            NumericSuffix::Mm => write!(f, "mm"),
99            NumericSuffix::Cm => write!(f, "cm"),
100            NumericSuffix::M => write!(f, "m"),
101            NumericSuffix::Inch => write!(f, "in"),
102            NumericSuffix::Ft => write!(f, "ft"),
103            NumericSuffix::Yd => write!(f, "yd"),
104            NumericSuffix::Deg => write!(f, "deg"),
105            NumericSuffix::Rad => write!(f, "rad"),
106        }
107    }
108}
109
110#[derive(Clone, Debug, PartialEq)]
111pub(crate) struct TokenStream {
112    tokens: Vec<Token>,
113}
114
115impl TokenStream {
116    fn new(tokens: Vec<Token>) -> Self {
117        Self { tokens }
118    }
119
120    pub(super) fn remove_unknown(&mut self) -> Vec<Token> {
121        let tokens = std::mem::take(&mut self.tokens);
122        let (tokens, unknown_tokens): (Vec<Token>, Vec<Token>) = tokens
123            .into_iter()
124            .partition(|token| token.token_type != TokenType::Unknown);
125        self.tokens = tokens;
126        unknown_tokens
127    }
128
129    pub fn iter(&self) -> impl Iterator<Item = &Token> {
130        self.tokens.iter()
131    }
132
133    pub fn is_empty(&self) -> bool {
134        self.tokens.is_empty()
135    }
136
137    pub fn as_slice(&self) -> TokenSlice {
138        TokenSlice::from(self)
139    }
140}
141
142impl<'a> From<&'a TokenStream> for TokenSlice<'a> {
143    fn from(stream: &'a TokenStream) -> Self {
144        TokenSlice {
145            start: 0,
146            end: stream.tokens.len(),
147            stream,
148        }
149    }
150}
151
152impl IntoIterator for TokenStream {
153    type Item = Token;
154
155    type IntoIter = std::vec::IntoIter<Token>;
156
157    fn into_iter(self) -> Self::IntoIter {
158        self.tokens.into_iter()
159    }
160}
161
162#[derive(Debug, Clone)]
163pub(crate) struct TokenSlice<'a> {
164    stream: &'a TokenStream,
165    start: usize,
166    end: usize,
167}
168
169impl<'a> std::ops::Deref for TokenSlice<'a> {
170    type Target = [Token];
171
172    fn deref(&self) -> &Self::Target {
173        &self.stream.tokens[self.start..self.end]
174    }
175}
176
177impl<'a> TokenSlice<'a> {
178    pub fn token(&self, i: usize) -> &Token {
179        &self.stream.tokens[i + self.start]
180    }
181
182    pub fn iter(&self) -> impl Iterator<Item = &Token> {
183        (**self).iter()
184    }
185
186    pub fn without_ends(&self) -> Self {
187        Self {
188            start: self.start + 1,
189            end: self.end - 1,
190            stream: self.stream,
191        }
192    }
193}
194
195impl<'a> IntoIterator for TokenSlice<'a> {
196    type Item = &'a Token;
197
198    type IntoIter = std::slice::Iter<'a, Token>;
199
200    fn into_iter(self) -> Self::IntoIter {
201        self.stream.tokens[self.start..self.end].iter()
202    }
203}
204
205impl<'a> Stream for TokenSlice<'a> {
206    type Token = Token;
207    type Slice = Self;
208    type IterOffsets = Enumerate<std::vec::IntoIter<Token>>;
209    type Checkpoint = Checkpoint;
210
211    fn iter_offsets(&self) -> Self::IterOffsets {
212        #[allow(clippy::unnecessary_to_owned)]
213        self.to_vec().into_iter().enumerate()
214    }
215
216    fn eof_offset(&self) -> usize {
217        self.len()
218    }
219
220    fn next_token(&mut self) -> Option<Self::Token> {
221        let token = self.first()?.clone();
222        self.start += 1;
223        Some(token)
224    }
225
226    fn offset_for<P>(&self, predicate: P) -> Option<usize>
227    where
228        P: Fn(Self::Token) -> bool,
229    {
230        self.iter().position(|b| predicate(b.clone()))
231    }
232
233    fn offset_at(&self, tokens: usize) -> Result<usize, winnow::error::Needed> {
234        if let Some(needed) = tokens.checked_sub(self.len()).and_then(NonZeroUsize::new) {
235            Err(winnow::error::Needed::Size(needed))
236        } else {
237            Ok(tokens)
238        }
239    }
240
241    fn next_slice(&mut self, offset: usize) -> Self::Slice {
242        assert!(self.start + offset <= self.end);
243
244        let next = TokenSlice {
245            stream: self.stream,
246            start: self.start,
247            end: self.start + offset,
248        };
249        self.start += offset;
250        next
251    }
252
253    fn checkpoint(&self) -> Self::Checkpoint {
254        Checkpoint(self.start, self.end)
255    }
256
257    fn reset(&mut self, checkpoint: &Self::Checkpoint) {
258        self.start = checkpoint.0;
259        self.end = checkpoint.1;
260    }
261
262    fn raw(&self) -> &dyn fmt::Debug {
263        self
264    }
265}
266
267impl<'a> winnow::stream::Offset for TokenSlice<'a> {
268    fn offset_from(&self, start: &Self) -> usize {
269        self.start - start.start
270    }
271}
272
273impl<'a> winnow::stream::Offset<Checkpoint> for TokenSlice<'a> {
274    fn offset_from(&self, start: &Checkpoint) -> usize {
275        self.start - start.0
276    }
277}
278
279impl winnow::stream::Offset for Checkpoint {
280    fn offset_from(&self, start: &Self) -> usize {
281        self.0 - start.0
282    }
283}
284
285impl<'a> winnow::stream::StreamIsPartial for TokenSlice<'a> {
286    type PartialState = ();
287
288    fn complete(&mut self) -> Self::PartialState {}
289
290    fn restore_partial(&mut self, _: Self::PartialState) {}
291
292    fn is_partial_supported() -> bool {
293        false
294    }
295}
296
297#[derive(Clone, Debug)]
298pub struct Checkpoint(usize, usize);
299
300/// The types of tokens.
301#[derive(Debug, PartialEq, Eq, Copy, Clone, Display)]
302#[display(style = "camelCase")]
303pub enum TokenType {
304    /// A number.
305    Number,
306    /// A word.
307    Word,
308    /// An operator.
309    Operator,
310    /// A string.
311    String,
312    /// A keyword.
313    Keyword,
314    /// A type.
315    Type,
316    /// A brace.
317    Brace,
318    /// A hash.
319    Hash,
320    /// A bang.
321    Bang,
322    /// A dollar sign.
323    Dollar,
324    /// Whitespace.
325    Whitespace,
326    /// A comma.
327    Comma,
328    /// A colon.
329    Colon,
330    /// A period.
331    Period,
332    /// A double period: `..`.
333    DoublePeriod,
334    /// A line comment.
335    LineComment,
336    /// A block comment.
337    BlockComment,
338    /// A function name.
339    Function,
340    /// Unknown lexemes.
341    Unknown,
342    /// The ? symbol, used for optional values.
343    QuestionMark,
344    /// The @ symbol.
345    At,
346}
347
348/// Most KCL tokens correspond to LSP semantic tokens (but not all).
349impl TryFrom<TokenType> for SemanticTokenType {
350    type Error = anyhow::Error;
351    fn try_from(token_type: TokenType) -> Result<Self> {
352        // If you return a new kind of `SemanticTokenType`, make sure to update `SEMANTIC_TOKEN_TYPES`
353        // in the LSP implementation.
354        Ok(match token_type {
355            TokenType::Number => Self::NUMBER,
356            TokenType::Word => Self::VARIABLE,
357            TokenType::Keyword => Self::KEYWORD,
358            TokenType::Type => Self::TYPE,
359            TokenType::Operator => Self::OPERATOR,
360            TokenType::QuestionMark => Self::OPERATOR,
361            TokenType::String => Self::STRING,
362            TokenType::Bang => Self::OPERATOR,
363            TokenType::LineComment => Self::COMMENT,
364            TokenType::BlockComment => Self::COMMENT,
365            TokenType::Function => Self::FUNCTION,
366            TokenType::Whitespace
367            | TokenType::Brace
368            | TokenType::Comma
369            | TokenType::Colon
370            | TokenType::Period
371            | TokenType::DoublePeriod
372            | TokenType::Hash
373            | TokenType::Dollar
374            | TokenType::At
375            | TokenType::Unknown => {
376                anyhow::bail!("unsupported token type: {:?}", token_type)
377            }
378        })
379    }
380}
381
382impl TokenType {
383    pub fn is_whitespace(&self) -> bool {
384        matches!(self, Self::Whitespace)
385    }
386
387    pub fn is_comment(&self) -> bool {
388        matches!(self, Self::LineComment | Self::BlockComment)
389    }
390}
391
392#[derive(Debug, PartialEq, Eq, Clone)]
393pub struct Token {
394    pub token_type: TokenType,
395    /// Offset in the source code where this token begins.
396    pub start: usize,
397    /// Offset in the source code where this token ends.
398    pub end: usize,
399    pub(super) module_id: ModuleId,
400    pub(super) value: String,
401}
402
403impl ContainsToken<Token> for (TokenType, &str) {
404    fn contains_token(&self, token: Token) -> bool {
405        self.0 == token.token_type && self.1 == token.value
406    }
407}
408
409impl ContainsToken<Token> for TokenType {
410    fn contains_token(&self, token: Token) -> bool {
411        *self == token.token_type
412    }
413}
414
415impl Token {
416    pub fn from_range(
417        range: std::ops::Range<usize>,
418        module_id: ModuleId,
419        token_type: TokenType,
420        value: String,
421    ) -> Self {
422        Self {
423            start: range.start,
424            end: range.end,
425            module_id,
426            value,
427            token_type,
428        }
429    }
430    pub fn is_code_token(&self) -> bool {
431        !matches!(
432            self.token_type,
433            TokenType::Whitespace | TokenType::LineComment | TokenType::BlockComment
434        )
435    }
436
437    pub fn as_source_range(&self) -> SourceRange {
438        SourceRange::new(self.start, self.end, self.module_id)
439    }
440
441    pub fn as_source_ranges(&self) -> Vec<SourceRange> {
442        vec![self.as_source_range()]
443    }
444
445    pub fn visibility_keyword(&self) -> Option<ItemVisibility> {
446        if !matches!(self.token_type, TokenType::Keyword) {
447            return None;
448        }
449        match self.value.as_str() {
450            "export" => Some(ItemVisibility::Export),
451            _ => None,
452        }
453    }
454
455    pub fn numeric_value(&self) -> Option<f64> {
456        if self.token_type != TokenType::Number {
457            return None;
458        }
459        let value = &self.value;
460        let value = value
461            .split_once(|c: char| c == '_' || c.is_ascii_alphabetic())
462            .map(|(s, _)| s)
463            .unwrap_or(value);
464        value.parse().ok()
465    }
466
467    pub fn numeric_suffix(&self) -> NumericSuffix {
468        if self.token_type != TokenType::Number {
469            return NumericSuffix::None;
470        }
471
472        if self.value.ends_with('_') {
473            return NumericSuffix::Count;
474        }
475
476        for suffix in NUM_SUFFIXES {
477            if self.value.ends_with(suffix) {
478                return suffix.parse().unwrap();
479            }
480        }
481
482        NumericSuffix::None
483    }
484
485    /// Is this token the beginning of a variable/function declaration?
486    /// If so, what kind?
487    /// If not, returns None.
488    pub fn declaration_keyword(&self) -> Option<VariableKind> {
489        if !matches!(self.token_type, TokenType::Keyword) {
490            return None;
491        }
492        Some(match self.value.as_str() {
493            "fn" => VariableKind::Fn,
494            "var" | "let" | "const" => VariableKind::Const,
495            _ => return None,
496        })
497    }
498}
499
500impl From<Token> for SourceRange {
501    fn from(token: Token) -> Self {
502        Self::new(token.start, token.end, token.module_id)
503    }
504}
505
506impl From<&Token> for SourceRange {
507    fn from(token: &Token) -> Self {
508        Self::new(token.start, token.end, token.module_id)
509    }
510}
511
512pub fn lex(s: &str, module_id: ModuleId) -> Result<TokenStream, KclError> {
513    tokeniser::lex(s, module_id).map_err(From::from)
514}
515
516impl From<ParseError<Input<'_>, winnow::error::ContextError>> for KclError {
517    fn from(err: ParseError<Input<'_>, winnow::error::ContextError>) -> Self {
518        let (input, offset): (Vec<char>, usize) = (err.input().chars().collect(), err.offset());
519        let module_id = err.input().state.module_id;
520
521        if offset >= input.len() {
522            // From the winnow docs:
523            //
524            // This is an offset, not an index, and may point to
525            // the end of input (input.len()) on eof errors.
526
527            return KclError::Lexical(crate::errors::KclErrorDetails {
528                source_ranges: vec![SourceRange::new(offset, offset, module_id)],
529                message: "unexpected EOF while parsing".to_string(),
530            });
531        }
532
533        // TODO: Add the Winnow tokenizer context to the error.
534        // See https://github.com/KittyCAD/modeling-app/issues/784
535        let bad_token = &input[offset];
536        // TODO: Add the Winnow parser context to the error.
537        // See https://github.com/KittyCAD/modeling-app/issues/784
538        KclError::Lexical(crate::errors::KclErrorDetails {
539            source_ranges: vec![SourceRange::new(offset, offset + 1, module_id)],
540            message: format!("found unknown token '{}'", bad_token),
541        })
542    }
543}