1#![allow(clippy::needless_lifetimes)]
3
4use std::{fmt, iter::Enumerate, num::NonZeroUsize, str::FromStr};
5
6use anyhow::Result;
7use parse_display::Display;
8use schemars::JsonSchema;
9use serde::{Deserialize, Serialize};
10use tokeniser::Input;
11use tower_lsp::lsp_types::SemanticTokenType;
12use winnow::{
13    self,
14    error::ParseError,
15    stream::{ContainsToken, Stream},
16};
17
18use crate::{
19    errors::KclError,
20    parsing::ast::types::{ItemVisibility, VariableKind},
21    source_range::SourceRange,
22    CompilationError, ModuleId,
23};
24
25mod tokeniser;
26
27pub(crate) use tokeniser::RESERVED_WORDS;
28
29pub const NUM_SUFFIXES: [&str; 10] = ["mm", "cm", "m", "inch", "in", "ft", "yd", "deg", "rad", "?"];
31
32#[derive(Clone, Copy, Debug, Eq, PartialEq, Serialize, Deserialize, ts_rs::TS, JsonSchema)]
33#[repr(u32)]
34pub enum NumericSuffix {
35    None,
36    Count,
37    Length,
38    Angle,
39    Mm,
40    Cm,
41    M,
42    Inch,
43    Ft,
44    Yd,
45    Deg,
46    Rad,
47    Unknown,
48}
49
50impl NumericSuffix {
51    #[allow(dead_code)]
52    pub fn is_none(self) -> bool {
53        self == Self::None
54    }
55
56    pub fn is_some(self) -> bool {
57        self != Self::None
58    }
59
60    pub fn digestable_id(&self) -> &[u8] {
61        match self {
62            NumericSuffix::None => &[],
63            NumericSuffix::Count => b"_",
64            NumericSuffix::Unknown => b"?",
65            NumericSuffix::Length => b"Length",
66            NumericSuffix::Angle => b"Angle",
67            NumericSuffix::Mm => b"mm",
68            NumericSuffix::Cm => b"cm",
69            NumericSuffix::M => b"m",
70            NumericSuffix::Inch => b"in",
71            NumericSuffix::Ft => b"ft",
72            NumericSuffix::Yd => b"yd",
73            NumericSuffix::Deg => b"deg",
74            NumericSuffix::Rad => b"rad",
75        }
76    }
77}
78
79impl FromStr for NumericSuffix {
80    type Err = CompilationError;
81
82    fn from_str(s: &str) -> Result<Self, Self::Err> {
83        match s {
84            "_" | "Count" => Ok(NumericSuffix::Count),
85            "Length" => Ok(NumericSuffix::Length),
86            "Angle" => Ok(NumericSuffix::Angle),
87            "mm" | "millimeters" => Ok(NumericSuffix::Mm),
88            "cm" | "centimeters" => Ok(NumericSuffix::Cm),
89            "m" | "meters" => Ok(NumericSuffix::M),
90            "inch" | "in" => Ok(NumericSuffix::Inch),
91            "ft" | "feet" => Ok(NumericSuffix::Ft),
92            "yd" | "yards" => Ok(NumericSuffix::Yd),
93            "deg" | "degrees" => Ok(NumericSuffix::Deg),
94            "rad" | "radians" => Ok(NumericSuffix::Rad),
95            "?" => Ok(NumericSuffix::Unknown),
96            _ => Err(CompilationError::err(SourceRange::default(), "invalid unit of measure")),
97        }
98    }
99}
100
101impl fmt::Display for NumericSuffix {
102    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
103        match self {
104            NumericSuffix::None => Ok(()),
105            NumericSuffix::Count => write!(f, "_"),
106            NumericSuffix::Unknown => write!(f, "_?"),
107            NumericSuffix::Length => write!(f, "Length"),
108            NumericSuffix::Angle => write!(f, "Angle"),
109            NumericSuffix::Mm => write!(f, "mm"),
110            NumericSuffix::Cm => write!(f, "cm"),
111            NumericSuffix::M => write!(f, "m"),
112            NumericSuffix::Inch => write!(f, "in"),
113            NumericSuffix::Ft => write!(f, "ft"),
114            NumericSuffix::Yd => write!(f, "yd"),
115            NumericSuffix::Deg => write!(f, "deg"),
116            NumericSuffix::Rad => write!(f, "rad"),
117        }
118    }
119}
120
121#[derive(Clone, Debug, PartialEq)]
122pub(crate) struct TokenStream {
123    tokens: Vec<Token>,
124}
125
126impl TokenStream {
127    fn new(tokens: Vec<Token>) -> Self {
128        Self { tokens }
129    }
130
131    pub(super) fn remove_unknown(&mut self) -> Vec<Token> {
132        let tokens = std::mem::take(&mut self.tokens);
133        let (tokens, unknown_tokens): (Vec<Token>, Vec<Token>) = tokens
134            .into_iter()
135            .partition(|token| token.token_type != TokenType::Unknown);
136        self.tokens = tokens;
137        unknown_tokens
138    }
139
140    pub fn iter(&self) -> impl Iterator<Item = &Token> {
141        self.tokens.iter()
142    }
143
144    pub fn is_empty(&self) -> bool {
145        self.tokens.is_empty()
146    }
147
148    pub fn as_slice(&self) -> TokenSlice {
149        TokenSlice::from(self)
150    }
151}
152
153impl<'a> From<&'a TokenStream> for TokenSlice<'a> {
154    fn from(stream: &'a TokenStream) -> Self {
155        TokenSlice {
156            start: 0,
157            end: stream.tokens.len(),
158            stream,
159        }
160    }
161}
162
163impl IntoIterator for TokenStream {
164    type Item = Token;
165
166    type IntoIter = std::vec::IntoIter<Token>;
167
168    fn into_iter(self) -> Self::IntoIter {
169        self.tokens.into_iter()
170    }
171}
172
173#[derive(Debug, Clone)]
174pub(crate) struct TokenSlice<'a> {
175    stream: &'a TokenStream,
176    start: usize,
178    end: usize,
180}
181
182impl<'a> std::ops::Deref for TokenSlice<'a> {
183    type Target = [Token];
184
185    fn deref(&self) -> &Self::Target {
186        &self.stream.tokens[self.start..self.end]
187    }
188}
189
190impl<'a> TokenSlice<'a> {
191    pub fn token(&self, i: usize) -> &Token {
192        &self.stream.tokens[i + self.start]
193    }
194
195    pub fn iter(&self) -> impl Iterator<Item = &Token> {
196        (**self).iter()
197    }
198
199    pub fn without_ends(&self) -> Self {
200        Self {
201            start: self.start + 1,
202            end: self.end - 1,
203            stream: self.stream,
204        }
205    }
206
207    pub fn as_source_range(&self) -> SourceRange {
208        let stream_len = self.stream.tokens.len();
209        let first_token = if stream_len == self.start {
210            &self.stream.tokens[self.start - 1]
211        } else {
212            self.token(0)
213        };
214        let last_token = if stream_len == self.end {
215            &self.stream.tokens[stream_len - 1]
216        } else {
217            self.token(self.end - self.start)
218        };
219        SourceRange::new(first_token.start, last_token.end, last_token.module_id)
220    }
221}
222
223impl<'a> IntoIterator for TokenSlice<'a> {
224    type Item = &'a Token;
225
226    type IntoIter = std::slice::Iter<'a, Token>;
227
228    fn into_iter(self) -> Self::IntoIter {
229        self.stream.tokens[self.start..self.end].iter()
230    }
231}
232
233impl<'a> Stream for TokenSlice<'a> {
234    type Token = Token;
235    type Slice = Self;
236    type IterOffsets = Enumerate<std::vec::IntoIter<Token>>;
237    type Checkpoint = Checkpoint;
238
239    fn iter_offsets(&self) -> Self::IterOffsets {
240        #[allow(clippy::unnecessary_to_owned)]
241        self.to_vec().into_iter().enumerate()
242    }
243
244    fn eof_offset(&self) -> usize {
245        self.len()
246    }
247
248    fn next_token(&mut self) -> Option<Self::Token> {
249        let token = self.first()?.clone();
250        self.start += 1;
251        Some(token)
252    }
253
254    fn offset_for<P>(&self, predicate: P) -> Option<usize>
255    where
256        P: Fn(Self::Token) -> bool,
257    {
258        self.iter().position(|b| predicate(b.clone()))
259    }
260
261    fn offset_at(&self, tokens: usize) -> Result<usize, winnow::error::Needed> {
262        if let Some(needed) = tokens.checked_sub(self.len()).and_then(NonZeroUsize::new) {
263            Err(winnow::error::Needed::Size(needed))
264        } else {
265            Ok(tokens)
266        }
267    }
268
269    fn next_slice(&mut self, offset: usize) -> Self::Slice {
270        assert!(self.start + offset <= self.end);
271
272        let next = TokenSlice {
273            stream: self.stream,
274            start: self.start,
275            end: self.start + offset,
276        };
277        self.start += offset;
278        next
279    }
280
281    fn checkpoint(&self) -> Self::Checkpoint {
282        Checkpoint(self.start, self.end)
283    }
284
285    fn reset(&mut self, checkpoint: &Self::Checkpoint) {
286        self.start = checkpoint.0;
287        self.end = checkpoint.1;
288    }
289
290    fn raw(&self) -> &dyn fmt::Debug {
291        self
292    }
293}
294
295impl<'a> winnow::stream::Offset for TokenSlice<'a> {
296    fn offset_from(&self, start: &Self) -> usize {
297        self.start - start.start
298    }
299}
300
301impl<'a> winnow::stream::Offset<Checkpoint> for TokenSlice<'a> {
302    fn offset_from(&self, start: &Checkpoint) -> usize {
303        self.start - start.0
304    }
305}
306
307impl winnow::stream::Offset for Checkpoint {
308    fn offset_from(&self, start: &Self) -> usize {
309        self.0 - start.0
310    }
311}
312
313impl<'a> winnow::stream::StreamIsPartial for TokenSlice<'a> {
314    type PartialState = ();
315
316    fn complete(&mut self) -> Self::PartialState {}
317
318    fn restore_partial(&mut self, _: Self::PartialState) {}
319
320    fn is_partial_supported() -> bool {
321        false
322    }
323}
324
325impl<'a> winnow::stream::FindSlice<&str> for TokenSlice<'a> {
326    fn find_slice(&self, substr: &str) -> Option<std::ops::Range<usize>> {
327        self.iter()
328            .enumerate()
329            .find_map(|(i, b)| if b.value == substr { Some(i..self.end) } else { None })
330    }
331}
332
333#[derive(Clone, Debug)]
334pub struct Checkpoint(usize, usize);
335
336#[derive(Debug, PartialEq, Eq, Copy, Clone, Display)]
338#[display(style = "camelCase")]
339pub enum TokenType {
340    Number,
342    Word,
344    Operator,
346    String,
348    Keyword,
350    Type,
352    Brace,
354    Hash,
356    Bang,
358    Dollar,
360    Whitespace,
362    Comma,
364    Colon,
366    DoubleColon,
368    Period,
370    DoublePeriod,
372    LineComment,
374    BlockComment,
376    Function,
378    Unknown,
380    QuestionMark,
382    At,
384    SemiColon,
386}
387
388impl TryFrom<TokenType> for SemanticTokenType {
390    type Error = anyhow::Error;
391    fn try_from(token_type: TokenType) -> Result<Self> {
392        Ok(match token_type {
395            TokenType::Number => Self::NUMBER,
396            TokenType::Word => Self::VARIABLE,
397            TokenType::Keyword => Self::KEYWORD,
398            TokenType::Type => Self::TYPE,
399            TokenType::Operator => Self::OPERATOR,
400            TokenType::QuestionMark => Self::OPERATOR,
401            TokenType::String => Self::STRING,
402            TokenType::Bang => Self::OPERATOR,
403            TokenType::LineComment => Self::COMMENT,
404            TokenType::BlockComment => Self::COMMENT,
405            TokenType::Function => Self::FUNCTION,
406            TokenType::Whitespace
407            | TokenType::Brace
408            | TokenType::Comma
409            | TokenType::Colon
410            | TokenType::DoubleColon
411            | TokenType::Period
412            | TokenType::DoublePeriod
413            | TokenType::Hash
414            | TokenType::Dollar
415            | TokenType::At
416            | TokenType::SemiColon
417            | TokenType::Unknown => {
418                anyhow::bail!("unsupported token type: {:?}", token_type)
419            }
420        })
421    }
422}
423
424impl TokenType {
425    pub fn is_whitespace(&self) -> bool {
426        matches!(self, Self::Whitespace)
427    }
428
429    pub fn is_comment(&self) -> bool {
430        matches!(self, Self::LineComment | Self::BlockComment)
431    }
432}
433
434#[derive(Debug, PartialEq, Eq, Clone)]
435pub struct Token {
436    pub token_type: TokenType,
437    pub start: usize,
439    pub end: usize,
441    pub(super) module_id: ModuleId,
442    pub(super) value: String,
443}
444
445impl ContainsToken<Token> for (TokenType, &str) {
446    fn contains_token(&self, token: Token) -> bool {
447        self.0 == token.token_type && self.1 == token.value
448    }
449}
450
451impl ContainsToken<Token> for TokenType {
452    fn contains_token(&self, token: Token) -> bool {
453        *self == token.token_type
454    }
455}
456
457impl Token {
458    pub fn from_range(
459        range: std::ops::Range<usize>,
460        module_id: ModuleId,
461        token_type: TokenType,
462        value: String,
463    ) -> Self {
464        Self {
465            start: range.start,
466            end: range.end,
467            module_id,
468            value,
469            token_type,
470        }
471    }
472    pub fn is_code_token(&self) -> bool {
473        !matches!(
474            self.token_type,
475            TokenType::Whitespace | TokenType::LineComment | TokenType::BlockComment
476        )
477    }
478
479    pub fn as_source_range(&self) -> SourceRange {
480        SourceRange::new(self.start, self.end, self.module_id)
481    }
482
483    pub fn as_source_ranges(&self) -> Vec<SourceRange> {
484        vec![self.as_source_range()]
485    }
486
487    pub fn visibility_keyword(&self) -> Option<ItemVisibility> {
488        if !matches!(self.token_type, TokenType::Keyword) {
489            return None;
490        }
491        match self.value.as_str() {
492            "export" => Some(ItemVisibility::Export),
493            _ => None,
494        }
495    }
496
497    pub fn numeric_value(&self) -> Option<f64> {
498        if self.token_type != TokenType::Number {
499            return None;
500        }
501        let value = &self.value;
502        let value = value
503            .split_once(|c: char| c == '_' || c.is_ascii_alphabetic())
504            .map(|(s, _)| s)
505            .unwrap_or(value);
506        value.parse().ok()
507    }
508
509    pub fn uint_value(&self) -> Option<u32> {
510        if self.token_type != TokenType::Number {
511            return None;
512        }
513        let value = &self.value;
514        let value = value
515            .split_once(|c: char| c == '_' || c.is_ascii_alphabetic())
516            .map(|(s, _)| s)
517            .unwrap_or(value);
518        value.parse().ok()
519    }
520
521    pub fn numeric_suffix(&self) -> NumericSuffix {
522        if self.token_type != TokenType::Number {
523            return NumericSuffix::None;
524        }
525
526        if self.value.ends_with('_') {
527            return NumericSuffix::Count;
528        }
529
530        for suffix in NUM_SUFFIXES {
531            if self.value.ends_with(suffix) {
532                return suffix.parse().unwrap();
533            }
534        }
535
536        NumericSuffix::None
537    }
538
539    pub fn declaration_keyword(&self) -> Option<VariableKind> {
543        if !matches!(self.token_type, TokenType::Keyword) {
544            return None;
545        }
546        Some(match self.value.as_str() {
547            "fn" => VariableKind::Fn,
548            "var" | "let" | "const" => VariableKind::Const,
549            _ => return None,
550        })
551    }
552}
553
554impl From<Token> for SourceRange {
555    fn from(token: Token) -> Self {
556        Self::new(token.start, token.end, token.module_id)
557    }
558}
559
560impl From<&Token> for SourceRange {
561    fn from(token: &Token) -> Self {
562        Self::new(token.start, token.end, token.module_id)
563    }
564}
565
566pub fn lex(s: &str, module_id: ModuleId) -> Result<TokenStream, KclError> {
567    tokeniser::lex(s, module_id).map_err(From::from)
568}
569
570impl From<ParseError<Input<'_>, winnow::error::ContextError>> for KclError {
571    fn from(err: ParseError<Input<'_>, winnow::error::ContextError>) -> Self {
572        let (input, offset): (Vec<char>, usize) = (err.input().chars().collect(), err.offset());
573        let module_id = err.input().state.module_id;
574
575        if offset >= input.len() {
576            return KclError::Lexical(crate::errors::KclErrorDetails {
582                source_ranges: vec![SourceRange::new(offset, offset, module_id)],
583                message: "unexpected EOF while parsing".to_string(),
584            });
585        }
586
587        let bad_token = &input[offset];
590        KclError::Lexical(crate::errors::KclErrorDetails {
593            source_ranges: vec![SourceRange::new(offset, offset + 1, module_id)],
594            message: format!("found unknown token '{}'", bad_token),
595        })
596    }
597}