1#![allow(clippy::needless_lifetimes)]
3
4use std::{fmt, iter::Enumerate, num::NonZeroUsize, str::FromStr};
5
6use anyhow::Result;
7use parse_display::Display;
8use serde::{Deserialize, Serialize};
9use tokeniser::Input;
10use tower_lsp::lsp_types::SemanticTokenType;
11use winnow::{
12 self,
13 error::ParseError,
14 stream::{ContainsToken, Stream},
15};
16
17use crate::{
18 CompilationError, ModuleId, SourceRange,
19 errors::KclError,
20 parsing::ast::types::{ItemVisibility, VariableKind},
21};
22
23mod tokeniser;
24
25pub(crate) use tokeniser::RESERVED_WORDS;
26
27pub const NUM_SUFFIXES: [&str; 10] = ["mm", "cm", "m", "inch", "in", "ft", "yd", "deg", "rad", "?"];
29
30#[derive(Clone, Copy, Debug, Eq, PartialEq, Serialize, Deserialize, ts_rs::TS)]
31#[repr(u32)]
32pub enum NumericSuffix {
33 None,
34 Count,
35 Length,
36 Angle,
37 Mm,
38 Cm,
39 M,
40 Inch,
41 Ft,
42 Yd,
43 Deg,
44 Rad,
45 Unknown,
46}
47
48impl NumericSuffix {
49 #[allow(dead_code)]
50 pub fn is_none(self) -> bool {
51 self == Self::None
52 }
53
54 pub fn is_some(self) -> bool {
55 self != Self::None
56 }
57
58 pub fn digestable_id(&self) -> &[u8] {
59 match self {
60 NumericSuffix::None => &[],
61 NumericSuffix::Count => b"_",
62 NumericSuffix::Unknown => b"?",
63 NumericSuffix::Length => b"Length",
64 NumericSuffix::Angle => b"Angle",
65 NumericSuffix::Mm => b"mm",
66 NumericSuffix::Cm => b"cm",
67 NumericSuffix::M => b"m",
68 NumericSuffix::Inch => b"in",
69 NumericSuffix::Ft => b"ft",
70 NumericSuffix::Yd => b"yd",
71 NumericSuffix::Deg => b"deg",
72 NumericSuffix::Rad => b"rad",
73 }
74 }
75}
76
77impl FromStr for NumericSuffix {
78 type Err = CompilationError;
79
80 fn from_str(s: &str) -> Result<Self, Self::Err> {
81 match s {
82 "_" | "Count" => Ok(NumericSuffix::Count),
83 "Length" => Ok(NumericSuffix::Length),
84 "Angle" => Ok(NumericSuffix::Angle),
85 "mm" | "millimeters" => Ok(NumericSuffix::Mm),
86 "cm" | "centimeters" => Ok(NumericSuffix::Cm),
87 "m" | "meters" => Ok(NumericSuffix::M),
88 "inch" | "in" => Ok(NumericSuffix::Inch),
89 "ft" | "feet" => Ok(NumericSuffix::Ft),
90 "yd" | "yards" => Ok(NumericSuffix::Yd),
91 "deg" | "degrees" => Ok(NumericSuffix::Deg),
92 "rad" | "radians" => Ok(NumericSuffix::Rad),
93 "?" => Ok(NumericSuffix::Unknown),
94 _ => Err(CompilationError::err(SourceRange::default(), "invalid unit of measure")),
95 }
96 }
97}
98
99impl fmt::Display for NumericSuffix {
100 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
101 match self {
102 NumericSuffix::None => Ok(()),
103 NumericSuffix::Count => write!(f, "_"),
104 NumericSuffix::Unknown => write!(f, "_?"),
105 NumericSuffix::Length => write!(f, "Length"),
106 NumericSuffix::Angle => write!(f, "Angle"),
107 NumericSuffix::Mm => write!(f, "mm"),
108 NumericSuffix::Cm => write!(f, "cm"),
109 NumericSuffix::M => write!(f, "m"),
110 NumericSuffix::Inch => write!(f, "in"),
111 NumericSuffix::Ft => write!(f, "ft"),
112 NumericSuffix::Yd => write!(f, "yd"),
113 NumericSuffix::Deg => write!(f, "deg"),
114 NumericSuffix::Rad => write!(f, "rad"),
115 }
116 }
117}
118
119#[derive(Clone, Debug, PartialEq)]
120pub(crate) struct TokenStream {
121 tokens: Vec<Token>,
122}
123
124impl TokenStream {
125 fn new(tokens: Vec<Token>) -> Self {
126 Self { tokens }
127 }
128
129 pub(super) fn remove_unknown(&mut self) -> Vec<Token> {
130 let tokens = std::mem::take(&mut self.tokens);
131 let (tokens, unknown_tokens): (Vec<Token>, Vec<Token>) = tokens
132 .into_iter()
133 .partition(|token| token.token_type != TokenType::Unknown);
134 self.tokens = tokens;
135 unknown_tokens
136 }
137
138 pub fn iter(&self) -> impl Iterator<Item = &Token> {
139 self.tokens.iter()
140 }
141
142 pub fn is_empty(&self) -> bool {
143 self.tokens.is_empty()
144 }
145
146 pub fn as_slice(&self) -> TokenSlice<'_> {
147 TokenSlice::from(self)
148 }
149}
150
151impl<'a> From<&'a TokenStream> for TokenSlice<'a> {
152 fn from(stream: &'a TokenStream) -> Self {
153 TokenSlice {
154 start: 0,
155 end: stream.tokens.len(),
156 stream,
157 }
158 }
159}
160
161impl IntoIterator for TokenStream {
162 type Item = Token;
163
164 type IntoIter = std::vec::IntoIter<Token>;
165
166 fn into_iter(self) -> Self::IntoIter {
167 self.tokens.into_iter()
168 }
169}
170
171#[derive(Debug, Clone)]
172pub(crate) struct TokenSlice<'a> {
173 stream: &'a TokenStream,
174 start: usize,
176 end: usize,
178}
179
180impl<'a> std::ops::Deref for TokenSlice<'a> {
181 type Target = [Token];
182
183 fn deref(&self) -> &Self::Target {
184 &self.stream.tokens[self.start..self.end]
185 }
186}
187
188impl<'a> TokenSlice<'a> {
189 pub fn token(&self, i: usize) -> &Token {
190 &self.stream.tokens[i + self.start]
191 }
192
193 pub fn iter(&self) -> impl Iterator<Item = &Token> {
194 (**self).iter()
195 }
196
197 pub fn without_ends(&self) -> Self {
198 Self {
199 start: self.start + 1,
200 end: self.end - 1,
201 stream: self.stream,
202 }
203 }
204
205 pub fn as_source_range(&self) -> SourceRange {
206 let stream_len = self.stream.tokens.len();
207 let first_token = if stream_len == self.start {
208 &self.stream.tokens[self.start - 1]
209 } else {
210 self.token(0)
211 };
212 let last_token = if stream_len == self.end {
213 &self.stream.tokens[stream_len - 1]
214 } else {
215 self.token(self.end - self.start)
216 };
217 SourceRange::new(first_token.start, last_token.end, last_token.module_id)
218 }
219}
220
221impl<'a> IntoIterator for TokenSlice<'a> {
222 type Item = &'a Token;
223
224 type IntoIter = std::slice::Iter<'a, Token>;
225
226 fn into_iter(self) -> Self::IntoIter {
227 self.stream.tokens[self.start..self.end].iter()
228 }
229}
230
231impl<'a> Stream for TokenSlice<'a> {
232 type Token = Token;
233 type Slice = Self;
234 type IterOffsets = Enumerate<std::vec::IntoIter<Token>>;
235 type Checkpoint = Checkpoint;
236
237 fn iter_offsets(&self) -> Self::IterOffsets {
238 #[allow(clippy::unnecessary_to_owned)]
239 self.to_vec().into_iter().enumerate()
240 }
241
242 fn eof_offset(&self) -> usize {
243 self.len()
244 }
245
246 fn next_token(&mut self) -> Option<Self::Token> {
247 let token = self.first()?.clone();
248 self.start += 1;
249 Some(token)
250 }
251
252 fn peek_token(&self) -> Option<Self::Token> {
254 Some(self.first()?.clone())
255 }
256
257 fn offset_for<P>(&self, predicate: P) -> Option<usize>
258 where
259 P: Fn(Self::Token) -> bool,
260 {
261 self.iter().position(|b| predicate(b.clone()))
262 }
263
264 fn offset_at(&self, tokens: usize) -> Result<usize, winnow::error::Needed> {
265 if let Some(needed) = tokens.checked_sub(self.len()).and_then(NonZeroUsize::new) {
266 Err(winnow::error::Needed::Size(needed))
267 } else {
268 Ok(tokens)
269 }
270 }
271
272 fn next_slice(&mut self, offset: usize) -> Self::Slice {
273 assert!(self.start + offset <= self.end);
274
275 let next = TokenSlice {
276 stream: self.stream,
277 start: self.start,
278 end: self.start + offset,
279 };
280 self.start += offset;
281 next
282 }
283
284 fn peek_slice(&self, offset: usize) -> Self::Slice {
286 assert!(self.start + offset <= self.end);
287
288 TokenSlice {
289 stream: self.stream,
290 start: self.start,
291 end: self.start + offset,
292 }
293 }
294
295 fn checkpoint(&self) -> Self::Checkpoint {
296 Checkpoint(self.start, self.end)
297 }
298
299 fn reset(&mut self, checkpoint: &Self::Checkpoint) {
300 self.start = checkpoint.0;
301 self.end = checkpoint.1;
302 }
303
304 fn raw(&self) -> &dyn fmt::Debug {
305 self
306 }
307}
308
309impl<'a> winnow::stream::Offset for TokenSlice<'a> {
310 fn offset_from(&self, start: &Self) -> usize {
311 self.start - start.start
312 }
313}
314
315impl<'a> winnow::stream::Offset<Checkpoint> for TokenSlice<'a> {
316 fn offset_from(&self, start: &Checkpoint) -> usize {
317 self.start - start.0
318 }
319}
320
321impl winnow::stream::Offset for Checkpoint {
322 fn offset_from(&self, start: &Self) -> usize {
323 self.0 - start.0
324 }
325}
326
327impl<'a> winnow::stream::StreamIsPartial for TokenSlice<'a> {
328 type PartialState = ();
329
330 fn complete(&mut self) -> Self::PartialState {}
331
332 fn restore_partial(&mut self, _: Self::PartialState) {}
333
334 fn is_partial_supported() -> bool {
335 false
336 }
337}
338
339impl<'a> winnow::stream::FindSlice<&str> for TokenSlice<'a> {
340 fn find_slice(&self, substr: &str) -> Option<std::ops::Range<usize>> {
341 self.iter()
342 .enumerate()
343 .find_map(|(i, b)| if b.value == substr { Some(i..self.end) } else { None })
344 }
345}
346
347#[derive(Clone, Debug)]
348pub struct Checkpoint(usize, usize);
349
350#[derive(Debug, PartialEq, Eq, Copy, Clone, Display)]
352#[display(style = "camelCase")]
353pub enum TokenType {
354 Number,
356 Word,
358 Operator,
360 String,
362 Keyword,
364 Type,
366 Brace,
368 Hash,
370 Bang,
372 Dollar,
374 Whitespace,
376 Comma,
378 Colon,
380 DoubleColon,
382 Period,
384 DoublePeriod,
386 DoublePeriodLessThan,
388 LineComment,
390 BlockComment,
392 Function,
394 Unknown,
396 QuestionMark,
398 At,
400 SemiColon,
402}
403
404impl TryFrom<TokenType> for SemanticTokenType {
406 type Error = anyhow::Error;
407 fn try_from(token_type: TokenType) -> Result<Self> {
408 Ok(match token_type {
411 TokenType::Number => Self::NUMBER,
412 TokenType::Word => Self::VARIABLE,
413 TokenType::Keyword => Self::KEYWORD,
414 TokenType::Type => Self::TYPE,
415 TokenType::Operator => Self::OPERATOR,
416 TokenType::QuestionMark => Self::OPERATOR,
417 TokenType::String => Self::STRING,
418 TokenType::Bang => Self::OPERATOR,
419 TokenType::LineComment => Self::COMMENT,
420 TokenType::BlockComment => Self::COMMENT,
421 TokenType::Function => Self::FUNCTION,
422 TokenType::Whitespace
423 | TokenType::Brace
424 | TokenType::Comma
425 | TokenType::Colon
426 | TokenType::DoubleColon
427 | TokenType::Period
428 | TokenType::DoublePeriod
429 | TokenType::DoublePeriodLessThan
430 | TokenType::Hash
431 | TokenType::Dollar
432 | TokenType::At
433 | TokenType::SemiColon
434 | TokenType::Unknown => {
435 anyhow::bail!("unsupported token type: {:?}", token_type)
436 }
437 })
438 }
439}
440
441impl TokenType {
442 pub fn is_whitespace(&self) -> bool {
443 matches!(self, Self::Whitespace)
444 }
445
446 pub fn is_comment(&self) -> bool {
447 matches!(self, Self::LineComment | Self::BlockComment)
448 }
449}
450
451#[derive(Debug, PartialEq, Eq, Clone)]
452pub struct Token {
453 pub token_type: TokenType,
454 pub start: usize,
456 pub end: usize,
458 pub(super) module_id: ModuleId,
459 pub(super) value: String,
460}
461
462impl ContainsToken<Token> for (TokenType, &str) {
463 fn contains_token(&self, token: Token) -> bool {
464 self.0 == token.token_type && self.1 == token.value
465 }
466}
467
468impl ContainsToken<Token> for TokenType {
469 fn contains_token(&self, token: Token) -> bool {
470 *self == token.token_type
471 }
472}
473
474impl Token {
475 pub fn from_range(
476 range: std::ops::Range<usize>,
477 module_id: ModuleId,
478 token_type: TokenType,
479 value: String,
480 ) -> Self {
481 Self {
482 start: range.start,
483 end: range.end,
484 module_id,
485 value,
486 token_type,
487 }
488 }
489 pub fn is_code_token(&self) -> bool {
490 !matches!(
491 self.token_type,
492 TokenType::Whitespace | TokenType::LineComment | TokenType::BlockComment
493 )
494 }
495
496 pub fn as_source_range(&self) -> SourceRange {
497 SourceRange::new(self.start, self.end, self.module_id)
498 }
499
500 pub fn as_source_ranges(&self) -> Vec<SourceRange> {
501 vec![self.as_source_range()]
502 }
503
504 pub fn visibility_keyword(&self) -> Option<ItemVisibility> {
505 if !matches!(self.token_type, TokenType::Keyword) {
506 return None;
507 }
508 match self.value.as_str() {
509 "export" => Some(ItemVisibility::Export),
510 _ => None,
511 }
512 }
513
514 pub fn numeric_value(&self) -> Option<f64> {
515 if self.token_type != TokenType::Number {
516 return None;
517 }
518 let value = &self.value;
519 let value = value
520 .split_once(|c: char| c == '_' || c.is_ascii_alphabetic())
521 .map(|(s, _)| s)
522 .unwrap_or(value);
523 value.parse().ok()
524 }
525
526 pub fn uint_value(&self) -> Option<u32> {
527 if self.token_type != TokenType::Number {
528 return None;
529 }
530 let value = &self.value;
531 let value = value
532 .split_once(|c: char| c == '_' || c.is_ascii_alphabetic())
533 .map(|(s, _)| s)
534 .unwrap_or(value);
535 value.parse().ok()
536 }
537
538 pub fn numeric_suffix(&self) -> NumericSuffix {
539 if self.token_type != TokenType::Number {
540 return NumericSuffix::None;
541 }
542
543 if self.value.ends_with('_') {
544 return NumericSuffix::Count;
545 }
546
547 for suffix in NUM_SUFFIXES {
548 if self.value.ends_with(suffix) {
549 return suffix.parse().unwrap();
550 }
551 }
552
553 NumericSuffix::None
554 }
555
556 pub fn declaration_keyword(&self) -> Option<VariableKind> {
560 if !matches!(self.token_type, TokenType::Keyword) {
561 return None;
562 }
563 Some(match self.value.as_str() {
564 "fn" => VariableKind::Fn,
565 "var" | "let" | "const" => VariableKind::Const,
566 _ => return None,
567 })
568 }
569}
570
571impl From<Token> for SourceRange {
572 fn from(token: Token) -> Self {
573 Self::new(token.start, token.end, token.module_id)
574 }
575}
576
577impl From<&Token> for SourceRange {
578 fn from(token: &Token) -> Self {
579 Self::new(token.start, token.end, token.module_id)
580 }
581}
582
583pub fn lex(s: &str, module_id: ModuleId) -> Result<TokenStream, KclError> {
584 tokeniser::lex(s, module_id).map_err(From::from)
585}
586
587impl From<ParseError<Input<'_>, winnow::error::ContextError>> for KclError {
588 fn from(err: ParseError<Input<'_>, winnow::error::ContextError>) -> Self {
589 let (input, offset): (Vec<char>, usize) = (err.input().chars().collect(), err.offset());
590 let module_id = err.input().state.module_id;
591
592 if offset >= input.len() {
593 return KclError::new_lexical(crate::errors::KclErrorDetails::new(
599 "unexpected EOF while parsing".to_owned(),
600 vec![SourceRange::new(offset, offset, module_id)],
601 ));
602 }
603
604 let bad_token = &input[offset];
607 KclError::new_lexical(crate::errors::KclErrorDetails::new(
610 format!("found unknown token '{bad_token}'"),
611 vec![SourceRange::new(offset, offset + 1, module_id)],
612 ))
613 }
614}