1#![allow(clippy::needless_lifetimes)]
3
4use std::{fmt, iter::Enumerate, num::NonZeroUsize, str::FromStr};
5
6use anyhow::Result;
7use parse_display::Display;
8use schemars::JsonSchema;
9use serde::{Deserialize, Serialize};
10use tokeniser::Input;
11use tower_lsp::lsp_types::SemanticTokenType;
12use winnow::{
13 self,
14 error::ParseError,
15 stream::{ContainsToken, Stream},
16};
17
18use crate::{
19 CompilationError, ModuleId,
20 errors::KclError,
21 parsing::ast::types::{ItemVisibility, VariableKind},
22 source_range::SourceRange,
23};
24
25mod tokeniser;
26
27pub(crate) use tokeniser::RESERVED_WORDS;
28
29pub const NUM_SUFFIXES: [&str; 10] = ["mm", "cm", "m", "inch", "in", "ft", "yd", "deg", "rad", "?"];
31
32#[derive(Clone, Copy, Debug, Eq, PartialEq, Serialize, Deserialize, ts_rs::TS, JsonSchema)]
33#[repr(u32)]
34pub enum NumericSuffix {
35 None,
36 Count,
37 Length,
38 Angle,
39 Mm,
40 Cm,
41 M,
42 Inch,
43 Ft,
44 Yd,
45 Deg,
46 Rad,
47 Unknown,
48}
49
50impl NumericSuffix {
51 #[allow(dead_code)]
52 pub fn is_none(self) -> bool {
53 self == Self::None
54 }
55
56 pub fn is_some(self) -> bool {
57 self != Self::None
58 }
59
60 pub fn digestable_id(&self) -> &[u8] {
61 match self {
62 NumericSuffix::None => &[],
63 NumericSuffix::Count => b"_",
64 NumericSuffix::Unknown => b"?",
65 NumericSuffix::Length => b"Length",
66 NumericSuffix::Angle => b"Angle",
67 NumericSuffix::Mm => b"mm",
68 NumericSuffix::Cm => b"cm",
69 NumericSuffix::M => b"m",
70 NumericSuffix::Inch => b"in",
71 NumericSuffix::Ft => b"ft",
72 NumericSuffix::Yd => b"yd",
73 NumericSuffix::Deg => b"deg",
74 NumericSuffix::Rad => b"rad",
75 }
76 }
77}
78
79impl FromStr for NumericSuffix {
80 type Err = CompilationError;
81
82 fn from_str(s: &str) -> Result<Self, Self::Err> {
83 match s {
84 "_" | "Count" => Ok(NumericSuffix::Count),
85 "Length" => Ok(NumericSuffix::Length),
86 "Angle" => Ok(NumericSuffix::Angle),
87 "mm" | "millimeters" => Ok(NumericSuffix::Mm),
88 "cm" | "centimeters" => Ok(NumericSuffix::Cm),
89 "m" | "meters" => Ok(NumericSuffix::M),
90 "inch" | "in" => Ok(NumericSuffix::Inch),
91 "ft" | "feet" => Ok(NumericSuffix::Ft),
92 "yd" | "yards" => Ok(NumericSuffix::Yd),
93 "deg" | "degrees" => Ok(NumericSuffix::Deg),
94 "rad" | "radians" => Ok(NumericSuffix::Rad),
95 "?" => Ok(NumericSuffix::Unknown),
96 _ => Err(CompilationError::err(SourceRange::default(), "invalid unit of measure")),
97 }
98 }
99}
100
101impl fmt::Display for NumericSuffix {
102 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
103 match self {
104 NumericSuffix::None => Ok(()),
105 NumericSuffix::Count => write!(f, "_"),
106 NumericSuffix::Unknown => write!(f, "_?"),
107 NumericSuffix::Length => write!(f, "Length"),
108 NumericSuffix::Angle => write!(f, "Angle"),
109 NumericSuffix::Mm => write!(f, "mm"),
110 NumericSuffix::Cm => write!(f, "cm"),
111 NumericSuffix::M => write!(f, "m"),
112 NumericSuffix::Inch => write!(f, "in"),
113 NumericSuffix::Ft => write!(f, "ft"),
114 NumericSuffix::Yd => write!(f, "yd"),
115 NumericSuffix::Deg => write!(f, "deg"),
116 NumericSuffix::Rad => write!(f, "rad"),
117 }
118 }
119}
120
121#[derive(Clone, Debug, PartialEq)]
122pub(crate) struct TokenStream {
123 tokens: Vec<Token>,
124}
125
126impl TokenStream {
127 fn new(tokens: Vec<Token>) -> Self {
128 Self { tokens }
129 }
130
131 pub(super) fn remove_unknown(&mut self) -> Vec<Token> {
132 let tokens = std::mem::take(&mut self.tokens);
133 let (tokens, unknown_tokens): (Vec<Token>, Vec<Token>) = tokens
134 .into_iter()
135 .partition(|token| token.token_type != TokenType::Unknown);
136 self.tokens = tokens;
137 unknown_tokens
138 }
139
140 pub fn iter(&self) -> impl Iterator<Item = &Token> {
141 self.tokens.iter()
142 }
143
144 pub fn is_empty(&self) -> bool {
145 self.tokens.is_empty()
146 }
147
148 pub fn as_slice(&self) -> TokenSlice {
149 TokenSlice::from(self)
150 }
151}
152
153impl<'a> From<&'a TokenStream> for TokenSlice<'a> {
154 fn from(stream: &'a TokenStream) -> Self {
155 TokenSlice {
156 start: 0,
157 end: stream.tokens.len(),
158 stream,
159 }
160 }
161}
162
163impl IntoIterator for TokenStream {
164 type Item = Token;
165
166 type IntoIter = std::vec::IntoIter<Token>;
167
168 fn into_iter(self) -> Self::IntoIter {
169 self.tokens.into_iter()
170 }
171}
172
173#[derive(Debug, Clone)]
174pub(crate) struct TokenSlice<'a> {
175 stream: &'a TokenStream,
176 start: usize,
178 end: usize,
180}
181
182impl<'a> std::ops::Deref for TokenSlice<'a> {
183 type Target = [Token];
184
185 fn deref(&self) -> &Self::Target {
186 &self.stream.tokens[self.start..self.end]
187 }
188}
189
190impl<'a> TokenSlice<'a> {
191 pub fn token(&self, i: usize) -> &Token {
192 &self.stream.tokens[i + self.start]
193 }
194
195 pub fn iter(&self) -> impl Iterator<Item = &Token> {
196 (**self).iter()
197 }
198
199 pub fn without_ends(&self) -> Self {
200 Self {
201 start: self.start + 1,
202 end: self.end - 1,
203 stream: self.stream,
204 }
205 }
206
207 pub fn as_source_range(&self) -> SourceRange {
208 let stream_len = self.stream.tokens.len();
209 let first_token = if stream_len == self.start {
210 &self.stream.tokens[self.start - 1]
211 } else {
212 self.token(0)
213 };
214 let last_token = if stream_len == self.end {
215 &self.stream.tokens[stream_len - 1]
216 } else {
217 self.token(self.end - self.start)
218 };
219 SourceRange::new(first_token.start, last_token.end, last_token.module_id)
220 }
221}
222
223impl<'a> IntoIterator for TokenSlice<'a> {
224 type Item = &'a Token;
225
226 type IntoIter = std::slice::Iter<'a, Token>;
227
228 fn into_iter(self) -> Self::IntoIter {
229 self.stream.tokens[self.start..self.end].iter()
230 }
231}
232
233impl<'a> Stream for TokenSlice<'a> {
234 type Token = Token;
235 type Slice = Self;
236 type IterOffsets = Enumerate<std::vec::IntoIter<Token>>;
237 type Checkpoint = Checkpoint;
238
239 fn iter_offsets(&self) -> Self::IterOffsets {
240 #[allow(clippy::unnecessary_to_owned)]
241 self.to_vec().into_iter().enumerate()
242 }
243
244 fn eof_offset(&self) -> usize {
245 self.len()
246 }
247
248 fn next_token(&mut self) -> Option<Self::Token> {
249 let token = self.first()?.clone();
250 self.start += 1;
251 Some(token)
252 }
253
254 fn peek_token(&self) -> Option<Self::Token> {
256 Some(self.first()?.clone())
257 }
258
259 fn offset_for<P>(&self, predicate: P) -> Option<usize>
260 where
261 P: Fn(Self::Token) -> bool,
262 {
263 self.iter().position(|b| predicate(b.clone()))
264 }
265
266 fn offset_at(&self, tokens: usize) -> Result<usize, winnow::error::Needed> {
267 if let Some(needed) = tokens.checked_sub(self.len()).and_then(NonZeroUsize::new) {
268 Err(winnow::error::Needed::Size(needed))
269 } else {
270 Ok(tokens)
271 }
272 }
273
274 fn next_slice(&mut self, offset: usize) -> Self::Slice {
275 assert!(self.start + offset <= self.end);
276
277 let next = TokenSlice {
278 stream: self.stream,
279 start: self.start,
280 end: self.start + offset,
281 };
282 self.start += offset;
283 next
284 }
285
286 fn peek_slice(&self, offset: usize) -> Self::Slice {
288 assert!(self.start + offset <= self.end);
289
290 TokenSlice {
291 stream: self.stream,
292 start: self.start,
293 end: self.start + offset,
294 }
295 }
296
297 fn checkpoint(&self) -> Self::Checkpoint {
298 Checkpoint(self.start, self.end)
299 }
300
301 fn reset(&mut self, checkpoint: &Self::Checkpoint) {
302 self.start = checkpoint.0;
303 self.end = checkpoint.1;
304 }
305
306 fn raw(&self) -> &dyn fmt::Debug {
307 self
308 }
309}
310
311impl<'a> winnow::stream::Offset for TokenSlice<'a> {
312 fn offset_from(&self, start: &Self) -> usize {
313 self.start - start.start
314 }
315}
316
317impl<'a> winnow::stream::Offset<Checkpoint> for TokenSlice<'a> {
318 fn offset_from(&self, start: &Checkpoint) -> usize {
319 self.start - start.0
320 }
321}
322
323impl winnow::stream::Offset for Checkpoint {
324 fn offset_from(&self, start: &Self) -> usize {
325 self.0 - start.0
326 }
327}
328
329impl<'a> winnow::stream::StreamIsPartial for TokenSlice<'a> {
330 type PartialState = ();
331
332 fn complete(&mut self) -> Self::PartialState {}
333
334 fn restore_partial(&mut self, _: Self::PartialState) {}
335
336 fn is_partial_supported() -> bool {
337 false
338 }
339}
340
341impl<'a> winnow::stream::FindSlice<&str> for TokenSlice<'a> {
342 fn find_slice(&self, substr: &str) -> Option<std::ops::Range<usize>> {
343 self.iter()
344 .enumerate()
345 .find_map(|(i, b)| if b.value == substr { Some(i..self.end) } else { None })
346 }
347}
348
349#[derive(Clone, Debug)]
350pub struct Checkpoint(usize, usize);
351
352#[derive(Debug, PartialEq, Eq, Copy, Clone, Display)]
354#[display(style = "camelCase")]
355pub enum TokenType {
356 Number,
358 Word,
360 Operator,
362 String,
364 Keyword,
366 Type,
368 Brace,
370 Hash,
372 Bang,
374 Dollar,
376 Whitespace,
378 Comma,
380 Colon,
382 DoubleColon,
384 Period,
386 DoublePeriod,
388 DoublePeriodLessThan,
390 LineComment,
392 BlockComment,
394 Function,
396 Unknown,
398 QuestionMark,
400 At,
402 SemiColon,
404}
405
406impl TryFrom<TokenType> for SemanticTokenType {
408 type Error = anyhow::Error;
409 fn try_from(token_type: TokenType) -> Result<Self> {
410 Ok(match token_type {
413 TokenType::Number => Self::NUMBER,
414 TokenType::Word => Self::VARIABLE,
415 TokenType::Keyword => Self::KEYWORD,
416 TokenType::Type => Self::TYPE,
417 TokenType::Operator => Self::OPERATOR,
418 TokenType::QuestionMark => Self::OPERATOR,
419 TokenType::String => Self::STRING,
420 TokenType::Bang => Self::OPERATOR,
421 TokenType::LineComment => Self::COMMENT,
422 TokenType::BlockComment => Self::COMMENT,
423 TokenType::Function => Self::FUNCTION,
424 TokenType::Whitespace
425 | TokenType::Brace
426 | TokenType::Comma
427 | TokenType::Colon
428 | TokenType::DoubleColon
429 | TokenType::Period
430 | TokenType::DoublePeriod
431 | TokenType::DoublePeriodLessThan
432 | TokenType::Hash
433 | TokenType::Dollar
434 | TokenType::At
435 | TokenType::SemiColon
436 | TokenType::Unknown => {
437 anyhow::bail!("unsupported token type: {:?}", token_type)
438 }
439 })
440 }
441}
442
443impl TokenType {
444 pub fn is_whitespace(&self) -> bool {
445 matches!(self, Self::Whitespace)
446 }
447
448 pub fn is_comment(&self) -> bool {
449 matches!(self, Self::LineComment | Self::BlockComment)
450 }
451}
452
453#[derive(Debug, PartialEq, Eq, Clone)]
454pub struct Token {
455 pub token_type: TokenType,
456 pub start: usize,
458 pub end: usize,
460 pub(super) module_id: ModuleId,
461 pub(super) value: String,
462}
463
464impl ContainsToken<Token> for (TokenType, &str) {
465 fn contains_token(&self, token: Token) -> bool {
466 self.0 == token.token_type && self.1 == token.value
467 }
468}
469
470impl ContainsToken<Token> for TokenType {
471 fn contains_token(&self, token: Token) -> bool {
472 *self == token.token_type
473 }
474}
475
476impl Token {
477 pub fn from_range(
478 range: std::ops::Range<usize>,
479 module_id: ModuleId,
480 token_type: TokenType,
481 value: String,
482 ) -> Self {
483 Self {
484 start: range.start,
485 end: range.end,
486 module_id,
487 value,
488 token_type,
489 }
490 }
491 pub fn is_code_token(&self) -> bool {
492 !matches!(
493 self.token_type,
494 TokenType::Whitespace | TokenType::LineComment | TokenType::BlockComment
495 )
496 }
497
498 pub fn as_source_range(&self) -> SourceRange {
499 SourceRange::new(self.start, self.end, self.module_id)
500 }
501
502 pub fn as_source_ranges(&self) -> Vec<SourceRange> {
503 vec![self.as_source_range()]
504 }
505
506 pub fn visibility_keyword(&self) -> Option<ItemVisibility> {
507 if !matches!(self.token_type, TokenType::Keyword) {
508 return None;
509 }
510 match self.value.as_str() {
511 "export" => Some(ItemVisibility::Export),
512 _ => None,
513 }
514 }
515
516 pub fn numeric_value(&self) -> Option<f64> {
517 if self.token_type != TokenType::Number {
518 return None;
519 }
520 let value = &self.value;
521 let value = value
522 .split_once(|c: char| c == '_' || c.is_ascii_alphabetic())
523 .map(|(s, _)| s)
524 .unwrap_or(value);
525 value.parse().ok()
526 }
527
528 pub fn uint_value(&self) -> Option<u32> {
529 if self.token_type != TokenType::Number {
530 return None;
531 }
532 let value = &self.value;
533 let value = value
534 .split_once(|c: char| c == '_' || c.is_ascii_alphabetic())
535 .map(|(s, _)| s)
536 .unwrap_or(value);
537 value.parse().ok()
538 }
539
540 pub fn numeric_suffix(&self) -> NumericSuffix {
541 if self.token_type != TokenType::Number {
542 return NumericSuffix::None;
543 }
544
545 if self.value.ends_with('_') {
546 return NumericSuffix::Count;
547 }
548
549 for suffix in NUM_SUFFIXES {
550 if self.value.ends_with(suffix) {
551 return suffix.parse().unwrap();
552 }
553 }
554
555 NumericSuffix::None
556 }
557
558 pub fn declaration_keyword(&self) -> Option<VariableKind> {
562 if !matches!(self.token_type, TokenType::Keyword) {
563 return None;
564 }
565 Some(match self.value.as_str() {
566 "fn" => VariableKind::Fn,
567 "var" | "let" | "const" => VariableKind::Const,
568 _ => return None,
569 })
570 }
571}
572
573impl From<Token> for SourceRange {
574 fn from(token: Token) -> Self {
575 Self::new(token.start, token.end, token.module_id)
576 }
577}
578
579impl From<&Token> for SourceRange {
580 fn from(token: &Token) -> Self {
581 Self::new(token.start, token.end, token.module_id)
582 }
583}
584
585pub fn lex(s: &str, module_id: ModuleId) -> Result<TokenStream, KclError> {
586 tokeniser::lex(s, module_id).map_err(From::from)
587}
588
589impl From<ParseError<Input<'_>, winnow::error::ContextError>> for KclError {
590 fn from(err: ParseError<Input<'_>, winnow::error::ContextError>) -> Self {
591 let (input, offset): (Vec<char>, usize) = (err.input().chars().collect(), err.offset());
592 let module_id = err.input().state.module_id;
593
594 if offset >= input.len() {
595 return KclError::new_lexical(crate::errors::KclErrorDetails::new(
601 "unexpected EOF while parsing".to_owned(),
602 vec![SourceRange::new(offset, offset, module_id)],
603 ));
604 }
605
606 let bad_token = &input[offset];
609 KclError::new_lexical(crate::errors::KclErrorDetails::new(
612 format!("found unknown token '{bad_token}'"),
613 vec![SourceRange::new(offset, offset + 1, module_id)],
614 ))
615 }
616}