1#![allow(clippy::needless_lifetimes)]
3
4use std::fmt;
5use std::iter::Enumerate;
6use std::num::NonZeroUsize;
7use std::str::FromStr;
8
9use anyhow::Result;
10use parse_display::Display;
11use serde::Deserialize;
12use serde::Serialize;
13use tokeniser::Input;
14use tower_lsp::lsp_types::SemanticTokenType;
15use winnow::error::ParseError;
16use winnow::stream::ContainsToken;
17use winnow::stream::Stream;
18use winnow::{self};
19
20use crate::CompilationError;
21use crate::ModuleId;
22use crate::SourceRange;
23use crate::errors::KclError;
24use crate::parsing::ast::types::ItemVisibility;
25use crate::parsing::ast::types::VariableKind;
26
27mod tokeniser;
28
29pub(crate) use tokeniser::RESERVED_SKETCH_BLOCK_WORDS;
30pub(crate) use tokeniser::RESERVED_WORDS;
31
32pub const NUM_SUFFIXES: [&str; 10] = ["mm", "cm", "m", "inch", "in", "ft", "yd", "deg", "rad", "?"];
34
35#[derive(Clone, Copy, Debug, Eq, PartialEq, Serialize, Deserialize, ts_rs::TS)]
36#[repr(u32)]
37pub enum NumericSuffix {
38 None,
39 Count,
40 Length,
41 Angle,
42 Mm,
43 Cm,
44 M,
45 Inch,
46 Ft,
47 Yd,
48 Deg,
49 Rad,
50 Unknown,
51}
52
53impl NumericSuffix {
54 #[allow(dead_code)]
55 pub fn is_none(self) -> bool {
56 self == Self::None
57 }
58
59 pub fn is_some(self) -> bool {
60 self != Self::None
61 }
62
63 pub fn digestable_id(&self) -> &[u8] {
64 match self {
65 NumericSuffix::None => &[],
66 NumericSuffix::Count => b"_",
67 NumericSuffix::Unknown => b"?",
68 NumericSuffix::Length => b"Length",
69 NumericSuffix::Angle => b"Angle",
70 NumericSuffix::Mm => b"mm",
71 NumericSuffix::Cm => b"cm",
72 NumericSuffix::M => b"m",
73 NumericSuffix::Inch => b"in",
74 NumericSuffix::Ft => b"ft",
75 NumericSuffix::Yd => b"yd",
76 NumericSuffix::Deg => b"deg",
77 NumericSuffix::Rad => b"rad",
78 }
79 }
80}
81
82impl FromStr for NumericSuffix {
83 type Err = CompilationError;
84
85 fn from_str(s: &str) -> Result<Self, Self::Err> {
86 match s {
87 "_" | "Count" => Ok(NumericSuffix::Count),
88 "Length" => Ok(NumericSuffix::Length),
89 "Angle" => Ok(NumericSuffix::Angle),
90 "mm" | "millimeters" => Ok(NumericSuffix::Mm),
91 "cm" | "centimeters" => Ok(NumericSuffix::Cm),
92 "m" | "meters" => Ok(NumericSuffix::M),
93 "inch" | "in" => Ok(NumericSuffix::Inch),
94 "ft" | "feet" => Ok(NumericSuffix::Ft),
95 "yd" | "yards" => Ok(NumericSuffix::Yd),
96 "deg" | "degrees" => Ok(NumericSuffix::Deg),
97 "rad" | "radians" => Ok(NumericSuffix::Rad),
98 "?" => Ok(NumericSuffix::Unknown),
99 _ => Err(CompilationError::err(SourceRange::default(), "invalid unit of measure")),
100 }
101 }
102}
103
104impl fmt::Display for NumericSuffix {
105 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
106 match self {
107 NumericSuffix::None => Ok(()),
108 NumericSuffix::Count => write!(f, "_"),
109 NumericSuffix::Unknown => write!(f, "_?"),
110 NumericSuffix::Length => write!(f, "Length"),
111 NumericSuffix::Angle => write!(f, "Angle"),
112 NumericSuffix::Mm => write!(f, "mm"),
113 NumericSuffix::Cm => write!(f, "cm"),
114 NumericSuffix::M => write!(f, "m"),
115 NumericSuffix::Inch => write!(f, "in"),
116 NumericSuffix::Ft => write!(f, "ft"),
117 NumericSuffix::Yd => write!(f, "yd"),
118 NumericSuffix::Deg => write!(f, "deg"),
119 NumericSuffix::Rad => write!(f, "rad"),
120 }
121 }
122}
123
124#[derive(Clone, Debug, PartialEq)]
125pub(crate) struct TokenStream {
126 tokens: Vec<Token>,
127}
128
129impl TokenStream {
130 fn new(tokens: Vec<Token>) -> Self {
131 Self { tokens }
132 }
133
134 pub(super) fn remove_unknown(&mut self) -> Vec<Token> {
135 let tokens = std::mem::take(&mut self.tokens);
136 let (tokens, unknown_tokens): (Vec<Token>, Vec<Token>) = tokens
137 .into_iter()
138 .partition(|token| token.token_type != TokenType::Unknown);
139 self.tokens = tokens;
140 unknown_tokens
141 }
142
143 pub fn iter(&self) -> impl Iterator<Item = &Token> {
144 self.tokens.iter()
145 }
146
147 pub fn is_empty(&self) -> bool {
148 self.tokens.is_empty()
149 }
150
151 pub fn as_slice(&self) -> TokenSlice<'_> {
152 TokenSlice::from(self)
153 }
154}
155
156impl<'a> From<&'a TokenStream> for TokenSlice<'a> {
157 fn from(stream: &'a TokenStream) -> Self {
158 TokenSlice {
159 start: 0,
160 end: stream.tokens.len(),
161 stream,
162 }
163 }
164}
165
166impl IntoIterator for TokenStream {
167 type Item = Token;
168
169 type IntoIter = std::vec::IntoIter<Token>;
170
171 fn into_iter(self) -> Self::IntoIter {
172 self.tokens.into_iter()
173 }
174}
175
176#[derive(Debug, Clone)]
177pub(crate) struct TokenSlice<'a> {
178 stream: &'a TokenStream,
179 start: usize,
181 end: usize,
183}
184
185impl<'a> std::ops::Deref for TokenSlice<'a> {
186 type Target = [Token];
187
188 fn deref(&self) -> &Self::Target {
189 &self.stream.tokens[self.start..self.end]
190 }
191}
192
193impl<'a> TokenSlice<'a> {
194 pub fn token(&self, i: usize) -> &Token {
195 &self.stream.tokens[i + self.start]
196 }
197
198 pub fn iter(&self) -> impl Iterator<Item = &Token> {
199 (**self).iter()
200 }
201
202 pub fn without_ends(&self) -> Self {
203 Self {
204 start: self.start + 1,
205 end: self.end - 1,
206 stream: self.stream,
207 }
208 }
209
210 pub fn as_source_range(&self) -> SourceRange {
211 let stream_len = self.stream.tokens.len();
212 let first_token = if stream_len == self.start {
213 &self.stream.tokens[self.start - 1]
214 } else {
215 self.token(0)
216 };
217 let last_token = if stream_len == self.end {
218 &self.stream.tokens[stream_len - 1]
219 } else {
220 self.token(self.end - self.start)
221 };
222 SourceRange::new(first_token.start, last_token.end, last_token.module_id)
223 }
224}
225
226impl<'a> IntoIterator for TokenSlice<'a> {
227 type Item = &'a Token;
228
229 type IntoIter = std::slice::Iter<'a, Token>;
230
231 fn into_iter(self) -> Self::IntoIter {
232 self.stream.tokens[self.start..self.end].iter()
233 }
234}
235
236impl<'a> Stream for TokenSlice<'a> {
237 type Token = Token;
238 type Slice = Self;
239 type IterOffsets = Enumerate<std::vec::IntoIter<Token>>;
240 type Checkpoint = Checkpoint;
241
242 fn iter_offsets(&self) -> Self::IterOffsets {
243 #[allow(clippy::unnecessary_to_owned)]
244 self.to_vec().into_iter().enumerate()
245 }
246
247 fn eof_offset(&self) -> usize {
248 self.len()
249 }
250
251 fn next_token(&mut self) -> Option<Self::Token> {
252 let token = self.first()?.clone();
253 self.start += 1;
254 Some(token)
255 }
256
257 fn peek_token(&self) -> Option<Self::Token> {
259 Some(self.first()?.clone())
260 }
261
262 fn offset_for<P>(&self, predicate: P) -> Option<usize>
263 where
264 P: Fn(Self::Token) -> bool,
265 {
266 self.iter().position(|b| predicate(b.clone()))
267 }
268
269 fn offset_at(&self, tokens: usize) -> Result<usize, winnow::error::Needed> {
270 if let Some(needed) = tokens.checked_sub(self.len()).and_then(NonZeroUsize::new) {
271 Err(winnow::error::Needed::Size(needed))
272 } else {
273 Ok(tokens)
274 }
275 }
276
277 fn next_slice(&mut self, offset: usize) -> Self::Slice {
278 assert!(self.start + offset <= self.end);
279
280 let next = TokenSlice {
281 stream: self.stream,
282 start: self.start,
283 end: self.start + offset,
284 };
285 self.start += offset;
286 next
287 }
288
289 fn peek_slice(&self, offset: usize) -> Self::Slice {
291 assert!(self.start + offset <= self.end);
292
293 TokenSlice {
294 stream: self.stream,
295 start: self.start,
296 end: self.start + offset,
297 }
298 }
299
300 fn checkpoint(&self) -> Self::Checkpoint {
301 Checkpoint(self.start, self.end)
302 }
303
304 fn reset(&mut self, checkpoint: &Self::Checkpoint) {
305 self.start = checkpoint.0;
306 self.end = checkpoint.1;
307 }
308
309 fn raw(&self) -> &dyn fmt::Debug {
310 self
311 }
312}
313
314impl<'a> winnow::stream::Offset for TokenSlice<'a> {
315 fn offset_from(&self, start: &Self) -> usize {
316 self.start - start.start
317 }
318}
319
320impl<'a> winnow::stream::Offset<Checkpoint> for TokenSlice<'a> {
321 fn offset_from(&self, start: &Checkpoint) -> usize {
322 self.start - start.0
323 }
324}
325
326impl winnow::stream::Offset for Checkpoint {
327 fn offset_from(&self, start: &Self) -> usize {
328 self.0 - start.0
329 }
330}
331
332impl<'a> winnow::stream::StreamIsPartial for TokenSlice<'a> {
333 type PartialState = ();
334
335 fn complete(&mut self) -> Self::PartialState {}
336
337 fn restore_partial(&mut self, _: Self::PartialState) {}
338
339 fn is_partial_supported() -> bool {
340 false
341 }
342}
343
344impl<'a> winnow::stream::FindSlice<&str> for TokenSlice<'a> {
345 fn find_slice(&self, substr: &str) -> Option<std::ops::Range<usize>> {
346 self.iter()
347 .enumerate()
348 .find_map(|(i, b)| if b.value == substr { Some(i..self.end) } else { None })
349 }
350}
351
352#[derive(Clone, Debug)]
353pub struct Checkpoint(usize, usize);
354
355#[derive(Debug, PartialEq, Eq, Copy, Clone, Display)]
357#[display(style = "camelCase")]
358pub enum TokenType {
359 Number,
361 Word,
363 Operator,
365 String,
367 Keyword,
369 Type,
371 Brace,
373 Hash,
375 Bang,
377 Dollar,
379 Whitespace,
381 Comma,
383 Colon,
385 DoubleColon,
387 Period,
389 DoublePeriod,
391 DoublePeriodLessThan,
393 LineComment,
395 BlockComment,
397 Function,
399 Unknown,
401 QuestionMark,
403 At,
405 SemiColon,
407}
408
409impl TryFrom<TokenType> for SemanticTokenType {
411 type Error = anyhow::Error;
412 fn try_from(token_type: TokenType) -> Result<Self> {
413 Ok(match token_type {
416 TokenType::Number => Self::NUMBER,
417 TokenType::Word => Self::VARIABLE,
418 TokenType::Keyword => Self::KEYWORD,
419 TokenType::Type => Self::TYPE,
420 TokenType::Operator => Self::OPERATOR,
421 TokenType::QuestionMark => Self::OPERATOR,
422 TokenType::String => Self::STRING,
423 TokenType::Bang => Self::OPERATOR,
424 TokenType::LineComment => Self::COMMENT,
425 TokenType::BlockComment => Self::COMMENT,
426 TokenType::Function => Self::FUNCTION,
427 TokenType::Whitespace
428 | TokenType::Brace
429 | TokenType::Comma
430 | TokenType::Colon
431 | TokenType::DoubleColon
432 | TokenType::Period
433 | TokenType::DoublePeriod
434 | TokenType::DoublePeriodLessThan
435 | TokenType::Hash
436 | TokenType::Dollar
437 | TokenType::At
438 | TokenType::SemiColon
439 | TokenType::Unknown => {
440 anyhow::bail!("unsupported token type: {:?}", token_type)
441 }
442 })
443 }
444}
445
446impl TokenType {
447 pub fn is_whitespace(&self) -> bool {
448 matches!(self, Self::Whitespace)
449 }
450
451 pub fn is_comment(&self) -> bool {
452 matches!(self, Self::LineComment | Self::BlockComment)
453 }
454}
455
456#[derive(Debug, PartialEq, Eq, Clone)]
457pub struct Token {
458 pub token_type: TokenType,
459 pub start: usize,
461 pub end: usize,
463 pub(super) module_id: ModuleId,
464 pub(super) value: String,
465}
466
467impl ContainsToken<Token> for (TokenType, &str) {
468 fn contains_token(&self, token: Token) -> bool {
469 self.0 == token.token_type && self.1 == token.value
470 }
471}
472
473impl ContainsToken<Token> for TokenType {
474 fn contains_token(&self, token: Token) -> bool {
475 *self == token.token_type
476 }
477}
478
479impl Token {
480 pub fn from_range(
481 range: std::ops::Range<usize>,
482 module_id: ModuleId,
483 token_type: TokenType,
484 value: String,
485 ) -> Self {
486 Self {
487 start: range.start,
488 end: range.end,
489 module_id,
490 value,
491 token_type,
492 }
493 }
494 pub fn is_code_token(&self) -> bool {
495 !matches!(
496 self.token_type,
497 TokenType::Whitespace | TokenType::LineComment | TokenType::BlockComment
498 )
499 }
500
501 pub fn as_source_range(&self) -> SourceRange {
502 SourceRange::new(self.start, self.end, self.module_id)
503 }
504
505 pub fn as_source_ranges(&self) -> Vec<SourceRange> {
506 vec![self.as_source_range()]
507 }
508
509 pub fn visibility_keyword(&self) -> Option<ItemVisibility> {
510 if !matches!(self.token_type, TokenType::Keyword) {
511 return None;
512 }
513 match self.value.as_str() {
514 "export" => Some(ItemVisibility::Export),
515 _ => None,
516 }
517 }
518
519 pub fn numeric_value(&self) -> Option<f64> {
520 if self.token_type != TokenType::Number {
521 return None;
522 }
523 let value = &self.value;
524 let value = value
525 .split_once(|c: char| c == '_' || c.is_ascii_alphabetic())
526 .map(|(s, _)| s)
527 .unwrap_or(value);
528 value.parse().ok()
529 }
530
531 pub fn uint_value(&self) -> Option<u32> {
532 if self.token_type != TokenType::Number {
533 return None;
534 }
535 let value = &self.value;
536 let value = value
537 .split_once(|c: char| c == '_' || c.is_ascii_alphabetic())
538 .map(|(s, _)| s)
539 .unwrap_or(value);
540 value.parse().ok()
541 }
542
543 pub fn numeric_suffix(&self) -> NumericSuffix {
544 if self.token_type != TokenType::Number {
545 return NumericSuffix::None;
546 }
547
548 if self.value.ends_with('_') {
549 return NumericSuffix::Count;
550 }
551
552 for suffix in NUM_SUFFIXES {
553 if self.value.ends_with(suffix) {
554 return suffix.parse().unwrap();
555 }
556 }
557
558 NumericSuffix::None
559 }
560
561 pub fn declaration_keyword(&self) -> Option<VariableKind> {
565 if !matches!(self.token_type, TokenType::Keyword) {
566 return None;
567 }
568 Some(match self.value.as_str() {
569 "fn" => VariableKind::Fn,
570 "var" | "let" | "const" => VariableKind::Const,
571 _ => return None,
572 })
573 }
574}
575
576impl From<Token> for SourceRange {
577 fn from(token: Token) -> Self {
578 Self::new(token.start, token.end, token.module_id)
579 }
580}
581
582impl From<&Token> for SourceRange {
583 fn from(token: &Token) -> Self {
584 Self::new(token.start, token.end, token.module_id)
585 }
586}
587
588pub fn lex(s: &str, module_id: ModuleId) -> Result<TokenStream, KclError> {
589 tokeniser::lex(s, module_id).map_err(From::from)
590}
591
592impl From<ParseError<Input<'_>, winnow::error::ContextError>> for KclError {
593 fn from(err: ParseError<Input<'_>, winnow::error::ContextError>) -> Self {
594 let (input, offset): (Vec<char>, usize) = (err.input().chars().collect(), err.offset());
595 let module_id = err.input().state.module_id;
596
597 if offset >= input.len() {
598 return KclError::new_lexical(crate::errors::KclErrorDetails::new(
604 "unexpected EOF while parsing".to_owned(),
605 vec![SourceRange::new(offset, offset, module_id)],
606 ));
607 }
608
609 let bad_token = &input[offset];
612 KclError::new_lexical(crate::errors::KclErrorDetails::new(
615 format!("found unknown token '{bad_token}'"),
616 vec![SourceRange::new(offset, offset + 1, module_id)],
617 ))
618 }
619}