1#![allow(clippy::needless_lifetimes)]
3
4use std::{fmt, iter::Enumerate, num::NonZeroUsize, str::FromStr};
5
6use anyhow::Result;
7use parse_display::Display;
8use schemars::JsonSchema;
9use serde::{Deserialize, Serialize};
10use tokeniser::Input;
11use tower_lsp::lsp_types::SemanticTokenType;
12use winnow::{
13 self,
14 error::ParseError,
15 stream::{ContainsToken, Stream},
16};
17
18use crate::{
19 errors::KclError,
20 parsing::ast::types::{ItemVisibility, VariableKind},
21 source_range::SourceRange,
22 CompilationError, ModuleId,
23};
24
25mod tokeniser;
26
27pub(crate) use tokeniser::RESERVED_WORDS;
28
29pub const NUM_SUFFIXES: [&str; 10] = ["mm", "cm", "m", "inch", "in", "ft", "yd", "deg", "rad", "?"];
31
32#[derive(Clone, Copy, Debug, Eq, PartialEq, Serialize, Deserialize, ts_rs::TS, JsonSchema)]
33#[repr(u32)]
34pub enum NumericSuffix {
35 None,
36 Count,
37 Length,
38 Angle,
39 Mm,
40 Cm,
41 M,
42 Inch,
43 Ft,
44 Yd,
45 Deg,
46 Rad,
47 Unknown,
48}
49
50impl NumericSuffix {
51 #[allow(dead_code)]
52 pub fn is_none(self) -> bool {
53 self == Self::None
54 }
55
56 pub fn is_some(self) -> bool {
57 self != Self::None
58 }
59
60 pub fn digestable_id(&self) -> &[u8] {
61 match self {
62 NumericSuffix::None => &[],
63 NumericSuffix::Count => b"_",
64 NumericSuffix::Unknown => b"?",
65 NumericSuffix::Length => b"Length",
66 NumericSuffix::Angle => b"Angle",
67 NumericSuffix::Mm => b"mm",
68 NumericSuffix::Cm => b"cm",
69 NumericSuffix::M => b"m",
70 NumericSuffix::Inch => b"in",
71 NumericSuffix::Ft => b"ft",
72 NumericSuffix::Yd => b"yd",
73 NumericSuffix::Deg => b"deg",
74 NumericSuffix::Rad => b"rad",
75 }
76 }
77}
78
79impl FromStr for NumericSuffix {
80 type Err = CompilationError;
81
82 fn from_str(s: &str) -> Result<Self, Self::Err> {
83 match s {
84 "_" | "Count" => Ok(NumericSuffix::Count),
85 "Length" => Ok(NumericSuffix::Length),
86 "Angle" => Ok(NumericSuffix::Angle),
87 "mm" | "millimeters" => Ok(NumericSuffix::Mm),
88 "cm" | "centimeters" => Ok(NumericSuffix::Cm),
89 "m" | "meters" => Ok(NumericSuffix::M),
90 "inch" | "in" => Ok(NumericSuffix::Inch),
91 "ft" | "feet" => Ok(NumericSuffix::Ft),
92 "yd" | "yards" => Ok(NumericSuffix::Yd),
93 "deg" | "degrees" => Ok(NumericSuffix::Deg),
94 "rad" | "radians" => Ok(NumericSuffix::Rad),
95 "?" => Ok(NumericSuffix::Unknown),
96 _ => Err(CompilationError::err(SourceRange::default(), "invalid unit of measure")),
97 }
98 }
99}
100
101impl fmt::Display for NumericSuffix {
102 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
103 match self {
104 NumericSuffix::None => Ok(()),
105 NumericSuffix::Count => write!(f, "_"),
106 NumericSuffix::Unknown => write!(f, "_?"),
107 NumericSuffix::Length => write!(f, "Length"),
108 NumericSuffix::Angle => write!(f, "Angle"),
109 NumericSuffix::Mm => write!(f, "mm"),
110 NumericSuffix::Cm => write!(f, "cm"),
111 NumericSuffix::M => write!(f, "m"),
112 NumericSuffix::Inch => write!(f, "in"),
113 NumericSuffix::Ft => write!(f, "ft"),
114 NumericSuffix::Yd => write!(f, "yd"),
115 NumericSuffix::Deg => write!(f, "deg"),
116 NumericSuffix::Rad => write!(f, "rad"),
117 }
118 }
119}
120
121#[derive(Clone, Debug, PartialEq)]
122pub(crate) struct TokenStream {
123 tokens: Vec<Token>,
124}
125
126impl TokenStream {
127 fn new(tokens: Vec<Token>) -> Self {
128 Self { tokens }
129 }
130
131 pub(super) fn remove_unknown(&mut self) -> Vec<Token> {
132 let tokens = std::mem::take(&mut self.tokens);
133 let (tokens, unknown_tokens): (Vec<Token>, Vec<Token>) = tokens
134 .into_iter()
135 .partition(|token| token.token_type != TokenType::Unknown);
136 self.tokens = tokens;
137 unknown_tokens
138 }
139
140 pub fn iter(&self) -> impl Iterator<Item = &Token> {
141 self.tokens.iter()
142 }
143
144 pub fn is_empty(&self) -> bool {
145 self.tokens.is_empty()
146 }
147
148 pub fn as_slice(&self) -> TokenSlice {
149 TokenSlice::from(self)
150 }
151}
152
153impl<'a> From<&'a TokenStream> for TokenSlice<'a> {
154 fn from(stream: &'a TokenStream) -> Self {
155 TokenSlice {
156 start: 0,
157 end: stream.tokens.len(),
158 stream,
159 }
160 }
161}
162
163impl IntoIterator for TokenStream {
164 type Item = Token;
165
166 type IntoIter = std::vec::IntoIter<Token>;
167
168 fn into_iter(self) -> Self::IntoIter {
169 self.tokens.into_iter()
170 }
171}
172
173#[derive(Debug, Clone)]
174pub(crate) struct TokenSlice<'a> {
175 stream: &'a TokenStream,
176 start: usize,
178 end: usize,
180}
181
182impl<'a> std::ops::Deref for TokenSlice<'a> {
183 type Target = [Token];
184
185 fn deref(&self) -> &Self::Target {
186 &self.stream.tokens[self.start..self.end]
187 }
188}
189
190impl<'a> TokenSlice<'a> {
191 pub fn token(&self, i: usize) -> &Token {
192 &self.stream.tokens[i + self.start]
193 }
194
195 pub fn iter(&self) -> impl Iterator<Item = &Token> {
196 (**self).iter()
197 }
198
199 pub fn without_ends(&self) -> Self {
200 Self {
201 start: self.start + 1,
202 end: self.end - 1,
203 stream: self.stream,
204 }
205 }
206
207 pub fn as_source_range(&self) -> SourceRange {
208 let stream_len = self.stream.tokens.len();
209 let first_token = if stream_len == self.start {
210 &self.stream.tokens[self.start - 1]
211 } else {
212 self.token(0)
213 };
214 let last_token = if stream_len == self.end {
215 &self.stream.tokens[stream_len - 1]
216 } else {
217 self.token(self.end - self.start)
218 };
219 SourceRange::new(first_token.start, last_token.end, last_token.module_id)
220 }
221}
222
223impl<'a> IntoIterator for TokenSlice<'a> {
224 type Item = &'a Token;
225
226 type IntoIter = std::slice::Iter<'a, Token>;
227
228 fn into_iter(self) -> Self::IntoIter {
229 self.stream.tokens[self.start..self.end].iter()
230 }
231}
232
233impl<'a> Stream for TokenSlice<'a> {
234 type Token = Token;
235 type Slice = Self;
236 type IterOffsets = Enumerate<std::vec::IntoIter<Token>>;
237 type Checkpoint = Checkpoint;
238
239 fn iter_offsets(&self) -> Self::IterOffsets {
240 #[allow(clippy::unnecessary_to_owned)]
241 self.to_vec().into_iter().enumerate()
242 }
243
244 fn eof_offset(&self) -> usize {
245 self.len()
246 }
247
248 fn next_token(&mut self) -> Option<Self::Token> {
249 let token = self.first()?.clone();
250 self.start += 1;
251 Some(token)
252 }
253
254 fn offset_for<P>(&self, predicate: P) -> Option<usize>
255 where
256 P: Fn(Self::Token) -> bool,
257 {
258 self.iter().position(|b| predicate(b.clone()))
259 }
260
261 fn offset_at(&self, tokens: usize) -> Result<usize, winnow::error::Needed> {
262 if let Some(needed) = tokens.checked_sub(self.len()).and_then(NonZeroUsize::new) {
263 Err(winnow::error::Needed::Size(needed))
264 } else {
265 Ok(tokens)
266 }
267 }
268
269 fn next_slice(&mut self, offset: usize) -> Self::Slice {
270 assert!(self.start + offset <= self.end);
271
272 let next = TokenSlice {
273 stream: self.stream,
274 start: self.start,
275 end: self.start + offset,
276 };
277 self.start += offset;
278 next
279 }
280
281 fn checkpoint(&self) -> Self::Checkpoint {
282 Checkpoint(self.start, self.end)
283 }
284
285 fn reset(&mut self, checkpoint: &Self::Checkpoint) {
286 self.start = checkpoint.0;
287 self.end = checkpoint.1;
288 }
289
290 fn raw(&self) -> &dyn fmt::Debug {
291 self
292 }
293}
294
295impl<'a> winnow::stream::Offset for TokenSlice<'a> {
296 fn offset_from(&self, start: &Self) -> usize {
297 self.start - start.start
298 }
299}
300
301impl<'a> winnow::stream::Offset<Checkpoint> for TokenSlice<'a> {
302 fn offset_from(&self, start: &Checkpoint) -> usize {
303 self.start - start.0
304 }
305}
306
307impl winnow::stream::Offset for Checkpoint {
308 fn offset_from(&self, start: &Self) -> usize {
309 self.0 - start.0
310 }
311}
312
313impl<'a> winnow::stream::StreamIsPartial for TokenSlice<'a> {
314 type PartialState = ();
315
316 fn complete(&mut self) -> Self::PartialState {}
317
318 fn restore_partial(&mut self, _: Self::PartialState) {}
319
320 fn is_partial_supported() -> bool {
321 false
322 }
323}
324
325impl<'a> winnow::stream::FindSlice<&str> for TokenSlice<'a> {
326 fn find_slice(&self, substr: &str) -> Option<std::ops::Range<usize>> {
327 self.iter()
328 .enumerate()
329 .find_map(|(i, b)| if b.value == substr { Some(i..self.end) } else { None })
330 }
331}
332
333#[derive(Clone, Debug)]
334pub struct Checkpoint(usize, usize);
335
336#[derive(Debug, PartialEq, Eq, Copy, Clone, Display)]
338#[display(style = "camelCase")]
339pub enum TokenType {
340 Number,
342 Word,
344 Operator,
346 String,
348 Keyword,
350 Type,
352 Brace,
354 Hash,
356 Bang,
358 Dollar,
360 Whitespace,
362 Comma,
364 Colon,
366 DoubleColon,
368 Period,
370 DoublePeriod,
372 LineComment,
374 BlockComment,
376 Function,
378 Unknown,
380 QuestionMark,
382 At,
384 SemiColon,
386}
387
388impl TryFrom<TokenType> for SemanticTokenType {
390 type Error = anyhow::Error;
391 fn try_from(token_type: TokenType) -> Result<Self> {
392 Ok(match token_type {
395 TokenType::Number => Self::NUMBER,
396 TokenType::Word => Self::VARIABLE,
397 TokenType::Keyword => Self::KEYWORD,
398 TokenType::Type => Self::TYPE,
399 TokenType::Operator => Self::OPERATOR,
400 TokenType::QuestionMark => Self::OPERATOR,
401 TokenType::String => Self::STRING,
402 TokenType::Bang => Self::OPERATOR,
403 TokenType::LineComment => Self::COMMENT,
404 TokenType::BlockComment => Self::COMMENT,
405 TokenType::Function => Self::FUNCTION,
406 TokenType::Whitespace
407 | TokenType::Brace
408 | TokenType::Comma
409 | TokenType::Colon
410 | TokenType::DoubleColon
411 | TokenType::Period
412 | TokenType::DoublePeriod
413 | TokenType::Hash
414 | TokenType::Dollar
415 | TokenType::At
416 | TokenType::SemiColon
417 | TokenType::Unknown => {
418 anyhow::bail!("unsupported token type: {:?}", token_type)
419 }
420 })
421 }
422}
423
424impl TokenType {
425 pub fn is_whitespace(&self) -> bool {
426 matches!(self, Self::Whitespace)
427 }
428
429 pub fn is_comment(&self) -> bool {
430 matches!(self, Self::LineComment | Self::BlockComment)
431 }
432}
433
434#[derive(Debug, PartialEq, Eq, Clone)]
435pub struct Token {
436 pub token_type: TokenType,
437 pub start: usize,
439 pub end: usize,
441 pub(super) module_id: ModuleId,
442 pub(super) value: String,
443}
444
445impl ContainsToken<Token> for (TokenType, &str) {
446 fn contains_token(&self, token: Token) -> bool {
447 self.0 == token.token_type && self.1 == token.value
448 }
449}
450
451impl ContainsToken<Token> for TokenType {
452 fn contains_token(&self, token: Token) -> bool {
453 *self == token.token_type
454 }
455}
456
457impl Token {
458 pub fn from_range(
459 range: std::ops::Range<usize>,
460 module_id: ModuleId,
461 token_type: TokenType,
462 value: String,
463 ) -> Self {
464 Self {
465 start: range.start,
466 end: range.end,
467 module_id,
468 value,
469 token_type,
470 }
471 }
472 pub fn is_code_token(&self) -> bool {
473 !matches!(
474 self.token_type,
475 TokenType::Whitespace | TokenType::LineComment | TokenType::BlockComment
476 )
477 }
478
479 pub fn as_source_range(&self) -> SourceRange {
480 SourceRange::new(self.start, self.end, self.module_id)
481 }
482
483 pub fn as_source_ranges(&self) -> Vec<SourceRange> {
484 vec![self.as_source_range()]
485 }
486
487 pub fn visibility_keyword(&self) -> Option<ItemVisibility> {
488 if !matches!(self.token_type, TokenType::Keyword) {
489 return None;
490 }
491 match self.value.as_str() {
492 "export" => Some(ItemVisibility::Export),
493 _ => None,
494 }
495 }
496
497 pub fn numeric_value(&self) -> Option<f64> {
498 if self.token_type != TokenType::Number {
499 return None;
500 }
501 let value = &self.value;
502 let value = value
503 .split_once(|c: char| c == '_' || c.is_ascii_alphabetic())
504 .map(|(s, _)| s)
505 .unwrap_or(value);
506 value.parse().ok()
507 }
508
509 pub fn uint_value(&self) -> Option<u32> {
510 if self.token_type != TokenType::Number {
511 return None;
512 }
513 let value = &self.value;
514 let value = value
515 .split_once(|c: char| c == '_' || c.is_ascii_alphabetic())
516 .map(|(s, _)| s)
517 .unwrap_or(value);
518 value.parse().ok()
519 }
520
521 pub fn numeric_suffix(&self) -> NumericSuffix {
522 if self.token_type != TokenType::Number {
523 return NumericSuffix::None;
524 }
525
526 if self.value.ends_with('_') {
527 return NumericSuffix::Count;
528 }
529
530 for suffix in NUM_SUFFIXES {
531 if self.value.ends_with(suffix) {
532 return suffix.parse().unwrap();
533 }
534 }
535
536 NumericSuffix::None
537 }
538
539 pub fn declaration_keyword(&self) -> Option<VariableKind> {
543 if !matches!(self.token_type, TokenType::Keyword) {
544 return None;
545 }
546 Some(match self.value.as_str() {
547 "fn" => VariableKind::Fn,
548 "var" | "let" | "const" => VariableKind::Const,
549 _ => return None,
550 })
551 }
552}
553
554impl From<Token> for SourceRange {
555 fn from(token: Token) -> Self {
556 Self::new(token.start, token.end, token.module_id)
557 }
558}
559
560impl From<&Token> for SourceRange {
561 fn from(token: &Token) -> Self {
562 Self::new(token.start, token.end, token.module_id)
563 }
564}
565
566pub fn lex(s: &str, module_id: ModuleId) -> Result<TokenStream, KclError> {
567 tokeniser::lex(s, module_id).map_err(From::from)
568}
569
570impl From<ParseError<Input<'_>, winnow::error::ContextError>> for KclError {
571 fn from(err: ParseError<Input<'_>, winnow::error::ContextError>) -> Self {
572 let (input, offset): (Vec<char>, usize) = (err.input().chars().collect(), err.offset());
573 let module_id = err.input().state.module_id;
574
575 if offset >= input.len() {
576 return KclError::Lexical(crate::errors::KclErrorDetails::new(
582 "unexpected EOF while parsing".to_owned(),
583 vec![SourceRange::new(offset, offset, module_id)],
584 ));
585 }
586
587 let bad_token = &input[offset];
590 KclError::Lexical(crate::errors::KclErrorDetails::new(
593 format!("found unknown token '{}'", bad_token),
594 vec![SourceRange::new(offset, offset + 1, module_id)],
595 ))
596 }
597}