1#![allow(clippy::needless_lifetimes)]
3
4use std::{fmt, iter::Enumerate, num::NonZeroUsize, str::FromStr};
5
6use anyhow::Result;
7use parse_display::Display;
8use schemars::JsonSchema;
9use serde::{Deserialize, Serialize};
10use tokeniser::Input;
11use tower_lsp::lsp_types::SemanticTokenType;
12use winnow::{
13 self,
14 error::ParseError,
15 stream::{ContainsToken, Stream},
16};
17
18use crate::{
19 errors::KclError,
20 parsing::ast::types::{ItemVisibility, VariableKind},
21 source_range::SourceRange,
22 CompilationError, ModuleId,
23};
24
25mod tokeniser;
26
27pub(crate) use tokeniser::RESERVED_WORDS;
28
29pub const NUM_SUFFIXES: [&str; 10] = ["mm", "cm", "m", "inch", "in", "ft", "yd", "deg", "rad", "?"];
31
32#[derive(Clone, Copy, Debug, Eq, PartialEq, Serialize, Deserialize, ts_rs::TS, JsonSchema)]
33#[repr(u32)]
34pub enum NumericSuffix {
35 None,
36 Count,
37 Length,
38 Angle,
39 Mm,
40 Cm,
41 M,
42 Inch,
43 Ft,
44 Yd,
45 Deg,
46 Rad,
47 Unknown,
48}
49
50impl NumericSuffix {
51 #[allow(dead_code)]
52 pub fn is_none(self) -> bool {
53 self == Self::None
54 }
55
56 pub fn is_some(self) -> bool {
57 self != Self::None
58 }
59
60 pub fn digestable_id(&self) -> &[u8] {
61 match self {
62 NumericSuffix::None => &[],
63 NumericSuffix::Count => b"_",
64 NumericSuffix::Unknown => b"?",
65 NumericSuffix::Length => b"Length",
66 NumericSuffix::Angle => b"Angle",
67 NumericSuffix::Mm => b"mm",
68 NumericSuffix::Cm => b"cm",
69 NumericSuffix::M => b"m",
70 NumericSuffix::Inch => b"in",
71 NumericSuffix::Ft => b"ft",
72 NumericSuffix::Yd => b"yd",
73 NumericSuffix::Deg => b"deg",
74 NumericSuffix::Rad => b"rad",
75 }
76 }
77}
78
79impl FromStr for NumericSuffix {
80 type Err = CompilationError;
81
82 fn from_str(s: &str) -> Result<Self, Self::Err> {
83 match s {
84 "_" | "Count" => Ok(NumericSuffix::Count),
85 "Length" => Ok(NumericSuffix::Length),
86 "Angle" => Ok(NumericSuffix::Angle),
87 "mm" | "millimeters" => Ok(NumericSuffix::Mm),
88 "cm" | "centimeters" => Ok(NumericSuffix::Cm),
89 "m" | "meters" => Ok(NumericSuffix::M),
90 "inch" | "in" => Ok(NumericSuffix::Inch),
91 "ft" | "feet" => Ok(NumericSuffix::Ft),
92 "yd" | "yards" => Ok(NumericSuffix::Yd),
93 "deg" | "degrees" => Ok(NumericSuffix::Deg),
94 "rad" | "radians" => Ok(NumericSuffix::Rad),
95 "?" => Ok(NumericSuffix::Unknown),
96 _ => Err(CompilationError::err(SourceRange::default(), "invalid unit of measure")),
97 }
98 }
99}
100
101impl fmt::Display for NumericSuffix {
102 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
103 match self {
104 NumericSuffix::None => Ok(()),
105 NumericSuffix::Count => write!(f, "_"),
106 NumericSuffix::Unknown => write!(f, "_?"),
107 NumericSuffix::Length => write!(f, "Length"),
108 NumericSuffix::Angle => write!(f, "Angle"),
109 NumericSuffix::Mm => write!(f, "mm"),
110 NumericSuffix::Cm => write!(f, "cm"),
111 NumericSuffix::M => write!(f, "m"),
112 NumericSuffix::Inch => write!(f, "in"),
113 NumericSuffix::Ft => write!(f, "ft"),
114 NumericSuffix::Yd => write!(f, "yd"),
115 NumericSuffix::Deg => write!(f, "deg"),
116 NumericSuffix::Rad => write!(f, "rad"),
117 }
118 }
119}
120
121#[derive(Clone, Debug, PartialEq)]
122pub(crate) struct TokenStream {
123 tokens: Vec<Token>,
124}
125
126impl TokenStream {
127 fn new(tokens: Vec<Token>) -> Self {
128 Self { tokens }
129 }
130
131 pub(super) fn remove_unknown(&mut self) -> Vec<Token> {
132 let tokens = std::mem::take(&mut self.tokens);
133 let (tokens, unknown_tokens): (Vec<Token>, Vec<Token>) = tokens
134 .into_iter()
135 .partition(|token| token.token_type != TokenType::Unknown);
136 self.tokens = tokens;
137 unknown_tokens
138 }
139
140 pub fn iter(&self) -> impl Iterator<Item = &Token> {
141 self.tokens.iter()
142 }
143
144 pub fn is_empty(&self) -> bool {
145 self.tokens.is_empty()
146 }
147
148 pub fn as_slice(&self) -> TokenSlice {
149 TokenSlice::from(self)
150 }
151}
152
153impl<'a> From<&'a TokenStream> for TokenSlice<'a> {
154 fn from(stream: &'a TokenStream) -> Self {
155 TokenSlice {
156 start: 0,
157 end: stream.tokens.len(),
158 stream,
159 }
160 }
161}
162
163impl IntoIterator for TokenStream {
164 type Item = Token;
165
166 type IntoIter = std::vec::IntoIter<Token>;
167
168 fn into_iter(self) -> Self::IntoIter {
169 self.tokens.into_iter()
170 }
171}
172
173#[derive(Debug, Clone)]
174pub(crate) struct TokenSlice<'a> {
175 stream: &'a TokenStream,
176 start: usize,
178 end: usize,
180}
181
182impl<'a> std::ops::Deref for TokenSlice<'a> {
183 type Target = [Token];
184
185 fn deref(&self) -> &Self::Target {
186 &self.stream.tokens[self.start..self.end]
187 }
188}
189
190impl<'a> TokenSlice<'a> {
191 pub fn token(&self, i: usize) -> &Token {
192 &self.stream.tokens[i + self.start]
193 }
194
195 pub fn iter(&self) -> impl Iterator<Item = &Token> {
196 (**self).iter()
197 }
198
199 pub fn without_ends(&self) -> Self {
200 Self {
201 start: self.start + 1,
202 end: self.end - 1,
203 stream: self.stream,
204 }
205 }
206
207 pub fn as_source_range(&self) -> SourceRange {
208 let stream_len = self.stream.tokens.len();
209 let first_token = if stream_len == self.start {
210 &self.stream.tokens[self.start - 1]
211 } else {
212 self.token(0)
213 };
214 let last_token = if stream_len == self.end {
215 &self.stream.tokens[stream_len - 1]
216 } else {
217 self.token(self.end - self.start)
218 };
219 SourceRange::new(first_token.start, last_token.end, last_token.module_id)
220 }
221}
222
223impl<'a> IntoIterator for TokenSlice<'a> {
224 type Item = &'a Token;
225
226 type IntoIter = std::slice::Iter<'a, Token>;
227
228 fn into_iter(self) -> Self::IntoIter {
229 self.stream.tokens[self.start..self.end].iter()
230 }
231}
232
233impl<'a> Stream for TokenSlice<'a> {
234 type Token = Token;
235 type Slice = Self;
236 type IterOffsets = Enumerate<std::vec::IntoIter<Token>>;
237 type Checkpoint = Checkpoint;
238
239 fn iter_offsets(&self) -> Self::IterOffsets {
240 #[allow(clippy::unnecessary_to_owned)]
241 self.to_vec().into_iter().enumerate()
242 }
243
244 fn eof_offset(&self) -> usize {
245 self.len()
246 }
247
248 fn next_token(&mut self) -> Option<Self::Token> {
249 let token = self.first()?.clone();
250 self.start += 1;
251 Some(token)
252 }
253
254 fn offset_for<P>(&self, predicate: P) -> Option<usize>
255 where
256 P: Fn(Self::Token) -> bool,
257 {
258 self.iter().position(|b| predicate(b.clone()))
259 }
260
261 fn offset_at(&self, tokens: usize) -> Result<usize, winnow::error::Needed> {
262 if let Some(needed) = tokens.checked_sub(self.len()).and_then(NonZeroUsize::new) {
263 Err(winnow::error::Needed::Size(needed))
264 } else {
265 Ok(tokens)
266 }
267 }
268
269 fn next_slice(&mut self, offset: usize) -> Self::Slice {
270 assert!(self.start + offset <= self.end);
271
272 let next = TokenSlice {
273 stream: self.stream,
274 start: self.start,
275 end: self.start + offset,
276 };
277 self.start += offset;
278 next
279 }
280
281 fn checkpoint(&self) -> Self::Checkpoint {
282 Checkpoint(self.start, self.end)
283 }
284
285 fn reset(&mut self, checkpoint: &Self::Checkpoint) {
286 self.start = checkpoint.0;
287 self.end = checkpoint.1;
288 }
289
290 fn raw(&self) -> &dyn fmt::Debug {
291 self
292 }
293}
294
295impl<'a> winnow::stream::Offset for TokenSlice<'a> {
296 fn offset_from(&self, start: &Self) -> usize {
297 self.start - start.start
298 }
299}
300
301impl<'a> winnow::stream::Offset<Checkpoint> for TokenSlice<'a> {
302 fn offset_from(&self, start: &Checkpoint) -> usize {
303 self.start - start.0
304 }
305}
306
307impl winnow::stream::Offset for Checkpoint {
308 fn offset_from(&self, start: &Self) -> usize {
309 self.0 - start.0
310 }
311}
312
313impl<'a> winnow::stream::StreamIsPartial for TokenSlice<'a> {
314 type PartialState = ();
315
316 fn complete(&mut self) -> Self::PartialState {}
317
318 fn restore_partial(&mut self, _: Self::PartialState) {}
319
320 fn is_partial_supported() -> bool {
321 false
322 }
323}
324
325impl<'a> winnow::stream::FindSlice<&str> for TokenSlice<'a> {
326 fn find_slice(&self, substr: &str) -> Option<std::ops::Range<usize>> {
327 self.iter()
328 .enumerate()
329 .find_map(|(i, b)| if b.value == substr { Some(i..self.end) } else { None })
330 }
331}
332
333#[derive(Clone, Debug)]
334pub struct Checkpoint(usize, usize);
335
336#[derive(Debug, PartialEq, Eq, Copy, Clone, Display)]
338#[display(style = "camelCase")]
339pub enum TokenType {
340 Number,
342 Word,
344 Operator,
346 String,
348 Keyword,
350 Type,
352 Brace,
354 Hash,
356 Bang,
358 Dollar,
360 Whitespace,
362 Comma,
364 Colon,
366 DoubleColon,
368 Period,
370 DoublePeriod,
372 DoublePeriodLessThan,
374 LineComment,
376 BlockComment,
378 Function,
380 Unknown,
382 QuestionMark,
384 At,
386 SemiColon,
388}
389
390impl TryFrom<TokenType> for SemanticTokenType {
392 type Error = anyhow::Error;
393 fn try_from(token_type: TokenType) -> Result<Self> {
394 Ok(match token_type {
397 TokenType::Number => Self::NUMBER,
398 TokenType::Word => Self::VARIABLE,
399 TokenType::Keyword => Self::KEYWORD,
400 TokenType::Type => Self::TYPE,
401 TokenType::Operator => Self::OPERATOR,
402 TokenType::QuestionMark => Self::OPERATOR,
403 TokenType::String => Self::STRING,
404 TokenType::Bang => Self::OPERATOR,
405 TokenType::LineComment => Self::COMMENT,
406 TokenType::BlockComment => Self::COMMENT,
407 TokenType::Function => Self::FUNCTION,
408 TokenType::Whitespace
409 | TokenType::Brace
410 | TokenType::Comma
411 | TokenType::Colon
412 | TokenType::DoubleColon
413 | TokenType::Period
414 | TokenType::DoublePeriod
415 | TokenType::DoublePeriodLessThan
416 | TokenType::Hash
417 | TokenType::Dollar
418 | TokenType::At
419 | TokenType::SemiColon
420 | TokenType::Unknown => {
421 anyhow::bail!("unsupported token type: {:?}", token_type)
422 }
423 })
424 }
425}
426
427impl TokenType {
428 pub fn is_whitespace(&self) -> bool {
429 matches!(self, Self::Whitespace)
430 }
431
432 pub fn is_comment(&self) -> bool {
433 matches!(self, Self::LineComment | Self::BlockComment)
434 }
435}
436
437#[derive(Debug, PartialEq, Eq, Clone)]
438pub struct Token {
439 pub token_type: TokenType,
440 pub start: usize,
442 pub end: usize,
444 pub(super) module_id: ModuleId,
445 pub(super) value: String,
446}
447
448impl ContainsToken<Token> for (TokenType, &str) {
449 fn contains_token(&self, token: Token) -> bool {
450 self.0 == token.token_type && self.1 == token.value
451 }
452}
453
454impl ContainsToken<Token> for TokenType {
455 fn contains_token(&self, token: Token) -> bool {
456 *self == token.token_type
457 }
458}
459
460impl Token {
461 pub fn from_range(
462 range: std::ops::Range<usize>,
463 module_id: ModuleId,
464 token_type: TokenType,
465 value: String,
466 ) -> Self {
467 Self {
468 start: range.start,
469 end: range.end,
470 module_id,
471 value,
472 token_type,
473 }
474 }
475 pub fn is_code_token(&self) -> bool {
476 !matches!(
477 self.token_type,
478 TokenType::Whitespace | TokenType::LineComment | TokenType::BlockComment
479 )
480 }
481
482 pub fn as_source_range(&self) -> SourceRange {
483 SourceRange::new(self.start, self.end, self.module_id)
484 }
485
486 pub fn as_source_ranges(&self) -> Vec<SourceRange> {
487 vec![self.as_source_range()]
488 }
489
490 pub fn visibility_keyword(&self) -> Option<ItemVisibility> {
491 if !matches!(self.token_type, TokenType::Keyword) {
492 return None;
493 }
494 match self.value.as_str() {
495 "export" => Some(ItemVisibility::Export),
496 _ => None,
497 }
498 }
499
500 pub fn numeric_value(&self) -> Option<f64> {
501 if self.token_type != TokenType::Number {
502 return None;
503 }
504 let value = &self.value;
505 let value = value
506 .split_once(|c: char| c == '_' || c.is_ascii_alphabetic())
507 .map(|(s, _)| s)
508 .unwrap_or(value);
509 value.parse().ok()
510 }
511
512 pub fn uint_value(&self) -> Option<u32> {
513 if self.token_type != TokenType::Number {
514 return None;
515 }
516 let value = &self.value;
517 let value = value
518 .split_once(|c: char| c == '_' || c.is_ascii_alphabetic())
519 .map(|(s, _)| s)
520 .unwrap_or(value);
521 value.parse().ok()
522 }
523
524 pub fn numeric_suffix(&self) -> NumericSuffix {
525 if self.token_type != TokenType::Number {
526 return NumericSuffix::None;
527 }
528
529 if self.value.ends_with('_') {
530 return NumericSuffix::Count;
531 }
532
533 for suffix in NUM_SUFFIXES {
534 if self.value.ends_with(suffix) {
535 return suffix.parse().unwrap();
536 }
537 }
538
539 NumericSuffix::None
540 }
541
542 pub fn declaration_keyword(&self) -> Option<VariableKind> {
546 if !matches!(self.token_type, TokenType::Keyword) {
547 return None;
548 }
549 Some(match self.value.as_str() {
550 "fn" => VariableKind::Fn,
551 "var" | "let" | "const" => VariableKind::Const,
552 _ => return None,
553 })
554 }
555}
556
557impl From<Token> for SourceRange {
558 fn from(token: Token) -> Self {
559 Self::new(token.start, token.end, token.module_id)
560 }
561}
562
563impl From<&Token> for SourceRange {
564 fn from(token: &Token) -> Self {
565 Self::new(token.start, token.end, token.module_id)
566 }
567}
568
569pub fn lex(s: &str, module_id: ModuleId) -> Result<TokenStream, KclError> {
570 tokeniser::lex(s, module_id).map_err(From::from)
571}
572
573impl From<ParseError<Input<'_>, winnow::error::ContextError>> for KclError {
574 fn from(err: ParseError<Input<'_>, winnow::error::ContextError>) -> Self {
575 let (input, offset): (Vec<char>, usize) = (err.input().chars().collect(), err.offset());
576 let module_id = err.input().state.module_id;
577
578 if offset >= input.len() {
579 return KclError::Lexical(crate::errors::KclErrorDetails::new(
585 "unexpected EOF while parsing".to_owned(),
586 vec![SourceRange::new(offset, offset, module_id)],
587 ));
588 }
589
590 let bad_token = &input[offset];
593 KclError::Lexical(crate::errors::KclErrorDetails::new(
596 format!("found unknown token '{}'", bad_token),
597 vec![SourceRange::new(offset, offset + 1, module_id)],
598 ))
599 }
600}