1#![allow(clippy::needless_lifetimes)]
3
4use std::{fmt, iter::Enumerate, num::NonZeroUsize, str::FromStr};
5
6use anyhow::Result;
7use parse_display::Display;
8use schemars::JsonSchema;
9use serde::{Deserialize, Serialize};
10use tokeniser::Input;
11use tower_lsp::lsp_types::SemanticTokenType;
12use winnow::{
13 self,
14 error::ParseError,
15 stream::{ContainsToken, Stream},
16};
17
18use crate::{
19 errors::KclError,
20 parsing::ast::types::{ItemVisibility, VariableKind},
21 source_range::SourceRange,
22 CompilationError, ModuleId,
23};
24
25mod tokeniser;
26
27pub(crate) use tokeniser::RESERVED_WORDS;
28
29pub const NUM_SUFFIXES: [&str; 9] = ["mm", "cm", "m", "inch", "in", "ft", "yd", "deg", "rad"];
31
32#[derive(Clone, Copy, Debug, Eq, PartialEq, Serialize, Deserialize, ts_rs::TS, JsonSchema)]
33#[repr(u32)]
34pub enum NumericSuffix {
35 None,
36 Count,
37 Mm,
38 Cm,
39 M,
40 Inch,
41 Ft,
42 Yd,
43 Deg,
44 Rad,
45}
46
47impl NumericSuffix {
48 #[allow(dead_code)]
49 pub fn is_none(self) -> bool {
50 self == Self::None
51 }
52
53 pub fn is_some(self) -> bool {
54 self != Self::None
55 }
56
57 pub fn digestable_id(&self) -> &[u8] {
58 match self {
59 NumericSuffix::None => &[],
60 NumericSuffix::Count => b"_",
61 NumericSuffix::Mm => b"mm",
62 NumericSuffix::Cm => b"cm",
63 NumericSuffix::M => b"m",
64 NumericSuffix::Inch => b"in",
65 NumericSuffix::Ft => b"ft",
66 NumericSuffix::Yd => b"yd",
67 NumericSuffix::Deg => b"deg",
68 NumericSuffix::Rad => b"rad",
69 }
70 }
71}
72
73impl FromStr for NumericSuffix {
74 type Err = CompilationError;
75
76 fn from_str(s: &str) -> Result<Self, Self::Err> {
77 match s {
78 "_" => Ok(NumericSuffix::Count),
79 "mm" | "millimeters" => Ok(NumericSuffix::Mm),
80 "cm" | "centimeters" => Ok(NumericSuffix::Cm),
81 "m" | "meters" => Ok(NumericSuffix::M),
82 "inch" | "in" => Ok(NumericSuffix::Inch),
83 "ft" | "feet" => Ok(NumericSuffix::Ft),
84 "yd" | "yards" => Ok(NumericSuffix::Yd),
85 "deg" | "degrees" => Ok(NumericSuffix::Deg),
86 "rad" | "radians" => Ok(NumericSuffix::Rad),
87 _ => Err(CompilationError::err(SourceRange::default(), "invalid unit of measure")),
88 }
89 }
90}
91
92impl fmt::Display for NumericSuffix {
93 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
94 match self {
95 NumericSuffix::None => Ok(()),
96 NumericSuffix::Count => write!(f, "_"),
97 NumericSuffix::Mm => write!(f, "mm"),
98 NumericSuffix::Cm => write!(f, "cm"),
99 NumericSuffix::M => write!(f, "m"),
100 NumericSuffix::Inch => write!(f, "in"),
101 NumericSuffix::Ft => write!(f, "ft"),
102 NumericSuffix::Yd => write!(f, "yd"),
103 NumericSuffix::Deg => write!(f, "deg"),
104 NumericSuffix::Rad => write!(f, "rad"),
105 }
106 }
107}
108
109#[derive(Clone, Debug, PartialEq)]
110pub(crate) struct TokenStream {
111 tokens: Vec<Token>,
112}
113
114impl TokenStream {
115 fn new(tokens: Vec<Token>) -> Self {
116 Self { tokens }
117 }
118
119 pub(super) fn remove_unknown(&mut self) -> Vec<Token> {
120 let tokens = std::mem::take(&mut self.tokens);
121 let (tokens, unknown_tokens): (Vec<Token>, Vec<Token>) = tokens
122 .into_iter()
123 .partition(|token| token.token_type != TokenType::Unknown);
124 self.tokens = tokens;
125 unknown_tokens
126 }
127
128 pub fn iter(&self) -> impl Iterator<Item = &Token> {
129 self.tokens.iter()
130 }
131
132 pub fn is_empty(&self) -> bool {
133 self.tokens.is_empty()
134 }
135
136 pub fn as_slice(&self) -> TokenSlice {
137 TokenSlice::from(self)
138 }
139}
140
141impl<'a> From<&'a TokenStream> for TokenSlice<'a> {
142 fn from(stream: &'a TokenStream) -> Self {
143 TokenSlice {
144 start: 0,
145 end: stream.tokens.len(),
146 stream,
147 }
148 }
149}
150
151impl IntoIterator for TokenStream {
152 type Item = Token;
153
154 type IntoIter = std::vec::IntoIter<Token>;
155
156 fn into_iter(self) -> Self::IntoIter {
157 self.tokens.into_iter()
158 }
159}
160
161#[derive(Debug, Clone)]
162pub(crate) struct TokenSlice<'a> {
163 stream: &'a TokenStream,
164 start: usize,
166 end: usize,
168}
169
170impl<'a> std::ops::Deref for TokenSlice<'a> {
171 type Target = [Token];
172
173 fn deref(&self) -> &Self::Target {
174 &self.stream.tokens[self.start..self.end]
175 }
176}
177
178impl<'a> TokenSlice<'a> {
179 pub fn token(&self, i: usize) -> &Token {
180 &self.stream.tokens[i + self.start]
181 }
182
183 pub fn iter(&self) -> impl Iterator<Item = &Token> {
184 (**self).iter()
185 }
186
187 pub fn without_ends(&self) -> Self {
188 Self {
189 start: self.start + 1,
190 end: self.end - 1,
191 stream: self.stream,
192 }
193 }
194
195 pub fn as_source_range(&self) -> SourceRange {
196 let stream_len = self.stream.tokens.len();
197 let first_token = if stream_len == self.start {
198 &self.stream.tokens[self.start - 1]
199 } else {
200 self.token(0)
201 };
202 let last_token = if stream_len == self.end {
203 &self.stream.tokens[stream_len - 1]
204 } else {
205 self.token(self.end - self.start)
206 };
207 SourceRange::new(first_token.start, last_token.end, last_token.module_id)
208 }
209}
210
211impl<'a> IntoIterator for TokenSlice<'a> {
212 type Item = &'a Token;
213
214 type IntoIter = std::slice::Iter<'a, Token>;
215
216 fn into_iter(self) -> Self::IntoIter {
217 self.stream.tokens[self.start..self.end].iter()
218 }
219}
220
221impl<'a> Stream for TokenSlice<'a> {
222 type Token = Token;
223 type Slice = Self;
224 type IterOffsets = Enumerate<std::vec::IntoIter<Token>>;
225 type Checkpoint = Checkpoint;
226
227 fn iter_offsets(&self) -> Self::IterOffsets {
228 #[allow(clippy::unnecessary_to_owned)]
229 self.to_vec().into_iter().enumerate()
230 }
231
232 fn eof_offset(&self) -> usize {
233 self.len()
234 }
235
236 fn next_token(&mut self) -> Option<Self::Token> {
237 let token = self.first()?.clone();
238 self.start += 1;
239 Some(token)
240 }
241
242 fn offset_for<P>(&self, predicate: P) -> Option<usize>
243 where
244 P: Fn(Self::Token) -> bool,
245 {
246 self.iter().position(|b| predicate(b.clone()))
247 }
248
249 fn offset_at(&self, tokens: usize) -> Result<usize, winnow::error::Needed> {
250 if let Some(needed) = tokens.checked_sub(self.len()).and_then(NonZeroUsize::new) {
251 Err(winnow::error::Needed::Size(needed))
252 } else {
253 Ok(tokens)
254 }
255 }
256
257 fn next_slice(&mut self, offset: usize) -> Self::Slice {
258 assert!(self.start + offset <= self.end);
259
260 let next = TokenSlice {
261 stream: self.stream,
262 start: self.start,
263 end: self.start + offset,
264 };
265 self.start += offset;
266 next
267 }
268
269 fn checkpoint(&self) -> Self::Checkpoint {
270 Checkpoint(self.start, self.end)
271 }
272
273 fn reset(&mut self, checkpoint: &Self::Checkpoint) {
274 self.start = checkpoint.0;
275 self.end = checkpoint.1;
276 }
277
278 fn raw(&self) -> &dyn fmt::Debug {
279 self
280 }
281}
282
283impl<'a> winnow::stream::Offset for TokenSlice<'a> {
284 fn offset_from(&self, start: &Self) -> usize {
285 self.start - start.start
286 }
287}
288
289impl<'a> winnow::stream::Offset<Checkpoint> for TokenSlice<'a> {
290 fn offset_from(&self, start: &Checkpoint) -> usize {
291 self.start - start.0
292 }
293}
294
295impl winnow::stream::Offset for Checkpoint {
296 fn offset_from(&self, start: &Self) -> usize {
297 self.0 - start.0
298 }
299}
300
301impl<'a> winnow::stream::StreamIsPartial for TokenSlice<'a> {
302 type PartialState = ();
303
304 fn complete(&mut self) -> Self::PartialState {}
305
306 fn restore_partial(&mut self, _: Self::PartialState) {}
307
308 fn is_partial_supported() -> bool {
309 false
310 }
311}
312
313impl<'a> winnow::stream::FindSlice<&str> for TokenSlice<'a> {
314 fn find_slice(&self, substr: &str) -> Option<std::ops::Range<usize>> {
315 self.iter()
316 .enumerate()
317 .find_map(|(i, b)| if b.value == substr { Some(i..self.end) } else { None })
318 }
319}
320
321#[derive(Clone, Debug)]
322pub struct Checkpoint(usize, usize);
323
324#[derive(Debug, PartialEq, Eq, Copy, Clone, Display)]
326#[display(style = "camelCase")]
327pub enum TokenType {
328 Number,
330 Word,
332 Operator,
334 String,
336 Keyword,
338 Type,
340 Brace,
342 Hash,
344 Bang,
346 Dollar,
348 Whitespace,
350 Comma,
352 Colon,
354 DoubleColon,
356 Period,
358 DoublePeriod,
360 LineComment,
362 BlockComment,
364 Function,
366 Unknown,
368 QuestionMark,
370 At,
372 SemiColon,
374}
375
376impl TryFrom<TokenType> for SemanticTokenType {
378 type Error = anyhow::Error;
379 fn try_from(token_type: TokenType) -> Result<Self> {
380 Ok(match token_type {
383 TokenType::Number => Self::NUMBER,
384 TokenType::Word => Self::VARIABLE,
385 TokenType::Keyword => Self::KEYWORD,
386 TokenType::Type => Self::TYPE,
387 TokenType::Operator => Self::OPERATOR,
388 TokenType::QuestionMark => Self::OPERATOR,
389 TokenType::String => Self::STRING,
390 TokenType::Bang => Self::OPERATOR,
391 TokenType::LineComment => Self::COMMENT,
392 TokenType::BlockComment => Self::COMMENT,
393 TokenType::Function => Self::FUNCTION,
394 TokenType::Whitespace
395 | TokenType::Brace
396 | TokenType::Comma
397 | TokenType::Colon
398 | TokenType::DoubleColon
399 | TokenType::Period
400 | TokenType::DoublePeriod
401 | TokenType::Hash
402 | TokenType::Dollar
403 | TokenType::At
404 | TokenType::SemiColon
405 | TokenType::Unknown => {
406 anyhow::bail!("unsupported token type: {:?}", token_type)
407 }
408 })
409 }
410}
411
412impl TokenType {
413 pub fn is_whitespace(&self) -> bool {
414 matches!(self, Self::Whitespace)
415 }
416
417 pub fn is_comment(&self) -> bool {
418 matches!(self, Self::LineComment | Self::BlockComment)
419 }
420}
421
422#[derive(Debug, PartialEq, Eq, Clone)]
423pub struct Token {
424 pub token_type: TokenType,
425 pub start: usize,
427 pub end: usize,
429 pub(super) module_id: ModuleId,
430 pub(super) value: String,
431}
432
433impl ContainsToken<Token> for (TokenType, &str) {
434 fn contains_token(&self, token: Token) -> bool {
435 self.0 == token.token_type && self.1 == token.value
436 }
437}
438
439impl ContainsToken<Token> for TokenType {
440 fn contains_token(&self, token: Token) -> bool {
441 *self == token.token_type
442 }
443}
444
445impl Token {
446 pub fn from_range(
447 range: std::ops::Range<usize>,
448 module_id: ModuleId,
449 token_type: TokenType,
450 value: String,
451 ) -> Self {
452 Self {
453 start: range.start,
454 end: range.end,
455 module_id,
456 value,
457 token_type,
458 }
459 }
460 pub fn is_code_token(&self) -> bool {
461 !matches!(
462 self.token_type,
463 TokenType::Whitespace | TokenType::LineComment | TokenType::BlockComment
464 )
465 }
466
467 pub fn as_source_range(&self) -> SourceRange {
468 SourceRange::new(self.start, self.end, self.module_id)
469 }
470
471 pub fn as_source_ranges(&self) -> Vec<SourceRange> {
472 vec![self.as_source_range()]
473 }
474
475 pub fn visibility_keyword(&self) -> Option<ItemVisibility> {
476 if !matches!(self.token_type, TokenType::Keyword) {
477 return None;
478 }
479 match self.value.as_str() {
480 "export" => Some(ItemVisibility::Export),
481 _ => None,
482 }
483 }
484
485 pub fn numeric_value(&self) -> Option<f64> {
486 if self.token_type != TokenType::Number {
487 return None;
488 }
489 let value = &self.value;
490 let value = value
491 .split_once(|c: char| c == '_' || c.is_ascii_alphabetic())
492 .map(|(s, _)| s)
493 .unwrap_or(value);
494 value.parse().ok()
495 }
496
497 pub fn uint_value(&self) -> Option<u32> {
498 if self.token_type != TokenType::Number {
499 return None;
500 }
501 let value = &self.value;
502 let value = value
503 .split_once(|c: char| c == '_' || c.is_ascii_alphabetic())
504 .map(|(s, _)| s)
505 .unwrap_or(value);
506 value.parse().ok()
507 }
508
509 pub fn numeric_suffix(&self) -> NumericSuffix {
510 if self.token_type != TokenType::Number {
511 return NumericSuffix::None;
512 }
513
514 if self.value.ends_with('_') {
515 return NumericSuffix::Count;
516 }
517
518 for suffix in NUM_SUFFIXES {
519 if self.value.ends_with(suffix) {
520 return suffix.parse().unwrap();
521 }
522 }
523
524 NumericSuffix::None
525 }
526
527 pub fn declaration_keyword(&self) -> Option<VariableKind> {
531 if !matches!(self.token_type, TokenType::Keyword) {
532 return None;
533 }
534 Some(match self.value.as_str() {
535 "fn" => VariableKind::Fn,
536 "var" | "let" | "const" => VariableKind::Const,
537 _ => return None,
538 })
539 }
540}
541
542impl From<Token> for SourceRange {
543 fn from(token: Token) -> Self {
544 Self::new(token.start, token.end, token.module_id)
545 }
546}
547
548impl From<&Token> for SourceRange {
549 fn from(token: &Token) -> Self {
550 Self::new(token.start, token.end, token.module_id)
551 }
552}
553
554pub fn lex(s: &str, module_id: ModuleId) -> Result<TokenStream, KclError> {
555 tokeniser::lex(s, module_id).map_err(From::from)
556}
557
558impl From<ParseError<Input<'_>, winnow::error::ContextError>> for KclError {
559 fn from(err: ParseError<Input<'_>, winnow::error::ContextError>) -> Self {
560 let (input, offset): (Vec<char>, usize) = (err.input().chars().collect(), err.offset());
561 let module_id = err.input().state.module_id;
562
563 if offset >= input.len() {
564 return KclError::Lexical(crate::errors::KclErrorDetails {
570 source_ranges: vec![SourceRange::new(offset, offset, module_id)],
571 message: "unexpected EOF while parsing".to_string(),
572 });
573 }
574
575 let bad_token = &input[offset];
578 KclError::Lexical(crate::errors::KclErrorDetails {
581 source_ranges: vec![SourceRange::new(offset, offset + 1, module_id)],
582 message: format!("found unknown token '{}'", bad_token),
583 })
584 }
585}