1#![allow(clippy::needless_lifetimes)]
3
4use std::{fmt, iter::Enumerate, num::NonZeroUsize, str::FromStr};
5
6use anyhow::Result;
7use parse_display::Display;
8use schemars::JsonSchema;
9use serde::{Deserialize, Serialize};
10use tokeniser::Input;
11use tower_lsp::lsp_types::SemanticTokenType;
12use winnow::{
13 self,
14 error::ParseError,
15 stream::{ContainsToken, Stream},
16};
17
18use crate::{
19 errors::KclError,
20 parsing::ast::types::{ItemVisibility, VariableKind},
21 source_range::SourceRange,
22 CompilationError, ModuleId,
23};
24
25mod tokeniser;
26
27pub(crate) use tokeniser::RESERVED_WORDS;
28
29pub const NUM_SUFFIXES: [&str; 9] = ["mm", "cm", "m", "inch", "in", "ft", "yd", "deg", "rad"];
31
32#[derive(Clone, Copy, Debug, Eq, PartialEq, Serialize, Deserialize, ts_rs::TS, JsonSchema)]
33#[repr(u32)]
34pub enum NumericSuffix {
35 None,
36 Count,
37 Length,
38 Angle,
39 Mm,
40 Cm,
41 M,
42 Inch,
43 Ft,
44 Yd,
45 Deg,
46 Rad,
47}
48
49impl NumericSuffix {
50 #[allow(dead_code)]
51 pub fn is_none(self) -> bool {
52 self == Self::None
53 }
54
55 pub fn is_some(self) -> bool {
56 self != Self::None
57 }
58
59 pub fn digestable_id(&self) -> &[u8] {
60 match self {
61 NumericSuffix::None => &[],
62 NumericSuffix::Count => b"_",
63 NumericSuffix::Length => b"Length",
64 NumericSuffix::Angle => b"Angle",
65 NumericSuffix::Mm => b"mm",
66 NumericSuffix::Cm => b"cm",
67 NumericSuffix::M => b"m",
68 NumericSuffix::Inch => b"in",
69 NumericSuffix::Ft => b"ft",
70 NumericSuffix::Yd => b"yd",
71 NumericSuffix::Deg => b"deg",
72 NumericSuffix::Rad => b"rad",
73 }
74 }
75}
76
77impl FromStr for NumericSuffix {
78 type Err = CompilationError;
79
80 fn from_str(s: &str) -> Result<Self, Self::Err> {
81 match s {
82 "_" | "Count" => Ok(NumericSuffix::Count),
83 "Length" => Ok(NumericSuffix::Length),
84 "Angle" => Ok(NumericSuffix::Angle),
85 "mm" | "millimeters" => Ok(NumericSuffix::Mm),
86 "cm" | "centimeters" => Ok(NumericSuffix::Cm),
87 "m" | "meters" => Ok(NumericSuffix::M),
88 "inch" | "in" => Ok(NumericSuffix::Inch),
89 "ft" | "feet" => Ok(NumericSuffix::Ft),
90 "yd" | "yards" => Ok(NumericSuffix::Yd),
91 "deg" | "degrees" => Ok(NumericSuffix::Deg),
92 "rad" | "radians" => Ok(NumericSuffix::Rad),
93 _ => Err(CompilationError::err(SourceRange::default(), "invalid unit of measure")),
94 }
95 }
96}
97
98impl fmt::Display for NumericSuffix {
99 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
100 match self {
101 NumericSuffix::None => Ok(()),
102 NumericSuffix::Count => write!(f, "_"),
103 NumericSuffix::Length => write!(f, "Length"),
104 NumericSuffix::Angle => write!(f, "Angle"),
105 NumericSuffix::Mm => write!(f, "mm"),
106 NumericSuffix::Cm => write!(f, "cm"),
107 NumericSuffix::M => write!(f, "m"),
108 NumericSuffix::Inch => write!(f, "in"),
109 NumericSuffix::Ft => write!(f, "ft"),
110 NumericSuffix::Yd => write!(f, "yd"),
111 NumericSuffix::Deg => write!(f, "deg"),
112 NumericSuffix::Rad => write!(f, "rad"),
113 }
114 }
115}
116
117#[derive(Clone, Debug, PartialEq)]
118pub(crate) struct TokenStream {
119 tokens: Vec<Token>,
120}
121
122impl TokenStream {
123 fn new(tokens: Vec<Token>) -> Self {
124 Self { tokens }
125 }
126
127 pub(super) fn remove_unknown(&mut self) -> Vec<Token> {
128 let tokens = std::mem::take(&mut self.tokens);
129 let (tokens, unknown_tokens): (Vec<Token>, Vec<Token>) = tokens
130 .into_iter()
131 .partition(|token| token.token_type != TokenType::Unknown);
132 self.tokens = tokens;
133 unknown_tokens
134 }
135
136 pub fn iter(&self) -> impl Iterator<Item = &Token> {
137 self.tokens.iter()
138 }
139
140 pub fn is_empty(&self) -> bool {
141 self.tokens.is_empty()
142 }
143
144 pub fn as_slice(&self) -> TokenSlice {
145 TokenSlice::from(self)
146 }
147}
148
149impl<'a> From<&'a TokenStream> for TokenSlice<'a> {
150 fn from(stream: &'a TokenStream) -> Self {
151 TokenSlice {
152 start: 0,
153 end: stream.tokens.len(),
154 stream,
155 }
156 }
157}
158
159impl IntoIterator for TokenStream {
160 type Item = Token;
161
162 type IntoIter = std::vec::IntoIter<Token>;
163
164 fn into_iter(self) -> Self::IntoIter {
165 self.tokens.into_iter()
166 }
167}
168
169#[derive(Debug, Clone)]
170pub(crate) struct TokenSlice<'a> {
171 stream: &'a TokenStream,
172 start: usize,
174 end: usize,
176}
177
178impl<'a> std::ops::Deref for TokenSlice<'a> {
179 type Target = [Token];
180
181 fn deref(&self) -> &Self::Target {
182 &self.stream.tokens[self.start..self.end]
183 }
184}
185
186impl<'a> TokenSlice<'a> {
187 pub fn token(&self, i: usize) -> &Token {
188 &self.stream.tokens[i + self.start]
189 }
190
191 pub fn iter(&self) -> impl Iterator<Item = &Token> {
192 (**self).iter()
193 }
194
195 pub fn without_ends(&self) -> Self {
196 Self {
197 start: self.start + 1,
198 end: self.end - 1,
199 stream: self.stream,
200 }
201 }
202
203 pub fn as_source_range(&self) -> SourceRange {
204 let stream_len = self.stream.tokens.len();
205 let first_token = if stream_len == self.start {
206 &self.stream.tokens[self.start - 1]
207 } else {
208 self.token(0)
209 };
210 let last_token = if stream_len == self.end {
211 &self.stream.tokens[stream_len - 1]
212 } else {
213 self.token(self.end - self.start)
214 };
215 SourceRange::new(first_token.start, last_token.end, last_token.module_id)
216 }
217}
218
219impl<'a> IntoIterator for TokenSlice<'a> {
220 type Item = &'a Token;
221
222 type IntoIter = std::slice::Iter<'a, Token>;
223
224 fn into_iter(self) -> Self::IntoIter {
225 self.stream.tokens[self.start..self.end].iter()
226 }
227}
228
229impl<'a> Stream for TokenSlice<'a> {
230 type Token = Token;
231 type Slice = Self;
232 type IterOffsets = Enumerate<std::vec::IntoIter<Token>>;
233 type Checkpoint = Checkpoint;
234
235 fn iter_offsets(&self) -> Self::IterOffsets {
236 #[allow(clippy::unnecessary_to_owned)]
237 self.to_vec().into_iter().enumerate()
238 }
239
240 fn eof_offset(&self) -> usize {
241 self.len()
242 }
243
244 fn next_token(&mut self) -> Option<Self::Token> {
245 let token = self.first()?.clone();
246 self.start += 1;
247 Some(token)
248 }
249
250 fn offset_for<P>(&self, predicate: P) -> Option<usize>
251 where
252 P: Fn(Self::Token) -> bool,
253 {
254 self.iter().position(|b| predicate(b.clone()))
255 }
256
257 fn offset_at(&self, tokens: usize) -> Result<usize, winnow::error::Needed> {
258 if let Some(needed) = tokens.checked_sub(self.len()).and_then(NonZeroUsize::new) {
259 Err(winnow::error::Needed::Size(needed))
260 } else {
261 Ok(tokens)
262 }
263 }
264
265 fn next_slice(&mut self, offset: usize) -> Self::Slice {
266 assert!(self.start + offset <= self.end);
267
268 let next = TokenSlice {
269 stream: self.stream,
270 start: self.start,
271 end: self.start + offset,
272 };
273 self.start += offset;
274 next
275 }
276
277 fn checkpoint(&self) -> Self::Checkpoint {
278 Checkpoint(self.start, self.end)
279 }
280
281 fn reset(&mut self, checkpoint: &Self::Checkpoint) {
282 self.start = checkpoint.0;
283 self.end = checkpoint.1;
284 }
285
286 fn raw(&self) -> &dyn fmt::Debug {
287 self
288 }
289}
290
291impl<'a> winnow::stream::Offset for TokenSlice<'a> {
292 fn offset_from(&self, start: &Self) -> usize {
293 self.start - start.start
294 }
295}
296
297impl<'a> winnow::stream::Offset<Checkpoint> for TokenSlice<'a> {
298 fn offset_from(&self, start: &Checkpoint) -> usize {
299 self.start - start.0
300 }
301}
302
303impl winnow::stream::Offset for Checkpoint {
304 fn offset_from(&self, start: &Self) -> usize {
305 self.0 - start.0
306 }
307}
308
309impl<'a> winnow::stream::StreamIsPartial for TokenSlice<'a> {
310 type PartialState = ();
311
312 fn complete(&mut self) -> Self::PartialState {}
313
314 fn restore_partial(&mut self, _: Self::PartialState) {}
315
316 fn is_partial_supported() -> bool {
317 false
318 }
319}
320
321impl<'a> winnow::stream::FindSlice<&str> for TokenSlice<'a> {
322 fn find_slice(&self, substr: &str) -> Option<std::ops::Range<usize>> {
323 self.iter()
324 .enumerate()
325 .find_map(|(i, b)| if b.value == substr { Some(i..self.end) } else { None })
326 }
327}
328
329#[derive(Clone, Debug)]
330pub struct Checkpoint(usize, usize);
331
332#[derive(Debug, PartialEq, Eq, Copy, Clone, Display)]
334#[display(style = "camelCase")]
335pub enum TokenType {
336 Number,
338 Word,
340 Operator,
342 String,
344 Keyword,
346 Type,
348 Brace,
350 Hash,
352 Bang,
354 Dollar,
356 Whitespace,
358 Comma,
360 Colon,
362 DoubleColon,
364 Period,
366 DoublePeriod,
368 LineComment,
370 BlockComment,
372 Function,
374 Unknown,
376 QuestionMark,
378 At,
380 SemiColon,
382}
383
384impl TryFrom<TokenType> for SemanticTokenType {
386 type Error = anyhow::Error;
387 fn try_from(token_type: TokenType) -> Result<Self> {
388 Ok(match token_type {
391 TokenType::Number => Self::NUMBER,
392 TokenType::Word => Self::VARIABLE,
393 TokenType::Keyword => Self::KEYWORD,
394 TokenType::Type => Self::TYPE,
395 TokenType::Operator => Self::OPERATOR,
396 TokenType::QuestionMark => Self::OPERATOR,
397 TokenType::String => Self::STRING,
398 TokenType::Bang => Self::OPERATOR,
399 TokenType::LineComment => Self::COMMENT,
400 TokenType::BlockComment => Self::COMMENT,
401 TokenType::Function => Self::FUNCTION,
402 TokenType::Whitespace
403 | TokenType::Brace
404 | TokenType::Comma
405 | TokenType::Colon
406 | TokenType::DoubleColon
407 | TokenType::Period
408 | TokenType::DoublePeriod
409 | TokenType::Hash
410 | TokenType::Dollar
411 | TokenType::At
412 | TokenType::SemiColon
413 | TokenType::Unknown => {
414 anyhow::bail!("unsupported token type: {:?}", token_type)
415 }
416 })
417 }
418}
419
420impl TokenType {
421 pub fn is_whitespace(&self) -> bool {
422 matches!(self, Self::Whitespace)
423 }
424
425 pub fn is_comment(&self) -> bool {
426 matches!(self, Self::LineComment | Self::BlockComment)
427 }
428}
429
430#[derive(Debug, PartialEq, Eq, Clone)]
431pub struct Token {
432 pub token_type: TokenType,
433 pub start: usize,
435 pub end: usize,
437 pub(super) module_id: ModuleId,
438 pub(super) value: String,
439}
440
441impl ContainsToken<Token> for (TokenType, &str) {
442 fn contains_token(&self, token: Token) -> bool {
443 self.0 == token.token_type && self.1 == token.value
444 }
445}
446
447impl ContainsToken<Token> for TokenType {
448 fn contains_token(&self, token: Token) -> bool {
449 *self == token.token_type
450 }
451}
452
453impl Token {
454 pub fn from_range(
455 range: std::ops::Range<usize>,
456 module_id: ModuleId,
457 token_type: TokenType,
458 value: String,
459 ) -> Self {
460 Self {
461 start: range.start,
462 end: range.end,
463 module_id,
464 value,
465 token_type,
466 }
467 }
468 pub fn is_code_token(&self) -> bool {
469 !matches!(
470 self.token_type,
471 TokenType::Whitespace | TokenType::LineComment | TokenType::BlockComment
472 )
473 }
474
475 pub fn as_source_range(&self) -> SourceRange {
476 SourceRange::new(self.start, self.end, self.module_id)
477 }
478
479 pub fn as_source_ranges(&self) -> Vec<SourceRange> {
480 vec![self.as_source_range()]
481 }
482
483 pub fn visibility_keyword(&self) -> Option<ItemVisibility> {
484 if !matches!(self.token_type, TokenType::Keyword) {
485 return None;
486 }
487 match self.value.as_str() {
488 "export" => Some(ItemVisibility::Export),
489 _ => None,
490 }
491 }
492
493 pub fn numeric_value(&self) -> Option<f64> {
494 if self.token_type != TokenType::Number {
495 return None;
496 }
497 let value = &self.value;
498 let value = value
499 .split_once(|c: char| c == '_' || c.is_ascii_alphabetic())
500 .map(|(s, _)| s)
501 .unwrap_or(value);
502 value.parse().ok()
503 }
504
505 pub fn uint_value(&self) -> Option<u32> {
506 if self.token_type != TokenType::Number {
507 return None;
508 }
509 let value = &self.value;
510 let value = value
511 .split_once(|c: char| c == '_' || c.is_ascii_alphabetic())
512 .map(|(s, _)| s)
513 .unwrap_or(value);
514 value.parse().ok()
515 }
516
517 pub fn numeric_suffix(&self) -> NumericSuffix {
518 if self.token_type != TokenType::Number {
519 return NumericSuffix::None;
520 }
521
522 if self.value.ends_with('_') {
523 return NumericSuffix::Count;
524 }
525
526 for suffix in NUM_SUFFIXES {
527 if self.value.ends_with(suffix) {
528 return suffix.parse().unwrap();
529 }
530 }
531
532 NumericSuffix::None
533 }
534
535 pub fn declaration_keyword(&self) -> Option<VariableKind> {
539 if !matches!(self.token_type, TokenType::Keyword) {
540 return None;
541 }
542 Some(match self.value.as_str() {
543 "fn" => VariableKind::Fn,
544 "var" | "let" | "const" => VariableKind::Const,
545 _ => return None,
546 })
547 }
548}
549
550impl From<Token> for SourceRange {
551 fn from(token: Token) -> Self {
552 Self::new(token.start, token.end, token.module_id)
553 }
554}
555
556impl From<&Token> for SourceRange {
557 fn from(token: &Token) -> Self {
558 Self::new(token.start, token.end, token.module_id)
559 }
560}
561
562pub fn lex(s: &str, module_id: ModuleId) -> Result<TokenStream, KclError> {
563 tokeniser::lex(s, module_id).map_err(From::from)
564}
565
566impl From<ParseError<Input<'_>, winnow::error::ContextError>> for KclError {
567 fn from(err: ParseError<Input<'_>, winnow::error::ContextError>) -> Self {
568 let (input, offset): (Vec<char>, usize) = (err.input().chars().collect(), err.offset());
569 let module_id = err.input().state.module_id;
570
571 if offset >= input.len() {
572 return KclError::Lexical(crate::errors::KclErrorDetails {
578 source_ranges: vec![SourceRange::new(offset, offset, module_id)],
579 message: "unexpected EOF while parsing".to_string(),
580 });
581 }
582
583 let bad_token = &input[offset];
586 KclError::Lexical(crate::errors::KclErrorDetails {
589 source_ranges: vec![SourceRange::new(offset, offset + 1, module_id)],
590 message: format!("found unknown token '{}'", bad_token),
591 })
592 }
593}