1use super::functions::*;
6use crate::tokens::{Span, Token, TokenKind};
7
8#[derive(Clone, Debug, PartialEq)]
10#[allow(missing_docs)]
11pub struct TokenMeta {
12 pub token: Token,
14 pub category: TokenCategory,
16 #[allow(missing_docs)]
18 pub text: String,
19 pub preceded_by_space: bool,
21 pub preceded_by_newline: bool,
23}
24impl TokenMeta {
25 #[allow(missing_docs)]
27 pub fn new(
28 token: Token,
29 category: TokenCategory,
30 text: impl Into<String>,
31 preceded_by_space: bool,
32 preceded_by_newline: bool,
33 ) -> Self {
34 Self {
35 token,
36 category,
37 text: text.into(),
38 preceded_by_space,
39 preceded_by_newline,
40 }
41 }
42 #[allow(missing_docs)]
44 pub fn from_token(token: Token, source: &str) -> Self {
45 let span = &token.span;
46 let text = source.get(span.start..span.end).unwrap_or("").to_string();
47 let category = categorise(&token.kind);
48 Self {
49 token,
50 category,
51 text,
52 preceded_by_space: false,
53 preceded_by_newline: false,
54 }
55 }
56 #[allow(missing_docs)]
58 pub fn span(&self) -> &Span {
59 &self.token.span
60 }
61 #[allow(missing_docs)]
63 pub fn kind(&self) -> &TokenKind {
64 &self.token.kind
65 }
66 #[allow(missing_docs)]
68 pub fn is_ident(&self) -> bool {
69 self.token.is_ident()
70 }
71 #[allow(missing_docs)]
73 pub fn is_keyword(&self) -> bool {
74 self.category == TokenCategory::Keyword
75 }
76 #[allow(missing_docs)]
78 pub fn is_literal(&self) -> bool {
79 self.category == TokenCategory::Literal
80 }
81}
82impl TokenMeta {
83 #[allow(dead_code)]
85 #[allow(missing_docs)]
86 pub fn set_preceded_by_newline(&mut self, v: bool) {
87 self.preceded_by_newline = v;
88 }
89 #[allow(dead_code)]
91 #[allow(missing_docs)]
92 pub fn set_preceded_by_space(&mut self, v: bool) {
93 self.preceded_by_space = v;
94 }
95 #[allow(dead_code)]
97 #[allow(missing_docs)]
98 pub fn len(&self) -> usize {
99 self.token.span.end.saturating_sub(self.token.span.start)
100 }
101 #[allow(dead_code)]
103 #[allow(missing_docs)]
104 pub fn is_empty(&self) -> bool {
105 self.len() == 0
106 }
107 #[allow(dead_code)]
109 #[allow(missing_docs)]
110 pub fn is_numeric(&self) -> bool {
111 matches!(self.token.kind, TokenKind::Nat(_) | TokenKind::Float(_))
112 }
113 #[allow(dead_code)]
115 #[allow(missing_docs)]
116 pub fn is_string(&self) -> bool {
117 matches!(self.token.kind, TokenKind::String(_))
118 }
119 #[allow(dead_code)]
121 #[allow(missing_docs)]
122 pub fn is_operator(&self) -> bool {
123 self.category == TokenCategory::Operator
124 }
125}
126#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
129#[allow(missing_docs)]
130pub enum TokenCategory {
131 Keyword,
133 Identifier,
135 Literal,
137 Punctuation,
139 Operator,
141 Comment,
143 Eof,
145 Other,
147}
148impl TokenCategory {
149 #[allow(missing_docs)]
151 pub fn name(&self) -> &'static str {
152 match self {
153 TokenCategory::Keyword => "keyword",
154 TokenCategory::Identifier => "identifier",
155 TokenCategory::Literal => "literal",
156 TokenCategory::Punctuation => "punctuation",
157 TokenCategory::Operator => "operator",
158 TokenCategory::Comment => "comment",
159 TokenCategory::Eof => "end-of-file",
160 TokenCategory::Other => "token",
161 }
162 }
163 #[allow(missing_docs)]
166 pub fn can_start_expr(&self) -> bool {
167 matches!(
168 self,
169 TokenCategory::Identifier | TokenCategory::Literal | TokenCategory::Punctuation
170 )
171 }
172}
173impl TokenCategory {
174 #[allow(dead_code)]
176 #[allow(missing_docs)]
177 pub fn is_meaningful(&self) -> bool {
178 !matches!(self, TokenCategory::Eof | TokenCategory::Other)
179 }
180 #[allow(dead_code)]
182 #[allow(missing_docs)]
183 pub fn ansi_color(&self) -> &'static str {
184 match self {
185 TokenCategory::Keyword => ansi::BOLD_BLUE,
186 TokenCategory::Identifier => ansi::RESET,
187 TokenCategory::Literal => ansi::BOLD_GREEN,
188 TokenCategory::Operator => ansi::CYAN,
189 TokenCategory::Punctuation => ansi::YELLOW,
190 TokenCategory::Comment => ansi::GREEN,
191 TokenCategory::Eof => ansi::RESET,
192 TokenCategory::Other => ansi::RESET,
193 }
194 }
195 #[allow(dead_code)]
197 #[allow(missing_docs)]
198 pub fn all() -> &'static [TokenCategory] {
199 &[
200 TokenCategory::Keyword,
201 TokenCategory::Identifier,
202 TokenCategory::Literal,
203 TokenCategory::Punctuation,
204 TokenCategory::Operator,
205 TokenCategory::Comment,
206 TokenCategory::Eof,
207 TokenCategory::Other,
208 ]
209 }
210}
211#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
215#[allow(missing_docs)]
216pub struct OperatorPriority(pub u32);
217impl OperatorPriority {
218 pub fn new(p: u32) -> Self {
220 Self(p)
221 }
222 #[allow(missing_docs)]
224 pub const MIN: Self = Self(0);
225 pub const MAX: Self = Self(u32::MAX);
227}
228#[derive(Clone, Debug)]
230#[allow(missing_docs)]
231pub struct RichToken {
232 pub token: Token,
234 pub category: TokenCategory,
236 #[allow(missing_docs)]
238 pub arity: OperatorArity,
239 pub priority: OperatorPriority,
241}
242impl RichToken {
243 #[allow(missing_docs)]
245 pub fn from_token(token: Token) -> Self {
246 let category = categorise(&token.kind);
247 let arity = operator_arity(&token.kind);
248 let priority = operator_priority(&token.kind);
249 Self {
250 token,
251 category,
252 arity,
253 priority,
254 }
255 }
256 #[allow(missing_docs)]
258 pub fn is_infix(&self) -> bool {
259 self.arity == OperatorArity::Binary
260 }
261 #[allow(missing_docs)]
263 pub fn is_prefix(&self) -> bool {
264 self.arity == OperatorArity::Unary
265 }
266}
267#[allow(dead_code)]
269#[allow(missing_docs)]
270#[derive(Clone, Debug)]
271pub struct ReformatOptions {
272 pub space_before_op: bool,
274 pub space_after_op: bool,
276 #[allow(missing_docs)]
278 pub space_after_comma: bool,
279 pub no_space_before_close: bool,
281}
282#[derive(Clone, Debug)]
284#[allow(missing_docs)]
285pub struct TokenStream {
286 tokens: Vec<Token>,
287 pos: usize,
288}
289impl TokenStream {
290 #[allow(missing_docs)]
292 pub fn new(tokens: Vec<Token>) -> Self {
293 Self { tokens, pos: 0 }
294 }
295 #[allow(missing_docs)]
297 pub fn peek(&self) -> Option<&Token> {
298 self.tokens.get(self.pos)
299 }
300 #[allow(missing_docs)]
302 pub fn peek_ahead(&self, n: usize) -> Option<&Token> {
303 self.tokens.get(self.pos + n)
304 }
305 #[allow(clippy::should_implement_trait)]
307 #[allow(missing_docs)]
308 pub fn next(&mut self) -> Option<Token> {
309 if self.pos < self.tokens.len() {
310 let tok = self.tokens[self.pos].clone();
311 self.pos += 1;
312 Some(tok)
313 } else {
314 None
315 }
316 }
317 #[allow(missing_docs)]
319 pub fn eat(&mut self, expected: &TokenKind) -> Option<Token> {
320 if self.peek().map(|t| &t.kind) == Some(expected) {
321 self.next()
322 } else {
323 None
324 }
325 }
326 #[allow(missing_docs)]
328 pub fn eat_while<F>(&mut self, mut pred: F) -> Vec<Token>
329 where
330 F: FnMut(&Token) -> bool,
331 {
332 let mut consumed = Vec::new();
333 while let Some(tok) = self.peek() {
334 if pred(tok) {
335 consumed.push(self.next().expect("peek confirmed token exists"));
336 } else {
337 break;
338 }
339 }
340 consumed
341 }
342 #[allow(missing_docs)]
344 pub fn position(&self) -> usize {
345 self.pos
346 }
347 #[allow(missing_docs)]
349 pub fn is_empty(&self) -> bool {
350 self.pos >= self.tokens.len()
351 }
352 #[allow(missing_docs)]
354 pub fn remaining(&self) -> usize {
355 self.tokens.len().saturating_sub(self.pos)
356 }
357 #[allow(missing_docs)]
359 pub fn rewind(&mut self, saved: usize) {
360 self.pos = saved.min(self.tokens.len());
361 }
362 #[allow(missing_docs)]
364 pub fn save(&self) -> usize {
365 self.pos
366 }
367 #[allow(missing_docs)]
369 pub fn expect(&mut self, expected: &TokenKind) -> Result<Token, String> {
370 match self.peek() {
371 Some(tok) if &tok.kind == expected => {
372 Ok(self.next().expect("peek confirmed token exists"))
373 }
374 Some(tok) => Err(format!(
375 "expected {:?}, got {:?} at {}:{}",
376 expected, tok.kind, tok.span.line, tok.span.column
377 )),
378 None => Err(format!("expected {:?}, got end-of-file", expected)),
379 }
380 }
381 #[allow(missing_docs)]
383 pub fn collect_remaining(mut self) -> Vec<Token> {
384 let mut result = Vec::new();
385 while let Some(t) = self.next() {
386 result.push(t);
387 }
388 result
389 }
390}
391impl TokenStream {
392 #[allow(dead_code)]
394 #[allow(missing_docs)]
395 pub fn look_ahead(&self, n: usize) -> Option<&Token> {
396 self.tokens.get(self.pos + n)
397 }
398 #[allow(dead_code)]
400 #[allow(missing_docs)]
401 pub fn skip_while<F: FnMut(&Token) -> bool>(&mut self, mut pred: F) {
402 while let Some(tok) = self.peek() {
403 if pred(tok) {
404 self.pos += 1;
405 } else {
406 break;
407 }
408 }
409 }
410 #[allow(dead_code)]
412 #[allow(missing_docs)]
413 pub fn skip_to_inclusive(&mut self, kind: &TokenKind) {
414 while let Some(tok) = self.peek() {
415 let found = &tok.kind == kind;
416 self.pos += 1;
417 if found {
418 break;
419 }
420 }
421 }
422 #[allow(dead_code)]
424 #[allow(missing_docs)]
425 pub fn skip_to(&mut self, kind: &TokenKind) {
426 while let Some(tok) = self.peek() {
427 if &tok.kind == kind {
428 break;
429 }
430 self.pos += 1;
431 }
432 }
433 #[allow(dead_code)]
435 #[allow(missing_docs)]
436 pub fn peek_slice(&self, n: usize) -> &[Token] {
437 let end = (self.pos + n).min(self.tokens.len());
438 &self.tokens[self.pos..end]
439 }
440 #[allow(dead_code)]
442 #[allow(missing_docs)]
443 pub fn matches_sequence(&self, kinds: &[&TokenKind]) -> bool {
444 for (i, k) in kinds.iter().enumerate() {
445 match self.tokens.get(self.pos + i) {
446 Some(tok) if &&tok.kind == k => {}
447 _ => return false,
448 }
449 }
450 true
451 }
452 #[allow(dead_code)]
454 #[allow(missing_docs)]
455 pub fn consume_n(&mut self, n: usize) -> Vec<Token> {
456 let mut result = Vec::with_capacity(n);
457 for _ in 0..n {
458 if let Some(tok) = self.next() {
459 result.push(tok);
460 }
461 }
462 result
463 }
464 #[allow(dead_code)]
466 #[allow(missing_docs)]
467 pub fn peek_all(&self) -> &[Token] {
468 &self.tokens[self.pos..]
469 }
470 #[allow(dead_code)]
472 #[allow(missing_docs)]
473 pub fn inject(&mut self, tokens: Vec<Token>) {
474 let mut new_tokens = self.tokens[..self.pos].to_vec();
475 new_tokens.extend(tokens);
476 new_tokens.extend_from_slice(&self.tokens[self.pos..]);
477 self.tokens = new_tokens;
478 }
479 #[allow(dead_code)]
481 #[allow(missing_docs)]
482 pub fn len(&self) -> usize {
483 self.tokens.len()
484 }
485}
486#[allow(dead_code)]
488#[allow(missing_docs)]
489pub struct TokenNgramIter<'a> {
490 pub(crate) tokens: &'a [Token],
491 pub(crate) window: usize,
492 pub(crate) pos: usize,
493}
494impl<'a> TokenNgramIter<'a> {
495 #[allow(dead_code)]
497 #[allow(missing_docs)]
498 pub fn new(tokens: &'a [Token], window: usize) -> Self {
499 Self {
500 tokens,
501 window,
502 pos: 0,
503 }
504 }
505}
506#[derive(Clone, Copy, Debug, PartialEq, Eq)]
508#[allow(missing_docs)]
509pub enum BracketKind {
510 Paren,
512 Brace,
514 Bracket,
516}
517#[allow(dead_code)]
519#[allow(missing_docs)]
520#[derive(Clone, Debug)]
521pub enum TokenPattern {
522 Exact(TokenKind),
524 Category(TokenCategory),
526 Any,
528 Optional(Box<TokenPattern>),
530 Sequence(Vec<TokenPattern>),
532 Alternatives(Vec<TokenPattern>),
534}
535impl TokenPattern {
536 #[allow(dead_code)]
538 #[allow(missing_docs)]
539 pub fn matches_single(&self, tok: &Token) -> bool {
540 match self {
541 TokenPattern::Exact(k) => &tok.kind == k,
542 TokenPattern::Category(cat) => categorise(&tok.kind) == *cat,
543 TokenPattern::Any => true,
544 TokenPattern::Optional(inner) => inner.matches_single(tok),
545 TokenPattern::Sequence(_) => false,
546 TokenPattern::Alternatives(alts) => alts.iter().any(|a| a.matches_single(tok)),
547 }
548 }
549 #[allow(dead_code)]
552 #[allow(missing_docs)]
553 pub fn try_match(&self, tokens: &[Token]) -> Option<usize> {
554 match self {
555 TokenPattern::Exact(k) => {
556 if tokens.first().map(|t| &t.kind) == Some(k) {
557 Some(1)
558 } else {
559 None
560 }
561 }
562 TokenPattern::Category(cat) => {
563 if tokens.first().map(|t| categorise(&t.kind)) == Some(*cat) {
564 Some(1)
565 } else {
566 None
567 }
568 }
569 TokenPattern::Any => {
570 if tokens.is_empty() {
571 None
572 } else {
573 Some(1)
574 }
575 }
576 TokenPattern::Optional(inner) => Some(inner.try_match(tokens).unwrap_or(0)),
577 TokenPattern::Sequence(pats) => {
578 let mut consumed = 0;
579 for pat in pats {
580 match pat.try_match(&tokens[consumed..]) {
581 Some(n) => consumed += n,
582 None => return None,
583 }
584 }
585 Some(consumed)
586 }
587 TokenPattern::Alternatives(alts) => {
588 for alt in alts {
589 if let Some(n) = alt.try_match(tokens) {
590 return Some(n);
591 }
592 }
593 None
594 }
595 }
596 }
597 #[allow(dead_code)]
599 #[allow(missing_docs)]
600 pub fn find_all<'a>(&self, tokens: &'a [Token]) -> Vec<&'a [Token]> {
601 let mut results = Vec::new();
602 let mut pos = 0;
603 while pos < tokens.len() {
604 if let Some(n) = self.try_match(&tokens[pos..]) {
605 if n > 0 {
606 results.push(&tokens[pos..pos + n]);
607 pos += n;
608 } else {
609 pos += 1;
610 }
611 } else {
612 pos += 1;
613 }
614 }
615 results
616 }
617}
618#[allow(dead_code)]
620#[allow(missing_docs)]
621#[derive(Clone, Debug)]
622pub struct AnnotatedToken {
623 pub token: Token,
624 pub category: TokenCategory,
625 pub depth: i32,
626 pub index: usize,
627}
628impl AnnotatedToken {
629 #[allow(dead_code)]
631 #[allow(missing_docs)]
632 pub fn new(token: Token, depth: i32, index: usize) -> Self {
633 let category = categorise(&token.kind);
634 Self {
635 token,
636 category,
637 depth,
638 index,
639 }
640 }
641}
642#[derive(Debug, Clone, Copy, PartialEq, Eq)]
644#[allow(missing_docs)]
645pub enum OperatorArity {
646 Unary,
648 Binary,
650 None,
652}