1use crate::tokens::{Span, Token, TokenKind};
6
7use super::types::{
8 AnnotatedToken, BracketKind, OperatorArity, OperatorPriority, ReformatOptions, RichToken,
9 TokenCategory, TokenMeta, TokenNgramIter,
10};
11
12#[allow(missing_docs)]
14pub fn categorise(kind: &TokenKind) -> TokenCategory {
15 match kind {
16 TokenKind::Axiom
17 | TokenKind::Definition
18 | TokenKind::Theorem
19 | TokenKind::Lemma
20 | TokenKind::Opaque
21 | TokenKind::Inductive
22 | TokenKind::Structure
23 | TokenKind::Class
24 | TokenKind::Instance
25 | TokenKind::Namespace
26 | TokenKind::Section
27 | TokenKind::Variable
28 | TokenKind::Variables
29 | TokenKind::Parameter
30 | TokenKind::Parameters
31 | TokenKind::Constant
32 | TokenKind::Constants
33 | TokenKind::End
34 | TokenKind::Import
35 | TokenKind::Export
36 | TokenKind::Open
37 | TokenKind::Attribute
38 | TokenKind::Return
39 | TokenKind::Fun
40 | TokenKind::Do
41 | TokenKind::Let
42 | TokenKind::In
43 | TokenKind::If
44 | TokenKind::Then
45 | TokenKind::Else
46 | TokenKind::Match
47 | TokenKind::With
48 | TokenKind::Where
49 | TokenKind::Have
50 | TokenKind::Show
51 | TokenKind::From
52 | TokenKind::By
53 | TokenKind::Forall
54 | TokenKind::Exists
55 | TokenKind::Type => TokenCategory::Keyword,
56 TokenKind::Ident(_) => TokenCategory::Identifier,
57 TokenKind::Nat(_) | TokenKind::Float(_) | TokenKind::String(_) | TokenKind::Char(_) => {
58 TokenCategory::Literal
59 }
60 TokenKind::DocComment(_) => TokenCategory::Comment,
61 TokenKind::LParen
62 | TokenKind::RParen
63 | TokenKind::LBrace
64 | TokenKind::RBrace
65 | TokenKind::LBracket
66 | TokenKind::RBracket
67 | TokenKind::Comma
68 | TokenKind::Semicolon
69 | TokenKind::Colon
70 | TokenKind::Dot
71 | TokenKind::DotDot => TokenCategory::Punctuation,
72 TokenKind::Arrow
73 | TokenKind::FatArrow
74 | TokenKind::Eq
75 | TokenKind::Assign
76 | TokenKind::Plus
77 | TokenKind::Minus
78 | TokenKind::Star
79 | TokenKind::Slash
80 | TokenKind::Percent
81 | TokenKind::Caret
82 | TokenKind::Bar
83 | TokenKind::Bang
84 | TokenKind::BangEq
85 | TokenKind::Lt
86 | TokenKind::Gt
87 | TokenKind::Le
88 | TokenKind::Ge
89 | TokenKind::Ne
90 | TokenKind::AndAnd
91 | TokenKind::OrOr
92 | TokenKind::And
93 | TokenKind::Or
94 | TokenKind::Not
95 | TokenKind::Iff
96 | TokenKind::Underscore
97 | TokenKind::At => TokenCategory::Operator,
98 TokenKind::Eof => TokenCategory::Eof,
99 _ => TokenCategory::Other,
100 }
101}
102#[allow(missing_docs)]
104pub fn can_start_expr(kind: &TokenKind) -> bool {
105 matches!(
106 kind,
107 TokenKind::Ident(_)
108 | TokenKind::Nat(_)
109 | TokenKind::Float(_)
110 | TokenKind::String(_)
111 | TokenKind::Char(_)
112 | TokenKind::LParen
113 | TokenKind::LBrace
114 | TokenKind::LBracket
115 | TokenKind::Fun
116 | TokenKind::Let
117 | TokenKind::If
118 | TokenKind::Match
119 | TokenKind::Forall
120 | TokenKind::Exists
121 | TokenKind::Not
122 | TokenKind::Bang
123 | TokenKind::Minus
124 | TokenKind::Type
125 | TokenKind::Underscore
126 )
127}
128#[allow(missing_docs)]
130pub fn can_start_decl(kind: &TokenKind) -> bool {
131 matches!(
132 kind,
133 TokenKind::Axiom
134 | TokenKind::Definition
135 | TokenKind::Theorem
136 | TokenKind::Lemma
137 | TokenKind::Opaque
138 | TokenKind::Inductive
139 | TokenKind::Structure
140 | TokenKind::Class
141 | TokenKind::Instance
142 | TokenKind::Namespace
143 | TokenKind::Section
144 | TokenKind::Variable
145 | TokenKind::Variables
146 | TokenKind::Parameter
147 | TokenKind::Parameters
148 | TokenKind::Constant
149 | TokenKind::Constants
150 | TokenKind::Open
151 | TokenKind::Import
152 | TokenKind::End
153 )
154}
155#[allow(missing_docs)]
157pub fn is_infix_op(kind: &TokenKind) -> bool {
158 matches!(
159 kind,
160 TokenKind::Plus
161 | TokenKind::Minus
162 | TokenKind::Star
163 | TokenKind::Slash
164 | TokenKind::Percent
165 | TokenKind::Eq
166 | TokenKind::Ne
167 | TokenKind::BangEq
168 | TokenKind::Lt
169 | TokenKind::Le
170 | TokenKind::Gt
171 | TokenKind::Ge
172 | TokenKind::AndAnd
173 | TokenKind::OrOr
174 | TokenKind::And
175 | TokenKind::Or
176 | TokenKind::Iff
177 | TokenKind::Arrow
178 )
179}
180#[allow(missing_docs)]
182pub fn infix_precedence(kind: &TokenKind) -> Option<u32> {
183 match kind {
184 TokenKind::Iff => Some(10),
185 TokenKind::Arrow => Some(20),
186 TokenKind::Or | TokenKind::OrOr => Some(30),
187 TokenKind::And | TokenKind::AndAnd => Some(40),
188 TokenKind::Eq
189 | TokenKind::Ne
190 | TokenKind::BangEq
191 | TokenKind::Lt
192 | TokenKind::Le
193 | TokenKind::Gt
194 | TokenKind::Ge => Some(50),
195 TokenKind::Plus | TokenKind::Minus => Some(65),
196 TokenKind::Star | TokenKind::Slash | TokenKind::Percent => Some(70),
197 TokenKind::Caret => Some(75),
198 _ => None,
199 }
200}
201#[allow(missing_docs)]
203pub fn is_right_assoc(kind: &TokenKind) -> bool {
204 matches!(kind, TokenKind::Arrow | TokenKind::Caret)
205}
206#[allow(missing_docs)]
208pub fn token_kind_display(kind: &TokenKind) -> String {
209 match kind {
210 TokenKind::Ident(s) => format!("identifier `{}`", s),
211 TokenKind::Nat(n) => format!("number `{}`", n),
212 TokenKind::Float(f) => format!("float `{}`", f),
213 TokenKind::String(s) => format!("string \"{}\"", s),
214 TokenKind::Char(c) => format!("char '{}'", c),
215 TokenKind::Eof => "end of file".to_string(),
216 other => format!("`{:?}`", other),
217 }
218}
219#[allow(missing_docs)]
221pub fn ident_of(token: &Token) -> Option<&str> {
222 token.as_ident()
223}
224#[allow(missing_docs)]
226pub fn nat_lit_of(token: &Token) -> Option<u64> {
227 if let TokenKind::Nat(n) = &token.kind {
228 Some(*n)
229 } else {
230 None
231 }
232}
233#[allow(missing_docs)]
235pub fn opening_bracket(kind: &TokenKind) -> Option<BracketKind> {
236 match kind {
237 TokenKind::LParen => Some(BracketKind::Paren),
238 TokenKind::LBrace => Some(BracketKind::Brace),
239 TokenKind::LBracket => Some(BracketKind::Bracket),
240 _ => None,
241 }
242}
243#[allow(missing_docs)]
245pub fn closing_bracket(kind: &TokenKind) -> Option<BracketKind> {
246 match kind {
247 TokenKind::RParen => Some(BracketKind::Paren),
248 TokenKind::RBrace => Some(BracketKind::Brace),
249 TokenKind::RBracket => Some(BracketKind::Bracket),
250 _ => None,
251 }
252}
253#[allow(missing_docs)]
255pub fn closing_for(open: BracketKind) -> TokenKind {
256 match open {
257 BracketKind::Paren => TokenKind::RParen,
258 BracketKind::Brace => TokenKind::RBrace,
259 BracketKind::Bracket => TokenKind::RBracket,
260 }
261}
262#[allow(missing_docs)]
264pub fn opening_for(close: BracketKind) -> TokenKind {
265 match close {
266 BracketKind::Paren => TokenKind::LParen,
267 BracketKind::Brace => TokenKind::LBrace,
268 BracketKind::Bracket => TokenKind::LBracket,
269 }
270}
271#[allow(missing_docs)]
276pub fn check_bracket_balance(tokens: &[Token]) -> Result<(), (usize, String)> {
277 let mut stack: Vec<(BracketKind, usize)> = Vec::new();
278 for (i, tok) in tokens.iter().enumerate() {
279 if let Some(kind) = opening_bracket(&tok.kind) {
280 stack.push((kind, i));
281 } else if let Some(kind) = closing_bracket(&tok.kind) {
282 match stack.pop() {
283 Some((open, _)) if open == kind => {}
284 Some((open, pos)) => {
285 return Err((
286 i,
287 format!(
288 "bracket mismatch: opened {:?} at index {}, closed {:?} at index {}",
289 open, pos, kind, i
290 ),
291 ));
292 }
293 None => {
294 return Err((
295 i,
296 format!("unexpected closing bracket {:?} at index {}", kind, i),
297 ));
298 }
299 }
300 }
301 }
302 if let Some((kind, pos)) = stack.pop() {
303 return Err((pos, format!("unclosed bracket {:?} at index {}", kind, pos)));
304 }
305 Ok(())
306}
307#[allow(missing_docs)]
309pub fn strip_comments(tokens: Vec<Token>) -> Vec<Token> {
310 tokens
311 .into_iter()
312 .filter(|t| !matches!(t.kind, TokenKind::DocComment(_)))
313 .collect()
314}
315#[allow(missing_docs)]
317pub fn count_kind(tokens: &[Token], kind: &TokenKind) -> usize {
318 tokens.iter().filter(|t| &t.kind == kind).count()
319}
320#[allow(missing_docs)]
322pub fn covering_span(tokens: &[Token]) -> Span {
323 if tokens.is_empty() {
324 return Span::new(0, 0, 1, 1);
325 }
326 let first = &tokens[0].span;
327 let last = &tokens[tokens.len() - 1].span;
328 first.merge(last)
329}
330#[allow(missing_docs)]
332pub fn contains_ident(tokens: &[Token], name: &str) -> bool {
333 tokens
334 .iter()
335 .any(|t| matches!(& t.kind, TokenKind::Ident(s) if s == name))
336}
337#[allow(missing_docs)]
339pub fn collect_idents(tokens: &[Token]) -> Vec<String> {
340 tokens
341 .iter()
342 .filter_map(|t| {
343 if let TokenKind::Ident(s) = &t.kind {
344 Some(s.clone())
345 } else {
346 None
347 }
348 })
349 .collect()
350}
351#[allow(missing_docs)]
355pub fn split_at_kind(tokens: &[Token], sep: &TokenKind) -> Vec<Vec<Token>> {
356 let mut groups: Vec<Vec<Token>> = vec![Vec::new()];
357 for tok in tokens {
358 if &tok.kind == sep {
359 groups.push(Vec::new());
360 } else {
361 groups
362 .last_mut()
363 .expect("groups initialized with one element and only grows")
364 .push(tok.clone());
365 }
366 }
367 groups
368}
369#[allow(missing_docs)]
371pub fn is_assign(tok: &Token) -> bool {
372 matches!(tok.kind, TokenKind::Assign)
373}
374#[allow(missing_docs)]
376pub fn is_colon(tok: &Token) -> bool {
377 matches!(tok.kind, TokenKind::Colon)
378}
379#[allow(missing_docs)]
381pub fn is_ident_token(tok: &Token) -> bool {
382 tok.is_ident()
383}
384#[cfg(test)]
385mod tests {
386 use super::*;
387 use crate::token::*;
388 fn make_token(kind: TokenKind) -> Token {
389 Token::new(kind, Span::new(0, 1, 1, 1))
390 }
391 #[test]
392 fn test_categorise_keyword() {
393 assert_eq!(categorise(&TokenKind::Theorem), TokenCategory::Keyword);
394 assert_eq!(categorise(&TokenKind::Fun), TokenCategory::Keyword);
395 assert_eq!(categorise(&TokenKind::Let), TokenCategory::Keyword);
396 }
397 #[test]
398 fn test_categorise_identifier() {
399 assert_eq!(
400 categorise(&TokenKind::Ident("foo".to_string())),
401 TokenCategory::Identifier
402 );
403 }
404 #[test]
405 fn test_categorise_literal() {
406 assert_eq!(categorise(&TokenKind::Nat(42)), TokenCategory::Literal);
407 assert_eq!(
408 categorise(&TokenKind::String("hello".to_string())),
409 TokenCategory::Literal
410 );
411 }
412 #[test]
413 fn test_categorise_operator() {
414 assert_eq!(categorise(&TokenKind::Arrow), TokenCategory::Operator);
415 assert_eq!(categorise(&TokenKind::Plus), TokenCategory::Operator);
416 }
417 #[test]
418 fn test_token_stream_peek_and_next() {
419 let tokens = vec![
420 make_token(TokenKind::Ident("x".to_string())),
421 make_token(TokenKind::Eq),
422 make_token(TokenKind::Nat(1)),
423 ];
424 let mut stream = TokenStream::new(tokens);
425 assert_eq!(
426 stream.peek().map(|t| &t.kind),
427 Some(&TokenKind::Ident("x".to_string()))
428 );
429 let _ = stream.next();
430 assert_eq!(stream.peek().map(|t| &t.kind), Some(&TokenKind::Eq));
431 }
432 #[test]
433 fn test_token_stream_eat() {
434 let tokens = vec![make_token(TokenKind::Colon), make_token(TokenKind::Eq)];
435 let mut stream = TokenStream::new(tokens);
436 assert!(stream.eat(&TokenKind::Eq).is_none());
437 assert!(stream.eat(&TokenKind::Colon).is_some());
438 assert!(stream.eat(&TokenKind::Eq).is_some());
439 }
440 #[test]
441 fn test_infix_precedence_ordering() {
442 let plus = infix_precedence(&TokenKind::Plus).expect("test operation should succeed");
443 let times = infix_precedence(&TokenKind::Star).expect("test operation should succeed");
444 let eq = infix_precedence(&TokenKind::Eq).expect("test operation should succeed");
445 assert!(times > plus);
446 assert!(plus > eq);
447 }
448 #[test]
449 fn test_check_bracket_balance_ok() {
450 let tokens = vec![
451 make_token(TokenKind::LParen),
452 make_token(TokenKind::Ident("x".to_string())),
453 make_token(TokenKind::RParen),
454 ];
455 assert!(check_bracket_balance(&tokens).is_ok());
456 }
457 #[test]
458 fn test_check_bracket_balance_mismatch() {
459 let tokens = vec![
460 make_token(TokenKind::LParen),
461 make_token(TokenKind::RBracket),
462 ];
463 assert!(check_bracket_balance(&tokens).is_err());
464 }
465 #[test]
466 fn test_check_bracket_balance_unclosed() {
467 let tokens = vec![make_token(TokenKind::LBrace)];
468 assert!(check_bracket_balance(&tokens).is_err());
469 }
470 #[test]
471 fn test_covering_span() {
472 let t1 = Token::new(TokenKind::Ident("a".to_string()), Span::new(0, 1, 1, 1));
473 let t2 = Token::new(TokenKind::Nat(5), Span::new(5, 6, 1, 6));
474 let span = covering_span(&[t1, t2]);
475 assert_eq!(span.start, 0);
476 assert_eq!(span.end, 6);
477 }
478 #[test]
479 fn test_can_start_expr() {
480 assert!(can_start_expr(&TokenKind::Ident("x".to_string())));
481 assert!(can_start_expr(&TokenKind::Nat(0)));
482 assert!(can_start_expr(&TokenKind::LParen));
483 assert!(!can_start_expr(&TokenKind::Comma));
484 }
485 #[test]
486 fn test_can_start_decl() {
487 assert!(can_start_decl(&TokenKind::Theorem));
488 assert!(can_start_decl(&TokenKind::Definition));
489 assert!(!can_start_decl(&TokenKind::Plus));
490 }
491 #[test]
492 fn test_count_kind() {
493 let tokens = vec![
494 make_token(TokenKind::Comma),
495 make_token(TokenKind::Ident("a".to_string())),
496 make_token(TokenKind::Comma),
497 ];
498 assert_eq!(count_kind(&tokens, &TokenKind::Comma), 2);
499 }
500 #[test]
501 fn test_is_right_assoc() {
502 assert!(is_right_assoc(&TokenKind::Arrow));
503 assert!(is_right_assoc(&TokenKind::Caret));
504 assert!(!is_right_assoc(&TokenKind::Plus));
505 }
506 #[test]
507 fn test_token_stream_save_rewind() {
508 let tokens = vec![
509 make_token(TokenKind::Nat(1)),
510 make_token(TokenKind::Nat(2)),
511 make_token(TokenKind::Nat(3)),
512 ];
513 let mut stream = TokenStream::new(tokens);
514 let saved = stream.save();
515 let _ = stream.next();
516 let _ = stream.next();
517 assert_eq!(stream.position(), 2);
518 stream.rewind(saved);
519 assert_eq!(stream.position(), 0);
520 }
521 #[test]
522 fn test_split_at_kind() {
523 let tokens = vec![
524 make_token(TokenKind::Nat(1)),
525 make_token(TokenKind::Comma),
526 make_token(TokenKind::Nat(2)),
527 make_token(TokenKind::Comma),
528 make_token(TokenKind::Nat(3)),
529 ];
530 let groups = split_at_kind(&tokens, &TokenKind::Comma);
531 assert_eq!(groups.len(), 3);
532 assert_eq!(groups[0].len(), 1);
533 assert_eq!(groups[1].len(), 1);
534 assert_eq!(groups[2].len(), 1);
535 }
536 #[test]
537 fn test_collect_idents() {
538 let tokens = vec![
539 make_token(TokenKind::Ident("foo".to_string())),
540 make_token(TokenKind::Plus),
541 make_token(TokenKind::Ident("bar".to_string())),
542 ];
543 let idents = collect_idents(&tokens);
544 assert_eq!(idents, vec!["foo", "bar"]);
545 }
546 #[test]
547 fn test_contains_ident() {
548 let tokens = vec![
549 make_token(TokenKind::Ident("alpha".to_string())),
550 make_token(TokenKind::Comma),
551 ];
552 assert!(contains_ident(&tokens, "alpha"));
553 assert!(!contains_ident(&tokens, "beta"));
554 }
555}
556#[allow(missing_docs)]
558pub fn operator_priority(kind: &TokenKind) -> OperatorPriority {
559 match kind {
560 TokenKind::Plus | TokenKind::Minus => OperatorPriority(60),
561 TokenKind::Star | TokenKind::Slash => OperatorPriority(70),
562 TokenKind::Caret => OperatorPriority(80),
563 TokenKind::Eq
564 | TokenKind::Ne
565 | TokenKind::Lt
566 | TokenKind::Gt
567 | TokenKind::Le
568 | TokenKind::Ge => OperatorPriority(50),
569 TokenKind::AndAnd => OperatorPriority(40),
570 TokenKind::OrOr => OperatorPriority(30),
571 TokenKind::Arrow => OperatorPriority(20),
572 _ => OperatorPriority(0),
573 }
574}
575#[allow(missing_docs)]
577pub fn operator_arity(kind: &TokenKind) -> OperatorArity {
578 match kind {
579 TokenKind::Not | TokenKind::Minus => OperatorArity::Unary,
580 TokenKind::Plus
581 | TokenKind::Star
582 | TokenKind::Slash
583 | TokenKind::Eq
584 | TokenKind::Ne
585 | TokenKind::Lt
586 | TokenKind::Gt
587 | TokenKind::Le
588 | TokenKind::Ge
589 | TokenKind::Arrow
590 | TokenKind::AndAnd
591 | TokenKind::OrOr
592 | TokenKind::Caret
593 | TokenKind::Iff => OperatorArity::Binary,
594 _ => OperatorArity::None,
595 }
596}
597#[allow(missing_docs)]
599pub fn enrich_tokens(tokens: &[Token]) -> Vec<RichToken> {
600 tokens
601 .iter()
602 .map(|t| RichToken::from_token(t.clone()))
603 .collect()
604}
605#[allow(missing_docs)]
607pub fn find_by_category(tokens: &[Token], cat: TokenCategory) -> Option<&Token> {
608 tokens.iter().find(|t| categorise(&t.kind) == cat)
609}
610#[allow(missing_docs)]
612pub fn filter_tokens<F: Fn(&Token) -> bool>(tokens: &[Token], pred: F) -> Vec<&Token> {
613 tokens.iter().filter(|t| pred(t)).collect()
614}
615#[allow(missing_docs)]
617pub fn has_operator(tokens: &[Token]) -> bool {
618 tokens
619 .iter()
620 .any(|t| operator_arity(&t.kind) != OperatorArity::None)
621}
622#[allow(missing_docs)]
624pub fn strip_eof(tokens: &[Token]) -> &[Token] {
625 let start = tokens
626 .iter()
627 .position(|t| !matches!(t.kind, TokenKind::Eof))
628 .unwrap_or(0);
629 let end = tokens
630 .iter()
631 .rposition(|t| !matches!(t.kind, TokenKind::Eof))
632 .map(|i| i + 1)
633 .unwrap_or(start);
634 &tokens[start..end]
635}
636#[allow(missing_docs)]
638pub fn span_char_count(tokens: &[Token]) -> usize {
639 tokens
640 .iter()
641 .map(|t| t.span.end.saturating_sub(t.span.start))
642 .sum()
643}
644#[allow(missing_docs)]
646pub fn max_bracket_depth(tokens: &[Token]) -> u32 {
647 let mut depth = 0u32;
648 let mut max_depth = 0u32;
649 for tok in tokens {
650 match &tok.kind {
651 TokenKind::LParen | TokenKind::LBracket | TokenKind::LBrace => {
652 depth += 1;
653 if depth > max_depth {
654 max_depth = depth;
655 }
656 }
657 TokenKind::RParen | TokenKind::RBracket | TokenKind::RBrace => {
658 depth = depth.saturating_sub(1);
659 }
660 _ => {}
661 }
662 }
663 max_depth
664}
665#[cfg(test)]
666mod token_extended_tests {
667 use super::*;
668 use crate::token::*;
669 fn make_tok(kind: TokenKind) -> Token {
670 Token::new(kind, Span::new(0, 1, 1, 1))
671 }
672 #[test]
673 fn test_operator_priority_ordering() {
674 let star = operator_priority(&TokenKind::Star);
675 let plus = operator_priority(&TokenKind::Plus);
676 assert!(star > plus);
677 }
678 #[test]
679 fn test_operator_arity_plus() {
680 assert_eq!(operator_arity(&TokenKind::Plus), OperatorArity::Binary);
681 }
682 #[test]
683 fn test_operator_arity_not() {
684 assert_eq!(operator_arity(&TokenKind::Not), OperatorArity::Unary);
685 }
686 #[test]
687 fn test_operator_arity_comma() {
688 assert_eq!(operator_arity(&TokenKind::Comma), OperatorArity::None);
689 }
690 #[test]
691 fn test_rich_token_infix() {
692 let t = make_tok(TokenKind::Plus);
693 let rt = RichToken::from_token(t);
694 assert!(rt.is_infix());
695 assert!(!rt.is_prefix());
696 }
697 #[test]
698 fn test_enrich_tokens() {
699 let tokens = vec![make_tok(TokenKind::Plus), make_tok(TokenKind::Nat(1))];
700 let rich = enrich_tokens(&tokens);
701 assert_eq!(rich.len(), 2);
702 assert!(rich[0].is_infix());
703 }
704 #[test]
705 fn test_find_by_category() {
706 let tokens = vec![make_tok(TokenKind::Plus), make_tok(TokenKind::Nat(5))];
707 let found = find_by_category(&tokens, TokenCategory::Literal);
708 assert!(found.is_some());
709 }
710 #[test]
711 fn test_filter_tokens() {
712 let tokens = vec![
713 make_tok(TokenKind::Plus),
714 make_tok(TokenKind::Minus),
715 make_tok(TokenKind::Nat(1)),
716 ];
717 let ops = filter_tokens(&tokens, |t| operator_arity(&t.kind) != OperatorArity::None);
718 assert_eq!(ops.len(), 2);
719 }
720 #[test]
721 fn test_has_operator_true() {
722 let tokens = vec![make_tok(TokenKind::Plus)];
723 assert!(has_operator(&tokens));
724 }
725 #[test]
726 fn test_has_operator_false() {
727 let tokens = vec![make_tok(TokenKind::Comma)];
728 assert!(!has_operator(&tokens));
729 }
730 #[test]
731 fn test_strip_eof() {
732 let tokens = vec![
733 make_tok(TokenKind::Eof),
734 make_tok(TokenKind::Nat(1)),
735 make_tok(TokenKind::Eof),
736 ];
737 let stripped = strip_eof(&tokens);
738 assert_eq!(stripped.len(), 1);
739 }
740 #[test]
741 fn test_max_bracket_depth() {
742 let tokens = vec![
743 make_tok(TokenKind::LParen),
744 make_tok(TokenKind::LParen),
745 make_tok(TokenKind::Nat(1)),
746 make_tok(TokenKind::RParen),
747 make_tok(TokenKind::RParen),
748 ];
749 assert_eq!(max_bracket_depth(&tokens), 2);
750 }
751 #[test]
752 fn test_max_bracket_depth_flat() {
753 let tokens = vec![make_tok(TokenKind::Nat(1)), make_tok(TokenKind::Plus)];
754 assert_eq!(max_bracket_depth(&tokens), 0);
755 }
756 #[test]
757 fn test_operator_priority_min_max() {
758 assert!(OperatorPriority::MIN < OperatorPriority::MAX);
759 }
760}
761#[allow(dead_code)]
763#[allow(missing_docs)]
764pub fn token_ngrams(tokens: &[Token], n: usize) -> Vec<&[Token]> {
765 TokenNgramIter::new(tokens, n).collect()
766}
767#[allow(dead_code)]
770#[allow(missing_docs)]
771pub fn count_bigrams(tokens: &[Token]) -> std::collections::HashMap<(String, String), usize> {
772 let mut map = std::collections::HashMap::new();
773 for w in tokens.windows(2) {
774 let key = (format!("{:?}", w[0].kind), format!("{:?}", w[1].kind));
775 *map.entry(key).or_insert(0) += 1;
776 }
777 map
778}
779#[allow(dead_code)]
781#[allow(missing_docs)]
782pub fn longest_run(tokens: &[Token]) -> usize {
783 let mut max = 0usize;
784 let mut current = 0usize;
785 let mut last_kind: Option<&TokenKind> = None;
786 for tok in tokens {
787 if Some(&tok.kind) == last_kind {
788 current += 1;
789 } else {
790 current = 1;
791 last_kind = Some(&tok.kind);
792 }
793 if current > max {
794 max = current;
795 }
796 }
797 max
798}
799#[allow(dead_code)]
801#[allow(missing_docs)]
802pub fn vocabulary(tokens: &[Token]) -> std::collections::HashSet<String> {
803 tokens.iter().map(|t| format!("{:?}", t.kind)).collect()
804}
805#[allow(dead_code)]
807#[allow(missing_docs)]
808pub fn type_token_ratio(tokens: &[Token]) -> f64 {
809 if tokens.is_empty() {
810 return 0.0;
811 }
812 let distinct = vocabulary(tokens).len();
813 distinct as f64 / tokens.len() as f64
814}
815#[allow(dead_code)]
817#[allow(missing_docs)]
818pub fn token_frequencies(tokens: &[Token]) -> Vec<(String, usize)> {
819 let mut freq: std::collections::HashMap<String, usize> = std::collections::HashMap::new();
820 for tok in tokens {
821 *freq.entry(format!("{:?}", tok.kind)).or_insert(0) += 1;
822 }
823 let mut pairs: Vec<(String, usize)> = freq.into_iter().collect();
824 pairs.sort_by(|a, b| b.1.cmp(&a.1));
825 pairs
826}
827#[allow(dead_code)]
829#[allow(missing_docs)]
830pub mod ansi {
831 #[allow(missing_docs)]
832 pub const RESET: &str = "\x1b[0m";
833 pub const BOLD: &str = "\x1b[1m";
834 pub const BLUE: &str = "\x1b[34m";
835 pub const CYAN: &str = "\x1b[36m";
836 pub const GREEN: &str = "\x1b[32m";
837 #[allow(missing_docs)]
838 pub const YELLOW: &str = "\x1b[33m";
839 pub const RED: &str = "\x1b[31m";
840 pub const MAGENTA: &str = "\x1b[35m";
841 pub const BOLD_BLUE: &str = "\x1b[1;34m";
842 pub const BOLD_CYAN: &str = "\x1b[1;36m";
843 #[allow(missing_docs)]
844 pub const BOLD_GREEN: &str = "\x1b[1;32m";
845}
846#[allow(dead_code)]
848#[allow(missing_docs)]
849pub fn colorize_token(kind: &TokenKind, text: &str) -> String {
850 let cat = categorise(kind);
851 match cat {
852 TokenCategory::Keyword => format!("{}{}{}", ansi::BOLD_BLUE, text, ansi::RESET),
853 TokenCategory::Identifier => text.to_string(),
854 TokenCategory::Literal => format!("{}{}{}", ansi::BOLD_GREEN, text, ansi::RESET),
855 TokenCategory::Operator => format!("{}{}{}", ansi::CYAN, text, ansi::RESET),
856 TokenCategory::Punctuation => format!("{}{}{}", ansi::YELLOW, text, ansi::RESET),
857 TokenCategory::Comment => format!("{}{}{}", ansi::GREEN, text, ansi::RESET),
858 TokenCategory::Eof => String::new(),
859 TokenCategory::Other => text.to_string(),
860 }
861}
862#[allow(dead_code)]
864#[allow(missing_docs)]
865pub fn render_colored(tokens: &[Token], source: &str) -> String {
866 let mut out = String::new();
867 for tok in tokens {
868 let span = &tok.span;
869 let text = source.get(span.start..span.end).unwrap_or("?");
870 out.push_str(&colorize_token(&tok.kind, text));
871 }
872 out
873}
874#[allow(dead_code)]
876#[allow(missing_docs)]
877pub fn reformat(tokens: &[Token], source: &str, opts: &ReformatOptions) -> String {
878 let mut out = String::new();
879 for (i, tok) in tokens.iter().enumerate() {
880 let span = &tok.span;
881 let text = source.get(span.start..span.end).unwrap_or("?");
882 let is_op = is_infix_op(&tok.kind);
883 let is_comma = matches!(tok.kind, TokenKind::Comma);
884 let is_close = matches!(
885 tok.kind,
886 TokenKind::RParen | TokenKind::RBrace | TokenKind::RBracket
887 );
888 if i > 0 {
889 let prev = &tokens[i - 1];
890 let prev_is_open = matches!(
891 prev.kind,
892 TokenKind::LParen | TokenKind::LBrace | TokenKind::LBracket
893 );
894 let need_space = if is_close && opts.no_space_before_close {
895 false
896 } else if is_op && opts.space_before_op {
897 true
898 } else {
899 !is_comma && !prev_is_open
900 };
901 if need_space {
902 out.push(' ');
903 }
904 }
905 out.push_str(text);
906 if is_comma && opts.space_after_comma {}
907 }
908 out
909}
910#[allow(dead_code)]
912#[allow(missing_docs)]
913pub fn starts_with_valid_expr_head(tokens: &[Token]) -> bool {
914 tokens
915 .first()
916 .map(|t| can_start_expr(&t.kind))
917 .unwrap_or(false)
918}
919#[allow(dead_code)]
921#[allow(missing_docs)]
922pub fn starts_with_valid_decl_head(tokens: &[Token]) -> bool {
923 tokens
924 .first()
925 .map(|t| can_start_decl(&t.kind))
926 .unwrap_or(false)
927}
928#[allow(dead_code)]
930#[allow(missing_docs)]
931pub fn compute_depths(tokens: &[Token]) -> Vec<i32> {
932 let mut depths = Vec::with_capacity(tokens.len());
933 let mut depth = 0i32;
934 for tok in tokens {
935 match &tok.kind {
936 TokenKind::LParen | TokenKind::LBracket | TokenKind::LBrace => {
937 depths.push(depth);
938 depth += 1;
939 }
940 TokenKind::RParen | TokenKind::RBracket | TokenKind::RBrace => {
941 depth -= 1;
942 depths.push(depth.max(0));
943 }
944 _ => depths.push(depth),
945 }
946 }
947 depths
948}
949#[allow(dead_code)]
951#[allow(missing_docs)]
952pub fn find_matching_close(tokens: &[Token], open_idx: usize) -> Option<usize> {
953 let open_kind = opening_bracket(&tokens.get(open_idx)?.kind)?;
954 let close_kind = closing_for(open_kind);
955 let mut depth = 0i32;
956 for (i, tok) in tokens[open_idx..].iter().enumerate() {
957 if opening_bracket(&tok.kind).is_some() {
958 depth += 1;
959 } else if tok.kind == close_kind {
960 depth -= 1;
961 if depth == 0 {
962 return Some(open_idx + i);
963 }
964 }
965 }
966 None
967}
968#[allow(dead_code)]
970#[allow(missing_docs)]
971pub fn extract_bracketed(tokens: &[Token], open_idx: usize) -> Option<&[Token]> {
972 let close_idx = find_matching_close(tokens, open_idx)?;
973 Some(&tokens[open_idx + 1..close_idx])
974}
975#[allow(dead_code)]
977#[allow(missing_docs)]
978pub fn structurally_equal(a: &[Token], b: &[Token]) -> bool {
979 if a.len() != b.len() {
980 return false;
981 }
982 a.iter()
983 .zip(b.iter())
984 .all(|(ta, tb)| std::mem::discriminant(&ta.kind) == std::mem::discriminant(&tb.kind))
985}
986#[allow(dead_code)]
988#[allow(missing_docs)]
989pub fn token_edit_distance(a: &[Token], b: &[Token]) -> usize {
990 let m = a.len();
991 let n = b.len();
992 let mut dp = vec![vec![0usize; n + 1]; m + 1];
993 for (i, row) in dp.iter_mut().enumerate().take(m + 1) {
994 row[0] = i;
995 }
996 for (j, cell) in dp[0].iter_mut().enumerate().take(n + 1) {
997 *cell = j;
998 }
999 for i in 1..=m {
1000 for j in 1..=n {
1001 if a[i - 1].kind == b[j - 1].kind {
1002 dp[i][j] = dp[i - 1][j - 1];
1003 } else {
1004 dp[i][j] = 1 + dp[i - 1][j].min(dp[i][j - 1]).min(dp[i - 1][j - 1]);
1005 }
1006 }
1007 }
1008 dp[m][n]
1009}
1010#[allow(dead_code)]
1012#[allow(missing_docs)]
1013pub fn token_lcs_length(a: &[Token], b: &[Token]) -> usize {
1014 let m = a.len();
1015 let n = b.len();
1016 let mut dp = vec![vec![0usize; n + 1]; m + 1];
1017 for i in 1..=m {
1018 for j in 1..=n {
1019 if a[i - 1].kind == b[j - 1].kind {
1020 dp[i][j] = dp[i - 1][j - 1] + 1;
1021 } else {
1022 dp[i][j] = dp[i - 1][j].max(dp[i][j - 1]);
1023 }
1024 }
1025 }
1026 dp[m][n]
1027}
1028#[allow(dead_code)]
1030#[allow(missing_docs)]
1031pub fn annotate_tokens(tokens: &[Token]) -> Vec<AnnotatedToken> {
1032 let depths = compute_depths(tokens);
1033 tokens
1034 .iter()
1035 .enumerate()
1036 .map(|(i, tok)| AnnotatedToken::new(tok.clone(), depths[i], i))
1037 .collect()
1038}
1039#[allow(dead_code)]
1041#[allow(missing_docs)]
1042pub fn token_hash(tokens: &[Token]) -> u64 {
1043 let mut hash: u64 = 14_695_981_039_346_656_037;
1044 for tok in tokens {
1045 let kind_str = format!("{:?}", tok.kind);
1046 for b in kind_str.bytes() {
1047 hash ^= b as u64;
1048 hash = hash.wrapping_mul(1_099_511_628_211);
1049 }
1050 }
1051 hash
1052}
1053#[allow(dead_code)]
1055#[allow(missing_docs)]
1056pub fn serialize_tokens(tokens: &[Token]) -> String {
1057 tokens
1058 .iter()
1059 .map(|t| format!("{:?}@{}:{}", t.kind, t.span.line, t.span.column))
1060 .collect::<Vec<_>>()
1061 .join(" ")
1062}
1063#[allow(dead_code)]
1065#[allow(missing_docs)]
1066pub fn describe_token(tok: &Token) -> String {
1067 format!(
1068 "Token({:?}, line={}, col={})",
1069 tok.kind, tok.span.line, tok.span.column
1070 )
1071}
1072#[allow(dead_code)]
1074#[allow(missing_docs)]
1075pub fn reconstruct_source(tokens: &[Token], source: &str) -> String {
1076 let mut out = String::new();
1077 let mut last_end = 0usize;
1078 for tok in tokens {
1079 let start = tok.span.start;
1080 let end = tok.span.end;
1081 if start >= last_end && end <= source.len() {
1082 out.push_str(&source[last_end..start]);
1083 out.push_str(&source[start..end]);
1084 last_end = end;
1085 }
1086 }
1087 out
1088}
1089#[allow(dead_code)]
1091#[allow(missing_docs)]
1092pub fn are_adjacent(a: &Token, b: &Token) -> bool {
1093 a.span.end == b.span.start
1094}
1095#[allow(dead_code)]
1097#[allow(missing_docs)]
1098pub fn adjacent_pairs(tokens: &[Token]) -> Vec<(&Token, &Token)> {
1099 tokens
1100 .windows(2)
1101 .filter(|w| are_adjacent(&w[0], &w[1]))
1102 .map(|w| (&w[0], &w[1]))
1103 .collect()
1104}
1105#[allow(dead_code)]
1107#[allow(missing_docs)]
1108pub fn annotate_with_meta(tokens: &[Token], source: &str) -> Vec<TokenMeta> {
1109 let mut result = Vec::with_capacity(tokens.len());
1110 for (i, tok) in tokens.iter().enumerate() {
1111 let mut meta = TokenMeta::from_token(tok.clone(), source);
1112 if i > 0 {
1113 let prev = &tokens[i - 1];
1114 let prev_end = prev.span.end;
1115 let cur_start = tok.span.start;
1116 if prev_end < cur_start && cur_start <= source.len() {
1117 let gap = &source[prev_end..cur_start];
1118 meta.preceded_by_newline = gap.contains('\n');
1119 meta.preceded_by_space = gap.contains(' ') || gap.contains('\t');
1120 }
1121 }
1122 result.push(meta);
1123 }
1124 result
1125}
1126#[cfg(test)]
1127mod token_analysis_tests {
1128 use super::*;
1129 use crate::token::*;
1130 fn make(kind: TokenKind) -> Token {
1131 Token::new(kind, Span::new(0, 1, 1, 1))
1132 }
1133 fn make_at(kind: TokenKind, start: usize, end: usize) -> Token {
1134 Token::new(kind, Span::new(start, end, 1, start + 1))
1135 }
1136 #[test]
1137 fn test_token_ngrams_size2() {
1138 let tokens = vec![
1139 make(TokenKind::Nat(1)),
1140 make(TokenKind::Plus),
1141 make(TokenKind::Nat(2)),
1142 ];
1143 let grams = token_ngrams(&tokens, 2);
1144 assert_eq!(grams.len(), 2);
1145 }
1146 #[test]
1147 fn test_token_ngrams_empty() {
1148 let tokens: Vec<Token> = vec![];
1149 let grams = token_ngrams(&tokens, 2);
1150 assert!(grams.is_empty());
1151 }
1152 #[test]
1153 fn test_longest_run_same_kind() {
1154 let tokens = vec![
1155 make(TokenKind::Plus),
1156 make(TokenKind::Plus),
1157 make(TokenKind::Nat(1)),
1158 ];
1159 assert_eq!(longest_run(&tokens), 2);
1160 }
1161 #[test]
1162 fn test_type_token_ratio_all_distinct() {
1163 let tokens = vec![
1164 make(TokenKind::Plus),
1165 make(TokenKind::Minus),
1166 make(TokenKind::Star),
1167 ];
1168 let r = type_token_ratio(&tokens);
1169 assert!((r - 1.0).abs() < 1e-10);
1170 }
1171 #[test]
1172 fn test_type_token_ratio_empty() {
1173 let r = type_token_ratio(&[]);
1174 assert_eq!(r, 0.0);
1175 }
1176 #[test]
1177 fn test_token_frequencies() {
1178 let tokens = vec![
1179 make(TokenKind::Plus),
1180 make(TokenKind::Plus),
1181 make(TokenKind::Nat(1)),
1182 ];
1183 let freq = token_frequencies(&tokens);
1184 assert!(!freq.is_empty());
1185 assert_eq!(freq[0].1, 2);
1186 }
1187 #[test]
1188 fn test_structurally_equal_true() {
1189 let a = vec![make(TokenKind::Plus), make(TokenKind::Nat(1))];
1190 let b = vec![make(TokenKind::Plus), make(TokenKind::Nat(99))];
1191 assert!(structurally_equal(&a, &b));
1192 }
1193 #[test]
1194 fn test_structurally_equal_false_length() {
1195 let a = vec![make(TokenKind::Plus)];
1196 let b = vec![make(TokenKind::Plus), make(TokenKind::Plus)];
1197 assert!(!structurally_equal(&a, &b));
1198 }
1199 #[test]
1200 fn test_token_edit_distance_equal() {
1201 let tokens = vec![make(TokenKind::Plus), make(TokenKind::Nat(1))];
1202 assert_eq!(token_edit_distance(&tokens, &tokens), 0);
1203 }
1204 #[test]
1205 fn test_token_edit_distance_insert() {
1206 let a: Vec<Token> = vec![];
1207 let b = vec![make(TokenKind::Plus)];
1208 assert_eq!(token_edit_distance(&a, &b), 1);
1209 }
1210 #[test]
1211 fn test_token_lcs_length() {
1212 let a = vec![
1213 make(TokenKind::Plus),
1214 make(TokenKind::Nat(1)),
1215 make(TokenKind::Minus),
1216 ];
1217 let b = vec![make(TokenKind::Plus), make(TokenKind::Minus)];
1218 assert_eq!(token_lcs_length(&a, &b), 2);
1219 }
1220 #[test]
1221 fn test_compute_depths() {
1222 let tokens = vec![
1223 make(TokenKind::LParen),
1224 make(TokenKind::Nat(1)),
1225 make(TokenKind::RParen),
1226 ];
1227 let depths = compute_depths(&tokens);
1228 assert_eq!(depths.len(), 3);
1229 assert_eq!(depths[0], 0);
1230 assert_eq!(depths[1], 1);
1231 }
1232 #[test]
1233 fn test_find_matching_close() {
1234 let tokens = vec![
1235 make(TokenKind::LParen),
1236 make(TokenKind::Nat(1)),
1237 make(TokenKind::RParen),
1238 ];
1239 assert_eq!(find_matching_close(&tokens, 0), Some(2));
1240 }
1241 #[test]
1242 fn test_find_matching_close_nested() {
1243 let tokens = vec![
1244 make(TokenKind::LParen),
1245 make(TokenKind::LParen),
1246 make(TokenKind::RParen),
1247 make(TokenKind::RParen),
1248 ];
1249 assert_eq!(find_matching_close(&tokens, 0), Some(3));
1250 }
1251 #[test]
1252 fn test_extract_bracketed() {
1253 let tokens = vec![
1254 make(TokenKind::LParen),
1255 make(TokenKind::Nat(42)),
1256 make(TokenKind::RParen),
1257 ];
1258 let inner = extract_bracketed(&tokens, 0);
1259 assert!(inner.is_some());
1260 assert_eq!(inner.expect("test operation should succeed").len(), 1);
1261 }
1262 #[test]
1263 fn test_token_hash_deterministic() {
1264 let tokens = vec![make(TokenKind::Plus), make(TokenKind::Nat(1))];
1265 assert_eq!(token_hash(&tokens), token_hash(&tokens));
1266 }
1267 #[test]
1268 fn test_token_hash_different() {
1269 let a = vec![make(TokenKind::Plus)];
1270 let b = vec![make(TokenKind::Minus)];
1271 assert_ne!(token_hash(&a), token_hash(&b));
1272 }
1273 #[test]
1274 fn test_annotate_tokens() {
1275 let tokens = vec![
1276 make(TokenKind::LParen),
1277 make(TokenKind::Nat(1)),
1278 make(TokenKind::RParen),
1279 ];
1280 let ann = annotate_tokens(&tokens);
1281 assert_eq!(ann.len(), 3);
1282 assert_eq!(ann[0].depth, 0);
1283 assert_eq!(ann[1].depth, 1);
1284 }
1285 #[test]
1286 fn test_serialize_tokens() {
1287 let tokens = vec![make(TokenKind::Plus)];
1288 let s = serialize_tokens(&tokens);
1289 assert!(s.contains("Plus"));
1290 }
1291 #[test]
1292 fn test_describe_token() {
1293 let tok = make(TokenKind::Nat(7));
1294 let s = describe_token(&tok);
1295 assert!(s.contains("Nat"));
1296 }
1297 #[test]
1298 fn test_token_category_all() {
1299 let all = TokenCategory::all();
1300 assert_eq!(all.len(), 8);
1301 }
1302 #[test]
1303 fn test_token_category_ansi_color() {
1304 let col = TokenCategory::Keyword.ansi_color();
1305 assert!(!col.is_empty());
1306 }
1307 #[test]
1308 fn test_token_category_is_meaningful() {
1309 assert!(TokenCategory::Keyword.is_meaningful());
1310 assert!(!TokenCategory::Eof.is_meaningful());
1311 }
1312 #[test]
1313 fn test_token_meta_is_numeric() {
1314 let tok = make(TokenKind::Nat(5));
1315 let meta = TokenMeta::from_token(tok, "5");
1316 assert!(meta.is_numeric());
1317 }
1318 #[test]
1319 fn test_token_meta_is_string() {
1320 let tok = make(TokenKind::String("hi".to_string()));
1321 let meta = TokenMeta::from_token(tok, "\"hi\"");
1322 assert!(meta.is_string());
1323 }
1324 #[test]
1325 fn test_token_pattern_exact() {
1326 let tok = make(TokenKind::Plus);
1327 let pat = TokenPattern::Exact(TokenKind::Plus);
1328 assert!(pat.matches_single(&tok));
1329 }
1330 #[test]
1331 fn test_token_pattern_category() {
1332 let tok = make(TokenKind::Plus);
1333 let pat = TokenPattern::Category(TokenCategory::Operator);
1334 assert!(pat.matches_single(&tok));
1335 }
1336 #[test]
1337 fn test_token_pattern_any() {
1338 let tok = make(TokenKind::Comma);
1339 let pat = TokenPattern::Any;
1340 assert!(pat.matches_single(&tok));
1341 }
1342 #[test]
1343 fn test_token_pattern_alternatives() {
1344 let tok = make(TokenKind::Minus);
1345 let pat = TokenPattern::Alternatives(vec![
1346 TokenPattern::Exact(TokenKind::Plus),
1347 TokenPattern::Exact(TokenKind::Minus),
1348 ]);
1349 assert!(pat.matches_single(&tok));
1350 }
1351 #[test]
1352 fn test_token_pattern_sequence_match() {
1353 let tokens = vec![make(TokenKind::Plus), make(TokenKind::Nat(1))];
1354 let pat = TokenPattern::Sequence(vec![
1355 TokenPattern::Exact(TokenKind::Plus),
1356 TokenPattern::Category(TokenCategory::Literal),
1357 ]);
1358 assert_eq!(pat.try_match(&tokens), Some(2));
1359 }
1360 #[test]
1361 fn test_token_pattern_sequence_no_match() {
1362 let tokens = vec![make(TokenKind::Minus), make(TokenKind::Nat(1))];
1363 let pat = TokenPattern::Sequence(vec![TokenPattern::Exact(TokenKind::Plus)]);
1364 assert!(pat.try_match(&tokens).is_none());
1365 }
1366 #[test]
1367 fn test_token_pattern_find_all() {
1368 let tokens = vec![
1369 make(TokenKind::Plus),
1370 make(TokenKind::Nat(1)),
1371 make(TokenKind::Plus),
1372 make(TokenKind::Nat(2)),
1373 ];
1374 let pat = TokenPattern::Exact(TokenKind::Plus);
1375 let matches = pat.find_all(&tokens);
1376 assert_eq!(matches.len(), 2);
1377 }
1378 #[test]
1379 fn test_stream_inject() {
1380 let mut stream = TokenStream::new(vec![make(TokenKind::Nat(1)), make(TokenKind::Nat(2))]);
1381 stream.inject(vec![make(TokenKind::Plus)]);
1382 assert_eq!(stream.len(), 3);
1383 let first = stream.next().expect("iterator should have next element");
1384 assert_eq!(first.kind, TokenKind::Plus);
1385 }
1386 #[test]
1387 fn test_stream_peek_all() {
1388 let stream = TokenStream::new(vec![make(TokenKind::Plus), make(TokenKind::Nat(1))]);
1389 assert_eq!(stream.peek_all().len(), 2);
1390 }
1391 #[test]
1392 fn test_stream_peek_slice() {
1393 let stream = TokenStream::new(vec![
1394 make(TokenKind::Nat(1)),
1395 make(TokenKind::Nat(2)),
1396 make(TokenKind::Nat(3)),
1397 ]);
1398 let sl = stream.peek_slice(2);
1399 assert_eq!(sl.len(), 2);
1400 }
1401 #[test]
1402 fn test_stream_matches_sequence() {
1403 let stream = TokenStream::new(vec![
1404 make(TokenKind::LParen),
1405 make(TokenKind::Nat(1)),
1406 make(TokenKind::RParen),
1407 ]);
1408 assert!(stream.matches_sequence(&[&TokenKind::LParen, &TokenKind::Nat(1)]));
1409 assert!(!stream.matches_sequence(&[&TokenKind::Nat(1)]));
1410 }
1411 #[test]
1412 fn test_stream_consume_n() {
1413 let mut stream = TokenStream::new(vec![
1414 make(TokenKind::Nat(1)),
1415 make(TokenKind::Nat(2)),
1416 make(TokenKind::Nat(3)),
1417 ]);
1418 let consumed = stream.consume_n(2);
1419 assert_eq!(consumed.len(), 2);
1420 assert_eq!(stream.remaining(), 1);
1421 }
1422 #[test]
1423 fn test_stream_skip_to() {
1424 let mut stream = TokenStream::new(vec![
1425 make(TokenKind::Nat(1)),
1426 make(TokenKind::Plus),
1427 make(TokenKind::Nat(2)),
1428 ]);
1429 stream.skip_to(&TokenKind::Plus);
1430 assert_eq!(stream.peek().map(|t| &t.kind), Some(&TokenKind::Plus));
1431 }
1432 #[test]
1433 fn test_are_adjacent() {
1434 let a = make_at(TokenKind::Nat(1), 0, 1);
1435 let b = make_at(TokenKind::Plus, 1, 2);
1436 assert!(are_adjacent(&a, &b));
1437 }
1438 #[test]
1439 fn test_are_not_adjacent() {
1440 let a = make_at(TokenKind::Nat(1), 0, 1);
1441 let b = make_at(TokenKind::Plus, 3, 4);
1442 assert!(!are_adjacent(&a, &b));
1443 }
1444 #[test]
1445 fn test_adjacent_pairs() {
1446 let tokens = vec![
1447 make_at(TokenKind::Nat(1), 0, 1),
1448 make_at(TokenKind::Plus, 1, 2),
1449 make_at(TokenKind::Nat(2), 4, 5),
1450 ];
1451 let pairs = adjacent_pairs(&tokens);
1452 assert_eq!(pairs.len(), 1);
1453 }
1454 #[test]
1455 fn test_starts_with_valid_expr_head() {
1456 let tokens = vec![make(TokenKind::Nat(1))];
1457 assert!(starts_with_valid_expr_head(&tokens));
1458 }
1459 #[test]
1460 fn test_starts_with_valid_decl_head() {
1461 let tokens = vec![make(TokenKind::Theorem)];
1462 assert!(starts_with_valid_decl_head(&tokens));
1463 }
1464 #[test]
1465 fn test_vocabulary() {
1466 let tokens = vec![
1467 make(TokenKind::Plus),
1468 make(TokenKind::Plus),
1469 make(TokenKind::Nat(1)),
1470 ];
1471 let v = vocabulary(&tokens);
1472 assert_eq!(v.len(), 2);
1473 }
1474}
1475#[allow(dead_code)]
1477#[allow(missing_docs)]
1478pub fn token_kind_display_name(kind_str: &str) -> &'static str {
1479 match kind_str {
1480 "Ident" => "identifier",
1481 "Nat" => "natural number literal",
1482 "String" => "string literal",
1483 "Eof" => "end of file",
1484 "LParen" => "(",
1485 "RParen" => ")",
1486 "LBrack" => "[",
1487 "RBrack" => "]",
1488 "LBrace" => "{",
1489 "RBrace" => "}",
1490 "Comma" => ",",
1491 "Colon" => ":",
1492 "ColonColon" => "::",
1493 "Semi" => ";",
1494 "Arrow" => "->",
1495 _ => "token",
1496 }
1497}
1498#[cfg(test)]
1499mod token_display_tests {
1500 use super::*;
1501 use crate::token::*;
1502 #[test]
1503 fn test_token_kind_display_name() {
1504 assert_eq!(token_kind_display_name("Ident"), "identifier");
1505 assert_eq!(token_kind_display_name("Arrow"), "->");
1506 assert_eq!(token_kind_display_name("unknown"), "token");
1507 }
1508}
1509#[allow(dead_code)]
1511#[allow(missing_docs)]
1512pub fn is_keyword_token(kind_str: &str) -> bool {
1513 matches!(
1514 kind_str,
1515 "Def"
1516 | "Theorem"
1517 | "Lemma"
1518 | "Fun"
1519 | "Let"
1520 | "Have"
1521 | "Show"
1522 | "Match"
1523 | "Do"
1524 | "If"
1525 | "Then"
1526 | "Else"
1527 | "Forall"
1528 | "Return"
1529 | "In"
1530 | "End"
1531 )
1532}
1533#[cfg(test)]
1534mod token_keyword_tests {
1535 use super::*;
1536 use crate::token::*;
1537 #[test]
1538 fn test_is_keyword_token() {
1539 assert!(is_keyword_token("Def"));
1540 assert!(!is_keyword_token("Ident"));
1541 }
1542}