1use std::fmt;
9
10use bitflags::bitflags;
11
12use crate::str::{Quote, TripleQuotes};
13use crate::str_prefix::{
14 AnyStringPrefix, ByteStringPrefix, FStringPrefix, StringLiteralPrefix, TStringPrefix,
15};
16use crate::{AnyStringFlags, BoolOp, Operator, StringFlags, UnaryOp};
17use ruff_text_size::{Ranged, TextRange};
18
19mod parentheses;
20mod tokens;
21
22pub use parentheses::{parentheses_iterator, parenthesized_range};
23pub use tokens::{TokenAt, TokenIterWithContext, Tokens};
24
25#[derive(Clone, Copy, PartialEq, Eq)]
26#[cfg_attr(feature = "get-size", derive(get_size2::GetSize))]
27pub struct Token {
28 kind: TokenKind,
30 range: TextRange,
32 flags: TokenFlags,
34}
35
36impl Token {
37 pub fn new(kind: TokenKind, range: TextRange, flags: TokenFlags) -> Token {
38 Self { kind, range, flags }
39 }
40
41 #[inline]
43 pub const fn kind(&self) -> TokenKind {
44 self.kind
45 }
46
47 #[inline]
49 pub const fn as_tuple(&self) -> (TokenKind, TextRange) {
50 (self.kind, self.range)
51 }
52
53 pub fn is_triple_quoted_string(self) -> bool {
59 self.unwrap_string_flags().is_triple_quoted()
60 }
61
62 pub fn string_quote_style(self) -> Quote {
68 self.unwrap_string_flags().quote_style()
69 }
70
71 pub fn unwrap_string_flags(self) -> AnyStringFlags {
77 self.string_flags()
78 .unwrap_or_else(|| panic!("token to be a string"))
79 }
80
81 pub fn string_flags(self) -> Option<AnyStringFlags> {
83 if self.is_any_string() {
84 Some(self.flags.as_any_string_flags())
85 } else {
86 None
87 }
88 }
89
90 const fn is_any_string(self) -> bool {
93 matches!(
94 self.kind,
95 TokenKind::String
96 | TokenKind::FStringStart
97 | TokenKind::FStringMiddle
98 | TokenKind::FStringEnd
99 | TokenKind::TStringStart
100 | TokenKind::TStringMiddle
101 | TokenKind::TStringEnd
102 )
103 }
104}
105
106impl Ranged for Token {
107 fn range(&self) -> TextRange {
108 self.range
109 }
110}
111
112impl fmt::Debug for Token {
113 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
114 write!(f, "{:?} {:?}", self.kind, self.range)?;
115 if !self.flags.is_empty() {
116 f.write_str(" (flags = ")?;
117 let mut first = true;
118 for (name, _) in self.flags.iter_names() {
119 if first {
120 first = false;
121 } else {
122 f.write_str(" | ")?;
123 }
124 f.write_str(name)?;
125 }
126 f.write_str(")")?;
127 }
128 Ok(())
129 }
130}
131
132#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug, PartialOrd, Ord)]
134#[cfg_attr(feature = "get-size", derive(get_size2::GetSize))]
135pub enum TokenKind {
136 Name,
138 Int,
140 Float,
142 Complex,
144 String,
146 FStringStart,
149 FStringMiddle,
152 FStringEnd,
154 TStringStart,
157 TStringMiddle,
160 TStringEnd,
162 IpyEscapeCommand,
164 Comment,
166 Newline,
168 NonLogicalNewline,
171 Indent,
173 Dedent,
175 EndOfFile,
176 Question,
178 Exclamation,
180 Lpar,
182 Rpar,
184 Lsqb,
186 Rsqb,
188 Colon,
190 Comma,
192 Semi,
194 Plus,
196 Minus,
198 Star,
200 Slash,
202 Vbar,
204 Amper,
206 Less,
208 Greater,
210 Equal,
212 Dot,
214 Percent,
216 Lbrace,
218 Rbrace,
220 EqEqual,
222 NotEqual,
224 LessEqual,
226 GreaterEqual,
228 Tilde,
230 CircumFlex,
232 LeftShift,
234 RightShift,
236 DoubleStar,
238 DoubleStarEqual,
240 PlusEqual,
242 MinusEqual,
244 StarEqual,
246 SlashEqual,
248 PercentEqual,
250 AmperEqual,
252 VbarEqual,
254 CircumflexEqual,
256 LeftShiftEqual,
258 RightShiftEqual,
260 DoubleSlash,
262 DoubleSlashEqual,
264 ColonEqual,
266 At,
268 AtEqual,
270 Rarrow,
272 Ellipsis,
274
275 And,
280 As,
281 Assert,
282 Async,
283 Await,
284 Break,
285 Class,
286 Continue,
287 Def,
288 Del,
289 Elif,
290 Else,
291 Except,
292 False,
293 Finally,
294 For,
295 From,
296 Global,
297 If,
298 Import,
299 In,
300 Is,
301 Lambda,
302 None,
303 Nonlocal,
304 Not,
305 Or,
306 Pass,
307 Raise,
308 Return,
309 True,
310 Try,
311 While,
312 With,
313 Yield,
314
315 Case,
317 Match,
318 Type,
319
320 Unknown,
321}
322
323impl TokenKind {
324 #[inline]
326 pub const fn is_eof(self) -> bool {
327 matches!(self, TokenKind::EndOfFile)
328 }
329
330 #[inline]
332 pub const fn is_any_newline(self) -> bool {
333 matches!(self, TokenKind::Newline | TokenKind::NonLogicalNewline)
334 }
335
336 #[inline]
343 pub fn is_keyword(self) -> bool {
344 TokenKind::And <= self && self <= TokenKind::Type
345 }
346
347 #[inline]
354 pub fn is_soft_keyword(self) -> bool {
355 TokenKind::Case <= self && self <= TokenKind::Type
356 }
357
358 #[inline]
365 pub fn is_non_soft_keyword(self) -> bool {
366 TokenKind::And <= self && self <= TokenKind::Yield
367 }
368
369 #[inline]
370 pub const fn is_operator(self) -> bool {
371 matches!(
372 self,
373 TokenKind::Lpar
374 | TokenKind::Rpar
375 | TokenKind::Lsqb
376 | TokenKind::Rsqb
377 | TokenKind::Comma
378 | TokenKind::Semi
379 | TokenKind::Plus
380 | TokenKind::Minus
381 | TokenKind::Star
382 | TokenKind::Slash
383 | TokenKind::Vbar
384 | TokenKind::Amper
385 | TokenKind::Less
386 | TokenKind::Greater
387 | TokenKind::Equal
388 | TokenKind::Dot
389 | TokenKind::Percent
390 | TokenKind::Lbrace
391 | TokenKind::Rbrace
392 | TokenKind::EqEqual
393 | TokenKind::NotEqual
394 | TokenKind::LessEqual
395 | TokenKind::GreaterEqual
396 | TokenKind::Tilde
397 | TokenKind::CircumFlex
398 | TokenKind::LeftShift
399 | TokenKind::RightShift
400 | TokenKind::DoubleStar
401 | TokenKind::PlusEqual
402 | TokenKind::MinusEqual
403 | TokenKind::StarEqual
404 | TokenKind::SlashEqual
405 | TokenKind::PercentEqual
406 | TokenKind::AmperEqual
407 | TokenKind::VbarEqual
408 | TokenKind::CircumflexEqual
409 | TokenKind::LeftShiftEqual
410 | TokenKind::RightShiftEqual
411 | TokenKind::DoubleStarEqual
412 | TokenKind::DoubleSlash
413 | TokenKind::DoubleSlashEqual
414 | TokenKind::At
415 | TokenKind::AtEqual
416 | TokenKind::Rarrow
417 | TokenKind::Ellipsis
418 | TokenKind::ColonEqual
419 | TokenKind::Colon
420 | TokenKind::And
421 | TokenKind::Or
422 | TokenKind::Not
423 | TokenKind::In
424 | TokenKind::Is
425 )
426 }
427
428 #[inline]
430 pub const fn is_singleton(self) -> bool {
431 matches!(self, TokenKind::False | TokenKind::True | TokenKind::None)
432 }
433
434 #[inline]
436 pub const fn is_trivia(&self) -> bool {
437 matches!(self, TokenKind::Comment | TokenKind::NonLogicalNewline)
438 }
439
440 #[inline]
442 pub const fn is_comment(&self) -> bool {
443 matches!(self, TokenKind::Comment)
444 }
445
446 #[inline]
447 pub const fn is_arithmetic(self) -> bool {
448 matches!(
449 self,
450 TokenKind::DoubleStar
451 | TokenKind::Star
452 | TokenKind::Plus
453 | TokenKind::Minus
454 | TokenKind::Slash
455 | TokenKind::DoubleSlash
456 | TokenKind::At
457 )
458 }
459
460 #[inline]
461 pub const fn is_bitwise_or_shift(self) -> bool {
462 matches!(
463 self,
464 TokenKind::LeftShift
465 | TokenKind::LeftShiftEqual
466 | TokenKind::RightShift
467 | TokenKind::RightShiftEqual
468 | TokenKind::Amper
469 | TokenKind::AmperEqual
470 | TokenKind::Vbar
471 | TokenKind::VbarEqual
472 | TokenKind::CircumFlex
473 | TokenKind::CircumflexEqual
474 | TokenKind::Tilde
475 )
476 }
477
478 #[inline]
480 pub const fn is_unary_arithmetic_operator(self) -> bool {
481 matches!(self, TokenKind::Plus | TokenKind::Minus)
482 }
483
484 #[inline]
485 pub const fn is_interpolated_string_end(self) -> bool {
486 matches!(self, TokenKind::FStringEnd | TokenKind::TStringEnd)
487 }
488
489 #[inline]
496 pub const fn as_unary_arithmetic_operator(self) -> Option<UnaryOp> {
497 Some(match self {
498 TokenKind::Plus => UnaryOp::UAdd,
499 TokenKind::Minus => UnaryOp::USub,
500 _ => return None,
501 })
502 }
503
504 #[inline]
511 pub const fn as_unary_operator(self) -> Option<UnaryOp> {
512 Some(match self {
513 TokenKind::Plus => UnaryOp::UAdd,
514 TokenKind::Minus => UnaryOp::USub,
515 TokenKind::Tilde => UnaryOp::Invert,
516 TokenKind::Not => UnaryOp::Not,
517 _ => return None,
518 })
519 }
520
521 #[inline]
524 pub const fn as_bool_operator(self) -> Option<BoolOp> {
525 Some(match self {
526 TokenKind::And => BoolOp::And,
527 TokenKind::Or => BoolOp::Or,
528 _ => return None,
529 })
530 }
531
532 pub const fn as_binary_operator(self) -> Option<Operator> {
539 Some(match self {
540 TokenKind::Plus => Operator::Add,
541 TokenKind::Minus => Operator::Sub,
542 TokenKind::Star => Operator::Mult,
543 TokenKind::At => Operator::MatMult,
544 TokenKind::DoubleStar => Operator::Pow,
545 TokenKind::Slash => Operator::Div,
546 TokenKind::DoubleSlash => Operator::FloorDiv,
547 TokenKind::Percent => Operator::Mod,
548 TokenKind::Amper => Operator::BitAnd,
549 TokenKind::Vbar => Operator::BitOr,
550 TokenKind::CircumFlex => Operator::BitXor,
551 TokenKind::LeftShift => Operator::LShift,
552 TokenKind::RightShift => Operator::RShift,
553 _ => return None,
554 })
555 }
556
557 #[inline]
560 pub const fn as_augmented_assign_operator(self) -> Option<Operator> {
561 Some(match self {
562 TokenKind::PlusEqual => Operator::Add,
563 TokenKind::MinusEqual => Operator::Sub,
564 TokenKind::StarEqual => Operator::Mult,
565 TokenKind::AtEqual => Operator::MatMult,
566 TokenKind::DoubleStarEqual => Operator::Pow,
567 TokenKind::SlashEqual => Operator::Div,
568 TokenKind::DoubleSlashEqual => Operator::FloorDiv,
569 TokenKind::PercentEqual => Operator::Mod,
570 TokenKind::AmperEqual => Operator::BitAnd,
571 TokenKind::VbarEqual => Operator::BitOr,
572 TokenKind::CircumflexEqual => Operator::BitXor,
573 TokenKind::LeftShiftEqual => Operator::LShift,
574 TokenKind::RightShiftEqual => Operator::RShift,
575 _ => return None,
576 })
577 }
578}
579
580impl From<BoolOp> for TokenKind {
581 #[inline]
582 fn from(op: BoolOp) -> Self {
583 match op {
584 BoolOp::And => TokenKind::And,
585 BoolOp::Or => TokenKind::Or,
586 }
587 }
588}
589
590impl From<UnaryOp> for TokenKind {
591 #[inline]
592 fn from(op: UnaryOp) -> Self {
593 match op {
594 UnaryOp::Invert => TokenKind::Tilde,
595 UnaryOp::Not => TokenKind::Not,
596 UnaryOp::UAdd => TokenKind::Plus,
597 UnaryOp::USub => TokenKind::Minus,
598 }
599 }
600}
601
602impl From<Operator> for TokenKind {
603 #[inline]
604 fn from(op: Operator) -> Self {
605 match op {
606 Operator::Add => TokenKind::Plus,
607 Operator::Sub => TokenKind::Minus,
608 Operator::Mult => TokenKind::Star,
609 Operator::MatMult => TokenKind::At,
610 Operator::Div => TokenKind::Slash,
611 Operator::Mod => TokenKind::Percent,
612 Operator::Pow => TokenKind::DoubleStar,
613 Operator::LShift => TokenKind::LeftShift,
614 Operator::RShift => TokenKind::RightShift,
615 Operator::BitOr => TokenKind::Vbar,
616 Operator::BitXor => TokenKind::CircumFlex,
617 Operator::BitAnd => TokenKind::Amper,
618 Operator::FloorDiv => TokenKind::DoubleSlash,
619 }
620 }
621}
622
623impl fmt::Display for TokenKind {
624 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
625 let value = match self {
626 TokenKind::Unknown => "Unknown",
627 TokenKind::Newline => "newline",
628 TokenKind::NonLogicalNewline => "NonLogicalNewline",
629 TokenKind::Indent => "indent",
630 TokenKind::Dedent => "dedent",
631 TokenKind::EndOfFile => "end of file",
632 TokenKind::Name => "name",
633 TokenKind::Int => "int",
634 TokenKind::Float => "float",
635 TokenKind::Complex => "complex",
636 TokenKind::String => "string",
637 TokenKind::FStringStart => "FStringStart",
638 TokenKind::FStringMiddle => "FStringMiddle",
639 TokenKind::FStringEnd => "FStringEnd",
640 TokenKind::TStringStart => "TStringStart",
641 TokenKind::TStringMiddle => "TStringMiddle",
642 TokenKind::TStringEnd => "TStringEnd",
643 TokenKind::IpyEscapeCommand => "IPython escape command",
644 TokenKind::Comment => "comment",
645 TokenKind::Question => "`?`",
646 TokenKind::Exclamation => "`!`",
647 TokenKind::Lpar => "`(`",
648 TokenKind::Rpar => "`)`",
649 TokenKind::Lsqb => "`[`",
650 TokenKind::Rsqb => "`]`",
651 TokenKind::Lbrace => "`{`",
652 TokenKind::Rbrace => "`}`",
653 TokenKind::Equal => "`=`",
654 TokenKind::ColonEqual => "`:=`",
655 TokenKind::Dot => "`.`",
656 TokenKind::Colon => "`:`",
657 TokenKind::Semi => "`;`",
658 TokenKind::Comma => "`,`",
659 TokenKind::Rarrow => "`->`",
660 TokenKind::Plus => "`+`",
661 TokenKind::Minus => "`-`",
662 TokenKind::Star => "`*`",
663 TokenKind::DoubleStar => "`**`",
664 TokenKind::Slash => "`/`",
665 TokenKind::DoubleSlash => "`//`",
666 TokenKind::Percent => "`%`",
667 TokenKind::Vbar => "`|`",
668 TokenKind::Amper => "`&`",
669 TokenKind::CircumFlex => "`^`",
670 TokenKind::LeftShift => "`<<`",
671 TokenKind::RightShift => "`>>`",
672 TokenKind::Tilde => "`~`",
673 TokenKind::At => "`@`",
674 TokenKind::Less => "`<`",
675 TokenKind::Greater => "`>`",
676 TokenKind::EqEqual => "`==`",
677 TokenKind::NotEqual => "`!=`",
678 TokenKind::LessEqual => "`<=`",
679 TokenKind::GreaterEqual => "`>=`",
680 TokenKind::PlusEqual => "`+=`",
681 TokenKind::MinusEqual => "`-=`",
682 TokenKind::StarEqual => "`*=`",
683 TokenKind::DoubleStarEqual => "`**=`",
684 TokenKind::SlashEqual => "`/=`",
685 TokenKind::DoubleSlashEqual => "`//=`",
686 TokenKind::PercentEqual => "`%=`",
687 TokenKind::VbarEqual => "`|=`",
688 TokenKind::AmperEqual => "`&=`",
689 TokenKind::CircumflexEqual => "`^=`",
690 TokenKind::LeftShiftEqual => "`<<=`",
691 TokenKind::RightShiftEqual => "`>>=`",
692 TokenKind::AtEqual => "`@=`",
693 TokenKind::Ellipsis => "`...`",
694 TokenKind::False => "`False`",
695 TokenKind::None => "`None`",
696 TokenKind::True => "`True`",
697 TokenKind::And => "`and`",
698 TokenKind::As => "`as`",
699 TokenKind::Assert => "`assert`",
700 TokenKind::Async => "`async`",
701 TokenKind::Await => "`await`",
702 TokenKind::Break => "`break`",
703 TokenKind::Class => "`class`",
704 TokenKind::Continue => "`continue`",
705 TokenKind::Def => "`def`",
706 TokenKind::Del => "`del`",
707 TokenKind::Elif => "`elif`",
708 TokenKind::Else => "`else`",
709 TokenKind::Except => "`except`",
710 TokenKind::Finally => "`finally`",
711 TokenKind::For => "`for`",
712 TokenKind::From => "`from`",
713 TokenKind::Global => "`global`",
714 TokenKind::If => "`if`",
715 TokenKind::Import => "`import`",
716 TokenKind::In => "`in`",
717 TokenKind::Is => "`is`",
718 TokenKind::Lambda => "`lambda`",
719 TokenKind::Nonlocal => "`nonlocal`",
720 TokenKind::Not => "`not`",
721 TokenKind::Or => "`or`",
722 TokenKind::Pass => "`pass`",
723 TokenKind::Raise => "`raise`",
724 TokenKind::Return => "`return`",
725 TokenKind::Try => "`try`",
726 TokenKind::While => "`while`",
727 TokenKind::Match => "`match`",
728 TokenKind::Type => "`type`",
729 TokenKind::Case => "`case`",
730 TokenKind::With => "`with`",
731 TokenKind::Yield => "`yield`",
732 };
733 f.write_str(value)
734 }
735}
736
737bitflags! {
738 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
739 pub struct TokenFlags: u16 {
740 const DOUBLE_QUOTES = 1 << 0;
742 const TRIPLE_QUOTED_STRING = 1 << 1;
745
746 const UNICODE_STRING = 1 << 2;
748 const BYTE_STRING = 1 << 3;
750 const F_STRING = 1 << 4;
752 const T_STRING = 1 << 5;
754 const RAW_STRING_LOWERCASE = 1 << 6;
756 const RAW_STRING_UPPERCASE = 1 << 7;
758 const UNCLOSED_STRING = 1 << 8;
760
761 const RAW_STRING = Self::RAW_STRING_LOWERCASE.bits() | Self::RAW_STRING_UPPERCASE.bits();
763
764 }
765}
766
767#[cfg(feature = "get-size")]
768impl get_size2::GetSize for TokenFlags {}
769
770impl StringFlags for TokenFlags {
771 fn quote_style(self) -> Quote {
772 if self.intersects(TokenFlags::DOUBLE_QUOTES) {
773 Quote::Double
774 } else {
775 Quote::Single
776 }
777 }
778
779 fn triple_quotes(self) -> TripleQuotes {
780 if self.intersects(TokenFlags::TRIPLE_QUOTED_STRING) {
781 TripleQuotes::Yes
782 } else {
783 TripleQuotes::No
784 }
785 }
786
787 fn prefix(self) -> AnyStringPrefix {
788 if self.intersects(TokenFlags::F_STRING) {
789 if self.intersects(TokenFlags::RAW_STRING_LOWERCASE) {
790 AnyStringPrefix::Format(FStringPrefix::Raw { uppercase_r: false })
791 } else if self.intersects(TokenFlags::RAW_STRING_UPPERCASE) {
792 AnyStringPrefix::Format(FStringPrefix::Raw { uppercase_r: true })
793 } else {
794 AnyStringPrefix::Format(FStringPrefix::Regular)
795 }
796 } else if self.intersects(TokenFlags::T_STRING) {
797 if self.intersects(TokenFlags::RAW_STRING_LOWERCASE) {
798 AnyStringPrefix::Template(TStringPrefix::Raw { uppercase_r: false })
799 } else if self.intersects(TokenFlags::RAW_STRING_UPPERCASE) {
800 AnyStringPrefix::Template(TStringPrefix::Raw { uppercase_r: true })
801 } else {
802 AnyStringPrefix::Template(TStringPrefix::Regular)
803 }
804 } else if self.intersects(TokenFlags::BYTE_STRING) {
805 if self.intersects(TokenFlags::RAW_STRING_LOWERCASE) {
806 AnyStringPrefix::Bytes(ByteStringPrefix::Raw { uppercase_r: false })
807 } else if self.intersects(TokenFlags::RAW_STRING_UPPERCASE) {
808 AnyStringPrefix::Bytes(ByteStringPrefix::Raw { uppercase_r: true })
809 } else {
810 AnyStringPrefix::Bytes(ByteStringPrefix::Regular)
811 }
812 } else if self.intersects(TokenFlags::RAW_STRING_LOWERCASE) {
813 AnyStringPrefix::Regular(StringLiteralPrefix::Raw { uppercase: false })
814 } else if self.intersects(TokenFlags::RAW_STRING_UPPERCASE) {
815 AnyStringPrefix::Regular(StringLiteralPrefix::Raw { uppercase: true })
816 } else if self.intersects(TokenFlags::UNICODE_STRING) {
817 AnyStringPrefix::Regular(StringLiteralPrefix::Unicode)
818 } else {
819 AnyStringPrefix::Regular(StringLiteralPrefix::Empty)
820 }
821 }
822
823 fn is_unclosed(self) -> bool {
824 self.intersects(TokenFlags::UNCLOSED_STRING)
825 }
826}
827
828impl TokenFlags {
829 pub const fn is_f_string(self) -> bool {
831 self.intersects(TokenFlags::F_STRING)
832 }
833
834 pub const fn is_t_string(self) -> bool {
836 self.intersects(TokenFlags::T_STRING)
837 }
838
839 pub const fn is_interpolated_string(self) -> bool {
841 self.intersects(TokenFlags::T_STRING.union(TokenFlags::F_STRING))
842 }
843
844 pub fn is_triple_quoted_interpolated_string(self) -> bool {
846 self.intersects(TokenFlags::TRIPLE_QUOTED_STRING) && self.is_interpolated_string()
847 }
848
849 pub const fn is_raw_string(self) -> bool {
851 self.intersects(TokenFlags::RAW_STRING)
852 }
853}