1use std::fmt;
9
10use bitflags::bitflags;
11
12use crate::str::{Quote, TripleQuotes};
13use crate::str_prefix::{
14 AnyStringPrefix, ByteStringPrefix, FStringPrefix, StringLiteralPrefix, TStringPrefix,
15};
16use crate::{AnyStringFlags, BoolOp, Operator, StringFlags, UnaryOp};
17use ruff_text_size::{Ranged, TextRange};
18
19mod parentheses;
20mod tokens;
21
22pub use parentheses::{parentheses_iterator, parenthesized_range};
23pub use tokens::{TokenAt, TokenIterWithContext, Tokens};
24
25#[derive(Clone, Copy, PartialEq, Eq)]
26#[cfg_attr(feature = "get-size", derive(get_size2::GetSize))]
27pub struct Token {
28 kind: TokenKind,
30 range: TextRange,
32 flags: TokenFlags,
34}
35
36impl Token {
37 pub fn new(kind: TokenKind, range: TextRange, flags: TokenFlags) -> Token {
38 Self { kind, range, flags }
39 }
40
41 #[inline]
43 pub const fn kind(&self) -> TokenKind {
44 self.kind
45 }
46
47 #[inline]
49 pub const fn as_tuple(&self) -> (TokenKind, TextRange) {
50 (self.kind, self.range)
51 }
52
53 pub fn is_triple_quoted_string(self) -> bool {
59 self.unwrap_string_flags().is_triple_quoted()
60 }
61
62 pub fn string_quote_style(self) -> Quote {
68 self.unwrap_string_flags().quote_style()
69 }
70
71 pub fn unwrap_string_flags(self) -> AnyStringFlags {
77 self.string_flags()
78 .unwrap_or_else(|| panic!("token to be a string"))
79 }
80
81 pub fn string_flags(self) -> Option<AnyStringFlags> {
83 if self.is_any_string() {
84 Some(self.flags.as_any_string_flags())
85 } else {
86 None
87 }
88 }
89
90 const fn is_any_string(self) -> bool {
93 matches!(
94 self.kind,
95 TokenKind::String
96 | TokenKind::FStringStart
97 | TokenKind::FStringMiddle
98 | TokenKind::FStringEnd
99 | TokenKind::TStringStart
100 | TokenKind::TStringMiddle
101 | TokenKind::TStringEnd
102 )
103 }
104}
105
106impl Ranged for Token {
107 fn range(&self) -> TextRange {
108 self.range
109 }
110}
111
112impl fmt::Debug for Token {
113 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
114 write!(f, "{:?} {:?}", self.kind, self.range)?;
115 if !self.flags.is_empty() {
116 f.write_str(" (flags = ")?;
117 let mut first = true;
118 for (name, _) in self.flags.iter_names() {
119 if first {
120 first = false;
121 } else {
122 f.write_str(" | ")?;
123 }
124 f.write_str(name)?;
125 }
126 f.write_str(")")?;
127 }
128 Ok(())
129 }
130}
131
132#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug, PartialOrd, Ord)]
134#[cfg_attr(feature = "get-size", derive(get_size2::GetSize))]
135pub enum TokenKind {
136 Name,
138 Int,
140 Float,
142 Complex,
144 String,
146 FStringStart,
149 FStringMiddle,
152 FStringEnd,
154 TStringStart,
157 TStringMiddle,
160 TStringEnd,
162 IpyEscapeCommand,
164 Comment,
166 Newline,
168 NonLogicalNewline,
171 Indent,
173 Dedent,
175 EndOfFile,
176 Question,
178 Exclamation,
180 Lpar,
182 Rpar,
184 Lsqb,
186 Rsqb,
188 Colon,
190 Comma,
192 Semi,
194 Plus,
196 Minus,
198 Star,
200 Slash,
202 Vbar,
204 Amper,
206 Less,
208 Greater,
210 Equal,
212 Dot,
214 Percent,
216 Lbrace,
218 Rbrace,
220 EqEqual,
222 NotEqual,
224 LessEqual,
226 GreaterEqual,
228 Tilde,
230 CircumFlex,
232 LeftShift,
234 RightShift,
236 DoubleStar,
238 DoubleStarEqual,
240 PlusEqual,
242 MinusEqual,
244 StarEqual,
246 SlashEqual,
248 PercentEqual,
250 AmperEqual,
252 VbarEqual,
254 CircumflexEqual,
256 LeftShiftEqual,
258 RightShiftEqual,
260 DoubleSlash,
262 DoubleSlashEqual,
264 ColonEqual,
266 At,
268 AtEqual,
270 Rarrow,
272 Ellipsis,
274
275 And,
280 As,
281 Assert,
282 Async,
283 Await,
284 Break,
285 Class,
286 Continue,
287 Def,
288 Del,
289 Elif,
290 Else,
291 Except,
292 False,
293 Finally,
294 For,
295 From,
296 Global,
297 If,
298 Import,
299 In,
300 Is,
301 Lambda,
302 None,
303 Nonlocal,
304 Not,
305 Or,
306 Pass,
307 Raise,
308 Return,
309 True,
310 Try,
311 While,
312 With,
313 Yield,
314
315 Case,
317 Lazy,
318 Match,
319 Type,
320
321 Unknown,
322}
323
324impl TokenKind {
325 #[inline]
327 pub const fn is_eof(self) -> bool {
328 matches!(self, TokenKind::EndOfFile)
329 }
330
331 #[inline]
333 pub const fn is_any_newline(self) -> bool {
334 matches!(self, TokenKind::Newline | TokenKind::NonLogicalNewline)
335 }
336
337 #[inline]
344 pub fn is_keyword(self) -> bool {
345 TokenKind::And <= self && self <= TokenKind::Type
346 }
347
348 #[inline]
355 pub fn is_soft_keyword(self) -> bool {
356 TokenKind::Case <= self && self <= TokenKind::Type
357 }
358
359 #[inline]
366 pub fn is_non_soft_keyword(self) -> bool {
367 TokenKind::And <= self && self <= TokenKind::Yield
368 }
369
370 #[inline]
371 pub const fn is_operator(self) -> bool {
372 matches!(
373 self,
374 TokenKind::Lpar
375 | TokenKind::Rpar
376 | TokenKind::Lsqb
377 | TokenKind::Rsqb
378 | TokenKind::Comma
379 | TokenKind::Semi
380 | TokenKind::Plus
381 | TokenKind::Minus
382 | TokenKind::Star
383 | TokenKind::Slash
384 | TokenKind::Vbar
385 | TokenKind::Amper
386 | TokenKind::Less
387 | TokenKind::Greater
388 | TokenKind::Equal
389 | TokenKind::Dot
390 | TokenKind::Percent
391 | TokenKind::Lbrace
392 | TokenKind::Rbrace
393 | TokenKind::EqEqual
394 | TokenKind::NotEqual
395 | TokenKind::LessEqual
396 | TokenKind::GreaterEqual
397 | TokenKind::Tilde
398 | TokenKind::CircumFlex
399 | TokenKind::LeftShift
400 | TokenKind::RightShift
401 | TokenKind::DoubleStar
402 | TokenKind::PlusEqual
403 | TokenKind::MinusEqual
404 | TokenKind::StarEqual
405 | TokenKind::SlashEqual
406 | TokenKind::PercentEqual
407 | TokenKind::AmperEqual
408 | TokenKind::VbarEqual
409 | TokenKind::CircumflexEqual
410 | TokenKind::LeftShiftEqual
411 | TokenKind::RightShiftEqual
412 | TokenKind::DoubleStarEqual
413 | TokenKind::DoubleSlash
414 | TokenKind::DoubleSlashEqual
415 | TokenKind::At
416 | TokenKind::AtEqual
417 | TokenKind::Rarrow
418 | TokenKind::Ellipsis
419 | TokenKind::ColonEqual
420 | TokenKind::Colon
421 | TokenKind::And
422 | TokenKind::Or
423 | TokenKind::Not
424 | TokenKind::In
425 | TokenKind::Is
426 )
427 }
428
429 #[inline]
431 pub const fn is_singleton(self) -> bool {
432 matches!(self, TokenKind::False | TokenKind::True | TokenKind::None)
433 }
434
435 #[inline]
437 pub const fn is_trivia(&self) -> bool {
438 matches!(self, TokenKind::Comment | TokenKind::NonLogicalNewline)
439 }
440
441 #[inline]
443 pub const fn is_comment(&self) -> bool {
444 matches!(self, TokenKind::Comment)
445 }
446
447 #[inline]
448 pub const fn is_arithmetic(self) -> bool {
449 matches!(
450 self,
451 TokenKind::DoubleStar
452 | TokenKind::Star
453 | TokenKind::Plus
454 | TokenKind::Minus
455 | TokenKind::Slash
456 | TokenKind::DoubleSlash
457 | TokenKind::At
458 )
459 }
460
461 #[inline]
462 pub const fn is_bitwise_or_shift(self) -> bool {
463 matches!(
464 self,
465 TokenKind::LeftShift
466 | TokenKind::LeftShiftEqual
467 | TokenKind::RightShift
468 | TokenKind::RightShiftEqual
469 | TokenKind::Amper
470 | TokenKind::AmperEqual
471 | TokenKind::Vbar
472 | TokenKind::VbarEqual
473 | TokenKind::CircumFlex
474 | TokenKind::CircumflexEqual
475 | TokenKind::Tilde
476 )
477 }
478
479 #[inline]
481 pub const fn is_unary_arithmetic_operator(self) -> bool {
482 matches!(self, TokenKind::Plus | TokenKind::Minus)
483 }
484
485 #[inline]
486 pub const fn is_interpolated_string_end(self) -> bool {
487 matches!(self, TokenKind::FStringEnd | TokenKind::TStringEnd)
488 }
489
490 #[inline]
497 pub const fn as_unary_arithmetic_operator(self) -> Option<UnaryOp> {
498 Some(match self {
499 TokenKind::Plus => UnaryOp::UAdd,
500 TokenKind::Minus => UnaryOp::USub,
501 _ => return None,
502 })
503 }
504
505 #[inline]
512 pub const fn as_unary_operator(self) -> Option<UnaryOp> {
513 Some(match self {
514 TokenKind::Plus => UnaryOp::UAdd,
515 TokenKind::Minus => UnaryOp::USub,
516 TokenKind::Tilde => UnaryOp::Invert,
517 TokenKind::Not => UnaryOp::Not,
518 _ => return None,
519 })
520 }
521
522 #[inline]
525 pub const fn as_bool_operator(self) -> Option<BoolOp> {
526 Some(match self {
527 TokenKind::And => BoolOp::And,
528 TokenKind::Or => BoolOp::Or,
529 _ => return None,
530 })
531 }
532
533 pub const fn as_binary_operator(self) -> Option<Operator> {
540 Some(match self {
541 TokenKind::Plus => Operator::Add,
542 TokenKind::Minus => Operator::Sub,
543 TokenKind::Star => Operator::Mult,
544 TokenKind::At => Operator::MatMult,
545 TokenKind::DoubleStar => Operator::Pow,
546 TokenKind::Slash => Operator::Div,
547 TokenKind::DoubleSlash => Operator::FloorDiv,
548 TokenKind::Percent => Operator::Mod,
549 TokenKind::Amper => Operator::BitAnd,
550 TokenKind::Vbar => Operator::BitOr,
551 TokenKind::CircumFlex => Operator::BitXor,
552 TokenKind::LeftShift => Operator::LShift,
553 TokenKind::RightShift => Operator::RShift,
554 _ => return None,
555 })
556 }
557
558 #[inline]
561 pub const fn as_augmented_assign_operator(self) -> Option<Operator> {
562 Some(match self {
563 TokenKind::PlusEqual => Operator::Add,
564 TokenKind::MinusEqual => Operator::Sub,
565 TokenKind::StarEqual => Operator::Mult,
566 TokenKind::AtEqual => Operator::MatMult,
567 TokenKind::DoubleStarEqual => Operator::Pow,
568 TokenKind::SlashEqual => Operator::Div,
569 TokenKind::DoubleSlashEqual => Operator::FloorDiv,
570 TokenKind::PercentEqual => Operator::Mod,
571 TokenKind::AmperEqual => Operator::BitAnd,
572 TokenKind::VbarEqual => Operator::BitOr,
573 TokenKind::CircumflexEqual => Operator::BitXor,
574 TokenKind::LeftShiftEqual => Operator::LShift,
575 TokenKind::RightShiftEqual => Operator::RShift,
576 _ => return None,
577 })
578 }
579}
580
581impl From<BoolOp> for TokenKind {
582 #[inline]
583 fn from(op: BoolOp) -> Self {
584 match op {
585 BoolOp::And => TokenKind::And,
586 BoolOp::Or => TokenKind::Or,
587 }
588 }
589}
590
591impl From<UnaryOp> for TokenKind {
592 #[inline]
593 fn from(op: UnaryOp) -> Self {
594 match op {
595 UnaryOp::Invert => TokenKind::Tilde,
596 UnaryOp::Not => TokenKind::Not,
597 UnaryOp::UAdd => TokenKind::Plus,
598 UnaryOp::USub => TokenKind::Minus,
599 }
600 }
601}
602
603impl From<Operator> for TokenKind {
604 #[inline]
605 fn from(op: Operator) -> Self {
606 match op {
607 Operator::Add => TokenKind::Plus,
608 Operator::Sub => TokenKind::Minus,
609 Operator::Mult => TokenKind::Star,
610 Operator::MatMult => TokenKind::At,
611 Operator::Div => TokenKind::Slash,
612 Operator::Mod => TokenKind::Percent,
613 Operator::Pow => TokenKind::DoubleStar,
614 Operator::LShift => TokenKind::LeftShift,
615 Operator::RShift => TokenKind::RightShift,
616 Operator::BitOr => TokenKind::Vbar,
617 Operator::BitXor => TokenKind::CircumFlex,
618 Operator::BitAnd => TokenKind::Amper,
619 Operator::FloorDiv => TokenKind::DoubleSlash,
620 }
621 }
622}
623
624impl fmt::Display for TokenKind {
625 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
626 let value = match self {
627 TokenKind::Unknown => "Unknown",
628 TokenKind::Newline => "newline",
629 TokenKind::NonLogicalNewline => "NonLogicalNewline",
630 TokenKind::Indent => "indent",
631 TokenKind::Dedent => "dedent",
632 TokenKind::EndOfFile => "end of file",
633 TokenKind::Name => "name",
634 TokenKind::Int => "int",
635 TokenKind::Float => "float",
636 TokenKind::Complex => "complex",
637 TokenKind::String => "string",
638 TokenKind::FStringStart => "FStringStart",
639 TokenKind::FStringMiddle => "FStringMiddle",
640 TokenKind::FStringEnd => "FStringEnd",
641 TokenKind::TStringStart => "TStringStart",
642 TokenKind::TStringMiddle => "TStringMiddle",
643 TokenKind::TStringEnd => "TStringEnd",
644 TokenKind::IpyEscapeCommand => "IPython escape command",
645 TokenKind::Comment => "comment",
646 TokenKind::Question => "`?`",
647 TokenKind::Exclamation => "`!`",
648 TokenKind::Lpar => "`(`",
649 TokenKind::Rpar => "`)`",
650 TokenKind::Lsqb => "`[`",
651 TokenKind::Rsqb => "`]`",
652 TokenKind::Lbrace => "`{`",
653 TokenKind::Rbrace => "`}`",
654 TokenKind::Equal => "`=`",
655 TokenKind::ColonEqual => "`:=`",
656 TokenKind::Dot => "`.`",
657 TokenKind::Colon => "`:`",
658 TokenKind::Semi => "`;`",
659 TokenKind::Comma => "`,`",
660 TokenKind::Rarrow => "`->`",
661 TokenKind::Plus => "`+`",
662 TokenKind::Minus => "`-`",
663 TokenKind::Star => "`*`",
664 TokenKind::DoubleStar => "`**`",
665 TokenKind::Slash => "`/`",
666 TokenKind::DoubleSlash => "`//`",
667 TokenKind::Percent => "`%`",
668 TokenKind::Vbar => "`|`",
669 TokenKind::Amper => "`&`",
670 TokenKind::CircumFlex => "`^`",
671 TokenKind::LeftShift => "`<<`",
672 TokenKind::RightShift => "`>>`",
673 TokenKind::Tilde => "`~`",
674 TokenKind::At => "`@`",
675 TokenKind::Less => "`<`",
676 TokenKind::Greater => "`>`",
677 TokenKind::EqEqual => "`==`",
678 TokenKind::NotEqual => "`!=`",
679 TokenKind::LessEqual => "`<=`",
680 TokenKind::GreaterEqual => "`>=`",
681 TokenKind::PlusEqual => "`+=`",
682 TokenKind::MinusEqual => "`-=`",
683 TokenKind::StarEqual => "`*=`",
684 TokenKind::DoubleStarEqual => "`**=`",
685 TokenKind::SlashEqual => "`/=`",
686 TokenKind::DoubleSlashEqual => "`//=`",
687 TokenKind::PercentEqual => "`%=`",
688 TokenKind::VbarEqual => "`|=`",
689 TokenKind::AmperEqual => "`&=`",
690 TokenKind::CircumflexEqual => "`^=`",
691 TokenKind::LeftShiftEqual => "`<<=`",
692 TokenKind::RightShiftEqual => "`>>=`",
693 TokenKind::AtEqual => "`@=`",
694 TokenKind::Ellipsis => "`...`",
695 TokenKind::False => "`False`",
696 TokenKind::None => "`None`",
697 TokenKind::True => "`True`",
698 TokenKind::And => "`and`",
699 TokenKind::As => "`as`",
700 TokenKind::Assert => "`assert`",
701 TokenKind::Async => "`async`",
702 TokenKind::Await => "`await`",
703 TokenKind::Break => "`break`",
704 TokenKind::Class => "`class`",
705 TokenKind::Continue => "`continue`",
706 TokenKind::Def => "`def`",
707 TokenKind::Del => "`del`",
708 TokenKind::Elif => "`elif`",
709 TokenKind::Else => "`else`",
710 TokenKind::Except => "`except`",
711 TokenKind::Finally => "`finally`",
712 TokenKind::For => "`for`",
713 TokenKind::From => "`from`",
714 TokenKind::Global => "`global`",
715 TokenKind::If => "`if`",
716 TokenKind::Import => "`import`",
717 TokenKind::In => "`in`",
718 TokenKind::Is => "`is`",
719 TokenKind::Lambda => "`lambda`",
720 TokenKind::Nonlocal => "`nonlocal`",
721 TokenKind::Not => "`not`",
722 TokenKind::Or => "`or`",
723 TokenKind::Pass => "`pass`",
724 TokenKind::Raise => "`raise`",
725 TokenKind::Return => "`return`",
726 TokenKind::Try => "`try`",
727 TokenKind::While => "`while`",
728 TokenKind::Lazy => "`lazy`",
729 TokenKind::Match => "`match`",
730 TokenKind::Type => "`type`",
731 TokenKind::Case => "`case`",
732 TokenKind::With => "`with`",
733 TokenKind::Yield => "`yield`",
734 };
735 f.write_str(value)
736 }
737}
738
739bitflags! {
740 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
741 pub struct TokenFlags: u16 {
742 const DOUBLE_QUOTES = 1 << 0;
744 const TRIPLE_QUOTED_STRING = 1 << 1;
747
748 const UNICODE_STRING = 1 << 2;
750 const BYTE_STRING = 1 << 3;
752 const F_STRING = 1 << 4;
754 const T_STRING = 1 << 5;
756 const RAW_STRING_LOWERCASE = 1 << 6;
758 const RAW_STRING_UPPERCASE = 1 << 7;
760 const UNCLOSED_STRING = 1 << 8;
762
763 const RAW_STRING = Self::RAW_STRING_LOWERCASE.bits() | Self::RAW_STRING_UPPERCASE.bits();
765
766 }
767}
768
769#[cfg(feature = "get-size")]
770impl get_size2::GetSize for TokenFlags {}
771
772impl StringFlags for TokenFlags {
773 fn quote_style(self) -> Quote {
774 if self.intersects(TokenFlags::DOUBLE_QUOTES) {
775 Quote::Double
776 } else {
777 Quote::Single
778 }
779 }
780
781 fn triple_quotes(self) -> TripleQuotes {
782 if self.intersects(TokenFlags::TRIPLE_QUOTED_STRING) {
783 TripleQuotes::Yes
784 } else {
785 TripleQuotes::No
786 }
787 }
788
789 fn prefix(self) -> AnyStringPrefix {
790 if self.intersects(TokenFlags::F_STRING) {
791 if self.intersects(TokenFlags::RAW_STRING_LOWERCASE) {
792 AnyStringPrefix::Format(FStringPrefix::Raw { uppercase_r: false })
793 } else if self.intersects(TokenFlags::RAW_STRING_UPPERCASE) {
794 AnyStringPrefix::Format(FStringPrefix::Raw { uppercase_r: true })
795 } else {
796 AnyStringPrefix::Format(FStringPrefix::Regular)
797 }
798 } else if self.intersects(TokenFlags::T_STRING) {
799 if self.intersects(TokenFlags::RAW_STRING_LOWERCASE) {
800 AnyStringPrefix::Template(TStringPrefix::Raw { uppercase_r: false })
801 } else if self.intersects(TokenFlags::RAW_STRING_UPPERCASE) {
802 AnyStringPrefix::Template(TStringPrefix::Raw { uppercase_r: true })
803 } else {
804 AnyStringPrefix::Template(TStringPrefix::Regular)
805 }
806 } else if self.intersects(TokenFlags::BYTE_STRING) {
807 if self.intersects(TokenFlags::RAW_STRING_LOWERCASE) {
808 AnyStringPrefix::Bytes(ByteStringPrefix::Raw { uppercase_r: false })
809 } else if self.intersects(TokenFlags::RAW_STRING_UPPERCASE) {
810 AnyStringPrefix::Bytes(ByteStringPrefix::Raw { uppercase_r: true })
811 } else {
812 AnyStringPrefix::Bytes(ByteStringPrefix::Regular)
813 }
814 } else if self.intersects(TokenFlags::RAW_STRING_LOWERCASE) {
815 AnyStringPrefix::Regular(StringLiteralPrefix::Raw { uppercase: false })
816 } else if self.intersects(TokenFlags::RAW_STRING_UPPERCASE) {
817 AnyStringPrefix::Regular(StringLiteralPrefix::Raw { uppercase: true })
818 } else if self.intersects(TokenFlags::UNICODE_STRING) {
819 AnyStringPrefix::Regular(StringLiteralPrefix::Unicode)
820 } else {
821 AnyStringPrefix::Regular(StringLiteralPrefix::Empty)
822 }
823 }
824
825 fn is_unclosed(self) -> bool {
826 self.intersects(TokenFlags::UNCLOSED_STRING)
827 }
828}
829
830impl TokenFlags {
831 pub const fn is_f_string(self) -> bool {
833 self.intersects(TokenFlags::F_STRING)
834 }
835
836 pub const fn is_t_string(self) -> bool {
838 self.intersects(TokenFlags::T_STRING)
839 }
840
841 pub const fn is_interpolated_string(self) -> bool {
843 self.intersects(TokenFlags::T_STRING.union(TokenFlags::F_STRING))
844 }
845
846 pub fn is_triple_quoted_interpolated_string(self) -> bool {
848 self.intersects(TokenFlags::TRIPLE_QUOTED_STRING) && self.is_interpolated_string()
849 }
850
851 pub const fn is_raw_string(self) -> bool {
853 self.intersects(TokenFlags::RAW_STRING)
854 }
855}