rustpython_ruff_python_ast/token/
tokens.rs1use std::{iter::FusedIterator, ops::Deref};
2
3use super::{Token, TokenKind};
4use ruff_python_trivia::CommentRanges;
5use ruff_text_size::{Ranged as _, TextRange, TextSize};
6
7#[derive(Debug, Clone, PartialEq, Eq)]
9#[cfg_attr(feature = "get-size", derive(get_size2::GetSize))]
10pub struct Tokens {
11 raw: Vec<Token>,
12}
13
14impl Tokens {
15 pub fn new(tokens: Vec<Token>) -> Tokens {
16 Tokens { raw: tokens }
17 }
18
19 pub fn iter_with_context(&self) -> TokenIterWithContext<'_> {
21 TokenIterWithContext::new(&self.raw)
22 }
23
24 pub fn binary_search_by_start(&self, offset: TextSize) -> Result<usize, usize> {
30 let partition_point = self.partition_point(|token| token.start() < offset);
31
32 let after = &self[partition_point..];
33
34 if after.first().is_some_and(|first| first.start() == offset) {
35 Ok(partition_point)
36 } else {
37 Err(partition_point)
38 }
39 }
40
41 pub fn in_range(&self, range: TextRange) -> &[Token] {
82 let tokens_after_start = self.after(range.start());
83
84 Self::before_impl(tokens_after_start, range.end())
85 }
86
87 pub fn at_offset(&self, offset: TextSize) -> TokenAt {
92 match self.binary_search_by_start(offset) {
93 Ok(index) => {
99 let token = self[index];
100 if let Some(previous) = index.checked_sub(1).map(|idx| self[idx]) {
103 if previous.end() == offset {
104 return TokenAt::Between(previous, token);
105 }
106 }
107
108 TokenAt::Single(token)
109 }
110
111 Err(index) => {
122 if let Some(previous) = index.checked_sub(1).map(|idx| self[idx]) {
123 if previous.range().contains_inclusive(offset) {
124 return TokenAt::Single(previous);
125 }
126 }
127
128 TokenAt::None
129 }
130 }
131 }
132
133 pub fn before(&self, offset: TextSize) -> &[Token] {
144 Self::before_impl(&self.raw, offset)
145 }
146
147 fn before_impl(tokens: &[Token], offset: TextSize) -> &[Token] {
148 let partition_point = tokens.partition_point(|token| token.start() < offset);
149 let before = &tokens[..partition_point];
150
151 if let Some(last) = before.last() {
152 assert!(
156 offset >= last.end(),
157 "Offset {offset:?} is inside token `{last:?}`",
158 );
159 }
160 before
161 }
162
163 pub fn after(&self, offset: TextSize) -> &[Token] {
174 let partition_point = self.partition_point(|token| token.end() <= offset);
175 let after = &self[partition_point..];
176
177 if let Some(first) = after.first() {
178 assert!(
181 offset <= first.start(),
182 "Offset {offset:?} is inside token `{first:?}`",
183 );
184 }
185
186 after
187 }
188
189 pub fn split_at(&self, offset: TextSize) -> (&[Token], &[Token]) {
204 let partition_point = self.partition_point(|token| token.start() < offset);
205 let (before, after) = &self.raw.split_at(partition_point);
206
207 if let Some(last) = before.last() {
208 assert!(
209 offset >= last.end(),
210 "Offset {offset:?} is inside token `{last:?}`"
211 );
212 }
213 (before, after)
214 }
215}
216
217impl<'a> IntoIterator for &'a Tokens {
218 type Item = &'a Token;
219 type IntoIter = std::slice::Iter<'a, Token>;
220
221 fn into_iter(self) -> Self::IntoIter {
222 self.iter()
223 }
224}
225
226impl Deref for Tokens {
227 type Target = [Token];
228
229 fn deref(&self) -> &Self::Target {
230 &self.raw
231 }
232}
233
234#[derive(Debug, Clone)]
236pub enum TokenAt {
237 None,
239
240 Single(Token),
242
243 Between(Token, Token),
246}
247
248impl Iterator for TokenAt {
249 type Item = Token;
250
251 fn next(&mut self) -> Option<Self::Item> {
252 match *self {
253 TokenAt::None => None,
254 TokenAt::Single(token) => {
255 *self = TokenAt::None;
256 Some(token)
257 }
258 TokenAt::Between(first, second) => {
259 *self = TokenAt::Single(second);
260 Some(first)
261 }
262 }
263 }
264}
265
266impl FusedIterator for TokenAt {}
267
268impl From<&Tokens> for CommentRanges {
269 fn from(tokens: &Tokens) -> Self {
270 let mut ranges = vec![];
271 for token in tokens {
272 if token.kind() == TokenKind::Comment {
273 ranges.push(token.range());
274 }
275 }
276 CommentRanges::new(ranges)
277 }
278}
279
280#[derive(Debug, Clone)]
287pub struct TokenIterWithContext<'a> {
288 inner: std::slice::Iter<'a, Token>,
289 nesting: u32,
290}
291
292impl<'a> TokenIterWithContext<'a> {
293 fn new(tokens: &'a [Token]) -> TokenIterWithContext<'a> {
294 TokenIterWithContext {
295 inner: tokens.iter(),
296 nesting: 0,
297 }
298 }
299
300 pub const fn nesting(&self) -> u32 {
302 self.nesting
303 }
304
305 pub const fn in_parenthesized_context(&self) -> bool {
307 self.nesting > 0
308 }
309
310 pub fn peek(&self) -> Option<&'a Token> {
312 self.clone().next()
313 }
314}
315
316impl<'a> Iterator for TokenIterWithContext<'a> {
317 type Item = &'a Token;
318
319 fn next(&mut self) -> Option<Self::Item> {
320 let token = self.inner.next()?;
321
322 match token.kind() {
323 TokenKind::Lpar | TokenKind::Lbrace | TokenKind::Lsqb => self.nesting += 1,
324 TokenKind::Rpar | TokenKind::Rbrace | TokenKind::Rsqb => {
325 self.nesting = self.nesting.saturating_sub(1);
326 }
327 TokenKind::Newline if self.nesting > 0 => {
331 self.nesting = 0;
332 }
333 _ => {}
334 }
335
336 Some(token)
337 }
338}
339
340impl FusedIterator for TokenIterWithContext<'_> {}
341
342#[cfg(test)]
343mod tests {
344 use std::ops::Range;
345
346 use ruff_text_size::TextSize;
347
348 use crate::token::{Token, TokenFlags, TokenKind};
349
350 use super::*;
351
352 const TEST_CASE_WITH_GAP: [(TokenKind, Range<u32>); 10] = [
356 (TokenKind::Def, 0..3),
357 (TokenKind::Name, 4..7),
358 (TokenKind::Lpar, 7..8),
359 (TokenKind::Rpar, 8..9),
360 (TokenKind::Colon, 9..10),
361 (TokenKind::Newline, 10..11),
362 (TokenKind::Comment, 15..24),
364 (TokenKind::NonLogicalNewline, 24..25),
365 (TokenKind::Indent, 25..29),
366 (TokenKind::Pass, 29..33),
367 ];
369
370 fn new_tokens(tokens: impl Iterator<Item = (TokenKind, Range<u32>)>) -> Tokens {
372 Tokens::new(
373 tokens
374 .map(|(kind, range)| {
375 Token::new(
376 kind,
377 TextRange::new(TextSize::new(range.start), TextSize::new(range.end)),
378 TokenFlags::empty(),
379 )
380 })
381 .collect(),
382 )
383 }
384
385 #[test]
386 fn tokens_after_offset_at_token_start() {
387 let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
388 let after = tokens.after(TextSize::new(8));
389 assert_eq!(after.len(), 7);
390 assert_eq!(after.first().unwrap().kind(), TokenKind::Rpar);
391 }
392
393 #[test]
394 fn tokens_after_offset_at_token_end() {
395 let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
396 let after = tokens.after(TextSize::new(11));
397 assert_eq!(after.len(), 4);
398 assert_eq!(after.first().unwrap().kind(), TokenKind::Comment);
399 }
400
401 #[test]
402 fn tokens_after_offset_between_tokens() {
403 let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
404 let after = tokens.after(TextSize::new(13));
405 assert_eq!(after.len(), 4);
406 assert_eq!(after.first().unwrap().kind(), TokenKind::Comment);
407 }
408
409 #[test]
410 fn tokens_after_offset_at_last_token_end() {
411 let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
412 let after = tokens.after(TextSize::new(33));
413 assert_eq!(after.len(), 0);
414 }
415
416 #[test]
417 #[should_panic(expected = "Offset 5 is inside token `Name 4..7`")]
418 fn tokens_after_offset_inside_token() {
419 let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
420 tokens.after(TextSize::new(5));
421 }
422
423 #[test]
424 fn tokens_before_offset_at_first_token_start() {
425 let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
426 let before = tokens.before(TextSize::new(0));
427 assert_eq!(before.len(), 0);
428 }
429
430 #[test]
431 fn tokens_before_offset_after_first_token_gap() {
432 let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
433 let before = tokens.before(TextSize::new(3));
434 assert_eq!(before.len(), 1);
435 assert_eq!(before.last().unwrap().kind(), TokenKind::Def);
436 }
437
438 #[test]
439 fn tokens_before_offset_at_second_token_start() {
440 let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
441 let before = tokens.before(TextSize::new(4));
442 assert_eq!(before.len(), 1);
443 assert_eq!(before.last().unwrap().kind(), TokenKind::Def);
444 }
445
446 #[test]
447 fn tokens_before_offset_at_token_start() {
448 let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
449 let before = tokens.before(TextSize::new(8));
450 assert_eq!(before.len(), 3);
451 assert_eq!(before.last().unwrap().kind(), TokenKind::Lpar);
452 }
453
454 #[test]
455 fn tokens_before_offset_at_token_end() {
456 let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
457 let before = tokens.before(TextSize::new(11));
458 assert_eq!(before.len(), 6);
459 assert_eq!(before.last().unwrap().kind(), TokenKind::Newline);
460 }
461
462 #[test]
463 fn tokens_before_offset_between_tokens() {
464 let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
465 let before = tokens.before(TextSize::new(13));
466 assert_eq!(before.len(), 6);
467 assert_eq!(before.last().unwrap().kind(), TokenKind::Newline);
468 }
469
470 #[test]
471 fn tokens_before_offset_at_last_token_end() {
472 let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
473 let before = tokens.before(TextSize::new(33));
474 assert_eq!(before.len(), 10);
475 assert_eq!(before.last().unwrap().kind(), TokenKind::Pass);
476 }
477
478 #[test]
479 #[should_panic(expected = "Offset 5 is inside token `Name 4..7`")]
480 fn tokens_before_offset_inside_token() {
481 let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
482 tokens.before(TextSize::new(5));
483 }
484
485 #[test]
486 fn tokens_in_range_at_token_offset() {
487 let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
488 let in_range = tokens.in_range(TextRange::new(4.into(), 10.into()));
489 assert_eq!(in_range.len(), 4);
490 assert_eq!(in_range.first().unwrap().kind(), TokenKind::Name);
491 assert_eq!(in_range.last().unwrap().kind(), TokenKind::Colon);
492 }
493
494 #[test]
495 fn tokens_in_range_start_offset_at_token_end() {
496 let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
497 let in_range = tokens.in_range(TextRange::new(11.into(), 29.into()));
498 assert_eq!(in_range.len(), 3);
499 assert_eq!(in_range.first().unwrap().kind(), TokenKind::Comment);
500 assert_eq!(in_range.last().unwrap().kind(), TokenKind::Indent);
501 }
502
503 #[test]
504 fn tokens_in_range_end_offset_at_token_start() {
505 let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
506 let in_range = tokens.in_range(TextRange::new(8.into(), 15.into()));
507 assert_eq!(in_range.len(), 3);
508 assert_eq!(in_range.first().unwrap().kind(), TokenKind::Rpar);
509 assert_eq!(in_range.last().unwrap().kind(), TokenKind::Newline);
510 }
511
512 #[test]
513 fn tokens_in_range_start_offset_between_tokens() {
514 let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
515 let in_range = tokens.in_range(TextRange::new(13.into(), 29.into()));
516 assert_eq!(in_range.len(), 3);
517 assert_eq!(in_range.first().unwrap().kind(), TokenKind::Comment);
518 assert_eq!(in_range.last().unwrap().kind(), TokenKind::Indent);
519 }
520
521 #[test]
522 fn tokens_in_range_end_offset_between_tokens() {
523 let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
524 let in_range = tokens.in_range(TextRange::new(9.into(), 13.into()));
525 assert_eq!(in_range.len(), 2);
526 assert_eq!(in_range.first().unwrap().kind(), TokenKind::Colon);
527 assert_eq!(in_range.last().unwrap().kind(), TokenKind::Newline);
528 }
529
530 #[test]
531 #[should_panic(expected = "Offset 5 is inside token `Name 4..7`")]
532 fn tokens_in_range_start_offset_inside_token() {
533 let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
534 tokens.in_range(TextRange::new(5.into(), 10.into()));
535 }
536
537 #[test]
538 #[should_panic(expected = "Offset 6 is inside token `Name 4..7`")]
539 fn tokens_in_range_end_offset_inside_token() {
540 let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
541 tokens.in_range(TextRange::new(0.into(), 6.into()));
542 }
543
544 #[test]
545 fn tokens_split_at_first_token_start() {
546 let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
547 let (before, after) = tokens.split_at(TextSize::new(0));
548 assert_eq!(before.len(), 0);
549 assert_eq!(after.len(), 10);
550 }
551
552 #[test]
553 fn tokens_split_at_last_token_end() {
554 let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
555 let (before, after) = tokens.split_at(TextSize::new(33));
556 assert_eq!(before.len(), 10);
557 assert_eq!(after.len(), 0);
558 }
559
560 #[test]
561 fn tokens_split_at_inside_gap() {
562 let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
563 let (before, after) = tokens.split_at(TextSize::new(13));
564 assert_eq!(before.len(), 6);
565 assert_eq!(after.len(), 4);
566 }
567
568 #[test]
569 #[should_panic(expected = "Offset 18 is inside token `Comment 15..24`")]
570 fn tokens_split_at_inside_token() {
571 let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
572 tokens.split_at(TextSize::new(18));
573 }
574
575 #[test]
576 fn tokens_split_at_matches_before_and_after() {
577 let offset = TextSize::new(15);
578 let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
579 let (before, after) = tokens.split_at(offset);
580 assert_eq!(before, tokens.before(offset));
581 assert_eq!(after, tokens.after(offset));
582 }
583
584 #[test]
585 #[should_panic(expected = "Contents of after slice different when offset at dedent")]
586 fn tokens_split_at_matches_before_and_after_zero_length() {
587 let offset = TextSize::new(13);
588 let tokens = new_tokens(
589 [
590 (TokenKind::If, 0..2),
591 (TokenKind::Name, 3..4),
592 (TokenKind::Colon, 4..5),
593 (TokenKind::Newline, 5..6),
594 (TokenKind::Indent, 6..7),
595 (TokenKind::Pass, 7..11),
596 (TokenKind::Newline, 11..12),
597 (TokenKind::NonLogicalNewline, 12..13),
598 (TokenKind::Dedent, 13..13),
599 (TokenKind::Name, 13..14),
600 (TokenKind::Newline, 14..14),
601 ]
602 .into_iter(),
603 );
604 let (before, after) = tokens.split_at(offset);
605 assert_eq!(before, tokens.before(offset));
606 assert!(
607 after == tokens.after(offset),
608 "Contents of after slice different when offset at dedent"
609 );
610 }
611}