use std::{iter::FusedIterator, ops::Deref};
use super::{Token, TokenKind};
use ruff_python_trivia::CommentRanges;
use ruff_text_size::{Ranged as _, TextRange, TextSize};
#[derive(Debug, Clone, PartialEq, Eq)]
#[cfg_attr(feature = "get-size", derive(get_size2::GetSize))]
pub struct Tokens {
raw: Vec<Token>,
}
impl Tokens {
pub fn new(tokens: Vec<Token>) -> Tokens {
Tokens { raw: tokens }
}
pub fn iter_with_context(&self) -> TokenIterWithContext<'_> {
TokenIterWithContext::new(&self.raw)
}
pub fn binary_search_by_start(&self, offset: TextSize) -> Result<usize, usize> {
let partition_point = self.partition_point(|token| token.start() < offset);
let after = &self[partition_point..];
if after.first().is_some_and(|first| first.start() == offset) {
Ok(partition_point)
} else {
Err(partition_point)
}
}
pub fn in_range(&self, range: TextRange) -> &[Token] {
let tokens_after_start = self.after(range.start());
Self::before_impl(tokens_after_start, range.end())
}
pub fn at_offset(&self, offset: TextSize) -> TokenAt {
match self.binary_search_by_start(offset) {
Ok(index) => {
let token = self[index];
if let Some(previous) = index.checked_sub(1).map(|idx| self[idx]) {
if previous.end() == offset {
return TokenAt::Between(previous, token);
}
}
TokenAt::Single(token)
}
Err(index) => {
if let Some(previous) = index.checked_sub(1).map(|idx| self[idx]) {
if previous.range().contains_inclusive(offset) {
return TokenAt::Single(previous);
}
}
TokenAt::None
}
}
}
pub fn before(&self, offset: TextSize) -> &[Token] {
Self::before_impl(&self.raw, offset)
}
fn before_impl(tokens: &[Token], offset: TextSize) -> &[Token] {
let partition_point = tokens.partition_point(|token| token.start() < offset);
let before = &tokens[..partition_point];
if let Some(last) = before.last() {
assert!(
offset >= last.end(),
"Offset {offset:?} is inside token `{last:?}`",
);
}
before
}
pub fn after(&self, offset: TextSize) -> &[Token] {
let partition_point = self.partition_point(|token| token.end() <= offset);
let after = &self[partition_point..];
if let Some(first) = after.first() {
assert!(
offset <= first.start(),
"Offset {offset:?} is inside token `{first:?}`",
);
}
after
}
pub fn split_at(&self, offset: TextSize) -> (&[Token], &[Token]) {
let partition_point = self.partition_point(|token| token.start() < offset);
let (before, after) = &self.raw.split_at(partition_point);
if let Some(last) = before.last() {
assert!(
offset >= last.end(),
"Offset {offset:?} is inside token `{last:?}`"
);
}
(before, after)
}
}
impl<'a> IntoIterator for &'a Tokens {
type Item = &'a Token;
type IntoIter = std::slice::Iter<'a, Token>;
fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}
impl Deref for Tokens {
type Target = [Token];
fn deref(&self) -> &Self::Target {
&self.raw
}
}
#[derive(Debug, Clone)]
pub enum TokenAt {
None,
Single(Token),
Between(Token, Token),
}
impl Iterator for TokenAt {
type Item = Token;
fn next(&mut self) -> Option<Self::Item> {
match *self {
TokenAt::None => None,
TokenAt::Single(token) => {
*self = TokenAt::None;
Some(token)
}
TokenAt::Between(first, second) => {
*self = TokenAt::Single(second);
Some(first)
}
}
}
}
impl FusedIterator for TokenAt {}
impl From<&Tokens> for CommentRanges {
fn from(tokens: &Tokens) -> Self {
let mut ranges = vec![];
for token in tokens {
if token.kind() == TokenKind::Comment {
ranges.push(token.range());
}
}
CommentRanges::new(ranges)
}
}
#[derive(Debug, Clone)]
pub struct TokenIterWithContext<'a> {
inner: std::slice::Iter<'a, Token>,
nesting: u32,
}
impl<'a> TokenIterWithContext<'a> {
fn new(tokens: &'a [Token]) -> TokenIterWithContext<'a> {
TokenIterWithContext {
inner: tokens.iter(),
nesting: 0,
}
}
pub const fn nesting(&self) -> u32 {
self.nesting
}
pub const fn in_parenthesized_context(&self) -> bool {
self.nesting > 0
}
pub fn peek(&self) -> Option<&'a Token> {
self.clone().next()
}
}
impl<'a> Iterator for TokenIterWithContext<'a> {
type Item = &'a Token;
fn next(&mut self) -> Option<Self::Item> {
let token = self.inner.next()?;
match token.kind() {
TokenKind::Lpar | TokenKind::Lbrace | TokenKind::Lsqb => self.nesting += 1,
TokenKind::Rpar | TokenKind::Rbrace | TokenKind::Rsqb => {
self.nesting = self.nesting.saturating_sub(1);
}
TokenKind::Newline if self.nesting > 0 => {
self.nesting = 0;
}
_ => {}
}
Some(token)
}
}
impl FusedIterator for TokenIterWithContext<'_> {}
#[cfg(test)]
mod tests {
use std::ops::Range;
use ruff_text_size::TextSize;
use crate::token::{Token, TokenFlags, TokenKind};
use super::*;
const TEST_CASE_WITH_GAP: [(TokenKind, Range<u32>); 10] = [
(TokenKind::Def, 0..3),
(TokenKind::Name, 4..7),
(TokenKind::Lpar, 7..8),
(TokenKind::Rpar, 8..9),
(TokenKind::Colon, 9..10),
(TokenKind::Newline, 10..11),
(TokenKind::Comment, 15..24),
(TokenKind::NonLogicalNewline, 24..25),
(TokenKind::Indent, 25..29),
(TokenKind::Pass, 29..33),
];
fn new_tokens(tokens: impl Iterator<Item = (TokenKind, Range<u32>)>) -> Tokens {
Tokens::new(
tokens
.map(|(kind, range)| {
Token::new(
kind,
TextRange::new(TextSize::new(range.start), TextSize::new(range.end)),
TokenFlags::empty(),
)
})
.collect(),
)
}
#[test]
fn tokens_after_offset_at_token_start() {
let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
let after = tokens.after(TextSize::new(8));
assert_eq!(after.len(), 7);
assert_eq!(after.first().unwrap().kind(), TokenKind::Rpar);
}
#[test]
fn tokens_after_offset_at_token_end() {
let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
let after = tokens.after(TextSize::new(11));
assert_eq!(after.len(), 4);
assert_eq!(after.first().unwrap().kind(), TokenKind::Comment);
}
#[test]
fn tokens_after_offset_between_tokens() {
let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
let after = tokens.after(TextSize::new(13));
assert_eq!(after.len(), 4);
assert_eq!(after.first().unwrap().kind(), TokenKind::Comment);
}
#[test]
fn tokens_after_offset_at_last_token_end() {
let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
let after = tokens.after(TextSize::new(33));
assert_eq!(after.len(), 0);
}
#[test]
#[should_panic(expected = "Offset 5 is inside token `Name 4..7`")]
fn tokens_after_offset_inside_token() {
let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
tokens.after(TextSize::new(5));
}
#[test]
fn tokens_before_offset_at_first_token_start() {
let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
let before = tokens.before(TextSize::new(0));
assert_eq!(before.len(), 0);
}
#[test]
fn tokens_before_offset_after_first_token_gap() {
let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
let before = tokens.before(TextSize::new(3));
assert_eq!(before.len(), 1);
assert_eq!(before.last().unwrap().kind(), TokenKind::Def);
}
#[test]
fn tokens_before_offset_at_second_token_start() {
let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
let before = tokens.before(TextSize::new(4));
assert_eq!(before.len(), 1);
assert_eq!(before.last().unwrap().kind(), TokenKind::Def);
}
#[test]
fn tokens_before_offset_at_token_start() {
let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
let before = tokens.before(TextSize::new(8));
assert_eq!(before.len(), 3);
assert_eq!(before.last().unwrap().kind(), TokenKind::Lpar);
}
#[test]
fn tokens_before_offset_at_token_end() {
let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
let before = tokens.before(TextSize::new(11));
assert_eq!(before.len(), 6);
assert_eq!(before.last().unwrap().kind(), TokenKind::Newline);
}
#[test]
fn tokens_before_offset_between_tokens() {
let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
let before = tokens.before(TextSize::new(13));
assert_eq!(before.len(), 6);
assert_eq!(before.last().unwrap().kind(), TokenKind::Newline);
}
#[test]
fn tokens_before_offset_at_last_token_end() {
let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
let before = tokens.before(TextSize::new(33));
assert_eq!(before.len(), 10);
assert_eq!(before.last().unwrap().kind(), TokenKind::Pass);
}
#[test]
#[should_panic(expected = "Offset 5 is inside token `Name 4..7`")]
fn tokens_before_offset_inside_token() {
let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
tokens.before(TextSize::new(5));
}
#[test]
fn tokens_in_range_at_token_offset() {
let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
let in_range = tokens.in_range(TextRange::new(4.into(), 10.into()));
assert_eq!(in_range.len(), 4);
assert_eq!(in_range.first().unwrap().kind(), TokenKind::Name);
assert_eq!(in_range.last().unwrap().kind(), TokenKind::Colon);
}
#[test]
fn tokens_in_range_start_offset_at_token_end() {
let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
let in_range = tokens.in_range(TextRange::new(11.into(), 29.into()));
assert_eq!(in_range.len(), 3);
assert_eq!(in_range.first().unwrap().kind(), TokenKind::Comment);
assert_eq!(in_range.last().unwrap().kind(), TokenKind::Indent);
}
#[test]
fn tokens_in_range_end_offset_at_token_start() {
let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
let in_range = tokens.in_range(TextRange::new(8.into(), 15.into()));
assert_eq!(in_range.len(), 3);
assert_eq!(in_range.first().unwrap().kind(), TokenKind::Rpar);
assert_eq!(in_range.last().unwrap().kind(), TokenKind::Newline);
}
#[test]
fn tokens_in_range_start_offset_between_tokens() {
let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
let in_range = tokens.in_range(TextRange::new(13.into(), 29.into()));
assert_eq!(in_range.len(), 3);
assert_eq!(in_range.first().unwrap().kind(), TokenKind::Comment);
assert_eq!(in_range.last().unwrap().kind(), TokenKind::Indent);
}
#[test]
fn tokens_in_range_end_offset_between_tokens() {
let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
let in_range = tokens.in_range(TextRange::new(9.into(), 13.into()));
assert_eq!(in_range.len(), 2);
assert_eq!(in_range.first().unwrap().kind(), TokenKind::Colon);
assert_eq!(in_range.last().unwrap().kind(), TokenKind::Newline);
}
#[test]
#[should_panic(expected = "Offset 5 is inside token `Name 4..7`")]
fn tokens_in_range_start_offset_inside_token() {
let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
tokens.in_range(TextRange::new(5.into(), 10.into()));
}
#[test]
#[should_panic(expected = "Offset 6 is inside token `Name 4..7`")]
fn tokens_in_range_end_offset_inside_token() {
let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
tokens.in_range(TextRange::new(0.into(), 6.into()));
}
#[test]
fn tokens_split_at_first_token_start() {
let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
let (before, after) = tokens.split_at(TextSize::new(0));
assert_eq!(before.len(), 0);
assert_eq!(after.len(), 10);
}
#[test]
fn tokens_split_at_last_token_end() {
let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
let (before, after) = tokens.split_at(TextSize::new(33));
assert_eq!(before.len(), 10);
assert_eq!(after.len(), 0);
}
#[test]
fn tokens_split_at_inside_gap() {
let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
let (before, after) = tokens.split_at(TextSize::new(13));
assert_eq!(before.len(), 6);
assert_eq!(after.len(), 4);
}
#[test]
#[should_panic(expected = "Offset 18 is inside token `Comment 15..24`")]
fn tokens_split_at_inside_token() {
let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
tokens.split_at(TextSize::new(18));
}
#[test]
fn tokens_split_at_matches_before_and_after() {
let offset = TextSize::new(15);
let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
let (before, after) = tokens.split_at(offset);
assert_eq!(before, tokens.before(offset));
assert_eq!(after, tokens.after(offset));
}
#[test]
#[should_panic(expected = "Contents of after slice different when offset at dedent")]
fn tokens_split_at_matches_before_and_after_zero_length() {
let offset = TextSize::new(13);
let tokens = new_tokens(
[
(TokenKind::If, 0..2),
(TokenKind::Name, 3..4),
(TokenKind::Colon, 4..5),
(TokenKind::Newline, 5..6),
(TokenKind::Indent, 6..7),
(TokenKind::Pass, 7..11),
(TokenKind::Newline, 11..12),
(TokenKind::NonLogicalNewline, 12..13),
(TokenKind::Dedent, 13..13),
(TokenKind::Name, 13..14),
(TokenKind::Newline, 14..14),
]
.into_iter(),
);
let (before, after) = tokens.split_at(offset);
assert_eq!(before, tokens.before(offset));
assert!(
after == tokens.after(offset),
"Contents of after slice different when offset at dedent"
);
}
}