use std::ops::Range;
#[cfg(feature = "external_scanners")]
pub struct TSLexerAdapter<'a> {
src: &'a [u8],
cursor: usize,
mark_end: usize,
row: u32,
col: u32,
line_starts: &'a [usize],
ranges: Ranges,
}
struct Ranges {
spans: Box<[Range<usize>]>,
next: usize,
}
impl<'a> TSLexerAdapter<'a> {
pub fn new(
src: &'a [u8],
cursor: usize,
line_starts: &'a [usize],
ranges: Vec<Range<usize>>,
) -> Self {
let (row, col) = position_to_line_col(src, cursor, line_starts);
let mut next = 0;
for (i, range) in ranges.iter().enumerate() {
if cursor < range.end {
next = i;
break;
}
}
Self {
src,
cursor,
mark_end: cursor,
row,
col,
line_starts,
ranges: Ranges {
spans: ranges.into_boxed_slice(),
next,
},
}
}
fn current_range(&self) -> Option<&Range<usize>> {
self.ranges.spans.get(self.ranges.next)
}
fn update_position(&mut self, byte: u8) {
match byte {
b'\n' => {
self.row += 1;
self.col = 0;
}
b'\r' => {
}
_ => {
if (byte & 0b11000000) != 0b10000000 {
self.col += 1;
}
}
}
}
}
impl<'a> crate::external_scanner::Lexer for TSLexerAdapter<'a> {
fn lookahead(&self) -> Option<u8> {
if let Some(range) = self.current_range() {
if self.cursor >= range.end {
return None; }
} else {
return None; }
self.src.get(self.cursor).copied()
}
fn advance(&mut self, n: usize) {
for _ in 0..n {
if let Some(range) = self.current_range() {
if self.cursor >= range.end {
return; }
} else {
return; }
if let Some(&byte) = self.src.get(self.cursor) {
if byte == b'\r' {
if self.src.get(self.cursor + 1) == Some(&b'\n') {
self.cursor += 2;
self.row += 1;
self.col = 0;
} else {
self.cursor += 1;
self.row += 1;
self.col = 0;
}
} else {
self.cursor += 1;
self.update_position(byte);
}
if let Some(range) = self.current_range() {
if self.cursor >= range.end && self.ranges.next + 1 < self.ranges.spans.len() {
self.ranges.next += 1;
if let Some(next_range) = self.ranges.spans.get(self.ranges.next) {
self.cursor = next_range.start;
let (row, col) =
position_to_line_col(self.src, self.cursor, self.line_starts);
self.row = row;
self.col = col;
}
}
}
} else {
return; }
}
}
fn mark_end(&mut self) {
self.mark_end = self.cursor;
}
fn column(&self) -> usize {
self.col as usize
}
fn is_eof(&self) -> bool {
if let Some(range) = self.current_range() {
self.cursor >= range.end && self.ranges.next + 1 >= self.ranges.spans.len()
} else {
true
}
}
}
impl<'a> TSLexerAdapter<'a> {
pub fn is_at_included_range_start(&self) -> bool {
self.ranges
.spans
.get(self.ranges.next)
.map(|r| r.start == self.cursor)
.unwrap_or(false)
}
pub fn get_marked_length(&self) -> usize {
self.mark_end
.saturating_sub(self.cursor.saturating_sub(self.mark_end))
}
}
fn position_to_line_col(src: &[u8], pos: usize, line_starts: &[usize]) -> (u32, u32) {
let line = line_starts
.binary_search(&pos)
.unwrap_or_else(|i| i.saturating_sub(1));
let line_start = line_starts.get(line).copied().unwrap_or(0);
let mut col = 0u32;
for i in line_start..pos.min(src.len()) {
if let Some(&byte) = src.get(i) {
if (byte & 0b11000000) != 0b10000000 {
col += 1;
}
}
}
(line as u32, col)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::external_scanner::Lexer;
#[test]
fn test_advance_crlf() {
let input = b"hello\r\nworld";
let line_starts = vec![0, 7]; let ranges = vec![0..input.len()];
let mut adapter = TSLexerAdapter::new(input, 0, &line_starts, ranges);
for _ in 0..5 {
adapter.advance(1);
}
assert_eq!(adapter.row, 0);
assert_eq!(adapter.col, 5);
adapter.advance(1); assert_eq!(adapter.row, 1);
assert_eq!(adapter.col, 0);
assert_eq!(adapter.lookahead(), Some(b'w'));
}
#[test]
fn test_range_boundaries() {
let input = b"hello world";
let line_starts = vec![0];
let ranges = vec![0..5, 6..11]; let mut adapter = TSLexerAdapter::new(input, 0, &line_starts, ranges);
for _ in 0..5 {
adapter.advance(1);
}
assert_eq!(adapter.cursor, 6);
assert_eq!(adapter.lookahead(), Some(b'w'));
adapter.advance(1);
assert_eq!(adapter.cursor, 7); assert_eq!(adapter.lookahead(), Some(b'o'));
}
#[test]
fn test_is_at_included_range_start() {
let input = b"hello world";
let line_starts = vec![0];
let ranges = vec![0..5, 6..11];
let adapter = TSLexerAdapter::new(input, 0, &line_starts, ranges);
assert!(adapter.is_at_included_range_start());
let adapter2 = TSLexerAdapter::new(input, 6, &line_starts, vec![0..5, 6..11]);
assert!(adapter2.is_at_included_range_start()); }
}