use memchr::memchr_iter;
use std::str;
use crate::common::SourceLocation;
#[inline]
pub fn utf16_len(slice: &str) -> usize {
let bytes: &[u8] = slice.as_bytes();
if bytes.iter().all(|&b| b < 0x80) {
return bytes.len();
}
utf16_len_non_ascii(bytes)
}
#[inline]
fn utf16_len_non_ascii(bytes: &[u8]) -> usize {
let mut count = 0;
let mut i = 0;
while i < bytes.len() {
let b = bytes[i];
if b < 0x80 {
count += 1;
i += 1;
} else if b < 0xC0 {
i += 1;
} else if b < 0xE0 {
count += 1;
i += 2;
} else if b < 0xF0 {
count += 1;
i += 3;
} else {
count += 2;
i += 4;
}
}
count
}
pub fn find_lines_memchr_bump_vec(input: &[u8]) -> Vec<usize> {
let estimated_lines = input.len() / 40;
let mut offsets = Vec::with_capacity(estimated_lines);
offsets.extend(memchr_iter(b'\n', input));
offsets
}
pub fn find_utf16_offset_from_vec(input: &str, line_offsets: &[usize]) -> Vec<usize> {
let bytes: &[u8] = input.as_bytes();
let num_lines = line_offsets.len() + 1;
let mut utf16_cache = Vec::with_capacity(num_lines);
utf16_cache.push(0);
if bytes.iter().all(|&b| b < 0x80) {
for &newline_pos in line_offsets.iter() {
utf16_cache.push(newline_pos + 1);
}
return utf16_cache;
}
let mut running_utf16: usize = 0;
let mut prev_byte_pos: usize = 0;
for &newline_pos in line_offsets.iter() {
running_utf16 += utf16_len(&input[prev_byte_pos..=newline_pos]);
utf16_cache.push(running_utf16);
prev_byte_pos = newline_pos + 1;
}
utf16_cache
}
pub struct PositionResolver<'a> {
input: &'a str,
line_offsets: Vec<usize>,
utf16_cache: Vec<usize>,
}
impl<'a> PositionResolver<'a> {
pub fn new(input: &'a str) -> Self {
let line_offsets = find_lines_memchr_bump_vec(input.as_bytes());
let utf16_cache = find_utf16_offset_from_vec(input, &line_offsets);
Self {
input,
line_offsets,
utf16_cache,
}
}
pub fn new_for_sourcemap(input: &'a str) -> Self {
let line_offsets = find_lines_memchr_bump_vec(input.as_bytes());
Self {
input,
line_offsets,
utf16_cache: Vec::new(),
}
}
#[inline]
pub fn offset_to_line_and_col(&self, offset: usize) -> (usize, usize) {
debug_assert!(offset <= self.input.len(), "offset out of bounds");
debug_assert!(
self.input.is_char_boundary(offset),
"offset must land on a char boundary"
);
let line = self
.line_offsets
.binary_search(&offset)
.unwrap_or_else(|i| i);
let line_start = if line == 0 {
0
} else {
self.line_offsets[line - 1] + 1
};
let col_utf16 = utf16_len(&self.input[line_start..offset]);
(line + 1, col_utf16 + 1)
}
#[inline]
pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize, usize) {
debug_assert!(offset <= self.input.len(), "offset out of bounds");
debug_assert!(
self.input.is_char_boundary(offset),
"offset must land on a char boundary"
);
let line = self
.line_offsets
.binary_search(&offset)
.unwrap_or_else(|i| i);
let line_start = if line == 0 {
0
} else {
self.line_offsets[line - 1] + 1
};
let col_utf16 = utf16_len(&self.input[line_start..offset]);
let offset_utf16 = self.utf16_cache.get(line).copied().unwrap_or(0) + col_utf16;
(line + 1, col_utf16 + 1, offset_utf16)
}
pub fn to_source_location(&self, start: u32, end: u32) -> SourceLocation<'a> {
let (start_line, start_col, start_offset_utf16) = self.offset_to_line_col(start as usize);
let (end_line, end_col, end_offset_utf16) = self.offset_to_line_col(end as usize);
SourceLocation {
start: crate::common::Position {
offset: start,
offset_utf16: start_offset_utf16 as u32,
line: start_line as u32,
column: start_col as u32,
},
end: crate::common::Position {
offset: end,
offset_utf16: end_offset_utf16 as u32,
line: end_line as u32,
column: end_col as u32,
},
source: &self.input[start as usize..end as usize],
}
}
pub fn to_position(&self, offset: u32) -> crate::common::Position {
let (line, column, offset_utf16) = self.offset_to_line_col(offset as usize);
crate::common::Position {
offset,
offset_utf16: offset_utf16 as u32,
line: line as u32,
column: column as u32,
}
}
pub fn slice(&self, start: usize, end: usize) -> &'a str {
&self.input[start..end]
}
pub fn slice_bytes(&self, start: usize, end: usize) -> &'a [u8] {
&self.input.as_bytes()[start..end]
}
}
pub struct PositionSweep<'a> {
resolver: &'a PositionResolver<'a>,
current_line: usize,
}
impl<'a> PositionSweep<'a> {
pub fn new(resolver: &'a PositionResolver<'a>) -> Self {
Self {
resolver,
current_line: 0,
}
}
#[inline]
pub fn offset_to_line_col(&mut self, offset: usize) -> (usize, usize, usize) {
debug_assert!(offset <= self.resolver.input.len(), "offset out of bounds");
debug_assert!(
self.resolver.input.is_char_boundary(offset),
"offset must land on a char boundary"
);
let line_offsets = &self.resolver.line_offsets;
while self.current_line < line_offsets.len() && offset > line_offsets[self.current_line] {
self.current_line += 1;
}
let line = self.current_line;
let line_start = if line == 0 {
0
} else {
line_offsets[line - 1] + 1
};
let col_utf16 = utf16_len(&self.resolver.input[line_start..offset]);
let offset_utf16 = self.resolver.utf16_cache.get(line).copied().unwrap_or(0) + col_utf16;
(line + 1, col_utf16 + 1, offset_utf16)
}
}
impl<'a> PositionResolver<'a> {
pub fn sweep(&'a self) -> PositionSweep<'a> {
PositionSweep::new(self)
}
}
#[cfg(test)]
#[path = "position_tests.rs"]
mod position_tests;