use super::SourceLocation;
use crate::text_size::{TextLen, TextRange, TextSize};
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use std::fmt;
use std::fmt::{Debug, Formatter};
use std::num::NonZeroU32;
use std::ops::Deref;
use std::sync::Arc;
#[derive(Clone)]
pub struct LineIndex {
inner: Arc<LineIndexInner>,
}
struct LineIndexInner {
line_starts: Vec<TextSize>,
kind: IndexKind,
}
impl LineIndex {
pub fn from_source_text(text: &str) -> Self {
let mut line_starts: Vec<TextSize> = Vec::with_capacity(text.len() / 88);
line_starts.push(TextSize::default());
let bytes = text.as_bytes();
let mut utf8 = false;
assert!(u32::try_from(bytes.len()).is_ok());
for (i, byte) in bytes.iter().enumerate() {
utf8 |= !byte.is_ascii();
match byte {
b'\r' if bytes.get(i + 1) == Some(&b'\n') => continue,
b'\n' | b'\r' => {
#[allow(clippy::cast_possible_truncation)]
line_starts.push(TextSize::from(i as u32) + TextSize::from(1));
}
_ => {}
}
}
let kind = if utf8 {
IndexKind::Utf8
} else {
IndexKind::Ascii
};
Self {
inner: Arc::new(LineIndexInner { line_starts, kind }),
}
}
fn kind(&self) -> IndexKind {
self.inner.kind
}
pub fn source_location(&self, offset: TextSize, content: &str) -> SourceLocation {
match self.binary_search_line(&offset) {
Ok(row) => SourceLocation {
row: OneIndexed::from_zero_indexed(row),
column: OneIndexed::from_zero_indexed(0),
},
Err(next_row) => {
let row = next_row - 1;
let mut line_start = self.line_starts()[row as usize];
let column = if self.kind().is_ascii() {
u32::from(offset - line_start)
} else {
if line_start == TextSize::from(0) && content.starts_with('\u{feff}') {
line_start = '\u{feff}'.text_len();
}
let range = TextRange::new(line_start, offset);
content[range].chars().count().try_into().unwrap()
};
SourceLocation {
row: OneIndexed::from_zero_indexed(row),
column: OneIndexed::from_zero_indexed(column),
}
}
}
}
pub(crate) fn line_count(&self) -> usize {
self.line_starts().len()
}
pub fn line_index(&self, offset: TextSize) -> OneIndexed {
match self.binary_search_line(&offset) {
Ok(row) => OneIndexed::from_zero_indexed(row),
Err(row) => {
OneIndexed::from_zero_indexed(row - 1)
}
}
}
pub(crate) fn line_start(&self, line: OneIndexed, contents: &str) -> TextSize {
let row_index = line.to_zero_indexed_usize();
let starts = self.line_starts();
if row_index == starts.len() {
contents.text_len()
} else {
starts[row_index]
}
}
pub(crate) fn line_end(&self, line: OneIndexed, contents: &str) -> TextSize {
let row_index = line.to_zero_indexed_usize();
let starts = self.line_starts();
if row_index.saturating_add(1) >= starts.len() {
contents.text_len()
} else {
starts[row_index + 1]
}
}
pub(crate) fn line_range(&self, line: OneIndexed, contents: &str) -> TextRange {
let starts = self.line_starts();
if starts.len() == line.to_zero_indexed_usize() {
TextRange::empty(contents.text_len())
} else {
TextRange::new(
self.line_start(line, contents),
self.line_start(line.saturating_add(1), contents),
)
}
}
pub fn line_starts(&self) -> &[TextSize] {
&self.inner.line_starts
}
#[allow(clippy::trivially_copy_pass_by_ref)] fn binary_search_line(&self, offset: &TextSize) -> Result<u32, u32> {
match self.line_starts().binary_search(offset) {
Ok(index) => Ok(index.try_into().unwrap()),
Err(index) => Err(index.try_into().unwrap()),
}
}
}
impl Deref for LineIndex {
type Target = [TextSize];
fn deref(&self) -> &Self::Target {
self.line_starts()
}
}
impl Debug for LineIndex {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
f.debug_list().entries(self.line_starts()).finish()
}
}
#[derive(Debug, Clone, Copy)]
enum IndexKind {
Ascii,
Utf8,
}
impl IndexKind {
const fn is_ascii(self) -> bool {
matches!(self, IndexKind::Ascii)
}
}
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct OneIndexed(NonZeroU32);
#[allow(clippy::cast_possible_truncation)] const fn try_to_u32(value: usize) -> Result<u32, usize> {
if value <= u32::MAX as usize {
Ok(value as u32)
} else {
Err(value)
}
}
impl OneIndexed {
pub const MIN: Self = unwrap(Self::new(1));
pub const MAX: Self = unwrap(Self::new(u32::MAX));
const ONE: NonZeroU32 = unwrap(NonZeroU32::new(1));
pub const fn new(value: u32) -> Option<Self> {
match NonZeroU32::new(value) {
Some(value) => Some(Self(value)),
None => None,
}
}
pub const fn from_zero_indexed(value: u32) -> Self {
Self(Self::ONE.saturating_add(value))
}
pub const fn try_from_zero_indexed(value: usize) -> Result<Self, usize> {
match try_to_u32(value) {
Ok(value) => Ok(Self(Self::ONE.saturating_add(value))),
Err(value) => Err(value),
}
}
pub const fn get(self) -> u32 {
self.0.get()
}
pub const fn to_usize(self) -> usize {
self.get() as _
}
pub const fn to_zero_indexed(self) -> u32 {
self.0.get() - 1
}
pub const fn to_zero_indexed_usize(self) -> usize {
self.to_zero_indexed() as _
}
#[must_use]
pub const fn saturating_add(self, rhs: u32) -> Self {
match NonZeroU32::new(self.0.get().saturating_add(rhs)) {
Some(value) => Self(value),
None => Self::MAX,
}
}
#[must_use]
pub const fn saturating_sub(self, rhs: u32) -> Self {
match NonZeroU32::new(self.0.get().saturating_sub(rhs)) {
Some(value) => Self(value),
None => Self::MIN,
}
}
}
impl std::fmt::Display for OneIndexed {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::fmt::Debug::fmt(&self.0.get(), f)
}
}
const fn unwrap<T: Copy>(option: Option<T>) -> T {
match option {
Some(value) => value,
None => panic!("unwrapping None"),
}
}
#[cfg(test)]
mod tests {
use crate::source_location::line_index::LineIndex;
use crate::source_location::{OneIndexed, SourceLocation};
use crate::text_size::TextSize;
#[test]
fn ascii_index() {
let index = LineIndex::from_source_text("");
assert_eq!(index.line_starts(), &[TextSize::from(0)]);
let index = LineIndex::from_source_text("x = 1");
assert_eq!(index.line_starts(), &[TextSize::from(0)]);
let index = LineIndex::from_source_text("x = 1\n");
assert_eq!(index.line_starts(), &[TextSize::from(0), TextSize::from(6)]);
let index = LineIndex::from_source_text("x = 1\ny = 2\nz = x + y\n");
assert_eq!(
index.line_starts(),
&[
TextSize::from(0),
TextSize::from(6),
TextSize::from(12),
TextSize::from(22)
]
);
}
#[test]
fn ascii_source_location() {
let contents = "x = 1\ny = 2";
let index = LineIndex::from_source_text(contents);
let loc = index.source_location(TextSize::from(2), contents);
assert_eq!(
loc,
SourceLocation {
row: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(2)
}
);
let loc = index.source_location(TextSize::from(6), contents);
assert_eq!(
loc,
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(0)
}
);
let loc = index.source_location(TextSize::from(11), contents);
assert_eq!(
loc,
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(5)
}
);
}
#[test]
fn ascii_carriage_return() {
let contents = "x = 4\ry = 3";
let index = LineIndex::from_source_text(contents);
assert_eq!(index.line_starts(), &[TextSize::from(0), TextSize::from(6)]);
assert_eq!(
index.source_location(TextSize::from(4), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(4)
}
);
assert_eq!(
index.source_location(TextSize::from(6), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(0)
}
);
assert_eq!(
index.source_location(TextSize::from(7), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(1)
}
);
}
#[test]
fn ascii_carriage_return_newline() {
let contents = "x = 4\r\ny = 3";
let index = LineIndex::from_source_text(contents);
assert_eq!(index.line_starts(), &[TextSize::from(0), TextSize::from(7)]);
assert_eq!(
index.source_location(TextSize::from(4), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(4)
}
);
assert_eq!(
index.source_location(TextSize::from(7), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(0)
}
);
assert_eq!(
index.source_location(TextSize::from(8), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(1)
}
);
}
#[test]
fn utf8_index() {
let index = LineIndex::from_source_text("x = '🫣'");
assert_eq!(index.line_count(), 1);
assert_eq!(index.line_starts(), &[TextSize::from(0)]);
let index = LineIndex::from_source_text("x = '🫣'\n");
assert_eq!(index.line_count(), 2);
assert_eq!(
index.line_starts(),
&[TextSize::from(0), TextSize::from(11)]
);
let index = LineIndex::from_source_text("x = '🫣'\ny = 2\nz = x + y\n");
assert_eq!(index.line_count(), 4);
assert_eq!(
index.line_starts(),
&[
TextSize::from(0),
TextSize::from(11),
TextSize::from(17),
TextSize::from(27)
]
);
let index = LineIndex::from_source_text("# 🫣\nclass Foo:\n \"\"\".\"\"\"");
assert_eq!(index.line_count(), 3);
assert_eq!(
index.line_starts(),
&[TextSize::from(0), TextSize::from(7), TextSize::from(18)]
);
}
#[test]
fn utf8_carriage_return() {
let contents = "x = '🫣'\ry = 3";
let index = LineIndex::from_source_text(contents);
assert_eq!(index.line_count(), 2);
assert_eq!(
index.line_starts(),
&[TextSize::from(0), TextSize::from(11)]
);
assert_eq!(
index.source_location(TextSize::from(9), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(6)
}
);
assert_eq!(
index.source_location(TextSize::from(11), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(0)
}
);
assert_eq!(
index.source_location(TextSize::from(12), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(1)
}
);
}
#[test]
fn utf8_carriage_return_newline() {
let contents = "x = '🫣'\r\ny = 3";
let index = LineIndex::from_source_text(contents);
assert_eq!(index.line_count(), 2);
assert_eq!(
index.line_starts(),
&[TextSize::from(0), TextSize::from(12)]
);
assert_eq!(
index.source_location(TextSize::from(9), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(6)
}
);
assert_eq!(
index.source_location(TextSize::from(12), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(0)
}
);
assert_eq!(
index.source_location(TextSize::from(13), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(1)
}
);
}
#[test]
fn utf8_byte_offset() {
let contents = "x = '☃'\ny = 2";
let index = LineIndex::from_source_text(contents);
assert_eq!(
index.line_starts(),
&[TextSize::from(0), TextSize::from(10)]
);
let loc = index.source_location(TextSize::from(0), contents);
assert_eq!(
loc,
SourceLocation {
row: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(0)
}
);
let loc = index.source_location(TextSize::from(5), contents);
assert_eq!(
loc,
SourceLocation {
row: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(5)
}
);
let loc = index.source_location(TextSize::from(8), contents);
assert_eq!(
loc,
SourceLocation {
row: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(6)
}
);
let loc = index.source_location(TextSize::from(10), contents);
assert_eq!(
loc,
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(0)
}
);
let loc = index.source_location(TextSize::from(15), contents);
assert_eq!(
loc,
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(5)
}
);
}
}