#[cfg(feature = "serialization")]
use serde::{Deserialize, Serialize};
use std::{error, fmt};
use crate::{ByteIndex, ColumnIndex, LineIndex, LineOffset, Location, RawIndex, Span};
#[derive(Debug, PartialEq)]
pub struct LineIndexOutOfBoundsError {
pub given: LineIndex,
pub max: LineIndex,
}
impl error::Error for LineIndexOutOfBoundsError {}
impl fmt::Display for LineIndexOutOfBoundsError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"Line index out of bounds - given: {}, max: {}",
self.given, self.max
)
}
}
#[derive(Debug, PartialEq)]
pub enum LocationError {
OutOfBounds { given: ByteIndex, span: Span },
InvalidCharBoundary { given: ByteIndex },
}
impl error::Error for LocationError {}
impl fmt::Display for LocationError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
LocationError::OutOfBounds { given, span } => write!(
f,
"Byte index out of bounds - given: {}, span: {}",
given, span
),
LocationError::InvalidCharBoundary { given } => {
write!(f, "Byte index within character boundary - given: {}", given)
},
}
}
}
#[derive(Debug, PartialEq)]
pub struct SpanOutOfBoundsError {
pub given: Span,
pub span: Span,
}
impl error::Error for SpanOutOfBoundsError {}
impl fmt::Display for SpanOutOfBoundsError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"Span out of bounds - given: {}, span: {}",
self.given, self.span
)
}
}
#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[cfg_attr(feature = "serialization", derive(Deserialize, Serialize))]
#[cfg_attr(feature = "memory_usage", derive(heapsize_derive::HeapSizeOf))]
pub struct FileId(u32);
#[derive(Debug, Clone)]
pub struct Files {
files: Vec<File>,
}
impl Files {
pub fn new() -> Files {
Files { files: Vec::new() }
}
pub fn add(&mut self, name: impl Into<String>, source: impl Into<String>) -> FileId {
let file_id = FileId(self.files.len() as u32);
self.files.push(File::new(name.into(), source.into()));
file_id
}
pub fn update(&mut self, file_id: FileId, source: impl Into<String>) {
self.get_mut(file_id).update(source.into())
}
fn get(&self, file_id: FileId) -> &File {
&self.files[file_id.0 as usize]
}
fn get_mut(&mut self, file_id: FileId) -> &mut File {
&mut self.files[file_id.0 as usize]
}
pub fn name(&self, file_id: FileId) -> &str {
self.get(file_id).name()
}
pub fn line_span(
&self,
file_id: FileId,
line_index: impl Into<LineIndex>,
) -> Result<Span, LineIndexOutOfBoundsError> {
self.get(file_id).line_span(line_index.into())
}
pub fn location(
&self,
file_id: FileId,
byte_index: impl Into<ByteIndex>,
) -> Result<Location, LocationError> {
self.get(file_id).location(byte_index.into())
}
pub fn source(&self, file_id: FileId) -> &str {
self.get(file_id).source()
}
pub fn source_span(&self, file_id: FileId) -> Span {
self.get(file_id).source_span()
}
pub fn source_slice(
&self,
file_id: FileId,
span: impl Into<Span>,
) -> Result<&str, SpanOutOfBoundsError> {
self.get(file_id).source_slice(span.into())
}
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serialization", derive(Deserialize, Serialize))]
#[cfg_attr(feature = "memory_usage", derive(heapsize_derive::HeapSizeOf))]
struct File {
name: String,
source: String,
line_starts: Vec<ByteIndex>,
}
fn compute_line_starts(source: &str) -> Vec<ByteIndex> {
std::iter::once(0)
.chain(source.match_indices('\n').map(|(i, _)| i as u32 + 1))
.map(ByteIndex::from)
.collect()
}
impl File {
fn new(name: String, source: String) -> File {
let line_starts = compute_line_starts(&source);
File {
name,
source,
line_starts,
}
}
fn update(&mut self, source: String) {
let line_starts = compute_line_starts(&source);
self.source = source;
self.line_starts = line_starts;
}
fn name(&self) -> &str {
&self.name
}
fn line_start(&self, line_index: LineIndex) -> Result<ByteIndex, LineIndexOutOfBoundsError> {
use std::cmp::Ordering;
match line_index.cmp(&self.last_line_index()) {
Ordering::Less => Ok(self.line_starts[line_index.to_usize()]),
Ordering::Equal => Ok(self.source_span().end()),
Ordering::Greater => Err(LineIndexOutOfBoundsError {
given: line_index,
max: self.last_line_index(),
}),
}
}
fn last_line_index(&self) -> LineIndex {
LineIndex::from(self.line_starts.len() as RawIndex)
}
fn line_span(&self, line_index: LineIndex) -> Result<Span, LineIndexOutOfBoundsError> {
let line_start = self.line_start(line_index)?;
let next_line_start = self.line_start(line_index + LineOffset::from(1))?;
Ok(Span::new(line_start, next_line_start))
}
fn location(&self, byte_index: ByteIndex) -> Result<Location, LocationError> {
use unicode_segmentation::UnicodeSegmentation;
match self.line_starts.binary_search(&byte_index) {
Ok(line) => Ok(Location::new(line as u32, 0)),
Err(next_line) => {
let line_index = LineIndex::from(next_line as u32 - 1);
let line_start_index =
self.line_start(line_index)
.map_err(|_| LocationError::OutOfBounds {
given: byte_index,
span: self.source_span(),
})?;
let line_src = self
.source()
.get(line_start_index.to_usize()..byte_index.to_usize())
.ok_or_else(|| {
let given = byte_index;
if given >= self.source_span().end() {
let span = self.source_span();
LocationError::OutOfBounds { given, span }
} else {
LocationError::InvalidCharBoundary { given }
}
})?;
Ok(Location {
line: line_index,
column: ColumnIndex::from(line_src.graphemes(true).count() as u32),
})
},
}
}
fn source(&self) -> &str {
&self.source
}
fn source_span(&self) -> Span {
Span::from_str(self.source())
}
fn source_slice(&self, span: Span) -> Result<&str, SpanOutOfBoundsError> {
let start = span.start().to_usize();
let end = span.end().to_usize();
self.source.get(start..end).ok_or_else(|| {
let span = Span::from_str(self.source());
SpanOutOfBoundsError { given: span, span }
})
}
}
#[cfg(test)]
mod test {
use pretty_assertions::assert_eq;
use super::*;
const TEST_SOURCE: &str = "foo\nbar\r\n\nbaz";
#[test]
fn line_starts() {
let mut files = Files::new();
let file_id = files.add("test", TEST_SOURCE);
assert_eq!(
files.get(file_id).line_starts,
[
ByteIndex::from(0),
ByteIndex::from(4),
ByteIndex::from(9),
ByteIndex::from(10),
],
);
}
#[test]
fn line_span_sources() {
let mut files = Files::new();
let file_id = files.add("test", TEST_SOURCE);
let line_sources = (0..4)
.map(|line| {
let line_span = files.line_span(file_id, line).unwrap();
files.source_slice(file_id, line_span)
})
.collect::<Vec<_>>();
assert_eq!(
line_sources,
[Ok("foo\n"), Ok("bar\r\n"), Ok("\n"), Ok("baz")],
);
}
}