use std::borrow::Cow;
use std::cmp::Ordering;
use std::ops::Range;
use std::path::PathBuf;
use std::sync::Arc;
use codespan_reporting::files::line_starts;
use codespan_reporting::files::Error;
use codespan_reporting::files::Files;
use dashmap::DashMap;
use serde::Deserialize;
use serde::Serialize;
use mago_interner::StringIdentifier;
use mago_interner::ThreadedInterner;
use crate::error::SourceError;
pub mod error;
#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash, Serialize, Deserialize, PartialOrd, Ord)]
pub struct SourceIdentifier(pub StringIdentifier, pub bool);
#[derive(Debug, Clone, Eq, PartialEq, Hash, Serialize, Deserialize, PartialOrd, Ord)]
pub struct Source {
pub identifier: SourceIdentifier,
pub path: Option<PathBuf>,
pub content: StringIdentifier,
pub size: usize,
pub lines: Vec<usize>,
}
pub trait HasSource {
fn source(&self) -> SourceIdentifier;
}
#[derive(Debug)]
struct SourceEntry {
name: String,
path: Option<PathBuf>,
content: Option<(StringIdentifier, usize, Vec<usize>)>,
}
#[derive(Clone, Debug)]
pub struct SourceManager {
interner: ThreadedInterner,
sources: Arc<DashMap<SourceIdentifier, SourceEntry>>,
}
impl SourceIdentifier {
pub fn dummy() -> Self {
Self(StringIdentifier::empty(), true)
}
#[inline(always)]
pub const fn value(&self) -> StringIdentifier {
self.0
}
#[inline(always)]
pub const fn is_external(&self) -> bool {
!self.1
}
#[inline(always)]
pub const fn is_user_defined(&self) -> bool {
self.1
}
}
impl Source {
pub fn line_number(&self, offset: usize) -> usize {
self.lines.binary_search(&offset).unwrap_or_else(|next_line| next_line - 1)
}
}
impl SourceManager {
pub fn new(interner: ThreadedInterner) -> Self {
Self { interner, sources: Arc::new(DashMap::new()) }
}
pub fn insert_path(&self, name: String, path: PathBuf, user_defined: bool) -> SourceIdentifier {
let string_id = self.interner.intern(&name);
let source_id = SourceIdentifier(string_id, user_defined);
if self.sources.contains_key(&source_id) {
return source_id;
}
self.sources.insert(source_id, SourceEntry { name, path: Some(path), content: None });
source_id
}
pub fn insert_content(&mut self, name: String, content: String, user_defined: bool) -> SourceIdentifier {
if let Some(entry) = self.sources.iter().find(|entry| entry.name == name) {
return *entry.key();
}
let source_id = SourceIdentifier(self.interner.intern(&name), user_defined);
let lines = line_starts(&content).collect();
let size = content.len();
let content = self.interner.intern(content);
self.sources.insert(source_id, SourceEntry { name, path: None, content: Some((content, size, lines)) });
source_id
}
pub fn contains(&self, source_id: &SourceIdentifier) -> bool {
self.sources.contains_key(source_id)
}
pub fn source_ids(&self) -> impl Iterator<Item = SourceIdentifier> + '_ {
self.sources.iter().map(|entry| *entry.key())
}
pub fn user_defined_source_ids(&self) -> impl Iterator<Item = SourceIdentifier> + '_ {
self.sources.iter().filter(|entry| entry.key().is_user_defined()).map(|entry| *entry.key())
}
pub fn external_source_ids(&self) -> impl Iterator<Item = SourceIdentifier> + '_ {
self.sources.iter().filter(|entry| entry.key().is_external()).map(|entry| *entry.key())
}
pub fn load(&self, source_id: SourceIdentifier) -> Result<Source, SourceError> {
let Some(mut entry) = self.sources.get_mut(&source_id) else {
return Err(SourceError::UnavailableSource(source_id));
};
match &entry.content {
Some((content, size, lines)) => Ok(Source {
identifier: source_id,
path: entry.path.clone(),
content: *content,
size: *size,
lines: lines.clone(),
}),
None => {
let path = entry.path.clone().expect("source entry must contain either content or path");
let content = std::fs::read(&path)
.map(|bytes| match String::from_utf8_lossy(&bytes) {
Cow::Borrowed(str) => str.to_string(),
Cow::Owned(string) => {
tracing::warn!(
"encountered invalid utf-8 sequence in file {:?}. behavior with non-utf-8 files is undefined and may lead to unexpected results.",
path,
);
string
}
})?;
let (_, v) = entry.pair_mut();
let lines: Vec<_> = line_starts(&content).collect();
let size = content.len();
let content = self.interner.intern(content);
let source = Source { identifier: source_id, path: Some(path), content, size, lines: lines.clone() };
v.content = Some((content, size, lines));
Ok(source)
}
}
}
pub fn write(&self, source_id: SourceIdentifier, content: String) -> Result<(), SourceError> {
let mut entry = self.sources.get_mut(&source_id).ok_or(SourceError::UnavailableSource(source_id))?;
let lines = line_starts(&content).collect();
let size = content.len();
let content = self.interner.intern(content);
let (_, v) = entry.pair_mut();
if let Some((old_content, _, _)) = v.content.as_mut() {
if *old_content == content {
return Ok(());
}
}
v.content = Some((content, size, lines));
if let Some(path) = entry.value().path.as_ref() {
std::fs::write(path, self.interner.lookup(&content)).map_err(SourceError::IOError)?;
}
Ok(())
}
pub fn len(&self) -> usize {
self.sources.len()
}
pub fn is_empty(&self) -> bool {
self.sources.is_empty()
}
fn get(&self, source_id: SourceIdentifier) -> Result<Source, Error> {
self.sources
.get(&source_id)
.map(|entry| {
let entry = entry.value();
let (content, size, lines) = entry
.content
.as_ref()
.expect("content must be initialized when source entry is present in the map");
Source {
identifier: source_id,
path: entry.path.clone(),
content: *content,
size: *size,
lines: lines.clone(),
}
})
.ok_or(Error::FileMissing)
}
}
unsafe impl Send for SourceManager {}
unsafe impl Sync for SourceManager {}
impl<'a> Files<'a> for SourceManager {
type FileId = SourceIdentifier;
type Name = &'a str;
type Source = &'a str;
fn name(&'a self, file_id: SourceIdentifier) -> Result<&'a str, Error> {
self.get(file_id).map(|source| self.interner.lookup(&source.identifier.value()))
}
fn source(&'a self, file_id: SourceIdentifier) -> Result<&'a str, Error> {
self.get(file_id).map(|source| self.interner.lookup(&source.content))
}
fn line_index(&self, file_id: SourceIdentifier, byte_index: usize) -> Result<usize, Error> {
let source = self.get(file_id)?;
Ok(source.line_number(byte_index))
}
fn line_range(&self, file_id: SourceIdentifier, line_index: usize) -> Result<Range<usize>, Error> {
let source = self.get(file_id)?;
codespan_line_range(&source.lines, source.size, line_index)
}
}
fn codespan_line_start(lines: &[usize], size: usize, line_index: usize) -> Result<usize, Error> {
match line_index.cmp(&lines.len()) {
Ordering::Less => Ok(lines.get(line_index).cloned().expect("failed despite previous check")),
Ordering::Equal => Ok(size),
Ordering::Greater => Err(Error::LineTooLarge { given: line_index, max: lines.len() - 1 }),
}
}
fn codespan_line_range(lines: &[usize], size: usize, line_index: usize) -> Result<Range<usize>, Error> {
let line_start = codespan_line_start(lines, size, line_index)?;
let next_line_start = codespan_line_start(lines, size, line_index + 1)?;
Ok(line_start..next_line_start)
}
impl<T: HasSource> HasSource for Box<T> {
fn source(&self) -> SourceIdentifier {
self.as_ref().source()
}
}