use std::borrow::Cow;
use std::path::PathBuf;
use std::sync::Arc;
use dashmap::DashMap;
use serde::Deserialize;
use serde::Serialize;
use mago_interner::StringIdentifier;
use mago_interner::ThreadedInterner;
use crate::error::SourceError;
pub mod error;
#[derive(Default, Debug, Clone, Copy, Eq, PartialEq, Hash, Serialize, Deserialize, PartialOrd, Ord)]
pub enum SourceCategory {
BuiltIn,
External,
#[default]
UserDefined,
}
#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash, Serialize, Deserialize, PartialOrd, Ord)]
pub struct SourceIdentifier(pub StringIdentifier, pub SourceCategory);
#[derive(Debug, Clone, Eq, PartialEq, Hash, Serialize, Deserialize, PartialOrd, Ord)]
pub struct Source {
pub identifier: SourceIdentifier,
pub path: Option<PathBuf>,
pub content: StringIdentifier,
pub size: usize,
pub lines: Vec<usize>,
}
pub trait HasSource {
fn source(&self) -> SourceIdentifier;
}
#[derive(Debug)]
struct SourceEntry {
path: Option<PathBuf>,
content: Option<(StringIdentifier, usize, Vec<usize>)>,
}
#[derive(Clone, Debug)]
pub struct SourceManager {
interner: ThreadedInterner,
sources: Arc<DashMap<SourceIdentifier, SourceEntry>>,
sources_by_name: Arc<DashMap<StringIdentifier, SourceIdentifier>>,
}
impl SourceCategory {
#[inline(always)]
pub const fn is_built_in(&self) -> bool {
matches!(self, Self::BuiltIn)
}
#[inline(always)]
pub const fn is_external(&self) -> bool {
matches!(self, Self::External)
}
#[inline(always)]
pub const fn is_user_defined(&self) -> bool {
matches!(self, Self::UserDefined)
}
}
impl SourceIdentifier {
#[inline(always)]
pub fn dummy() -> Self {
Self(StringIdentifier::empty(), SourceCategory::UserDefined)
}
#[inline(always)]
pub const fn value(&self) -> StringIdentifier {
self.0
}
#[inline(always)]
pub const fn category(&self) -> SourceCategory {
self.1
}
}
impl Source {
#[inline(always)]
pub fn standalone(interner: &ThreadedInterner, name: &str, content: &str) -> Self {
let lines: Vec<_> = line_starts(content).collect();
let size = content.len();
let content_id = interner.intern(content);
Self {
identifier: SourceIdentifier(interner.intern(name), SourceCategory::UserDefined),
path: None,
content: content_id,
size,
lines,
}
}
#[inline(always)]
pub fn line_number(&self, offset: usize) -> usize {
self.lines.binary_search(&offset).unwrap_or_else(|next_line| next_line - 1)
}
pub fn get_line_start_offset(&self, line: usize) -> Option<usize> {
self.lines.get(line).copied()
}
#[inline(always)]
pub fn column_number(&self, offset: usize) -> usize {
let line_start = self.lines.binary_search(&offset).unwrap_or_else(|next_line| self.lines[next_line - 1]);
offset - line_start
}
}
impl SourceManager {
#[inline(always)]
pub fn new(interner: ThreadedInterner) -> Self {
Self { interner, sources: Arc::new(DashMap::new()), sources_by_name: Arc::new(DashMap::new()) }
}
#[inline(always)]
pub fn insert_path(&self, name: impl AsRef<str>, path: PathBuf, category: SourceCategory) -> SourceIdentifier {
let name_id = self.interner.intern(&name);
let source_id = SourceIdentifier(name_id, category);
if self.sources.contains_key(&source_id) {
return source_id;
}
self.sources.insert(source_id, SourceEntry { path: Some(path), content: None });
self.sources_by_name.insert(name_id, source_id);
source_id
}
#[inline(always)]
pub fn insert_content(
&self,
name: impl AsRef<str>,
content: impl AsRef<str>,
category: SourceCategory,
) -> SourceIdentifier {
let name_id = self.interner.intern(&name);
if let Some(source_id) = self.sources_by_name.get(&name_id).map(|v| *v) {
return source_id;
}
let source_id = SourceIdentifier(name_id, category);
let lines: Vec<_> = line_starts(content.as_ref()).collect();
let size = content.as_ref().len();
let content_id = self.interner.intern(content);
self.sources.insert(source_id, SourceEntry { path: None, content: Some((content_id, size, lines)) });
self.sources_by_name.insert(name_id, source_id);
source_id
}
#[inline(always)]
pub fn contains(&self, source_id: &SourceIdentifier) -> bool {
self.sources.contains_key(source_id)
}
#[inline(always)]
pub fn source_ids(&self) -> impl Iterator<Item = SourceIdentifier> + '_ {
self.sources.iter().map(|entry| *entry.key())
}
#[inline(always)]
pub fn source_ids_for_category(&self, category: SourceCategory) -> impl Iterator<Item = SourceIdentifier> + '_ {
self.sources.iter().filter(move |entry| entry.key().category() == category).map(|entry| *entry.key())
}
#[inline(always)]
pub fn source_ids_except_category(&self, category: SourceCategory) -> impl Iterator<Item = SourceIdentifier> + '_ {
self.sources.iter().filter(move |entry| entry.key().category() != category).map(|entry| *entry.key())
}
#[inline(always)]
pub fn load(&self, source_id: &SourceIdentifier) -> Result<Source, SourceError> {
let mut entry = self.sources.get_mut(source_id).ok_or(SourceError::UnavailableSource(*source_id))?;
if let Some((content, size, ref lines)) = entry.content {
return Ok(Source {
identifier: *source_id,
path: entry.path.clone(),
content,
size,
lines: lines.clone(),
});
}
let path = entry.path.clone().expect("Entry must have either content or path");
let bytes = std::fs::read(&path).map_err(SourceError::IOError)?;
let content = match String::from_utf8_lossy(&bytes) {
Cow::Borrowed(s) => s.to_owned(),
Cow::Owned(s) => {
tracing::warn!("Source '{}' contains invalid UTF-8 sequence, behavior is undefined.", path.display());
s
}
};
let lines: Vec<_> = line_starts(&content).collect();
let size = content.len();
let content_id = self.interner.intern(content);
entry.content = Some((content_id, size, lines.clone()));
Ok(Source { identifier: *source_id, path: Some(path), content: content_id, size, lines })
}
#[inline(always)]
pub fn write(&self, source_id: SourceIdentifier, new_content: impl AsRef<str>) -> Result<(), SourceError> {
let mut entry = self.sources.get_mut(&source_id).ok_or(SourceError::UnavailableSource(source_id))?;
let new_content = new_content.as_ref();
let new_content_id = self.interner.intern(new_content);
if let Some((old_content, _, _)) = entry.content.as_ref() {
if *old_content == new_content_id {
return Ok(());
}
}
let new_lines: Vec<_> = line_starts(new_content).collect();
let new_size = new_content.len();
entry.content = Some((new_content_id, new_size, new_lines.clone()));
if let Some(ref path) = entry.path {
std::fs::write(path, self.interner.lookup(&new_content_id)).map_err(SourceError::IOError)?;
}
Ok(())
}
#[inline(always)]
pub fn len(&self) -> usize {
self.sources.len()
}
#[inline(always)]
pub fn is_empty(&self) -> bool {
self.sources.is_empty()
}
}
unsafe impl Send for SourceManager {}
unsafe impl Sync for SourceManager {}
impl<T: HasSource> HasSource for Box<T> {
#[inline(always)]
fn source(&self) -> SourceIdentifier {
self.as_ref().source()
}
}
#[inline(always)]
fn line_starts(source: &str) -> impl Iterator<Item = usize> + '_ {
std::iter::once(0).chain(source.match_indices('\n').map(|(i, _)| i + 1))
}