1use std::path::PathBuf;
2use std::sync::Arc;
3
4use ahash::HashMap;
5use parking_lot::RwLock;
6use serde::Deserialize;
7use serde::Serialize;
8
9use mago_interner::StringIdentifier;
10use mago_interner::ThreadedInterner;
11
12use crate::error::SourceError;
13
14pub mod error;
15
16#[derive(Default, Debug, Clone, Copy, Eq, PartialEq, Hash, Serialize, Deserialize, PartialOrd, Ord)]
28pub enum SourceCategory {
29 BuiltIn,
31
32 External,
34
35 #[default]
37 UserDefined,
38}
39
40#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash, Serialize, Deserialize, PartialOrd, Ord)]
42#[repr(C)]
43pub struct SourceIdentifier(pub StringIdentifier, pub SourceCategory);
44
45#[derive(Debug, Clone, Eq, PartialEq, Hash, Serialize, Deserialize, PartialOrd, Ord)]
47pub struct Source {
48 pub identifier: SourceIdentifier,
49 pub path: Option<PathBuf>,
50 pub content: StringIdentifier,
51 pub size: usize,
52 pub lines: Vec<usize>,
53}
54
55pub trait HasSource {
57 fn source(&self) -> SourceIdentifier;
58}
59
60#[derive(Debug)]
62struct SourceEntry {
63 path: Option<PathBuf>,
65 content: Option<(StringIdentifier, usize, Vec<usize>)>,
67}
68
69#[derive(Debug)]
73struct SourceManagerInner {
74 sources: HashMap<SourceIdentifier, SourceEntry>,
75 sources_by_name: HashMap<StringIdentifier, SourceIdentifier>,
76}
77
78#[derive(Clone, Debug)]
83pub struct SourceManager {
84 interner: ThreadedInterner,
86 inner: Arc<RwLock<SourceManagerInner>>,
88}
89
90impl SourceCategory {
92 #[inline]
93 pub const fn is_built_in(&self) -> bool {
94 matches!(self, Self::BuiltIn)
95 }
96
97 #[inline]
98 pub const fn is_external(&self) -> bool {
99 matches!(self, Self::External)
100 }
101
102 #[inline]
103 pub const fn is_user_defined(&self) -> bool {
104 matches!(self, Self::UserDefined)
105 }
106
107 #[inline]
108 pub const fn as_str(&self) -> &'static str {
109 match self {
110 Self::BuiltIn => "built-in",
111 Self::External => "external",
112 Self::UserDefined => "user defined",
113 }
114 }
115}
116
117impl SourceIdentifier {
119 #[inline]
120 pub fn dummy() -> Self {
121 Self(StringIdentifier::empty(), SourceCategory::UserDefined)
122 }
123
124 #[inline]
126 pub const fn value(&self) -> StringIdentifier {
127 self.0
128 }
129
130 #[inline]
132 pub const fn category(&self) -> SourceCategory {
133 self.1
134 }
135}
136impl Source {
138 #[inline]
152 pub fn standalone(interner: &ThreadedInterner, name: &str, content: &str) -> Self {
153 let lines: Vec<_> = line_starts(content).collect();
154 let size = content.len();
155 let content_id = interner.intern(content);
156
157 Self {
158 identifier: SourceIdentifier(interner.intern(name), SourceCategory::UserDefined),
159 path: None,
160 content: content_id,
161 size,
162 lines,
163 }
164 }
165
166 #[inline]
176 pub fn line_number(&self, offset: usize) -> usize {
177 self.lines.binary_search(&offset).unwrap_or_else(|next_line| next_line - 1)
178 }
179
180 pub fn get_line_start_offset(&self, line: usize) -> Option<usize> {
190 self.lines.get(line).copied()
191 }
192
193 pub fn get_line_end_offset(&self, line: usize) -> Option<usize> {
203 match self.lines.get(line + 1) {
204 Some(&end) => Some(end - 1),
205 None if line == self.lines.len() - 1 => Some(self.size),
206 _ => None,
207 }
208 }
209
210 #[inline]
220 pub fn column_number(&self, offset: usize) -> usize {
221 let line_start = self.lines.binary_search(&offset).unwrap_or_else(|next_line| self.lines[next_line - 1]);
222
223 offset - line_start
224 }
225}
226
227impl SourceManager {
228 #[inline]
230 pub fn new(interner: ThreadedInterner) -> Self {
231 Self {
232 interner,
233 inner: Arc::new(RwLock::new(SourceManagerInner {
234 sources: HashMap::default(),
235 sources_by_name: HashMap::default(),
236 })),
237 }
238 }
239
240 #[inline]
242 pub fn insert_path(&self, name: impl AsRef<str>, path: PathBuf, category: SourceCategory) -> SourceIdentifier {
243 let name_str = name.as_ref();
244 let name_id = self.interner.intern(name_str);
245 let source_id = SourceIdentifier(name_id, category);
246
247 {
248 let inner = self.inner.read();
249 if inner.sources.contains_key(&source_id) {
250 return source_id;
251 }
252 }
253
254 let mut inner = self.inner.write();
255 if inner.sources.contains_key(&source_id) {
257 return source_id;
258 }
259 inner.sources.insert(source_id, SourceEntry { path: Some(path), content: None });
260 inner.sources_by_name.insert(name_id, source_id);
261 source_id
262 }
263
264 #[inline]
266 pub fn insert_content(
267 &self,
268 name: impl AsRef<str>,
269 content: impl AsRef<str>,
270 category: SourceCategory,
271 ) -> SourceIdentifier {
272 let name_str = name.as_ref();
273 let content_str = content.as_ref();
274 let name_id = self.interner.intern(name_str);
275
276 {
277 let inner = self.inner.read();
278 if let Some(&source_id) = inner.sources_by_name.get(&name_id) {
279 return source_id;
280 }
281 }
282
283 let lines: Vec<_> = line_starts(content_str).collect();
284 let size = content_str.len();
285 let content_id = self.interner.intern(content_str);
286 let source_id = SourceIdentifier(name_id, category);
287
288 let mut inner = self.inner.write();
289 if let Some(&existing) = inner.sources_by_name.get(&name_id) {
290 return existing;
291 }
292 inner.sources.insert(source_id, SourceEntry { path: None, content: Some((content_id, size, lines)) });
293 inner.sources_by_name.insert(name_id, source_id);
294 source_id
295 }
296
297 #[inline]
299 pub fn contains(&self, source_id: &SourceIdentifier) -> bool {
300 let inner = self.inner.read();
301 inner.sources.contains_key(source_id)
302 }
303
304 #[inline]
306 pub fn source_ids(&self) -> Vec<SourceIdentifier> {
307 let inner = self.inner.read();
308 inner.sources.keys().cloned().collect()
309 }
310
311 #[inline]
313 pub fn source_ids_for_category(&self, category: SourceCategory) -> Vec<SourceIdentifier> {
314 let inner = self.inner.read();
315 inner.sources.keys().filter(|id| id.category() == category).cloned().collect()
316 }
317
318 #[inline]
320 pub fn source_ids_except_category(&self, category: SourceCategory) -> Vec<SourceIdentifier> {
321 let inner = self.inner.read();
322 inner.sources.keys().filter(|id| id.category() != category).cloned().collect()
323 }
324
325 #[inline]
330 pub fn load(&self, source_id: &SourceIdentifier) -> Result<Source, SourceError> {
331 let path = {
332 let inner = self.inner.read();
333 let entry = inner.sources.get(source_id).ok_or(SourceError::UnavailableSource(*source_id))?;
334
335 if let Some((content, size, ref lines)) = entry.content {
337 return Ok(Source {
338 identifier: *source_id,
339 path: entry.path.clone(),
340 content,
341 size,
342 lines: lines.clone(),
343 });
344 }
345
346 entry.path.clone().ok_or(SourceError::UnavailableSource(*source_id))?
348 };
349
350 let bytes = std::fs::read(&path).map_err(SourceError::IOError)?;
352 let content_str = match String::from_utf8(bytes) {
353 Ok(s) => s,
354 Err(err) => {
355 let s = err.into_bytes();
356 let s = String::from_utf8_lossy(&s).into_owned();
357 if source_id.category().is_user_defined() {
358 tracing::debug!(
359 "Source '{}' contains invalid UTF-8 sequence; behavior is undefined.",
360 path.display()
361 );
362 } else {
363 tracing::info!(
364 "Source '{}' contains invalid UTF-8 sequence; behavior is undefined.",
365 path.display()
366 );
367 }
368
369 s
370 }
371 };
372 let lines: Vec<_> = line_starts(&content_str).collect();
373 let size = content_str.len();
374 let content_id = self.interner.intern(&content_str);
375
376 {
378 let mut inner = self.inner.write();
379 if let Some(entry) = inner.sources.get_mut(source_id) {
380 if entry.content.is_none() {
382 entry.content = Some((content_id, size, lines.clone()));
383 }
384 Ok(Source { identifier: *source_id, path: entry.path.clone(), content: content_id, size, lines })
385 } else {
386 Err(SourceError::UnavailableSource(*source_id))
387 }
388 }
389 }
390
391 #[inline]
393 pub fn write(&self, source_id: SourceIdentifier, new_content: impl AsRef<str>) -> Result<(), SourceError> {
394 let new_content_str = new_content.as_ref();
395 let new_content_id = self.interner.intern(new_content_str);
396 let new_lines: Vec<_> = line_starts(new_content_str).collect();
397 let new_size = new_content_str.len();
398
399 let path_opt = {
400 let mut inner = self.inner.write();
401 let entry = inner.sources.get_mut(&source_id).ok_or(SourceError::UnavailableSource(source_id))?;
402 if let Some((old_content, _, _)) = entry.content
403 && old_content == new_content_id
404 {
405 return Ok(());
406 }
407 entry.content = Some((new_content_id, new_size, new_lines));
408 entry.path.clone()
409 };
410
411 if let Some(ref path) = path_opt {
413 std::fs::write(path, self.interner.lookup(&new_content_id)).map_err(SourceError::IOError)?;
414 }
415
416 Ok(())
417 }
418
419 #[inline]
421 pub fn len(&self) -> usize {
422 let inner = self.inner.read();
423 inner.sources.len()
424 }
425
426 #[inline]
428 pub fn is_empty(&self) -> bool {
429 let inner = self.inner.read();
430 inner.sources.is_empty()
431 }
432}
433
434impl<T: HasSource> HasSource for Box<T> {
435 #[inline]
436 fn source(&self) -> SourceIdentifier {
437 self.as_ref().source()
438 }
439}
440
441#[inline]
443fn line_starts(source: &str) -> impl Iterator<Item = usize> + '_ {
444 let bytes = source.as_bytes();
445
446 std::iter::once(0)
447 .chain(memchr::memchr_iter(b'\n', bytes).map(|i| if i > 0 && bytes[i - 1] == b'\r' { i } else { i + 1 }))
448}