1use std::path::PathBuf;
2use std::sync::Arc;
3
4use ahash::HashMap;
5use parking_lot::RwLock;
6use serde::Deserialize;
7use serde::Serialize;
8
9use mago_interner::StringIdentifier;
10use mago_interner::ThreadedInterner;
11
12use crate::error::SourceError;
13
14pub mod error;
15
16#[derive(Default, Debug, Clone, Copy, Eq, PartialEq, Hash, Serialize, Deserialize, PartialOrd, Ord)]
28pub enum SourceCategory {
29 BuiltIn,
31
32 External,
34
35 #[default]
37 UserDefined,
38}
39
40#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash, Serialize, Deserialize, PartialOrd, Ord)]
42#[repr(C)]
43pub struct SourceIdentifier(pub StringIdentifier, pub SourceCategory);
44
45#[derive(Debug, Clone, Eq, PartialEq, Hash, Serialize, Deserialize, PartialOrd, Ord)]
47pub struct Source {
48 pub identifier: SourceIdentifier,
49 pub path: Option<PathBuf>,
50 pub content: StringIdentifier,
51 pub size: usize,
52 pub lines: Vec<usize>,
53}
54
55pub trait HasSource {
57 fn source(&self) -> SourceIdentifier;
58}
59
60#[derive(Debug)]
62struct SourceEntry {
63 path: Option<PathBuf>,
65 content: Option<(StringIdentifier, usize, Vec<usize>)>,
67}
68
69#[derive(Debug)]
73struct SourceManagerInner {
74 sources: HashMap<SourceIdentifier, SourceEntry>,
75 sources_by_name: HashMap<StringIdentifier, SourceIdentifier>,
76}
77
78#[derive(Clone, Debug)]
83pub struct SourceManager {
84 interner: ThreadedInterner,
86 inner: Arc<RwLock<SourceManagerInner>>,
88}
89
90impl SourceCategory {
92 #[inline(always)]
93 pub const fn is_built_in(&self) -> bool {
94 matches!(self, Self::BuiltIn)
95 }
96
97 #[inline(always)]
98 pub const fn is_external(&self) -> bool {
99 matches!(self, Self::External)
100 }
101
102 #[inline(always)]
103 pub const fn is_user_defined(&self) -> bool {
104 matches!(self, Self::UserDefined)
105 }
106}
107
108impl SourceIdentifier {
110 #[inline(always)]
111 pub fn dummy() -> Self {
112 Self(StringIdentifier::empty(), SourceCategory::UserDefined)
113 }
114
115 #[inline(always)]
117 pub const fn value(&self) -> StringIdentifier {
118 self.0
119 }
120
121 #[inline(always)]
123 pub const fn category(&self) -> SourceCategory {
124 self.1
125 }
126}
127impl Source {
129 #[inline(always)]
143 pub fn standalone(interner: &ThreadedInterner, name: &str, content: &str) -> Self {
144 let lines: Vec<_> = line_starts(content).collect();
145 let size = content.len();
146 let content_id = interner.intern(content);
147
148 Self {
149 identifier: SourceIdentifier(interner.intern(name), SourceCategory::UserDefined),
150 path: None,
151 content: content_id,
152 size,
153 lines,
154 }
155 }
156
157 #[inline(always)]
167 pub fn line_number(&self, offset: usize) -> usize {
168 self.lines.binary_search(&offset).unwrap_or_else(|next_line| next_line - 1)
169 }
170
171 pub fn get_line_start_offset(&self, line: usize) -> Option<usize> {
181 self.lines.get(line).copied()
182 }
183
184 pub fn get_line_end_offset(&self, line: usize) -> Option<usize> {
194 match self.lines.get(line + 1) {
195 Some(&end) => Some(end - 1),
196 None if line == self.lines.len() - 1 => Some(self.size),
197 _ => None,
198 }
199 }
200
201 #[inline(always)]
211 pub fn column_number(&self, offset: usize) -> usize {
212 let line_start = self.lines.binary_search(&offset).unwrap_or_else(|next_line| self.lines[next_line - 1]);
213
214 offset - line_start
215 }
216}
217
218impl SourceManager {
219 #[inline(always)]
221 pub fn new(interner: ThreadedInterner) -> Self {
222 Self {
223 interner,
224 inner: Arc::new(RwLock::new(SourceManagerInner {
225 sources: HashMap::default(),
226 sources_by_name: HashMap::default(),
227 })),
228 }
229 }
230
231 #[inline(always)]
233 pub fn insert_path(&self, name: impl AsRef<str>, path: PathBuf, category: SourceCategory) -> SourceIdentifier {
234 let name_str = name.as_ref();
235 let name_id = self.interner.intern(name_str);
236 let source_id = SourceIdentifier(name_id, category);
237
238 {
239 let inner = self.inner.read();
240 if inner.sources.contains_key(&source_id) {
241 return source_id;
242 }
243 }
244
245 let mut inner = self.inner.write();
246 if inner.sources.contains_key(&source_id) {
248 return source_id;
249 }
250 inner.sources.insert(source_id, SourceEntry { path: Some(path), content: None });
251 inner.sources_by_name.insert(name_id, source_id);
252 source_id
253 }
254
255 #[inline(always)]
257 pub fn insert_content(
258 &self,
259 name: impl AsRef<str>,
260 content: impl AsRef<str>,
261 category: SourceCategory,
262 ) -> SourceIdentifier {
263 let name_str = name.as_ref();
264 let content_str = content.as_ref();
265 let name_id = self.interner.intern(name_str);
266
267 {
268 let inner = self.inner.read();
269 if let Some(&source_id) = inner.sources_by_name.get(&name_id) {
270 return source_id;
271 }
272 }
273
274 let lines: Vec<_> = line_starts(content_str).collect();
275 let size = content_str.len();
276 let content_id = self.interner.intern(content_str);
277 let source_id = SourceIdentifier(name_id, category);
278
279 let mut inner = self.inner.write();
280 if let Some(&existing) = inner.sources_by_name.get(&name_id) {
281 return existing;
282 }
283 inner.sources.insert(source_id, SourceEntry { path: None, content: Some((content_id, size, lines)) });
284 inner.sources_by_name.insert(name_id, source_id);
285 source_id
286 }
287
288 #[inline(always)]
290 pub fn contains(&self, source_id: &SourceIdentifier) -> bool {
291 let inner = self.inner.read();
292 inner.sources.contains_key(source_id)
293 }
294
295 #[inline(always)]
297 pub fn source_ids(&self) -> Vec<SourceIdentifier> {
298 let inner = self.inner.read();
299 inner.sources.keys().cloned().collect()
300 }
301
302 #[inline(always)]
304 pub fn source_ids_for_category(&self, category: SourceCategory) -> Vec<SourceIdentifier> {
305 let inner = self.inner.read();
306 inner.sources.keys().filter(|id| id.category() == category).cloned().collect()
307 }
308
309 #[inline(always)]
311 pub fn source_ids_except_category(&self, category: SourceCategory) -> Vec<SourceIdentifier> {
312 let inner = self.inner.read();
313 inner.sources.keys().filter(|id| id.category() != category).cloned().collect()
314 }
315
316 #[inline(always)]
321 pub fn load(&self, source_id: &SourceIdentifier) -> Result<Source, SourceError> {
322 let path = {
323 let inner = self.inner.read();
324 let entry = inner.sources.get(source_id).ok_or(SourceError::UnavailableSource(*source_id))?;
325
326 if let Some((content, size, ref lines)) = entry.content {
328 return Ok(Source {
329 identifier: *source_id,
330 path: entry.path.clone(),
331 content,
332 size,
333 lines: lines.clone(),
334 });
335 }
336
337 entry.path.clone().ok_or(SourceError::UnavailableSource(*source_id))?
339 };
340
341 let bytes = std::fs::read(&path).map_err(SourceError::IOError)?;
343 let content_str = match String::from_utf8(bytes) {
344 Ok(s) => s,
345 Err(err) => {
346 let s = err.into_bytes();
347 let s = String::from_utf8_lossy(&s).into_owned();
348 if source_id.category().is_user_defined() {
349 tracing::debug!(
350 "Source '{}' contains invalid UTF-8 sequence; behavior is undefined.",
351 path.display()
352 );
353 } else {
354 tracing::info!(
355 "Source '{}' contains invalid UTF-8 sequence; behavior is undefined.",
356 path.display()
357 );
358 }
359
360 s
361 }
362 };
363 let lines: Vec<_> = line_starts(&content_str).collect();
364 let size = content_str.len();
365 let content_id = self.interner.intern(&content_str);
366
367 {
369 let mut inner = self.inner.write();
370 if let Some(entry) = inner.sources.get_mut(source_id) {
371 if entry.content.is_none() {
373 entry.content = Some((content_id, size, lines.clone()));
374 }
375 Ok(Source { identifier: *source_id, path: entry.path.clone(), content: content_id, size, lines })
376 } else {
377 Err(SourceError::UnavailableSource(*source_id))
378 }
379 }
380 }
381
382 #[inline(always)]
384 pub fn write(&self, source_id: SourceIdentifier, new_content: impl AsRef<str>) -> Result<(), SourceError> {
385 let new_content_str = new_content.as_ref();
386 let new_content_id = self.interner.intern(new_content_str);
387 let new_lines: Vec<_> = line_starts(new_content_str).collect();
388 let new_size = new_content_str.len();
389
390 let path_opt = {
391 let mut inner = self.inner.write();
392 let entry = inner.sources.get_mut(&source_id).ok_or(SourceError::UnavailableSource(source_id))?;
393 if let Some((old_content, _, _)) = entry.content
394 && old_content == new_content_id
395 {
396 return Ok(());
397 }
398 entry.content = Some((new_content_id, new_size, new_lines));
399 entry.path.clone()
400 };
401
402 if let Some(ref path) = path_opt {
404 std::fs::write(path, self.interner.lookup(&new_content_id)).map_err(SourceError::IOError)?;
405 }
406
407 Ok(())
408 }
409
410 #[inline(always)]
412 pub fn len(&self) -> usize {
413 let inner = self.inner.read();
414 inner.sources.len()
415 }
416
417 #[inline(always)]
419 pub fn is_empty(&self) -> bool {
420 let inner = self.inner.read();
421 inner.sources.is_empty()
422 }
423}
424
425impl<T: HasSource> HasSource for Box<T> {
426 #[inline(always)]
427 fn source(&self) -> SourceIdentifier {
428 self.as_ref().source()
429 }
430}
431
432#[inline(always)]
434fn line_starts(source: &str) -> impl Iterator<Item = usize> + '_ {
435 let bytes = source.as_bytes();
436
437 std::iter::once(0)
438 .chain(memchr::memchr_iter(b'\n', bytes).map(|i| if i > 0 && bytes[i - 1] == b'\r' { i } else { i + 1 }))
439}