1use std::path::PathBuf;
2use std::sync::Arc;
3
4use ahash::HashMap;
5use parking_lot::RwLock;
6use serde::Deserialize;
7use serde::Serialize;
8
9use mago_interner::StringIdentifier;
10use mago_interner::ThreadedInterner;
11
12use crate::error::SourceError;
13
14pub mod error;
15
16#[derive(Default, Debug, Clone, Copy, Eq, PartialEq, Hash, Serialize, Deserialize, PartialOrd, Ord)]
28pub enum SourceCategory {
29    BuiltIn,
31
32    External,
34
35    #[default]
37    UserDefined,
38}
39
40#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash, Serialize, Deserialize, PartialOrd, Ord)]
42#[repr(C)]
43pub struct SourceIdentifier(pub StringIdentifier, pub SourceCategory);
44
45#[derive(Debug, Clone, Eq, PartialEq, Hash, Serialize, Deserialize, PartialOrd, Ord)]
47pub struct Source {
48    pub identifier: SourceIdentifier,
49    pub path: Option<PathBuf>,
50    pub content: StringIdentifier,
51    pub size: usize,
52    pub lines: Vec<usize>,
53}
54
55pub trait HasSource {
57    fn source(&self) -> SourceIdentifier;
58}
59
60#[derive(Debug)]
62struct SourceEntry {
63    path: Option<PathBuf>,
65    content: Option<(StringIdentifier, usize, Vec<usize>)>,
67}
68
69#[derive(Debug)]
73struct SourceManagerInner {
74    sources: HashMap<SourceIdentifier, SourceEntry>,
75    sources_by_name: HashMap<StringIdentifier, SourceIdentifier>,
76}
77
78#[derive(Clone, Debug)]
83pub struct SourceManager {
84    interner: ThreadedInterner,
86    inner: Arc<RwLock<SourceManagerInner>>,
88}
89
90impl SourceCategory {
92    #[inline(always)]
93    pub const fn is_built_in(&self) -> bool {
94        matches!(self, Self::BuiltIn)
95    }
96
97    #[inline(always)]
98    pub const fn is_external(&self) -> bool {
99        matches!(self, Self::External)
100    }
101
102    #[inline(always)]
103    pub const fn is_user_defined(&self) -> bool {
104        matches!(self, Self::UserDefined)
105    }
106}
107
108impl SourceIdentifier {
110    #[inline(always)]
111    pub fn dummy() -> Self {
112        Self(StringIdentifier::empty(), SourceCategory::UserDefined)
113    }
114
115    #[inline(always)]
117    pub const fn value(&self) -> StringIdentifier {
118        self.0
119    }
120
121    #[inline(always)]
123    pub const fn category(&self) -> SourceCategory {
124        self.1
125    }
126}
127impl Source {
129    #[inline(always)]
143    pub fn standalone(interner: &ThreadedInterner, name: &str, content: &str) -> Self {
144        let lines: Vec<_> = line_starts(content).collect();
145        let size = content.len();
146        let content_id = interner.intern(content);
147
148        Self {
149            identifier: SourceIdentifier(interner.intern(name), SourceCategory::UserDefined),
150            path: None,
151            content: content_id,
152            size,
153            lines,
154        }
155    }
156
157    #[inline(always)]
167    pub fn line_number(&self, offset: usize) -> usize {
168        self.lines.binary_search(&offset).unwrap_or_else(|next_line| next_line - 1)
169    }
170
171    pub fn get_line_start_offset(&self, line: usize) -> Option<usize> {
181        self.lines.get(line).copied()
182    }
183
184    pub fn get_line_end_offset(&self, line: usize) -> Option<usize> {
194        match self.lines.get(line + 1) {
195            Some(&end) => Some(end - 1),
196            None if line == self.lines.len() - 1 => Some(self.size),
197            _ => None,
198        }
199    }
200
201    #[inline(always)]
211    pub fn column_number(&self, offset: usize) -> usize {
212        let line_start = self.lines.binary_search(&offset).unwrap_or_else(|next_line| self.lines[next_line - 1]);
213
214        offset - line_start
215    }
216}
217
218impl SourceManager {
219    #[inline(always)]
221    pub fn new(interner: ThreadedInterner) -> Self {
222        Self {
223            interner,
224            inner: Arc::new(RwLock::new(SourceManagerInner {
225                sources: HashMap::default(),
226                sources_by_name: HashMap::default(),
227            })),
228        }
229    }
230
231    #[inline(always)]
233    pub fn insert_path(&self, name: impl AsRef<str>, path: PathBuf, category: SourceCategory) -> SourceIdentifier {
234        let name_str = name.as_ref();
235        let name_id = self.interner.intern(name_str);
236        let source_id = SourceIdentifier(name_id, category);
237
238        {
239            let inner = self.inner.read();
240            if inner.sources.contains_key(&source_id) {
241                return source_id;
242            }
243        }
244
245        let mut inner = self.inner.write();
246        if inner.sources.contains_key(&source_id) {
248            return source_id;
249        }
250        inner.sources.insert(source_id, SourceEntry { path: Some(path), content: None });
251        inner.sources_by_name.insert(name_id, source_id);
252        source_id
253    }
254
255    #[inline(always)]
257    pub fn insert_content(
258        &self,
259        name: impl AsRef<str>,
260        content: impl AsRef<str>,
261        category: SourceCategory,
262    ) -> SourceIdentifier {
263        let name_str = name.as_ref();
264        let content_str = content.as_ref();
265        let name_id = self.interner.intern(name_str);
266
267        {
268            let inner = self.inner.read();
269            if let Some(&source_id) = inner.sources_by_name.get(&name_id) {
270                return source_id;
271            }
272        }
273
274        let lines: Vec<_> = line_starts(content_str).collect();
275        let size = content_str.len();
276        let content_id = self.interner.intern(content_str);
277        let source_id = SourceIdentifier(name_id, category);
278
279        let mut inner = self.inner.write();
280        if let Some(&existing) = inner.sources_by_name.get(&name_id) {
281            return existing;
282        }
283        inner.sources.insert(source_id, SourceEntry { path: None, content: Some((content_id, size, lines)) });
284        inner.sources_by_name.insert(name_id, source_id);
285        source_id
286    }
287
288    #[inline(always)]
290    pub fn contains(&self, source_id: &SourceIdentifier) -> bool {
291        let inner = self.inner.read();
292        inner.sources.contains_key(source_id)
293    }
294
295    #[inline(always)]
297    pub fn source_ids(&self) -> Vec<SourceIdentifier> {
298        let inner = self.inner.read();
299        inner.sources.keys().cloned().collect()
300    }
301
302    #[inline(always)]
304    pub fn source_ids_for_category(&self, category: SourceCategory) -> Vec<SourceIdentifier> {
305        let inner = self.inner.read();
306        inner.sources.keys().filter(|id| id.category() == category).cloned().collect()
307    }
308
309    #[inline(always)]
311    pub fn source_ids_except_category(&self, category: SourceCategory) -> Vec<SourceIdentifier> {
312        let inner = self.inner.read();
313        inner.sources.keys().filter(|id| id.category() != category).cloned().collect()
314    }
315
316    #[inline(always)]
321    pub fn load(&self, source_id: &SourceIdentifier) -> Result<Source, SourceError> {
322        let path = {
323            let inner = self.inner.read();
324            let entry = inner.sources.get(source_id).ok_or(SourceError::UnavailableSource(*source_id))?;
325
326            if let Some((content, size, ref lines)) = entry.content {
328                return Ok(Source {
329                    identifier: *source_id,
330                    path: entry.path.clone(),
331                    content,
332                    size,
333                    lines: lines.clone(),
334                });
335            }
336
337            entry.path.clone().ok_or(SourceError::UnavailableSource(*source_id))?
339        };
340
341        let bytes = std::fs::read(&path).map_err(SourceError::IOError)?;
343        let content_str = match String::from_utf8(bytes) {
344            Ok(s) => s,
345            Err(err) => {
346                let s = err.into_bytes();
347                let s = String::from_utf8_lossy(&s).into_owned();
348                if source_id.category().is_user_defined() {
349                    tracing::debug!(
350                        "Source '{}' contains invalid UTF-8 sequence; behavior is undefined.",
351                        path.display()
352                    );
353                } else {
354                    tracing::info!(
355                        "Source '{}' contains invalid UTF-8 sequence; behavior is undefined.",
356                        path.display()
357                    );
358                }
359
360                s
361            }
362        };
363        let lines: Vec<_> = line_starts(&content_str).collect();
364        let size = content_str.len();
365        let content_id = self.interner.intern(&content_str);
366
367        {
369            let mut inner = self.inner.write();
370            if let Some(entry) = inner.sources.get_mut(source_id) {
371                if entry.content.is_none() {
373                    entry.content = Some((content_id, size, lines.clone()));
374                }
375                Ok(Source { identifier: *source_id, path: entry.path.clone(), content: content_id, size, lines })
376            } else {
377                Err(SourceError::UnavailableSource(*source_id))
378            }
379        }
380    }
381
382    #[inline(always)]
384    pub fn write(&self, source_id: SourceIdentifier, new_content: impl AsRef<str>) -> Result<(), SourceError> {
385        let new_content_str = new_content.as_ref();
386        let new_content_id = self.interner.intern(new_content_str);
387        let new_lines: Vec<_> = line_starts(new_content_str).collect();
388        let new_size = new_content_str.len();
389
390        let path_opt = {
391            let mut inner = self.inner.write();
392            let entry = inner.sources.get_mut(&source_id).ok_or(SourceError::UnavailableSource(source_id))?;
393            if let Some((old_content, _, _)) = entry.content
394                && old_content == new_content_id
395            {
396                return Ok(());
397            }
398            entry.content = Some((new_content_id, new_size, new_lines));
399            entry.path.clone()
400        };
401
402        if let Some(ref path) = path_opt {
404            std::fs::write(path, self.interner.lookup(&new_content_id)).map_err(SourceError::IOError)?;
405        }
406
407        Ok(())
408    }
409
410    #[inline(always)]
412    pub fn len(&self) -> usize {
413        let inner = self.inner.read();
414        inner.sources.len()
415    }
416
417    #[inline(always)]
419    pub fn is_empty(&self) -> bool {
420        let inner = self.inner.read();
421        inner.sources.is_empty()
422    }
423}
424
425impl<T: HasSource> HasSource for Box<T> {
426    #[inline(always)]
427    fn source(&self) -> SourceIdentifier {
428        self.as_ref().source()
429    }
430}
431
432#[inline(always)]
434fn line_starts(source: &str) -> impl Iterator<Item = usize> + '_ {
435    let bytes = source.as_bytes();
436
437    std::iter::once(0)
438        .chain(memchr::memchr_iter(b'\n', bytes).map(|i| if i > 0 && bytes[i - 1] == b'\r' { i } else { i + 1 }))
439}