1use std::path::PathBuf;
2use std::sync::Arc;
3
4use ahash::HashMap;
5use parking_lot::RwLock;
6use serde::Deserialize;
7use serde::Serialize;
8
9use mago_interner::StringIdentifier;
10use mago_interner::ThreadedInterner;
11
12use crate::error::SourceError;
13
14pub mod error;
15
16#[derive(Default, Debug, Clone, Copy, Eq, PartialEq, Hash, Serialize, Deserialize, PartialOrd, Ord)]
28pub enum SourceCategory {
29 BuiltIn,
31
32 External,
34
35 #[default]
37 UserDefined,
38}
39
40#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash, Serialize, Deserialize, PartialOrd, Ord)]
42#[repr(C)]
43pub struct SourceIdentifier(pub StringIdentifier, pub SourceCategory);
44
45#[derive(Debug, Clone, Eq, PartialEq, Hash, Serialize, Deserialize, PartialOrd, Ord)]
47pub struct Source {
48 pub identifier: SourceIdentifier,
49 pub path: Option<PathBuf>,
50 pub content: StringIdentifier,
51 pub size: usize,
52 pub lines: Vec<usize>,
53}
54
55pub trait HasSource {
57 fn source(&self) -> SourceIdentifier;
58}
59
60#[derive(Debug)]
62struct SourceEntry {
63 path: Option<PathBuf>,
65 content: Option<(StringIdentifier, usize, Vec<usize>)>,
67}
68
69#[derive(Debug)]
73struct SourceManagerInner {
74 sources: HashMap<SourceIdentifier, SourceEntry>,
75 sources_by_name: HashMap<StringIdentifier, SourceIdentifier>,
76}
77
78#[derive(Clone, Debug)]
83pub struct SourceManager {
84 interner: ThreadedInterner,
86 inner: Arc<RwLock<SourceManagerInner>>,
88}
89
90impl SourceCategory {
92 #[inline(always)]
93 pub const fn is_built_in(&self) -> bool {
94 matches!(self, Self::BuiltIn)
95 }
96
97 #[inline(always)]
98 pub const fn is_external(&self) -> bool {
99 matches!(self, Self::External)
100 }
101
102 #[inline(always)]
103 pub const fn is_user_defined(&self) -> bool {
104 matches!(self, Self::UserDefined)
105 }
106}
107
108impl SourceIdentifier {
110 #[inline(always)]
111 pub fn dummy() -> Self {
112 Self(StringIdentifier::empty(), SourceCategory::UserDefined)
113 }
114
115 #[inline(always)]
117 pub const fn value(&self) -> StringIdentifier {
118 self.0
119 }
120
121 #[inline(always)]
123 pub const fn category(&self) -> SourceCategory {
124 self.1
125 }
126}
127impl Source {
129 #[inline(always)]
143 pub fn standalone(interner: &ThreadedInterner, name: &str, content: &str) -> Self {
144 let lines: Vec<_> = line_starts(content).collect();
145 let size = content.len();
146 let content_id = interner.intern(content);
147
148 Self {
149 identifier: SourceIdentifier(interner.intern(name), SourceCategory::UserDefined),
150 path: None,
151 content: content_id,
152 size,
153 lines,
154 }
155 }
156
157 #[inline(always)]
167 pub fn line_number(&self, offset: usize) -> usize {
168 self.lines.binary_search(&offset).unwrap_or_else(|next_line| next_line - 1)
169 }
170
171 pub fn get_line_start_offset(&self, line: usize) -> Option<usize> {
181 self.lines.get(line).copied()
182 }
183
184 #[inline(always)]
194 pub fn column_number(&self, offset: usize) -> usize {
195 let line_start = self.lines.binary_search(&offset).unwrap_or_else(|next_line| self.lines[next_line - 1]);
196
197 offset - line_start
198 }
199}
200
201impl SourceManager {
202 #[inline(always)]
204 pub fn new(interner: ThreadedInterner) -> Self {
205 Self {
206 interner,
207 inner: Arc::new(RwLock::new(SourceManagerInner {
208 sources: HashMap::default(),
209 sources_by_name: HashMap::default(),
210 })),
211 }
212 }
213
214 #[inline(always)]
216 pub fn insert_path(&self, name: impl AsRef<str>, path: PathBuf, category: SourceCategory) -> SourceIdentifier {
217 let name_str = name.as_ref();
218 let name_id = self.interner.intern(name_str);
219 let source_id = SourceIdentifier(name_id, category);
220
221 {
222 let inner = self.inner.read();
223 if inner.sources.contains_key(&source_id) {
224 return source_id;
225 }
226 }
227
228 let mut inner = self.inner.write();
229 if inner.sources.contains_key(&source_id) {
231 return source_id;
232 }
233 inner.sources.insert(source_id, SourceEntry { path: Some(path), content: None });
234 inner.sources_by_name.insert(name_id, source_id);
235 source_id
236 }
237
238 #[inline(always)]
240 pub fn insert_content(
241 &self,
242 name: impl AsRef<str>,
243 content: impl AsRef<str>,
244 category: SourceCategory,
245 ) -> SourceIdentifier {
246 let name_str = name.as_ref();
247 let content_str = content.as_ref();
248 let name_id = self.interner.intern(name_str);
249
250 {
251 let inner = self.inner.read();
252 if let Some(&source_id) = inner.sources_by_name.get(&name_id) {
253 return source_id;
254 }
255 }
256
257 let lines: Vec<_> = line_starts(content_str).collect();
258 let size = content_str.len();
259 let content_id = self.interner.intern(content_str);
260 let source_id = SourceIdentifier(name_id, category);
261
262 let mut inner = self.inner.write();
263 if let Some(&existing) = inner.sources_by_name.get(&name_id) {
264 return existing;
265 }
266 inner.sources.insert(source_id, SourceEntry { path: None, content: Some((content_id, size, lines)) });
267 inner.sources_by_name.insert(name_id, source_id);
268 source_id
269 }
270
271 #[inline(always)]
273 pub fn contains(&self, source_id: &SourceIdentifier) -> bool {
274 let inner = self.inner.read();
275 inner.sources.contains_key(source_id)
276 }
277
278 #[inline(always)]
280 pub fn source_ids(&self) -> Vec<SourceIdentifier> {
281 let inner = self.inner.read();
282 inner.sources.keys().cloned().collect()
283 }
284
285 #[inline(always)]
287 pub fn source_ids_for_category(&self, category: SourceCategory) -> Vec<SourceIdentifier> {
288 let inner = self.inner.read();
289 inner.sources.keys().filter(|id| id.category() == category).cloned().collect()
290 }
291
292 #[inline(always)]
294 pub fn source_ids_except_category(&self, category: SourceCategory) -> Vec<SourceIdentifier> {
295 let inner = self.inner.read();
296 inner.sources.keys().filter(|id| id.category() != category).cloned().collect()
297 }
298
299 #[inline(always)]
304 pub fn load(&self, source_id: &SourceIdentifier) -> Result<Source, SourceError> {
305 {
307 let inner = self.inner.read();
308 if let Some(entry) = inner.sources.get(source_id) {
309 if let Some((content, size, ref lines)) = entry.content {
310 return Ok(Source {
311 identifier: *source_id,
312 path: entry.path.clone(),
313 content,
314 size,
315 lines: lines.clone(),
316 });
317 }
318 }
319 }
320
321 let path = {
323 let inner = self.inner.read();
324 let entry = inner.sources.get(source_id).ok_or(SourceError::UnavailableSource(*source_id))?;
325
326 entry.path.clone().ok_or(SourceError::UnavailableSource(*source_id))?
327 };
328
329 let bytes = std::fs::read(&path).map_err(SourceError::IOError)?;
331 let content_str = match String::from_utf8(bytes) {
332 Ok(s) => s,
333 Err(err) => {
334 let s = err.into_bytes();
335 let s = String::from_utf8_lossy(&s).into_owned();
336 tracing::warn!("Source '{}' contains invalid UTF-8 sequence; behavior is undefined.", path.display());
337 s
338 }
339 };
340 let lines: Vec<_> = line_starts(&content_str).collect();
341 let size = content_str.len();
342 let content_id = self.interner.intern(&content_str);
343
344 {
346 let mut inner = self.inner.write();
347 if let Some(entry) = inner.sources.get_mut(source_id) {
348 if entry.content.is_none() {
350 entry.content = Some((content_id, size, lines.clone()));
351 }
352 Ok(Source { identifier: *source_id, path: entry.path.clone(), content: content_id, size, lines })
353 } else {
354 Err(SourceError::UnavailableSource(*source_id))
355 }
356 }
357 }
358
359 #[inline(always)]
361 pub fn write(&self, source_id: SourceIdentifier, new_content: impl AsRef<str>) -> Result<(), SourceError> {
362 let new_content_str = new_content.as_ref();
363 let new_content_id = self.interner.intern(new_content_str);
364 let new_lines: Vec<_> = line_starts(new_content_str).collect();
365 let new_size = new_content_str.len();
366
367 let path_opt = {
368 let mut inner = self.inner.write();
369 let entry = inner.sources.get_mut(&source_id).ok_or(SourceError::UnavailableSource(source_id))?;
370 if let Some((old_content, _, _)) = entry.content {
371 if old_content == new_content_id {
372 return Ok(());
373 }
374 }
375 entry.content = Some((new_content_id, new_size, new_lines));
376 entry.path.clone()
377 };
378
379 if let Some(ref path) = path_opt {
381 std::fs::write(path, self.interner.lookup(&new_content_id)).map_err(SourceError::IOError)?;
382 }
383
384 Ok(())
385 }
386
387 #[inline(always)]
389 pub fn len(&self) -> usize {
390 let inner = self.inner.read();
391 inner.sources.len()
392 }
393
394 #[inline(always)]
396 pub fn is_empty(&self) -> bool {
397 let inner = self.inner.read();
398 inner.sources.is_empty()
399 }
400}
401
402impl<T: HasSource> HasSource for Box<T> {
403 #[inline(always)]
404 fn source(&self) -> SourceIdentifier {
405 self.as_ref().source()
406 }
407}
408
409#[inline(always)]
411fn line_starts(source: &str) -> impl Iterator<Item = usize> + '_ {
412 std::iter::once(0).chain(memchr::memchr_iter(b'\n', source.as_bytes()).map(|i| i + 1))
413}