1use std::path::PathBuf;
2use std::sync::Arc;
3
4use ahash::HashMap;
5use parking_lot::RwLock;
6use serde::Deserialize;
7use serde::Serialize;
8
9use mago_interner::StringIdentifier;
10use mago_interner::ThreadedInterner;
11
12use crate::error::SourceError;
13
14pub mod error;
15
16#[derive(Default, Debug, Clone, Copy, Eq, PartialEq, Hash, Serialize, Deserialize, PartialOrd, Ord)]
28pub enum SourceCategory {
29 BuiltIn,
31
32 External,
34
35 #[default]
37 UserDefined,
38}
39
40#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash, Serialize, Deserialize, PartialOrd, Ord)]
42#[repr(C)]
43pub struct SourceIdentifier(pub StringIdentifier, pub SourceCategory);
44
45#[derive(Debug, Clone, Eq, PartialEq, Hash, Serialize, Deserialize, PartialOrd, Ord)]
47pub struct Source {
48 pub identifier: SourceIdentifier,
49 pub path: Option<PathBuf>,
50 pub content: StringIdentifier,
51 pub size: usize,
52 pub lines: Vec<usize>,
53}
54
55pub trait HasSource {
57 fn source(&self) -> SourceIdentifier;
58}
59
60#[derive(Debug)]
62struct SourceEntry {
63 path: Option<PathBuf>,
65 content: Option<(StringIdentifier, usize, Vec<usize>)>,
67}
68
69#[derive(Debug)]
73struct SourceManagerInner {
74 sources: HashMap<SourceIdentifier, SourceEntry>,
75 sources_by_name: HashMap<StringIdentifier, SourceIdentifier>,
76}
77
78#[derive(Clone, Debug)]
83pub struct SourceManager {
84 interner: ThreadedInterner,
86 inner: Arc<RwLock<SourceManagerInner>>,
88}
89
90impl SourceCategory {
92 #[inline(always)]
93 pub const fn is_built_in(&self) -> bool {
94 matches!(self, Self::BuiltIn)
95 }
96
97 #[inline(always)]
98 pub const fn is_external(&self) -> bool {
99 matches!(self, Self::External)
100 }
101
102 #[inline(always)]
103 pub const fn is_user_defined(&self) -> bool {
104 matches!(self, Self::UserDefined)
105 }
106}
107
108impl SourceIdentifier {
110 #[inline(always)]
111 pub fn dummy() -> Self {
112 Self(StringIdentifier::empty(), SourceCategory::UserDefined)
113 }
114
115 #[inline(always)]
117 pub const fn value(&self) -> StringIdentifier {
118 self.0
119 }
120
121 #[inline(always)]
123 pub const fn category(&self) -> SourceCategory {
124 self.1
125 }
126}
127impl Source {
129 #[inline(always)]
143 pub fn standalone(interner: &ThreadedInterner, name: &str, content: &str) -> Self {
144 let lines: Vec<_> = line_starts(content).collect();
145 let size = content.len();
146 let content_id = interner.intern(content);
147
148 Self {
149 identifier: SourceIdentifier(interner.intern(name), SourceCategory::UserDefined),
150 path: None,
151 content: content_id,
152 size,
153 lines,
154 }
155 }
156
157 #[inline(always)]
167 pub fn line_number(&self, offset: usize) -> usize {
168 self.lines.binary_search(&offset).unwrap_or_else(|next_line| next_line - 1)
169 }
170
171 pub fn get_line_start_offset(&self, line: usize) -> Option<usize> {
181 self.lines.get(line).copied()
182 }
183
184 pub fn get_line_end_offset(&self, line: usize) -> Option<usize> {
194 match self.lines.get(line + 1) {
195 Some(&end) => Some(end - 1),
196 None if line == self.lines.len() - 1 => Some(self.size),
197 _ => None,
198 }
199 }
200
201 #[inline(always)]
211 pub fn column_number(&self, offset: usize) -> usize {
212 let line_start = self.lines.binary_search(&offset).unwrap_or_else(|next_line| self.lines[next_line - 1]);
213
214 offset - line_start
215 }
216}
217
218impl SourceManager {
219 #[inline(always)]
221 pub fn new(interner: ThreadedInterner) -> Self {
222 Self {
223 interner,
224 inner: Arc::new(RwLock::new(SourceManagerInner {
225 sources: HashMap::default(),
226 sources_by_name: HashMap::default(),
227 })),
228 }
229 }
230
231 #[inline(always)]
233 pub fn insert_path(&self, name: impl AsRef<str>, path: PathBuf, category: SourceCategory) -> SourceIdentifier {
234 let name_str = name.as_ref();
235 let name_id = self.interner.intern(name_str);
236 let source_id = SourceIdentifier(name_id, category);
237
238 {
239 let inner = self.inner.read();
240 if inner.sources.contains_key(&source_id) {
241 return source_id;
242 }
243 }
244
245 let mut inner = self.inner.write();
246 if inner.sources.contains_key(&source_id) {
248 return source_id;
249 }
250 inner.sources.insert(source_id, SourceEntry { path: Some(path), content: None });
251 inner.sources_by_name.insert(name_id, source_id);
252 source_id
253 }
254
255 #[inline(always)]
257 pub fn insert_content(
258 &self,
259 name: impl AsRef<str>,
260 content: impl AsRef<str>,
261 category: SourceCategory,
262 ) -> SourceIdentifier {
263 let name_str = name.as_ref();
264 let content_str = content.as_ref();
265 let name_id = self.interner.intern(name_str);
266
267 {
268 let inner = self.inner.read();
269 if let Some(&source_id) = inner.sources_by_name.get(&name_id) {
270 return source_id;
271 }
272 }
273
274 let lines: Vec<_> = line_starts(content_str).collect();
275 let size = content_str.len();
276 let content_id = self.interner.intern(content_str);
277 let source_id = SourceIdentifier(name_id, category);
278
279 let mut inner = self.inner.write();
280 if let Some(&existing) = inner.sources_by_name.get(&name_id) {
281 return existing;
282 }
283 inner.sources.insert(source_id, SourceEntry { path: None, content: Some((content_id, size, lines)) });
284 inner.sources_by_name.insert(name_id, source_id);
285 source_id
286 }
287
288 #[inline(always)]
290 pub fn contains(&self, source_id: &SourceIdentifier) -> bool {
291 let inner = self.inner.read();
292 inner.sources.contains_key(source_id)
293 }
294
295 #[inline(always)]
297 pub fn source_ids(&self) -> Vec<SourceIdentifier> {
298 let inner = self.inner.read();
299 inner.sources.keys().cloned().collect()
300 }
301
302 #[inline(always)]
304 pub fn source_ids_for_category(&self, category: SourceCategory) -> Vec<SourceIdentifier> {
305 let inner = self.inner.read();
306 inner.sources.keys().filter(|id| id.category() == category).cloned().collect()
307 }
308
309 #[inline(always)]
311 pub fn source_ids_except_category(&self, category: SourceCategory) -> Vec<SourceIdentifier> {
312 let inner = self.inner.read();
313 inner.sources.keys().filter(|id| id.category() != category).cloned().collect()
314 }
315
316 #[inline(always)]
321 pub fn load(&self, source_id: &SourceIdentifier) -> Result<Source, SourceError> {
322 {
324 let inner = self.inner.read();
325 if let Some(entry) = inner.sources.get(source_id) {
326 if let Some((content, size, ref lines)) = entry.content {
327 return Ok(Source {
328 identifier: *source_id,
329 path: entry.path.clone(),
330 content,
331 size,
332 lines: lines.clone(),
333 });
334 }
335 }
336 }
337
338 let path = {
340 let inner = self.inner.read();
341 let entry = inner.sources.get(source_id).ok_or(SourceError::UnavailableSource(*source_id))?;
342
343 entry.path.clone().ok_or(SourceError::UnavailableSource(*source_id))?
344 };
345
346 let bytes = std::fs::read(&path).map_err(SourceError::IOError)?;
348 let content_str = match String::from_utf8(bytes) {
349 Ok(s) => s,
350 Err(err) => {
351 let s = err.into_bytes();
352 let s = String::from_utf8_lossy(&s).into_owned();
353 tracing::warn!("Source '{}' contains invalid UTF-8 sequence; behavior is undefined.", path.display());
354 s
355 }
356 };
357 let lines: Vec<_> = line_starts(&content_str).collect();
358 let size = content_str.len();
359 let content_id = self.interner.intern(&content_str);
360
361 {
363 let mut inner = self.inner.write();
364 if let Some(entry) = inner.sources.get_mut(source_id) {
365 if entry.content.is_none() {
367 entry.content = Some((content_id, size, lines.clone()));
368 }
369 Ok(Source { identifier: *source_id, path: entry.path.clone(), content: content_id, size, lines })
370 } else {
371 Err(SourceError::UnavailableSource(*source_id))
372 }
373 }
374 }
375
376 #[inline(always)]
378 pub fn write(&self, source_id: SourceIdentifier, new_content: impl AsRef<str>) -> Result<(), SourceError> {
379 let new_content_str = new_content.as_ref();
380 let new_content_id = self.interner.intern(new_content_str);
381 let new_lines: Vec<_> = line_starts(new_content_str).collect();
382 let new_size = new_content_str.len();
383
384 let path_opt = {
385 let mut inner = self.inner.write();
386 let entry = inner.sources.get_mut(&source_id).ok_or(SourceError::UnavailableSource(source_id))?;
387 if let Some((old_content, _, _)) = entry.content {
388 if old_content == new_content_id {
389 return Ok(());
390 }
391 }
392 entry.content = Some((new_content_id, new_size, new_lines));
393 entry.path.clone()
394 };
395
396 if let Some(ref path) = path_opt {
398 std::fs::write(path, self.interner.lookup(&new_content_id)).map_err(SourceError::IOError)?;
399 }
400
401 Ok(())
402 }
403
404 #[inline(always)]
406 pub fn len(&self) -> usize {
407 let inner = self.inner.read();
408 inner.sources.len()
409 }
410
411 #[inline(always)]
413 pub fn is_empty(&self) -> bool {
414 let inner = self.inner.read();
415 inner.sources.is_empty()
416 }
417}
418
419impl<T: HasSource> HasSource for Box<T> {
420 #[inline(always)]
421 fn source(&self) -> SourceIdentifier {
422 self.as_ref().source()
423 }
424}
425
426#[inline(always)]
428fn line_starts(source: &str) -> impl Iterator<Item = usize> + '_ {
429 let bytes = source.as_bytes();
430
431 std::iter::once(0)
432 .chain(memchr::memchr_iter(b'\n', bytes).map(|i| if i > 0 && bytes[i - 1] == b'\r' { i } else { i + 1 }))
433}