moore_common/
source.rs

1// Copyright (c) 2016-2021 Fabian Schuiki
2
3//! A global source file table that assigns an opaque ID to each processed
4//! source file. This helps keeping the source location lean and allow for
5//! simple querying of information.
6
7use crate::name::RcStr;
8use memmap::Mmap;
9use once_cell::sync::OnceCell;
10use std;
11use std::borrow::Borrow;
12use std::cell::RefCell;
13use std::cmp::{max, min};
14use std::collections::HashMap;
15use std::fmt;
16use std::fs::File;
17use std::hash::{Hash, Hasher};
18use std::path::Path;
19use std::rc::Rc;
20
21pub const INVALID_SOURCE: Source = Source(0);
22pub const INVALID_LOCATION: Location = Location {
23    source: INVALID_SOURCE,
24    offset: 0,
25};
26pub const INVALID_SPAN: Span = Span {
27    source: INVALID_SOURCE,
28    begin: 0,
29    end: 0,
30};
31
32#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
33pub struct Source(pub u32);
34
35impl Source {
36    /// Return the path of this source file.
37    pub fn get_path(self) -> RcStr {
38        get_source_manager().with(self, |x| x.get_path())
39    }
40
41    /// Access the contents of this source file.
42    pub fn get_content(self) -> Rc<dyn SourceContent> {
43        get_source_manager().with(self, |x| x.get_content())
44    }
45
46    /// Copy a range of the source content into a String instance owned by the
47    /// caller, possibly converting the encoding such that the result is in
48    /// UTF-8.
49    pub fn extract(self, begin: usize, end: usize) -> String {
50        get_source_manager().with(self, |x| x.extract(begin, end))
51    }
52}
53
54impl fmt::Debug for Source {
55    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
56        if self.0 > 0 {
57            write!(f, "Source({}; \"{}\")", self.0, self.get_path())
58        } else {
59            write!(f, "Source(INVALID)")
60        }
61    }
62}
63
64impl fmt::Display for Source {
65    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
66        fmt::Display::fmt(&self.get_path(), f)
67    }
68}
69
70// impl Encodable for Source {
71//     fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
72//         s.emit_bool(self.0 == 0)?;
73//         if self.0 > 0 {
74//             s.emit_str(self.get_path().borrow())?
75//         }
76//         Ok(())
77//     }
78// }
79
80// impl Decodable for Source {
81//     fn decode<S: Decoder>(s: &mut S) -> Result<Source, S::Error> {
82//         let invalid = s.read_bool()?;
83//         if !invalid {
84//             let path = s.read_str()?;
85//             match get_source_manager().open(&path) {
86//                 Some(x) => Ok(x),
87//                 None => panic!("trying to decode invalid source `{}`", path),
88//             }
89//         } else {
90//             Ok(INVALID_SOURCE)
91//         }
92//     }
93// }
94
95pub trait SourceFile {
96    fn get_id(&self) -> Source;
97    fn get_path(&self) -> RcStr;
98    // TODO: getter for character iterator
99    // TODO: getter for source file extracts
100
101    /// Obtain the content of this source file. The returned object may be used
102    /// to iterate over the characters in the file or extract portions of it.
103    fn get_content(&self) -> Rc<dyn SourceContent>;
104
105    /// Copy a range of the source content into a String instance owned by the
106    /// caller, possibly converting the encoding such that the result is in
107    /// UTF-8.
108    fn extract(&self, begin: usize, end: usize) -> String {
109        self.get_content().extract(begin, end)
110    }
111}
112
113pub trait SourceContent {
114    /// Obtain an iterator over the characters within the source file, together
115    /// with their respective byte positions.
116    fn iter(&self) -> Box<CharIter>;
117
118    /// Obtain an iterator over the characters within the source file, starting
119    /// at the provided location `offset`, together with their respective byte
120    /// positions.
121    fn iter_from(&self, offset: usize) -> Box<CharIter>;
122
123    /// Copy a range of the source content into a String instance owned by the
124    /// caller, possibly converting the encoding such that the result is in
125    /// UTF-8.
126    fn extract(&self, begin: usize, end: usize) -> String;
127
128    /// Obtain an iterator over an extract of the source content. This might be
129    /// more efficient than copying the extract into a String.
130    fn extract_iter(&self, begin: usize, end: usize) -> Box<CharIter>;
131
132    /// Obtain a slice voer all bytes within the source file. This is the
133    /// fastest way of getting at the file's contents, since no parsing or
134    /// character encoding is performed or assumed.
135    fn bytes(&self) -> &[u8];
136
137    /// Return a list of byte offsets indicating the start of lines.
138    fn lines(&self) -> &[usize];
139}
140
141/// A manager for source files and their assigned IDs.
142pub struct SourceManager {
143    map: RefCell<HashMap<RcStr, Source>>,
144    vect: RefCell<Vec<Box<dyn SourceFile>>>,
145}
146
147impl SourceManager {
148    fn new() -> SourceManager {
149        SourceManager {
150            map: RefCell::new(HashMap::new()),
151            vect: RefCell::new(Vec::new()),
152        }
153    }
154
155    /// Obtain the source file for a given source ID.
156    pub fn with<F, R>(&self, id: Source, f: F) -> R
157    where
158        F: FnOnce(&dyn SourceFile) -> R,
159    {
160        let ref vect = *self.vect.borrow();
161        assert!(id.0 > 0, "invalid source");
162        assert!(
163            (id.0 as usize - 1) < vect.len(),
164            "unknown source file: Source({}) >= {}",
165            id.0,
166            vect.len()
167        );
168        f(&*vect[id.0 as usize - 1])
169    }
170
171    pub fn find<Q: ?Sized>(&self, filename: &Q) -> Option<Source>
172    where
173        RcStr: Borrow<Q>,
174        Q: Eq + Hash,
175    {
176        (*self.map.borrow()).get(filename).map(|v| *v)
177    }
178
179    pub fn open(&self, filename: &str) -> Option<Source> {
180        // Check if the file has already been opened and return its pointer.
181        let mut map = self.map.borrow_mut();
182        if let Some(&id) = map.get(filename) {
183            return Some(id);
184        }
185
186        // Check whether the file exists and allocate a new index for it.
187        if Path::new(filename).exists() {
188            let mut vect = self.vect.borrow_mut();
189            let new_id = Source(vect.len() as u32 + 1);
190            let v = RcStr::new(filename);
191            map.insert(v.clone(), new_id);
192            vect.push(Box::new(DiskSourceFile {
193                id: new_id,
194                filename: v,
195                content: RefCell::new(None),
196            }));
197            Some(new_id)
198        } else {
199            None
200        }
201    }
202
203    /// Create a virtual file from the contents of a string and add it to the
204    /// source manager. Future calls to `open()` with the given filename will
205    /// yield the provided contents.
206    pub fn add(&self, filename: &str, content: &str) -> Source {
207        let mut map = self.map.borrow_mut();
208        assert!(
209            !map.contains_key(filename),
210            "add failed: source \"{}\" already exists",
211            filename
212        );
213        let mut vect = self.vect.borrow_mut();
214        let new_id = Source(vect.len() as u32 + 1);
215        let v = RcStr::new(filename);
216        map.insert(v.clone(), new_id);
217        vect.push(Box::new(VirtualSourceFile {
218            id: new_id,
219            filename: v,
220            content: Rc::new(VirtualSourceContent(content.to_string(), OnceCell::new())),
221        }));
222        new_id
223    }
224
225    /// Create a virtual file from the contents of a string and add it to the
226    /// source manager. The file can only be used with the returned `Source`,
227    /// since there is no name associated with it by which it could be referred
228    /// to.
229    pub fn add_anonymous<S>(&self, content: S) -> Source
230    where
231        S: Into<String>,
232    {
233        let mut vect = self.vect.borrow_mut();
234        let new_id = Source(vect.len() as u32 + 1);
235        vect.push(Box::new(VirtualSourceFile {
236            id: new_id,
237            filename: RcStr::new("<anonymous>"),
238            content: Rc::new(VirtualSourceContent(content.into(), OnceCell::new())),
239        }));
240        new_id
241    }
242}
243
244/// Get the global source manager.
245pub fn get_source_manager() -> Rc<SourceManager> {
246    thread_local!(static MNGR: Rc<SourceManager> = {
247        Rc::new(SourceManager::new())
248    });
249    MNGR.with(|x| x.clone())
250}
251
252fn line_starts(iter: impl Iterator<Item = (usize, char)>) -> impl Iterator<Item = usize> {
253    Some(0)
254        .into_iter()
255        .chain(iter.filter(|(_i, c)| *c == '\n').map(|(i, _c)| i + 1))
256}
257
258/// A virtual source file that has no correspondence in the file system. Useful
259/// for unit tests.
260struct VirtualSourceFile {
261    id: Source,
262    filename: RcStr,
263    content: Rc<VirtualSourceContent>,
264}
265
266struct VirtualSourceContent(pub String, OnceCell<Vec<usize>>);
267
268impl SourceFile for VirtualSourceFile {
269    fn get_id(&self) -> Source {
270        self.id
271    }
272
273    fn get_path(&self) -> RcStr {
274        self.filename.clone()
275    }
276
277    fn get_content(&self) -> Rc<dyn SourceContent> {
278        self.content.clone()
279    }
280}
281
282impl SourceContent for VirtualSourceContent {
283    fn iter(&self) -> Box<CharIter> {
284        Box::new(self.0.char_indices())
285    }
286
287    fn iter_from(&self, offset: usize) -> Box<CharIter> {
288        Box::new(self.0[offset..].char_indices())
289    }
290
291    fn extract(&self, begin: usize, end: usize) -> String {
292        self.0[begin..end].to_string()
293    }
294
295    fn extract_iter(&self, begin: usize, end: usize) -> Box<CharIter> {
296        Box::new(self.0[begin..end].char_indices())
297    }
298
299    fn bytes(&self) -> &[u8] {
300        self.0.as_bytes()
301    }
302
303    fn lines(&self) -> &[usize] {
304        self.1.get_or_init(|| line_starts(self.iter()).collect())
305    }
306}
307
308/// A source file on disk.
309struct DiskSourceFile {
310    id: Source,
311    filename: RcStr,
312    content: RefCell<Option<Rc<DiskSourceContent>>>,
313}
314
315#[derive(Debug)]
316struct DiskSourceContent(pub Mmap, OnceCell<Vec<usize>>);
317
318impl SourceFile for DiskSourceFile {
319    fn get_id(&self) -> Source {
320        self.id
321    }
322
323    fn get_path(&self) -> RcStr {
324        self.filename.clone()
325    }
326
327    fn get_content(&self) -> Rc<dyn SourceContent> {
328        let is_none = self.content.borrow().is_none();
329        if is_none {
330            let c = Rc::new(DiskSourceContent(
331                unsafe { Mmap::map(&File::open(&*self.filename).unwrap()).unwrap() },
332                OnceCell::new(),
333            ));
334            *self.content.borrow_mut() = Some(c.clone());
335            c
336        } else {
337            self.content.borrow().clone().unwrap()
338        }
339    }
340}
341
342impl SourceContent for DiskSourceContent {
343    fn iter(&self) -> Box<CharIter> {
344        use std::str;
345        Box::new(str::from_utf8(&self.0[..]).unwrap().char_indices())
346    }
347
348    fn iter_from(&self, offset: usize) -> Box<CharIter> {
349        use std::str;
350        Box::new(str::from_utf8(&self.0[offset..]).unwrap().char_indices())
351    }
352
353    fn extract(&self, begin: usize, end: usize) -> String {
354        use std::str;
355        str::from_utf8(&self.0[begin..end]).unwrap().to_string()
356    }
357
358    fn extract_iter(&self, begin: usize, end: usize) -> Box<CharIter> {
359        use std::str;
360        Box::new(str::from_utf8(&self.0[begin..end]).unwrap().char_indices())
361    }
362
363    fn bytes(&self) -> &[u8] {
364        &self.0[..]
365    }
366
367    fn lines(&self) -> &[usize] {
368        self.1.get_or_init(|| line_starts(self.iter()).collect())
369    }
370}
371
372/// An iterator that yields the characters from an input file together with the
373/// byte positions within the stream.
374pub type CharIter<'a> = dyn DoubleEndedIterator<Item = (usize, char)> + 'a;
375
376/// A single location within a source file, expressed as a byte offset.
377#[derive(Copy, Clone, PartialOrd, Ord, PartialEq, Eq, Hash)]
378pub struct Location {
379    pub source: Source,
380    pub offset: usize,
381}
382
383impl Location {
384    /// Create a new location.
385    pub fn new(source: Source, offset: usize) -> Location {
386        Location {
387            source: source,
388            offset: offset,
389        }
390    }
391
392    /// Create a new location given a human-readable line and column.
393    pub fn with_line_and_column(source: Source, line: usize, column: usize) -> Location {
394        let c = source.get_content();
395        let lines = c.lines();
396        if line > 0 && line <= lines.len() {
397            Location::new(source, lines[line - 1] + column - 1)
398        } else {
399            Location::new(source, 0)
400        }
401    }
402
403    /// Obtain an iterator into the source file at this location.
404    pub fn iter<'a>(self, content: &'a Rc<dyn SourceContent>) -> Box<CharIter<'a>> {
405        content.iter_from(self.offset)
406    }
407
408    /// Determine the line and column information at this location.
409    ///
410    /// Returns a tuple `(line, column, line_offset)`.
411    pub fn human(self) -> (usize, usize, usize) {
412        let c = self.source.get_content();
413        let lines = c.lines();
414        let index = lines.partition_point(|&x| x <= self.offset) - 1;
415        let line = index + 1;
416        let line_offset = lines[index];
417        assert!(line_offset <= self.offset);
418        let col = self.offset - line_offset + 1;
419        (line, col, line_offset)
420    }
421
422    /// Determine the line at this location.
423    pub fn human_line(self) -> usize {
424        self.human().0
425    }
426
427    /// Determine the column at this location.
428    pub fn human_column(self) -> usize {
429        self.human().1
430    }
431
432    /// Determine the line offset at this location.
433    pub fn human_line_offset(self) -> usize {
434        self.human().2
435    }
436}
437
438impl fmt::Debug for Location {
439    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
440        write!(f, "{:?}:{}", self.source, self.offset)
441    }
442}
443
444impl From<Location> for Span {
445    fn from(l: Location) -> Span {
446        Span::new(l.source, l.offset, l.offset)
447    }
448}
449
450/// A span of locations within a source file, expressed as a half-open interval
451/// of bytes `[begin,end)`.
452#[derive(Copy, Clone, PartialOrd, Ord, PartialEq, Eq, Hash)]
453pub struct Span {
454    pub source: Source,
455    pub begin: usize,
456    pub end: usize,
457}
458
459impl Span {
460    /// Create a new span from two byte offsets.
461    pub fn new(source: Source, begin: usize, end: usize) -> Span {
462        Span {
463            source: source,
464            begin: begin,
465            end: end,
466        }
467    }
468
469    /// Create a new span that covers two spans, i.e. represents the smallest
470    /// possible span that fully contains both input spans `a` and `b`.
471    pub fn union<S: Into<Span>>(a: S, b: S) -> Span {
472        let sa = a.into();
473        let sb = b.into();
474        // assert_eq!(sa.source, sb.source);
475        if sa.source != sb.source {
476            return sa;
477        }
478        Span {
479            source: sa.source,
480            begin: min(sa.begin, sb.begin),
481            end: max(sa.end, sb.end),
482        }
483    }
484
485    /// Modify this range to also cover the entirety of the `other` range. The
486    /// `other` range must lie in the same source as `self`.
487    pub fn expand<S: Into<Span>>(&mut self, other: S) -> &mut Self {
488        let o = other.into();
489        // assert_eq!(self.source, o.source);
490        if self.source == o.source {
491            self.begin = min(self.begin, o.begin);
492            self.end = max(self.end, o.end);
493        }
494        self
495    }
496
497    /// Return the location just before the first character in this span.
498    pub fn begin(&self) -> Location {
499        Location::new(self.source, self.begin)
500    }
501
502    /// Return the location just after the last character in this span.
503    pub fn end(&self) -> Location {
504        Location::new(self.source, self.end)
505    }
506
507    /// Copy the portion of the source file in this span into an owned string.
508    pub fn extract(&self) -> String {
509        self.source.get_content().extract(self.begin, self.end)
510    }
511
512    /// Obtain an iterator over the extract of the source file describe by this
513    /// span.
514    pub fn iter<'a>(self, content: &'a Rc<dyn SourceContent>) -> Box<CharIter<'a>> {
515        content.extract_iter(self.begin, self.end)
516    }
517}
518
519impl fmt::Debug for Span {
520    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
521        write!(f, "{:?}:{}-{}", self.source, self.begin, self.end)
522    }
523}
524
525/// A wrapper that associates a span with a value.
526#[derive(PartialOrd, Ord, PartialEq, Eq)]
527pub struct Spanned<T> {
528    pub value: T,
529    pub span: Span,
530}
531
532impl<T> Spanned<T> {
533    /// Wrap a given value together with the span it covers.
534    pub fn new(value: T, span: Span) -> Spanned<T> {
535        Spanned {
536            value: value,
537            span: span,
538        }
539    }
540
541    /// Map the spanned value, preserving the span.
542    pub fn map<U, F: FnOnce(T) -> U>(self, f: F) -> Spanned<U> {
543        Spanned::new(f(self.value), self.span)
544    }
545
546    pub fn map_into<U>(self) -> Spanned<U>
547    where
548        T: Into<U>,
549    {
550        Spanned::new(self.value.into(), self.span)
551    }
552
553    pub fn as_ref(&self) -> Spanned<&T> {
554        Spanned::new(&self.value, self.span)
555    }
556}
557
558impl<T> std::fmt::Debug for Spanned<T>
559where
560    T: std::fmt::Debug,
561{
562    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
563        self.value.fmt(f)
564    }
565}
566
567impl<T> std::fmt::Display for Spanned<T>
568where
569    T: std::fmt::Display,
570{
571    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
572        self.value.fmt(f)
573    }
574}
575
576impl<T> Copy for Spanned<T> where T: Copy {}
577
578impl<T> Clone for Spanned<T>
579where
580    T: Clone,
581{
582    fn clone(&self) -> Self {
583        Spanned {
584            value: self.value.clone(),
585            span: self.span,
586        }
587    }
588}
589
590impl<T> Hash for Spanned<T>
591where
592    T: Hash,
593{
594    fn hash<H>(&self, state: &mut H)
595    where
596        H: Hasher,
597    {
598        self.value.hash(state)
599    }
600}
601
602#[cfg(test)]
603mod tests {
604    use super::*;
605
606    #[test]
607    #[should_panic(expected = "invalid source")]
608    fn invalid_source_id() {
609        get_source_manager().with(Source(0), |_| ());
610    }
611
612    #[test]
613    #[should_panic(expected = "unknown source file")]
614    fn unknown_source_id() {
615        get_source_manager().with(Source(1), |_| ());
616    }
617
618    #[test]
619    fn inject_file() {
620        let sm = get_source_manager();
621        let id = sm.add("flabberghasted.txt", "Hello\nWorld\n");
622        let source = sm.open("flabberghasted.txt").expect("file should exist");
623        assert_eq!(source, id);
624    }
625
626    #[test]
627    fn inexistent_file() {
628        let sm = get_source_manager();
629        assert_eq!(sm.open("/this/path/points/nowhere"), None);
630    }
631
632    #[test]
633    fn chars() {
634        let sm = get_source_manager();
635        let source = sm.add("test.txt", "老虎.");
636        let content = source.get_content();
637        let elements: Vec<(usize, char)> = content.iter().collect();
638        assert_eq!(elements, vec![(0, '老'), (3, '虎'), (6, '.')]);
639    }
640
641    #[test]
642    fn file() {
643        use std::fs::File;
644        use std::io::Write;
645        use std::path::Path;
646
647        let path = Path::new("/tmp/moore-test");
648        let data = "Löwe 老虎 Léopard\n";
649        File::create(path)
650            .unwrap()
651            .write_all(data.as_bytes())
652            .unwrap();
653
654        let sm = get_source_manager();
655        let source = sm.open(path.to_str().unwrap()).expect("file should exist");
656        let content = source.get_content();
657        let expected: Vec<_> = data.char_indices().collect();
658        let actual: Vec<_> = content.iter().collect();
659
660        assert_eq!(expected, actual);
661    }
662}