xswag_base/code/
filemap.rs

1use super::{SrcOffset, BytePos, LineIdx, ColIdx, Loc};
2use std::cell::RefCell;
3use std::fmt;
4
5/// Stores the content of a file and keeps track of some position meta data,
6/// such as linebreaks.
7pub struct FileMap {
8    /// Original filename or dummy filename of the form "<...>"
9    filename: String,
10    /// The whole content of the file
11    src: String,
12    /// List of line beginnings. Wrapped in a `RefCell`, because `FileMap`s
13    /// are shared a lot via `Rc` and this is the only thing requiring
14    /// mutability. Borrowing it will never panic, because:
15    /// - it's only borrowed within methods of `FileMap`
16    /// - the borrow always ends with those methods
17    /// - no method is called/active while another one is active (as long as
18    ///   no borrowing method is calling another borrowing method)
19    lines: RefCell<Vec<BytePos>>,
20}
21
22impl FileMap {
23    /// Creates a new Filemap from existing buffers for the filename and
24    /// content of the file.
25    pub fn new<U, V>(filename: U, src: V) -> FileMap
26        where U: Into<String>,
27              V: Into<String>
28    {
29        FileMap {
30            filename: filename.into(),
31            src: src.into(),
32            lines: RefCell::new(vec![BytePos(0)]),
33        }
34    }
35
36    pub fn filename(&self) -> &str {
37        &self.filename
38    }
39
40    pub fn src(&self) -> &str {
41        &self.src
42    }
43
44    /// Adds a new line beginning with the given BytePos to the list. Line
45    /// beginnings need to be added in order!
46    pub fn add_line(&self, offset: BytePos) {
47        self.lines.borrow_mut().push(offset);
48    }
49
50    /// Returns the number of lines.
51    pub fn num_lines(&self) -> usize {
52        self.lines.borrow().len()
53    }
54
55    /// Returns the (0-based) index of the line in which the byte with the
56    /// given `offset` lives.
57    pub fn get_line_idx(&self, offset: BytePos) -> LineIdx {
58        // If `offset` is not a line beginning, `binary_search` returns the
59        // index of the next line. Hence `-1`.
60        LineIdx(self.lines.borrow()
61            .binary_search(&offset)
62            .unwrap_or_else(|e| e - 1) as u32)
63    }
64
65    /// Returns the location of the given bytes as line and col numbers within
66    /// this file.
67    pub fn get_loc(&self, offset: BytePos) -> Loc {
68        let line = self.get_line_idx(offset);
69        let col = offset - self.lines.borrow()[line.0 as usize];
70
71        Loc { line: line, col: ColIdx(col.0) }
72    }
73
74    /// Returns the line with the given index or `None` if it is invalid.
75    pub fn get_line(&self, line: LineIdx) -> Option<&str> {
76        self.lines.borrow().get(line.0 as usize).map(|&BytePos(start)| {
77            let end = self.src[start as usize..]
78                          .find("\n")
79                          .unwrap_or(self.src.len() - start as usize);
80            &self.src[start as usize .. (end + start as usize)]
81        })
82    }
83
84    /// Returns the byte offset of the first symbol in `line`
85    pub fn get_line_start(&self, line: LineIdx) -> Option<BytePos> {
86        self.lines.borrow().get(line.0 as usize).map(|&pos| pos)
87    }
88
89    /// Searches for line endings and collects all line beginnings in the
90    /// source string. It starts searching at the latest line beginning in the
91    /// list so far (or at the beginning of none was added yet).
92    ///
93    /// Normally this is done while lexing to avoid iterating
94    /// over the whole string multiple times. Mostly handy for tests.
95    pub fn find_lines(&self) {
96        // We can unwrap here, because the vector contains at least one element
97        let last_line_so_far = self.lines.borrow().last().unwrap().0 as usize;
98        for (pos, c) in self.src[last_line_so_far..].char_indices() {
99            // it doesn't matter if there was a '\n' or '\r\n'
100            if c == '\n' {
101                let line_start = (pos + c.len_utf8()) as SrcOffset;
102                self.lines.borrow_mut().push(BytePos(line_start));
103            }
104        }
105    }
106}
107
108impl fmt::Debug for FileMap {
109    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
110        struct Dummy<'a>(&'a [BytePos]);
111        impl<'a> fmt::Debug for Dummy<'a> {
112            fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
113                let items = self.0.iter().map(|v| v.0).enumerate();
114                f.debug_map().entries(items).finish()
115            }
116        }
117
118        f.debug_struct("FileMap")
119            .field("filename", &self.filename)
120            .field("src", &format!("<long string> (len {})", self.src.len()))
121            .field("lines", &Dummy(&self.lines.borrow()))
122            .finish()
123    }
124}
125
126// --- tests ---
127#[test]
128fn location_lookup() {
129    let map = FileMap::new("<dummy>", "foo\r\nbär\nbaz");
130    map.find_lines();
131
132    // "foo\r\n" is 5 bytes. "bär\n" is 5 bytes, too.
133    assert_eq!(*map.lines.borrow(),
134        vec![BytePos(0), BytePos(5), BytePos(10)]
135    );
136
137    macro_rules! is_at {
138        ($offset:expr => [$line:expr, $col:expr]) => {
139            assert_eq!(map.get_loc(BytePos($offset)), Loc {
140                line: LineIdx($line), col: ColIdx($col)
141            });
142        }
143    }
144
145    is_at!(0 => [0, 0]);
146    is_at!(1 => [0, 1]);
147    is_at!(4 => [0, 4]);
148    is_at!(5 => [1, 0]);
149    is_at!(9 => [1, 4]);
150    is_at!(10 => [2, 0]);
151    is_at!(12 => [2, 2]);
152}