markdown_that/common/
sourcemap.rs

1//! Tools to work with source positions and mapping.
2
3#[derive(Debug)]
4/// Holds source code, allows to calculate `line:column` from byte offset.
5pub struct SourceWithLineStarts {
6    src: String,
7    marks: Vec<CharMappingMark>,
8}
9
10impl SourceWithLineStarts {
11    pub fn new(src: &str) -> Self {
12        let mut iterator = src.char_indices().peekable();
13        let mut line = 1;
14        let mut column = 0;
15        let mut marks = vec![CharMappingMark {
16            offset: 0,
17            line,
18            column,
19        }];
20
21        loop {
22            match iterator.next() {
23                Some((_, '\r')) if matches!(iterator.peek(), Some((_, '\n'))) => {
24                    // ignore \r followed by \n
25                    column += 1;
26                }
27                Some((offset, '\r' | '\n')) => {
28                    // \r or \n are linebreaks
29                    line += 1;
30                    column = 0;
31                    marks.push(CharMappingMark {
32                        offset: offset + 1,
33                        line,
34                        column,
35                    });
36                }
37                Some((offset, _)) => {
38                    // any other character, just increase position
39                    if column % 16 == 0 && column > 0 {
40                        marks.push(CharMappingMark {
41                            offset,
42                            line,
43                            column,
44                        });
45                    }
46                    column += 1;
47                }
48                None => break,
49            }
50        }
51
52        Self {
53            src: src.to_owned(),
54            marks,
55        }
56    }
57
58    fn get_position(&self, byte_offset: usize) -> (u32, u32) {
59        let byte_offset = byte_offset + 1; // include current char
60        let found = self
61            .marks
62            .binary_search_by(|mark| mark.offset.cmp(&byte_offset))
63            .unwrap_or_else(|x| x - 1);
64        let mark = &self.marks[found];
65        let line = mark.line;
66        let mut column = mark.column;
67        for (offset, _) in self.src[mark.offset..].char_indices() {
68            if mark.offset + offset >= byte_offset {
69                break;
70            }
71            column += 1;
72        }
73        (line, column)
74    }
75}
76
77#[derive(Debug)]
78struct CharMappingMark {
79    offset: usize,
80    line: u32,
81    column: u32,
82}
83
84#[derive(Default, Clone, Copy)]
85/// Positions of the start and the end of an AST node.
86pub struct SourcePos {
87    byte_offset: (usize, usize),
88}
89
90impl SourcePos {
91    /// Create positions from byte offsets:
92    ///  - start - offset of the first char from the node
93    ///  - end - offset of the first char after the node
94    pub fn new(start: usize, end: usize) -> Self {
95        SourcePos {
96            byte_offset: (start, end),
97        }
98    }
99
100    pub fn get_byte_offsets(&self) -> (usize, usize) {
101        self.byte_offset
102    }
103
104    /// Returns (line_start, column_start, line_end, column_end) from given positions
105    pub fn get_positions(&self, map: &SourceWithLineStarts) -> ((u32, u32), (u32, u32)) {
106        let start = map.get_position(self.byte_offset.0);
107        let end_off = if self.byte_offset.1 > 0 {
108            self.byte_offset.1 - 1
109        } else {
110            self.byte_offset.1
111        };
112        let end = map.get_position(end_off);
113        (start, end)
114    }
115}
116
117impl std::fmt::Debug for SourcePos {
118    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
119        self.byte_offset.fmt(f)
120    }
121}
122
123#[cfg(test)]
124mod tests {
125    use super::SourcePos;
126    use super::SourceWithLineStarts;
127
128    #[test]
129    fn no_linebreaks() {
130        let map = SourceWithLineStarts::new("qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM");
131        for i in 0..20 {
132            assert_eq!(
133                SourcePos::new(i, 0).get_positions(&map).0,
134                (1, i as u32 + 1)
135            );
136        }
137    }
138
139    #[test]
140    fn unicode() {
141        let map = SourceWithLineStarts::new("!ΑαΒβΓγΔδΕεΖζΗηΘθΙιΚκΛλΜμΝνΞξΟοΠπΡρΣσςΤτΥυΦφΧχΨψΩω");
142        assert_eq!(SourcePos::new(0, 0).get_positions(&map).0, (1, 1));
143        for i in 1..20 {
144            assert_eq!(
145                SourcePos::new(i, 0).get_positions(&map).0,
146                (1, ((i - 1) / 2) as u32 + 2)
147            );
148        }
149    }
150
151    #[test]
152    fn many_linebreaks() {
153        let map = SourceWithLineStarts::new("\n\n\n\n\n\n123");
154        for i in 0..6 {
155            assert_eq!(
156                SourcePos::new(i, 0).get_positions(&map).0,
157                (i as u32 + 2, 0)
158            );
159        }
160        assert_eq!(SourcePos::new(7, 0).get_positions(&map).0, (7, 2));
161        assert_eq!(SourcePos::new(8, 0).get_positions(&map).0, (7, 3));
162    }
163
164    #[test]
165    fn after_end() {
166        let map = SourceWithLineStarts::new("123");
167        assert_eq!(SourcePos::new(100, 0).get_positions(&map).0, (1, 3));
168        let map = SourceWithLineStarts::new("123\n");
169        assert_eq!(SourcePos::new(100, 0).get_positions(&map).0, (2, 0));
170        let map = SourceWithLineStarts::new("123\n456");
171        assert_eq!(SourcePos::new(100, 0).get_positions(&map).0, (2, 3));
172    }
173}