markdown_it/common/
sourcemap.rs

1//! Tools to work with source positions and mapping.
2
3#[derive(Debug)]
4/// Holds source code, allows to calculate `line:column` from byte offset.
5pub struct SourceWithLineStarts {
6    src: String,
7    marks: Vec<CharMappingMark>,
8}
9
10impl SourceWithLineStarts {
11    pub fn new(src: &str) -> Self {
12        let mut iterator = src.char_indices().peekable();
13        let mut line = 1;
14        let mut column = 0;
15        let mut marks = vec![CharMappingMark { offset: 0, line, column }];
16
17        loop {
18            match iterator.next() {
19                Some((_, '\r')) if matches!(iterator.peek(), Some((_, '\n'))) => {
20                    // ignore \r followed by \n
21                    column += 1;
22                }
23                Some((offset, '\r' | '\n')) => {
24                    // \r or \n are linebreaks
25                    line += 1;
26                    column = 0;
27                    marks.push(CharMappingMark { offset: offset + 1, line, column });
28                }
29                Some((offset, _)) => {
30                    // any other character, just increase position
31                    if column % 16 == 0 && column > 0 {
32                        marks.push(CharMappingMark { offset, line, column });
33                    }
34                    column += 1;
35                },
36                None => break,
37            }
38        }
39
40        Self { src: src.to_owned(), marks }
41    }
42
43    fn get_position(&self, byte_offset: usize) -> (u32, u32) {
44        let byte_offset = byte_offset + 1; // include current char
45        let found = match self.marks.binary_search_by(|mark| mark.offset.cmp(&byte_offset)) {
46            Ok(x) => x,
47            Err(x) => x - 1,
48        };
49        let mark = &self.marks[found];
50        let line = mark.line;
51        let mut column = mark.column;
52        for (offset, _) in self.src[mark.offset..].char_indices() {
53            if mark.offset + offset >= byte_offset { break; }
54            column += 1;
55        }
56        (line, column)
57    }
58}
59
60#[derive(Debug)]
61struct CharMappingMark {
62    offset: usize,
63    line: u32,
64    column: u32,
65}
66
67#[derive(Default, Clone, Copy)]
68/// Positions of the start and the end of an AST node.
69pub struct SourcePos {
70    byte_offset: (usize, usize),
71}
72
73impl SourcePos {
74    /// Create positions from byte offsets:
75    ///  - start - offset of the first char of the node
76    ///  - end - offset of the first char after the node
77    pub fn new(start: usize, end: usize) -> Self {
78        SourcePos {
79            byte_offset: (start, end),
80        }
81    }
82
83    pub fn get_byte_offsets(&self) -> (usize, usize) {
84        self.byte_offset
85    }
86
87    /// Returns (line_start, column_start, line_end, column_end) from given positions
88    pub fn get_positions(&self, map: &SourceWithLineStarts) -> ((u32, u32), (u32, u32)) {
89        let start = map.get_position(self.byte_offset.0);
90        let end_off = if self.byte_offset.1 > 0 { self.byte_offset.1 - 1 } else { self.byte_offset.1 };
91        let end = map.get_position(end_off);
92        (start, end)
93    }
94}
95
96impl std::fmt::Debug for SourcePos {
97    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
98        self.byte_offset.fmt(f)
99    }
100}
101
102#[cfg(test)]
103mod tests {
104    use super::SourceWithLineStarts;
105    use super::SourcePos;
106
107    #[test]
108    fn no_linebreaks() {
109        let map = SourceWithLineStarts::new("qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM");
110        for i in 0..20 {
111            assert_eq!(SourcePos::new(i, 0).get_positions(&map).0, (1, i as u32 + 1));
112        }
113    }
114
115    #[test]
116    fn unicode() {
117        let map = SourceWithLineStarts::new("!ΑαΒβΓγΔδΕεΖζΗηΘθΙιΚκΛλΜμΝνΞξΟοΠπΡρΣσςΤτΥυΦφΧχΨψΩω");
118        assert_eq!(SourcePos::new(0, 0).get_positions(&map).0, (1, 1));
119        for i in 1..20 {
120            assert_eq!(SourcePos::new(i, 0).get_positions(&map).0, (1, ((i - 1) / 2) as u32 + 2));
121        }
122    }
123
124    #[test]
125    fn many_linebreaks() {
126        let map = SourceWithLineStarts::new("\n\n\n\n\n\n123");
127        for i in 0..6 {
128            assert_eq!(SourcePos::new(i, 0).get_positions(&map).0, (i as u32 + 2, 0));
129        }
130        assert_eq!(SourcePos::new(7, 0).get_positions(&map).0, (7, 2));
131        assert_eq!(SourcePos::new(8, 0).get_positions(&map).0, (7, 3));
132    }
133
134    #[test]
135    fn after_end() {
136        let map = SourceWithLineStarts::new("123");
137        assert_eq!(SourcePos::new(100, 0).get_positions(&map).0, (1, 3));
138        let map = SourceWithLineStarts::new("123\n");
139        assert_eq!(SourcePos::new(100, 0).get_positions(&map).0, (2, 0));
140        let map = SourceWithLineStarts::new("123\n456");
141        assert_eq!(SourcePos::new(100, 0).get_positions(&map).0, (2, 3));
142    }
143}