js_source_scopes/
source.rs

1/// A structure allowing fast line/column <-> byte offset remapping.
2///
3/// The primary use-case is to allow efficient conversion between
4/// [`SourcePosition`]s (line/column) to byte offsets. The [`SourcePosition`]s
5/// are 0-based. All offsets are treated as `u32`, and creating a
6/// [`SourceContext`] for a source that exceeds the range of a `u32` will result
7/// in an `Err`.
8///
9/// # Examples
10///
11/// ```
12/// use js_source_scopes::{SourceContext, SourcePosition};
13///
14/// let src = r#"const arrowFnExpr = (a) => a;
15/// function namedFnDecl() {}"#;
16///
17/// let ctx = SourceContext::new(src).unwrap();
18///
19/// let offset = ctx.position_to_offset(SourcePosition::new(0, 6)).unwrap() as usize;
20/// assert_eq!(&src[offset..offset+11], "arrowFnExpr");
21/// let offset = ctx.position_to_offset(SourcePosition::new(1, 9)).unwrap() as usize;
22/// assert_eq!(&src[offset..offset+11], "namedFnDecl");
23/// ```
24pub struct SourceContext<T> {
25    src: T,
26    index: Vec<Mapping>,
27}
28
29/// When creating the [`SourceContext`], create a mapping every [`CHUNKS`] char.
30///
31/// For example for a 80kiB byte file, we would have 640 of these mappings,
32/// weighing about 7k in memory.
33const CHUNKS: usize = 128;
34
35/// A mapping in the [`SourceContext`] index.
36#[derive(Clone, Copy)]
37struct Mapping {
38    /// The current byte offset.
39    offset: u32,
40    /// Current 0-indexed line.
41    line: u32,
42    /// Current 0-indexed UTF-16 column.
43    column: u32,
44}
45
46impl<T: AsRef<str>> SourceContext<T> {
47    /// Unwrap this Source Context into the inner source buffer.
48    pub fn into_inner(self) -> T {
49        self.src
50    }
51
52    /// Construct a new Source Context from the given `src` buffer.
53    #[tracing::instrument(level = "trace", name = "SourceContext::new", skip_all)]
54    pub fn new(src: T) -> Result<Self, SourceContextError> {
55        let buf = src.as_ref();
56        // we can do the bounds check once in the beginning, that guarantees that
57        // all the other offsets are within `u32` bounds.
58        let _len: u32 = buf.len().try_into().map_err(|_| SourceContextError(()))?;
59
60        let mut index = vec![];
61
62        let mut offset = 0;
63        let mut line = 0;
64        let mut column = 0;
65        for (i, c) in buf.chars().enumerate() {
66            if i % CHUNKS == 0 {
67                index.push(Mapping {
68                    offset: offset as u32,
69                    line,
70                    column: column as u32,
71                });
72            }
73            offset += c.len_utf8();
74            if c == '\n' {
75                line += 1;
76                column = 0;
77            } else {
78                column += c.len_utf16();
79            }
80        }
81
82        Ok(Self { src, index })
83    }
84
85    /// Converts a byte offset into the source to the corresponding line/column.
86    ///
87    /// The column is given in UTF-16 code points.
88    pub fn offset_to_position(&self, offset: u32) -> Option<SourcePosition> {
89        let mapping = match self
90            .index
91            .binary_search_by_key(&offset, |mapping| mapping.offset)
92        {
93            Ok(idx) => self.index[idx],
94            Err(0) => Mapping {
95                offset: 0,
96                line: 0,
97                column: 0,
98            },
99            Err(idx) => self.index[idx - 1],
100        };
101
102        let mut byte_offset = mapping.offset as usize;
103        let mut line = mapping.line;
104        let mut column = mapping.column as usize;
105
106        for c in self.src.as_ref().get(byte_offset..)?.chars() {
107            if byte_offset >= offset as usize {
108                return Some(SourcePosition::new(line, column as u32));
109            }
110
111            byte_offset += c.len_utf8();
112            if c == '\n' {
113                line += 1;
114                column = 0;
115            } else {
116                column += c.len_utf16();
117            }
118        }
119
120        None
121    }
122
123    /// Converts the given line/column to the corresponding byte offset inside the source.
124    pub fn position_to_offset(&self, position: SourcePosition) -> Option<u32> {
125        let SourcePosition { line, column } = position;
126        let mapping = match self
127            .index
128            .binary_search_by_key(&(line, column), |mapping| (mapping.line, mapping.column))
129        {
130            Ok(idx) => self.index[idx],
131            Err(0) => Mapping {
132                offset: 0,
133                line: 0,
134                column: 0,
135            },
136            Err(idx) => self.index[idx - 1],
137        };
138
139        let mut byte_offset = mapping.offset as usize;
140        let mut mapping_line = mapping.line;
141        let mut mapping_column = mapping.column as usize;
142
143        for c in self.src.as_ref().get(byte_offset..)?.chars() {
144            if mapping_line == line && mapping_column >= column as usize {
145                return Some(byte_offset as u32);
146            }
147
148            byte_offset += c.len_utf8();
149            if c == '\n' {
150                mapping_line += 1;
151                mapping_column = 0;
152                // the column we were looking for is out of bounds
153                if mapping_line > line {
154                    return None;
155                }
156            } else {
157                mapping_column += c.len_utf16();
158            }
159        }
160
161        None
162    }
163}
164
165/// A line/column source position.
166#[derive(Copy, Clone, Debug, PartialEq, PartialOrd, Eq, Ord)]
167pub struct SourcePosition {
168    /// Line in the source file, 0-based.
169    pub line: u32,
170    /// Column in the source file, 0-based.
171    ///
172    /// The column is given in UTF-16 code points.
173    pub column: u32,
174}
175
176impl SourcePosition {
177    /// Create a new SourcePosition with the given line/column.
178    pub fn new(line: u32, column: u32) -> Self {
179        Self { line, column }
180    }
181}
182
183/// An Error that can happen when building a [`SourceContext`].
184#[derive(Debug)]
185pub struct SourceContextError(());
186
187impl std::error::Error for SourceContextError {}
188
189impl std::fmt::Display for SourceContextError {
190    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
191        f.write_str("source could not be converted to source context")
192    }
193}
194
195#[cfg(test)]
196mod tests {
197    use super::*;
198
199    #[test]
200    fn source_context() {
201        let ctx = SourceContext::new("").unwrap();
202        assert_eq!(ctx.offset_to_position(0), None);
203        assert_eq!(ctx.position_to_offset(SourcePosition::new(0, 0)), None);
204
205        let src = "\n \r\naö¿¡\nőá…–🤮🚀¿ 한글 테스트\nz̴̢̈͜ä̴̺̟́ͅl̸̛̦͎̺͂̃̚͝g̷̦̲͊͋̄̌͝o̸͇̞̪͙̞͌̇̀̓̏͜\r\noh hai";
206        let ctx = SourceContext::new(src).unwrap();
207
208        // out of bounds
209        assert_eq!(ctx.offset_to_position(150), None);
210        assert_eq!(ctx.position_to_offset(SourcePosition::new(0, 1)), None);
211        assert_eq!(ctx.position_to_offset(SourcePosition::new(1, 3)), None);
212        assert_eq!(ctx.position_to_offset(SourcePosition::new(6, 1)), None);
213
214        // correct positions
215        assert_eq!(ctx.offset_to_position(1), Some(SourcePosition::new(1, 0)));
216        assert_eq!(ctx.offset_to_position(3), Some(SourcePosition::new(1, 2)));
217
218        let offset = ctx.position_to_offset(SourcePosition::new(2, 0)).unwrap();
219        assert_eq!(offset, 4);
220        assert_eq!(&src[offset as usize..(offset as usize + 1)], "a");
221
222        // full roundtrips
223        for (offset, _c) in src.char_indices() {
224            if let Some(sp) = ctx.offset_to_position(offset as u32) {
225                let roundtrip = ctx.position_to_offset(sp).unwrap();
226                assert_eq!(roundtrip, offset as u32);
227            }
228        }
229    }
230}