hydroperfox_sourcetext/
lib.rs

1use std::cell::{Cell, RefCell};
2use std::str::CharIndices;
3
4const LINE_SKIP_THRESOLD: usize = 10;
5const HIGHER_LINE_SKIP_THRESOLD: usize = 100;
6const EXTRA_HIGHER_LINE_SKIP_THRESOLD: usize = 1_000;
7
8/// Contains source text and line locations.
9pub struct SourceText {
10    pub contents: String,
11    processed_lines: Cell<bool>,
12
13    /// Collection of ascending line number *skips* used
14    /// for optimizing retrieval of line numbers or line offsets.
15    pub(crate) line_skips: RefCell<Vec<LineSkip>>,
16    pub(crate) line_skips_counter: Cell<usize>,
17
18    /// Collection used before `line_skips` in line lookups
19    /// to skip lines in a higher threshold.
20    pub(crate) higher_line_skips: RefCell<Vec<HigherLineSkip>>,
21    pub(crate) higher_line_skips_counter: Cell<usize>,
22
23    /// Collection used before `higher_line_skips` in line lookups
24    /// to skip lines in an extra higher threshold.
25    pub(crate) extra_higher_line_skips: RefCell<Vec<HigherLineSkip>>,
26    pub(crate) extra_higher_line_skips_counter: Cell<usize>
27}
28
29impl SourceText {
30    pub fn new(contents: String) -> Self {
31        Self {
32            contents,
33            processed_lines: Cell::new(false),
34            line_skips: RefCell::new(vec![LineSkip { offset: 0, line_number: 1 }]),
35            line_skips_counter: Cell::new(0),
36            higher_line_skips: RefCell::new(vec![HigherLineSkip { skip_index: 0, offset: 0, line_number: 1 }]),
37            higher_line_skips_counter: Cell::new(0),
38            extra_higher_line_skips: RefCell::new(vec![HigherLineSkip { skip_index: 0, offset: 0, line_number: 1 }]),
39            extra_higher_line_skips_counter: Cell::new(0),
40        }
41    }
42
43    fn process_lines(&self) {
44        if self.processed_lines.get() {
45            return;
46        }
47        self.processed_lines.set(true);
48        let mut s = CharacterReader::from(&self.contents);
49        let mut line: usize = 1;
50        while s.has_remaining() {
51            let ch = s.next_or_zero();
52            if CharacterValidator::is_line_terminator(ch) {
53                if ch == '\r' && s.peek_or_zero() == '\n' {
54                    s.next();
55                }
56                line += 1;
57                self.push_line_skip(line, s.index());
58            }
59        }
60    }
61
62    fn push_line_skip(&self, line_number: usize, offset: usize) {
63        let counter = self.line_skips_counter.get();
64        if counter == LINE_SKIP_THRESOLD {
65            self.line_skips.borrow_mut().push(LineSkip { line_number, offset });
66            self.line_skips_counter.set(0);
67        } else {
68            self.line_skips_counter.set(counter + 1);
69        }
70
71        let counter = self.higher_line_skips_counter.get();
72        if counter == HIGHER_LINE_SKIP_THRESOLD {
73            self.higher_line_skips.borrow_mut().push(HigherLineSkip { skip_index: self.line_skips.borrow().len() - 1, line_number, offset });
74            self.higher_line_skips_counter.set(0);
75        } else {
76            self.higher_line_skips_counter.set(counter + 1);
77        }
78
79        let counter = self.extra_higher_line_skips_counter.get();
80        if counter == EXTRA_HIGHER_LINE_SKIP_THRESOLD {
81            self.extra_higher_line_skips.borrow_mut().push(HigherLineSkip { skip_index: self.higher_line_skips.borrow().len() - 1, line_number, offset });
82            self.extra_higher_line_skips_counter.set(0);
83        } else {
84            self.extra_higher_line_skips_counter.set(counter + 1);
85        }
86    }
87
88    /// Retrieves line number from an offset. The resulting line number
89    /// is counted from one.
90    pub fn get_line_number(&self, offset: usize) -> usize {
91        self.process_lines();
92
93        // Extra higher line skips
94        let mut last_skip = HigherLineSkip { skip_index: 0, offset: 0, line_number: 1 };
95        let skips = self.extra_higher_line_skips.borrow();
96        let mut skips = skips.iter();
97        while let Some(skip_1) = skips.next() {
98            if offset < skip_1.offset {
99                break;
100            }
101            last_skip = *skip_1;
102        }
103
104        // Higher line skips
105        let skips = self.higher_line_skips.borrow();
106        let mut skips = skips[last_skip.skip_index..].iter();
107        let mut last_skip = skips.next().unwrap();
108        while let Some(skip_1) = skips.next() {
109            if offset < skip_1.offset {
110                break;
111            }
112            last_skip = skip_1;
113        }
114
115        // Line skips
116        let skips = self.line_skips.borrow();
117        let mut skips = skips[last_skip.skip_index..].iter();
118        let mut last_skip = skips.next().unwrap();
119        while let Some(skip_1) = skips.next() {
120            if offset < skip_1.offset {
121                break;
122            }
123            last_skip = skip_1;
124        }
125
126        let mut current_line = last_skip.line_number;
127        let mut characters = CharacterReader::from(&self.contents[last_skip.offset..]);
128        while last_skip.offset + characters.index() < offset {
129            let ch_1 = characters.next();
130            if let Some(ch_1) = ch_1 {
131                if CharacterValidator::is_line_terminator(ch_1) {
132                    if ch_1 == '\r' && characters.peek_or_zero() == '\n' {
133                        characters.next();
134                    }
135                    current_line += 1;
136                }
137            } else {
138                break;
139            }
140        }
141        current_line
142    }
143
144    /// Retrieves offset from line number (counted from one).
145    pub fn get_line_offset(&self, line: usize) -> Option<usize> {
146        self.process_lines();
147
148        // Extra higher line skips
149        let mut last_skip = HigherLineSkip { skip_index: 0, offset: 0, line_number: 1 };
150        let skips = self.extra_higher_line_skips.borrow();
151        let mut skips = skips.iter();
152        while let Some(skip_1) = skips.next() {
153            if line < skip_1.line_number {
154                break;
155            }
156            last_skip = *skip_1;
157        }
158
159        // Higher line skips
160        let skips = self.higher_line_skips.borrow();
161        let mut skips = skips[last_skip.skip_index..].iter();
162        let mut last_skip = skips.next().unwrap();
163        while let Some(skip_1) = skips.next() {
164            if line < skip_1.line_number {
165                break;
166            }
167            last_skip = skip_1;
168        }
169
170        // Line skips
171        let skips = self.line_skips.borrow();
172        let mut skips = skips[last_skip.skip_index..].iter();
173        let mut last_skip = skips.next().unwrap();
174        while let Some(skip_1) = skips.next() {
175            if line < skip_1.line_number {
176                break;
177            }
178            last_skip = skip_1;
179        }
180
181        let mut current_line = last_skip.line_number;
182        let mut characters = CharacterReader::from(&self.contents[last_skip.offset..]);
183        while current_line != line {
184            let ch_1 = characters.next();
185            if let Some(ch_1) = ch_1 {
186                if CharacterValidator::is_line_terminator(ch_1) {
187                    if ch_1 == '\r' && characters.peek_or_zero() == '\n' {
188                        characters.next();
189                    }
190                    current_line += 1;
191                }
192            } else {
193                return None;
194            }
195        }
196        Some(last_skip.offset + characters.index())
197    }
198
199    /// Retrieves the offset from the corresponding line of an offset.
200    pub fn get_line_offset_from_offset(&self, offset: usize) -> usize {
201        self.process_lines();
202
203        // Extra higher line skips
204        let mut last_skip = HigherLineSkip { skip_index: 0, offset: 0, line_number: 1 };
205        let skips = self.extra_higher_line_skips.borrow();
206        let mut skips = skips.iter();
207        while let Some(skip_1) = skips.next() {
208            if offset < skip_1.offset {
209                break;
210            }
211            last_skip = *skip_1;
212        }
213
214        // Higher line skips
215        let skips = self.higher_line_skips.borrow();
216        let mut skips = skips[last_skip.skip_index..].iter();
217        let mut last_skip = skips.next().unwrap();
218        while let Some(skip_1) = skips.next() {
219            if offset < skip_1.offset {
220                break;
221            }
222            last_skip = skip_1;
223        }
224
225        // Line skips
226        let skips = self.line_skips.borrow();
227        let mut skips = skips[last_skip.skip_index..].iter();
228        let mut last_skip = skips.next().unwrap();
229        while let Some(skip_1) = skips.next() {
230            if offset < skip_1.offset {
231                break;
232            }
233            last_skip = skip_1;
234        }
235
236        let mut current_line_offset = last_skip.offset;
237        let mut characters = CharacterReader::from(&self.contents[last_skip.offset..]);
238        while last_skip.offset + characters.index() < offset {
239            let ch_1 = characters.next();
240            if let Some(ch_1) = ch_1 {
241                if CharacterValidator::is_line_terminator(ch_1) {
242                    if ch_1 == '\r' && characters.peek_or_zero() == '\n' {
243                        characters.next();
244                    }
245                    current_line_offset = last_skip.offset + characters.index();
246                }
247            } else {
248                break;
249            }
250        }
251        current_line_offset
252    }
253
254    /// Returns the zero based column of an offset.
255    pub fn get_column(&self, offset: usize) -> usize {
256        self.process_lines();
257
258        let line_offset = self.get_line_offset_from_offset(offset);
259        let target_offset = offset;
260        if line_offset > target_offset {
261            return 0;
262        }
263        let mut i = 0;
264        for _ in self.contents[line_offset..target_offset].chars() {
265            i += 1;
266        }
267        i
268    }
269}
270
271#[derive(Copy, Clone)]
272struct LineSkip {
273    /// Line offset.
274    pub offset: usize,
275    /// Line number counting from one.
276    pub line_number: usize,
277}
278
279#[derive(Copy, Clone)]
280struct HigherLineSkip {
281    /// Index to a `LineSkip`, or another `HigherLineSkip` in the case
282    /// of extra higher line skips.
283    pub skip_index: usize,
284    /// Line offset.
285    pub offset: usize,
286    /// Line number counting from one.
287    pub line_number: usize,
288}
289
290#[derive(Clone)]
291struct CharacterReader<'a> {
292    length: usize,
293    char_indices: CharIndices<'a>,
294}
295
296impl<'a> CharacterReader<'a> {
297    /// Indicates if there are remaining code points to read.
298    pub fn has_remaining(&self) -> bool {
299        self.clone().char_indices.next().is_some()
300    }
301
302    /// Indicates if the reader has reached the end of the string.
303    pub fn _reached_end(&self) -> bool {
304        self.clone().char_indices.next().is_none()
305    }
306
307    /// Returns the current index in the string.
308    pub fn index(&self) -> usize {
309        self.clone().char_indices.next().map_or(self.length, |(i, _)| i)
310    }
311
312    /// Returns the next code point. If there are no code points
313    /// available, returns U+00.
314    pub fn next_or_zero(&mut self) -> char {
315        self.char_indices.next().map_or('\x00', |(_, cp)| cp)
316    }
317
318    /// Peeks the next code point. If there are no code points
319    /// available, returns U+00.
320    pub fn peek_or_zero(&self) -> char {
321        self.clone().next_or_zero()
322    }
323}
324
325impl<'a> From<&'a str> for CharacterReader<'a> {
326    /// Constructs a `CharacterReader` from a string.
327    fn from(value: &'a str) -> Self {
328        CharacterReader { length: value.len(), char_indices: value.char_indices() }
329    }
330}
331
332impl<'a> From<&'a String> for CharacterReader<'a> {
333    /// Constructs a `CharacterReader` from a string.
334    fn from(value: &'a String) -> Self {
335        CharacterReader { length: value.len(), char_indices: value.char_indices() }
336    }
337}
338
339impl<'a> Iterator for CharacterReader<'a> {
340    type Item = char;
341
342    fn next(&mut self) -> Option<Self::Item> {
343        self.char_indices.next().map(|(_, cp)| cp)
344    }
345}
346
347struct CharacterValidator;
348
349impl CharacterValidator {
350    pub fn is_line_terminator(ch: char) -> bool {
351        ch == '\x0A' || ch == '\x0D' || ch == '\u{2028}' || ch == '\u{2029}'
352    }
353}
354
355#[cfg(test)]
356mod tests {
357    use super::SourceText;
358
359    #[test]
360    fn test() {
361        let text = SourceText::new("foo\r\nbar\r\nqux".into());
362        assert_eq!(0, text.get_column(0));
363        assert_eq!(0, text.get_column(5));
364        assert_eq!(2, text.get_line_number(5));
365        assert_eq!(5, text.get_line_offset(2).unwrap());
366        assert_eq!(5, text.get_line_offset_from_offset(7));
367
368        let text = SourceText::new("\n".repeat(1_024));
369        assert_eq!(1, text.get_line_number(0));
370        assert_eq!(2, text.get_line_number(1));
371        assert_eq!(1_025, text.get_line_number(1_024));
372
373        let text = SourceText::new("\ndefault xml namespace =\n".into());
374        assert_eq!(3, text.get_line_number(25));
375        assert_eq!(0, text.get_column(25));
376        assert_eq!(2, text.get_line_number(24));
377        assert_eq!(23, text.get_column(24));
378    }
379}