hydroperfox_sourcetext/
lib.rs1use std::cell::{Cell, RefCell};
2use std::str::CharIndices;
3
4const LINE_SKIP_THRESOLD: usize = 10;
5const HIGHER_LINE_SKIP_THRESOLD: usize = 100;
6const EXTRA_HIGHER_LINE_SKIP_THRESOLD: usize = 1_000;
7
8pub struct SourceText {
10 pub contents: String,
11 processed_lines: Cell<bool>,
12
13 pub(crate) line_skips: RefCell<Vec<LineSkip>>,
16 pub(crate) line_skips_counter: Cell<usize>,
17
18 pub(crate) higher_line_skips: RefCell<Vec<HigherLineSkip>>,
21 pub(crate) higher_line_skips_counter: Cell<usize>,
22
23 pub(crate) extra_higher_line_skips: RefCell<Vec<HigherLineSkip>>,
26 pub(crate) extra_higher_line_skips_counter: Cell<usize>
27}
28
29impl SourceText {
30 pub fn new(contents: String) -> Self {
31 Self {
32 contents,
33 processed_lines: Cell::new(false),
34 line_skips: RefCell::new(vec![LineSkip { offset: 0, line_number: 1 }]),
35 line_skips_counter: Cell::new(0),
36 higher_line_skips: RefCell::new(vec![HigherLineSkip { skip_index: 0, offset: 0, line_number: 1 }]),
37 higher_line_skips_counter: Cell::new(0),
38 extra_higher_line_skips: RefCell::new(vec![HigherLineSkip { skip_index: 0, offset: 0, line_number: 1 }]),
39 extra_higher_line_skips_counter: Cell::new(0),
40 }
41 }
42
43 fn process_lines(&self) {
44 if self.processed_lines.get() {
45 return;
46 }
47 self.processed_lines.set(true);
48 let mut s = CharacterReader::from(&self.contents);
49 let mut line: usize = 1;
50 while s.has_remaining() {
51 let ch = s.next_or_zero();
52 if CharacterValidator::is_line_terminator(ch) {
53 if ch == '\r' && s.peek_or_zero() == '\n' {
54 s.next();
55 }
56 line += 1;
57 self.push_line_skip(line, s.index());
58 }
59 }
60 }
61
62 fn push_line_skip(&self, line_number: usize, offset: usize) {
63 let counter = self.line_skips_counter.get();
64 if counter == LINE_SKIP_THRESOLD {
65 self.line_skips.borrow_mut().push(LineSkip { line_number, offset });
66 self.line_skips_counter.set(0);
67 } else {
68 self.line_skips_counter.set(counter + 1);
69 }
70
71 let counter = self.higher_line_skips_counter.get();
72 if counter == HIGHER_LINE_SKIP_THRESOLD {
73 self.higher_line_skips.borrow_mut().push(HigherLineSkip { skip_index: self.line_skips.borrow().len() - 1, line_number, offset });
74 self.higher_line_skips_counter.set(0);
75 } else {
76 self.higher_line_skips_counter.set(counter + 1);
77 }
78
79 let counter = self.extra_higher_line_skips_counter.get();
80 if counter == EXTRA_HIGHER_LINE_SKIP_THRESOLD {
81 self.extra_higher_line_skips.borrow_mut().push(HigherLineSkip { skip_index: self.higher_line_skips.borrow().len() - 1, line_number, offset });
82 self.extra_higher_line_skips_counter.set(0);
83 } else {
84 self.extra_higher_line_skips_counter.set(counter + 1);
85 }
86 }
87
88 pub fn get_line_number(&self, offset: usize) -> usize {
91 self.process_lines();
92
93 let mut last_skip = HigherLineSkip { skip_index: 0, offset: 0, line_number: 1 };
95 let skips = self.extra_higher_line_skips.borrow();
96 let mut skips = skips.iter();
97 while let Some(skip_1) = skips.next() {
98 if offset < skip_1.offset {
99 break;
100 }
101 last_skip = *skip_1;
102 }
103
104 let skips = self.higher_line_skips.borrow();
106 let mut skips = skips[last_skip.skip_index..].iter();
107 let mut last_skip = skips.next().unwrap();
108 while let Some(skip_1) = skips.next() {
109 if offset < skip_1.offset {
110 break;
111 }
112 last_skip = skip_1;
113 }
114
115 let skips = self.line_skips.borrow();
117 let mut skips = skips[last_skip.skip_index..].iter();
118 let mut last_skip = skips.next().unwrap();
119 while let Some(skip_1) = skips.next() {
120 if offset < skip_1.offset {
121 break;
122 }
123 last_skip = skip_1;
124 }
125
126 let mut current_line = last_skip.line_number;
127 let mut characters = CharacterReader::from(&self.contents[last_skip.offset..]);
128 while last_skip.offset + characters.index() < offset {
129 let ch_1 = characters.next();
130 if let Some(ch_1) = ch_1 {
131 if CharacterValidator::is_line_terminator(ch_1) {
132 if ch_1 == '\r' && characters.peek_or_zero() == '\n' {
133 characters.next();
134 }
135 current_line += 1;
136 }
137 } else {
138 break;
139 }
140 }
141 current_line
142 }
143
144 pub fn get_line_offset(&self, line: usize) -> Option<usize> {
146 self.process_lines();
147
148 let mut last_skip = HigherLineSkip { skip_index: 0, offset: 0, line_number: 1 };
150 let skips = self.extra_higher_line_skips.borrow();
151 let mut skips = skips.iter();
152 while let Some(skip_1) = skips.next() {
153 if line < skip_1.line_number {
154 break;
155 }
156 last_skip = *skip_1;
157 }
158
159 let skips = self.higher_line_skips.borrow();
161 let mut skips = skips[last_skip.skip_index..].iter();
162 let mut last_skip = skips.next().unwrap();
163 while let Some(skip_1) = skips.next() {
164 if line < skip_1.line_number {
165 break;
166 }
167 last_skip = skip_1;
168 }
169
170 let skips = self.line_skips.borrow();
172 let mut skips = skips[last_skip.skip_index..].iter();
173 let mut last_skip = skips.next().unwrap();
174 while let Some(skip_1) = skips.next() {
175 if line < skip_1.line_number {
176 break;
177 }
178 last_skip = skip_1;
179 }
180
181 let mut current_line = last_skip.line_number;
182 let mut characters = CharacterReader::from(&self.contents[last_skip.offset..]);
183 while current_line != line {
184 let ch_1 = characters.next();
185 if let Some(ch_1) = ch_1 {
186 if CharacterValidator::is_line_terminator(ch_1) {
187 if ch_1 == '\r' && characters.peek_or_zero() == '\n' {
188 characters.next();
189 }
190 current_line += 1;
191 }
192 } else {
193 return None;
194 }
195 }
196 Some(last_skip.offset + characters.index())
197 }
198
199 pub fn get_line_offset_from_offset(&self, offset: usize) -> usize {
201 self.process_lines();
202
203 let mut last_skip = HigherLineSkip { skip_index: 0, offset: 0, line_number: 1 };
205 let skips = self.extra_higher_line_skips.borrow();
206 let mut skips = skips.iter();
207 while let Some(skip_1) = skips.next() {
208 if offset < skip_1.offset {
209 break;
210 }
211 last_skip = *skip_1;
212 }
213
214 let skips = self.higher_line_skips.borrow();
216 let mut skips = skips[last_skip.skip_index..].iter();
217 let mut last_skip = skips.next().unwrap();
218 while let Some(skip_1) = skips.next() {
219 if offset < skip_1.offset {
220 break;
221 }
222 last_skip = skip_1;
223 }
224
225 let skips = self.line_skips.borrow();
227 let mut skips = skips[last_skip.skip_index..].iter();
228 let mut last_skip = skips.next().unwrap();
229 while let Some(skip_1) = skips.next() {
230 if offset < skip_1.offset {
231 break;
232 }
233 last_skip = skip_1;
234 }
235
236 let mut current_line_offset = last_skip.offset;
237 let mut characters = CharacterReader::from(&self.contents[last_skip.offset..]);
238 while last_skip.offset + characters.index() < offset {
239 let ch_1 = characters.next();
240 if let Some(ch_1) = ch_1 {
241 if CharacterValidator::is_line_terminator(ch_1) {
242 if ch_1 == '\r' && characters.peek_or_zero() == '\n' {
243 characters.next();
244 }
245 current_line_offset = last_skip.offset + characters.index();
246 }
247 } else {
248 break;
249 }
250 }
251 current_line_offset
252 }
253
254 pub fn get_column(&self, offset: usize) -> usize {
256 self.process_lines();
257
258 let line_offset = self.get_line_offset_from_offset(offset);
259 let target_offset = offset;
260 if line_offset > target_offset {
261 return 0;
262 }
263 let mut i = 0;
264 for _ in self.contents[line_offset..target_offset].chars() {
265 i += 1;
266 }
267 i
268 }
269}
270
271#[derive(Copy, Clone)]
272struct LineSkip {
273 pub offset: usize,
275 pub line_number: usize,
277}
278
279#[derive(Copy, Clone)]
280struct HigherLineSkip {
281 pub skip_index: usize,
284 pub offset: usize,
286 pub line_number: usize,
288}
289
290#[derive(Clone)]
291struct CharacterReader<'a> {
292 length: usize,
293 char_indices: CharIndices<'a>,
294}
295
296impl<'a> CharacterReader<'a> {
297 pub fn has_remaining(&self) -> bool {
299 self.clone().char_indices.next().is_some()
300 }
301
302 pub fn _reached_end(&self) -> bool {
304 self.clone().char_indices.next().is_none()
305 }
306
307 pub fn index(&self) -> usize {
309 self.clone().char_indices.next().map_or(self.length, |(i, _)| i)
310 }
311
312 pub fn next_or_zero(&mut self) -> char {
315 self.char_indices.next().map_or('\x00', |(_, cp)| cp)
316 }
317
318 pub fn peek_or_zero(&self) -> char {
321 self.clone().next_or_zero()
322 }
323}
324
325impl<'a> From<&'a str> for CharacterReader<'a> {
326 fn from(value: &'a str) -> Self {
328 CharacterReader { length: value.len(), char_indices: value.char_indices() }
329 }
330}
331
332impl<'a> From<&'a String> for CharacterReader<'a> {
333 fn from(value: &'a String) -> Self {
335 CharacterReader { length: value.len(), char_indices: value.char_indices() }
336 }
337}
338
339impl<'a> Iterator for CharacterReader<'a> {
340 type Item = char;
341
342 fn next(&mut self) -> Option<Self::Item> {
343 self.char_indices.next().map(|(_, cp)| cp)
344 }
345}
346
347struct CharacterValidator;
348
349impl CharacterValidator {
350 pub fn is_line_terminator(ch: char) -> bool {
351 ch == '\x0A' || ch == '\x0D' || ch == '\u{2028}' || ch == '\u{2029}'
352 }
353}
354
355#[cfg(test)]
356mod tests {
357 use super::SourceText;
358
359 #[test]
360 fn test() {
361 let text = SourceText::new("foo\r\nbar\r\nqux".into());
362 assert_eq!(0, text.get_column(0));
363 assert_eq!(0, text.get_column(5));
364 assert_eq!(2, text.get_line_number(5));
365 assert_eq!(5, text.get_line_offset(2).unwrap());
366 assert_eq!(5, text.get_line_offset_from_offset(7));
367
368 let text = SourceText::new("\n".repeat(1_024));
369 assert_eq!(1, text.get_line_number(0));
370 assert_eq!(2, text.get_line_number(1));
371 assert_eq!(1_025, text.get_line_number(1_024));
372
373 let text = SourceText::new("\ndefault xml namespace =\n".into());
374 assert_eq!(3, text.get_line_number(25));
375 assert_eq!(0, text.get_column(25));
376 assert_eq!(2, text.get_line_number(24));
377 assert_eq!(23, text.get_column(24));
378 }
379}