1use crate::errors::*;
2use crate::{LineColumn, Range, Span};
3use core::ops::{Bound, RangeBounds};
4use fs_err as fs;
5use std::io::Read;
6use std::path::Path;
7
8#[derive(Debug, PartialEq, Eq)]
9struct LineSepStat {
10 first_appearance: usize,
11 count: usize,
12 newline: &'static str,
13}
14
15#[inline(always)]
16fn extract_delimiter_inner<'a>(
17 mut iter: impl Iterator<Item = usize>,
18 newline: &'static str,
19) -> Option<LineSepStat> {
20 if let Some(first) = iter.next() {
21 let n = iter.count() + 1;
22 Some(LineSepStat {
23 first_appearance: first,
24 count: n,
25 newline,
26 })
27 } else {
28 None
29 }
30}
31
32pub fn extract_delimiter(s: &str) -> Option<&'static str> {
34 let lf = memchr::memchr_iter(b'\n', s.as_bytes());
36 let cr = memchr::memchr_iter(b'\r', s.as_bytes());
37 let crlf = memchr::memmem::find_iter(s.as_bytes(), "\r\n");
38 let lfcr = memchr::memmem::find_iter(s.as_bytes(), "\n\r");
39 let lfcr = extract_delimiter_inner(lfcr, "\n\r");
41 let crlf = extract_delimiter_inner(crlf, "\r\n");
42
43 let lf = extract_delimiter_inner(lf, "\n").map(|mut stat| {
45 stat.count = stat.count.saturating_sub(std::cmp::max(
46 crlf.as_ref().map(|stat| stat.count).unwrap_or_default(),
47 lfcr.as_ref().map(|stat| stat.count).unwrap_or_default(),
48 ));
49 stat
50 });
51 let cr = extract_delimiter_inner(cr, "\r").map(|mut stat| {
52 stat.count = stat.count.saturating_sub(std::cmp::max(
53 crlf.as_ref().map(|stat| stat.count).unwrap_or_default(),
54 lfcr.as_ref().map(|stat| stat.count).unwrap_or_default(),
55 ));
56 stat
57 });
58
59 vec![cr, lf, crlf, lfcr]
61 .into_iter()
62 .flatten()
63 .max_by(|b, a| {
64 if a.count == b.count {
65 a.first_appearance.cmp(&b.first_appearance)
66 } else {
67 b.count.cmp(&a.count)
68 }
69 })
70 .map(|x| x.newline)
71}
72
73pub fn iter_with_line_column_from(
77 s: &str,
78 start_point: LineColumn,
79) -> impl Iterator<Item = (char, usize, usize, LineColumn)> + '_ {
80 #[derive(Clone)]
81 struct State {
82 cursor: LineColumn,
83 previous_char_was_newline: bool,
84 }
85
86 let initial = State {
87 cursor: start_point,
88 previous_char_was_newline: false,
89 };
90
91 s.char_indices()
92 .enumerate()
93 .map(|(idx, (byte_offset, c))| (idx, byte_offset, c))
94 .scan(initial, |state, (idx, byte_offset, c)| -> Option<_> {
95 let cursor = state.cursor;
96 state.previous_char_was_newline = c == '\n';
97 if state.previous_char_was_newline {
98 state.cursor.line += 1;
99 state.cursor.column = 0;
100 } else {
101 state.cursor.column += 1;
102 }
103 Some((c, byte_offset, idx, cursor))
104 })
105}
106
107pub fn iter_with_line_column(
110 s: &str,
111) -> impl Iterator<Item = (char, usize, usize, LineColumn)> + '_ {
112 iter_with_line_column_from(s, LineColumn { line: 1, column: 0 })
113}
114
115pub fn load_span_from<R>(mut source: R, span: Span) -> Result<String>
120where
121 R: Read,
122{
123 log::trace!("Loading {span:?} from source");
124 if span.start.line < 1 {
125 return Err(Error::Span(
126 "Lines are 1-indexed, can't be less than 1".to_string(),
127 ));
128 }
129 if span.end.line < span.start.line {
130 return Err(Error::Span(
131 "Line range would be negative, bail".to_string(),
132 ));
133 }
134 if span.end.line == span.start.line && span.end.column < span.start.column {
135 return Err(Error::Span(
136 "Column range would be negative, bail".to_string(),
137 ));
138 }
139 let mut s = String::with_capacity(256);
140 source
141 .read_to_string(&mut s)
142 .expect("Must read successfully");
143
144 let extraction = iter_with_line_column(s.as_str())
145 .skip_while(|(_c, _byte_offset, _idx, cursor)| {
146 cursor.line < span.start.line
147 || (cursor.line == span.start.line && cursor.column < span.start.column)
148 })
149 .take_while(|(_c, _byte_offset, _idx, cursor)| {
150 cursor.line < span.end.line
151 || (cursor.line == span.end.line && cursor.column <= span.end.column)
152 })
153 .fuse()
154 .map(|(c, _byte_offset, _idx, _cursor)| c)
155 .collect::<String>();
156 Ok(extraction)
158}
159
160#[allow(unused)]
164pub(crate) fn load_span_from_file(path: impl AsRef<Path>, span: Span) -> Result<String> {
165 let path = path.as_ref();
166 let path = fs::canonicalize(path)?;
167
168 let ro = fs::OpenOptions::new().read(true).open(&path)?;
169
170 let mut reader = std::io::BufReader::new(ro);
171
172 load_span_from(reader, span)
173}
174
175pub fn sub_chars(s: &str, range: Range) -> String {
177 s.chars()
178 .skip(range.start)
179 .take(range.len())
180 .collect::<String>()
181}
182
183pub fn byte_range_to_char_range<R>(s: &str, byte_range: R) -> Option<Range>
189where
190 R: RangeBounds<usize>,
191{
192 let mut peekable = s.char_indices().enumerate().peekable();
193 let mut range = Range { start: 0, end: 0 };
194 let mut started = false;
195 while let Some((idx, (byte_offset, _c))) = peekable.next() {
196 match byte_range.start_bound() {
197 Bound::Included(&start) if byte_offset == start => {
198 started = true;
199 range.start = idx;
200 }
201 Bound::Included(&start) if byte_offset > start && !started => {
202 started = true;
203 range.start = idx.saturating_sub(1);
204 }
205 Bound::Excluded(_start) => {
206 unreachable!("Exclusive start bounds do not exist. qed");
207 }
208 _ => {}
209 }
210
211 match byte_range.end_bound() {
212 Bound::Included(&end) if byte_offset > end => {
213 range.end = idx;
214 return Some(range);
215 }
216 Bound::Excluded(&end) if byte_offset >= end => {
217 range.end = idx;
218 return Some(range);
219 }
220 _ => {}
221 }
222 if peekable.peek().is_none() && started {
223 range.end = idx + 1;
224 return Some(range);
225 }
226 }
227 None
228}
229
230pub fn byte_range_to_char_range_many<R>(s: &str, byte_ranges: &[R]) -> Vec<Range>
234where
235 R: std::ops::RangeBounds<usize> + std::fmt::Debug,
236{
237 let mut peekable = s.char_indices().enumerate().peekable();
238 let mut cursor = 0usize;
239 let mut acc = Vec::with_capacity(byte_ranges.len());
240 for byte_range in byte_ranges {
241 let mut range = Range { start: 0, end: 0 };
242 let mut started = false;
243 'inner: while let Some((idx, (byte_offset, _c))) = peekable.peek() {
244 cursor = *idx;
245 let byte_offset = *byte_offset;
246 match byte_range.start_bound() {
247 Bound::Included(&start) if byte_offset == start => {
248 started = true;
249 range.start = cursor;
250 }
251 Bound::Included(&start) if byte_offset > start && !started => {
252 started = true;
253 range.start = cursor.saturating_sub(1);
254 }
255 Bound::Excluded(_start) => {
256 unreachable!("Exclusive start bounds do not exist. qed");
257 }
258 _ => {}
259 }
260
261 match byte_range.end_bound() {
262 Bound::Included(&end) if byte_offset > end => {
263 range.end = cursor;
264 acc.push(range.clone());
265 started = false;
266 break 'inner;
267 }
268 Bound::Excluded(&end) if byte_offset >= end => {
269 range.end = cursor;
270 acc.push(range.clone());
271 started = false;
272 break 'inner;
273 }
274 _ => {}
275 }
276
277 let _ = peekable.next();
278 }
279 if started {
280 range.end = cursor + 1;
281 acc.push(range);
282 }
283 }
284 acc
285}
286
287pub fn sub_char_range<R>(s: &str, range: R) -> &str
289where
290 R: RangeBounds<usize>,
291{
292 let mut peekable = s.char_indices().enumerate().peekable();
293 let mut byte_range = Range { start: 0, end: 0 };
294 let mut started = false;
295 'loopy: while let Some((idx, (byte_offset_start, _c))) = peekable.next() {
296 match range.start_bound() {
297 Bound::Included(&start) if idx == start => {
298 started = true;
299 byte_range.start = byte_offset_start;
300 }
301 Bound::Excluded(_start) => {
302 unreachable!("Exclusive start bounds do not exist. qed");
303 }
304 _ => {}
305 }
306
307 match range.end_bound() {
308 Bound::Included(&end) if idx > end => {
309 byte_range.end = byte_offset_start;
310 break 'loopy;
311 }
312 Bound::Excluded(&end) if idx >= end => {
313 byte_range.end = byte_offset_start;
314 break 'loopy;
315 }
316 _ => {}
317 }
318 if peekable.peek().is_none() && started {
319 byte_range.end = s.len();
320 }
321 }
322 &s[byte_range]
323}
324
325#[cfg(test)]
326mod tests {
327 use super::*;
328 macro_rules! lcc {
329 ($line:literal, $column:literal, $c:literal) => {
330 (
331 LineColumn {
332 line: $line,
333 column: $column,
334 },
335 $c,
336 )
337 };
338 }
339 #[test]
340 fn iter_chars() {
341 const S: &str = r#"
342abc
343d
344"#;
345 const S2: &str = r#"c
346d"#;
347 const EXPECT: &[(LineColumn, char)] = &[
348 lcc!(1, 0, '\n'),
349 lcc!(2, 0, 'a'),
350 lcc!(2, 1, 'b'),
351 lcc!(2, 2, 'c'),
352 lcc!(2, 3, '\n'),
353 lcc!(3, 0, 'd'),
354 lcc!(3, 1, '\n'),
355 ];
356
357 iter_with_line_column(S).zip(EXPECT.iter()).for_each(
358 |((c, _byte_offset, _idx, lc), (expected_lc, expected_c))| {
359 assert_eq!(lc, expected_lc.clone());
360 assert_eq!(c, expected_c.clone());
361 },
362 );
363
364 const SPAN: Span = Span {
365 start: LineColumn { line: 2, column: 2 },
366 end: LineColumn { line: 3, column: 0 },
367 };
368
369 assert_eq!(
370 load_span_from(&mut S.as_bytes(), SPAN).expect("Must succeed"),
371 S2.to_owned()
372 );
373 }
374
375 #[test]
376 fn iter_span_doc_0_trivial() {
377 const SOURCE: &str = r##"#[doc=r#"Zebra
378Schlupfwespe,
379Grünfink"#]"##;
380 const S2: &str = r#"Zebra
381Schlupfwespe,
382Grünfink"#;
383
384 const SPAN: Span = Span {
385 start: LineColumn {
386 line: 1,
387 column: 0 + 9,
388 }, end: LineColumn { line: 3, column: 7 }, };
391
392 assert_eq!(
393 load_span_from(&mut SOURCE.as_bytes(), SPAN).expect("Must succeed"),
394 S2.to_owned()
395 );
396 }
397
398 #[test]
399 fn iter_span_doc_1_trailing_newline() {
400 const SOURCE: &str = r##"#[doc=r#"Zebra
401Schlupfwespe,
402"#]"##;
403 const S2: &str = r#"Zebra
404Schlupfwespe,
405"#;
406
407 const SPAN: Span = Span {
408 start: LineColumn {
409 line: 1,
410 column: 0 + 9,
411 }, end: LineColumn {
413 line: 2,
414 column: 13,
415 }, };
417
418 assert_eq!(
419 load_span_from(&mut SOURCE.as_bytes(), SPAN).expect("Must succeed"),
420 S2.to_owned()
421 );
422 }
423
424 #[test]
425 fn sub_a() {
426 const A: &str = "a🐲o🌡i🡴f🕧aodnferntkng";
427 const A_EXPECTED: &str = "a🐲o";
428
429 assert_eq!(sub_char_range(A, 0..3), A_EXPECTED);
430 assert_eq!(sub_char_range(A, ..3), A_EXPECTED);
431 assert_eq!(sub_chars(A, 0..3), A_EXPECTED.to_owned());
432 }
433
434 #[test]
435 fn sub_b() {
436 const B: &str = "fff🦦🡴🕧";
437 const B_EXPECTED: &str = "🦦🡴🕧";
438
439 assert_eq!(sub_char_range(B, 3..=5), B_EXPECTED);
440 assert_eq!(sub_char_range(B, 3..), B_EXPECTED);
441 }
442
443 #[test]
444 fn sub_c() {
445 const B: &str = "fff🦦🡴🕧";
446 const B_EXPECTED: &str = "";
447
448 assert_eq!(sub_char_range(B, 10..), B_EXPECTED);
449 assert_eq!(sub_char_range(B, 15..16), B_EXPECTED);
450 }
451
452 #[test]
453 fn range_bytes_to_chars() {
454 assert_eq!(byte_range_to_char_range("🕱™🐡", 4..7), Some(1..2));
456 assert_eq!(byte_range_to_char_range("🕱12™🐡", 6..13), Some(3..5));
458 assert_eq!(byte_range_to_char_range("🕱12™🐡", 0..0), Some(0..0));
459 assert_eq!(byte_range_to_char_range("🕱12™🐡", 25..26), None);
460 }
461
462 #[test]
463 fn range_bytes_to_chars_many() {
464 assert_eq!(
466 byte_range_to_char_range_many("🕱™🐡", &[4..7, 7..11]),
467 vec![1..2, 2..3]
468 );
469 assert_eq!(
470 byte_range_to_char_range_many("🕱™🐡", &[0..0, 4..11]),
471 vec![0..0, 1..3]
472 );
473 }
474}