Skip to main content

eure_tree/tree/
span.rs

1/// A helper struct that stores the new line indexes of the input text.
2/// This is used to get the line number of a given span.
3pub struct LineNumbers<'a> {
4    phantom: std::marker::PhantomData<&'a str>,
5    indexes: Vec<u32>,
6}
7
8/// Information about a character position in text
9#[derive(Debug, PartialEq, Eq, Clone, Copy)]
10pub struct CharInfo {
11    /// The line number (0-indexed)
12    pub line_number: u32,
13    /// The column number (0-indexed)
14    pub column_number: u32,
15    /// The last newline position before this character (None if this is on the first line)
16    pub last_newline: Option<u32>,
17}
18
19impl LineNumbers<'_> {
20    pub fn new(input: &str) -> Self {
21        let mut indexes = vec![];
22        for (i, c) in input.chars().enumerate() {
23            if c == '\n' {
24                indexes.push(i as u32);
25            }
26        }
27        Self {
28            phantom: std::marker::PhantomData,
29            indexes,
30        }
31    }
32
33    /// Get detailed character position information for a given character index
34    pub fn get_char_info(&self, index: u32) -> CharInfo {
35        // Find the number of newline characters that come before this index
36        let newlines_before = self.indexes.iter().take_while(|&&i| i < index).count();
37        let line_number = newlines_before as u32;
38
39        // Find the last newline before this position
40        let last_newline = if line_number == 0 {
41            None
42        } else {
43            Some(self.indexes[newlines_before - 1])
44        };
45
46        // Calculate column number based on last newline
47        let column_number = match last_newline {
48            Some(pos) => index - (pos + 1),
49            None => index,
50        };
51
52        CharInfo {
53            line_number,
54            column_number,
55            last_newline,
56        }
57    }
58}
59
60#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
61/// A span that is only valid within the context of the input text.
62pub struct InputSpan {
63    /// The start of the span.
64    pub start: u32,
65    /// The end of the span.
66    pub end: u32,
67}
68
69impl InputSpan {
70    pub const EMPTY: Self = Self {
71        start: u32::MAX,
72        end: 0,
73    };
74
75    pub fn new(start: u32, end: u32) -> Self {
76        Self { start, end }
77    }
78
79    pub fn merge(self, other: Self) -> Self {
80        Self {
81            start: self.start.min(other.start),
82            end: self.end.max(other.end),
83        }
84    }
85
86    pub fn merge_many(self, others: impl IntoIterator<Item = Self>) -> Self {
87        others.into_iter().fold(self, |acc, other| acc.merge(other))
88    }
89
90    pub fn as_str<'a>(&self, input: &'a str) -> &'a str {
91        &input[self.start as usize..self.end as usize]
92    }
93}
94
95#[cfg(test)]
96mod tests {
97    use super::*;
98
99    #[test]
100    fn test_empty_string() {
101        let line_numbers = LineNumbers::new("");
102        assert!(line_numbers.indexes.is_empty());
103        assert_eq!(
104            line_numbers.get_char_info(0),
105            CharInfo {
106                line_number: 0,
107                column_number: 0,
108                last_newline: None
109            }
110        );
111        assert_eq!(
112            line_numbers.get_char_info(1),
113            CharInfo {
114                line_number: 0,
115                column_number: 1,
116                last_newline: None
117            }
118        );
119    }
120
121    #[test]
122    fn test_single_line() {
123        let line_numbers = LineNumbers::new("hello world");
124        assert!(line_numbers.indexes.is_empty());
125        assert_eq!(
126            line_numbers.get_char_info(0),
127            CharInfo {
128                line_number: 0,
129                column_number: 0,
130                last_newline: None
131            }
132        );
133        assert_eq!(
134            line_numbers.get_char_info(5),
135            CharInfo {
136                line_number: 0,
137                column_number: 5,
138                last_newline: None
139            }
140        );
141        assert_eq!(
142            line_numbers.get_char_info(100),
143            CharInfo {
144                line_number: 0,
145                column_number: 100,
146                last_newline: None
147            }
148        );
149    }
150
151    #[test]
152    fn test_multiple_lines() {
153        let input = "line1\nline2\nline3";
154        let line_numbers = LineNumbers::new(input);
155
156        // Verify the newline indexes (after each newline character)
157        assert_eq!(line_numbers.indexes, vec![5, 11]);
158
159        // Test positions at the beginning of lines
160        assert_eq!(
161            line_numbers.get_char_info(0),
162            CharInfo {
163                line_number: 0,
164                column_number: 0,
165                last_newline: None
166            }
167        ); // start of line 1
168        assert_eq!(
169            line_numbers.get_char_info(6),
170            CharInfo {
171                line_number: 1,
172                column_number: 0,
173                last_newline: Some(5)
174            }
175        ); // after \n, on line 2
176        assert_eq!(
177            line_numbers.get_char_info(12),
178            CharInfo {
179                line_number: 2,
180                column_number: 0,
181                last_newline: Some(11)
182            }
183        ); // after \n, on line 3
184
185        // Test positions within lines
186        assert_eq!(
187            line_numbers.get_char_info(3),
188            CharInfo {
189                line_number: 0,
190                column_number: 3,
191                last_newline: None
192            }
193        ); // middle of line 1
194        assert_eq!(
195            line_numbers.get_char_info(8),
196            CharInfo {
197                line_number: 1,
198                column_number: 2,
199                last_newline: Some(5)
200            }
201        ); // middle of line 2
202        assert_eq!(
203            line_numbers.get_char_info(15),
204            CharInfo {
205                line_number: 2,
206                column_number: 3,
207                last_newline: Some(11)
208            }
209        ); // middle of line 3
210
211        // Test positions at newlines
212        assert_eq!(
213            line_numbers.get_char_info(5),
214            CharInfo {
215                line_number: 0,
216                column_number: 5,
217                last_newline: None
218            }
219        ); // at the \n of line 1
220        assert_eq!(
221            line_numbers.get_char_info(11),
222            CharInfo {
223                line_number: 1,
224                column_number: 5,
225                last_newline: Some(5)
226            }
227        ); // at the \n of line 2
228    }
229
230    #[test]
231    fn test_with_empty_lines() {
232        let input = "line1\n\nline3";
233        let line_numbers = LineNumbers::new(input);
234
235        assert_eq!(line_numbers.indexes, vec![5, 6]);
236        assert_eq!(
237            line_numbers.get_char_info(4),
238            CharInfo {
239                line_number: 0,
240                column_number: 4,
241                last_newline: None
242            }
243        ); // end of line 1
244        assert_eq!(
245            line_numbers.get_char_info(5),
246            CharInfo {
247                line_number: 0,
248                column_number: 5,
249                last_newline: None
250            }
251        ); // at \n of line 1
252        assert_eq!(
253            line_numbers.get_char_info(6),
254            CharInfo {
255                line_number: 1,
256                column_number: 0,
257                last_newline: Some(5)
258            }
259        ); // at \n of empty line
260        assert_eq!(
261            line_numbers.get_char_info(7),
262            CharInfo {
263                line_number: 2,
264                column_number: 0,
265                last_newline: Some(6)
266            }
267        ); // on line 3
268    }
269
270    #[test]
271    fn test_with_multiple_consecutive_newlines() {
272        let input = "line1\n\n\nline4";
273        let line_numbers = LineNumbers::new(input);
274
275        assert_eq!(line_numbers.indexes, vec![5, 6, 7]);
276        assert_eq!(
277            line_numbers.get_char_info(5),
278            CharInfo {
279                line_number: 0,
280                column_number: 5,
281                last_newline: None
282            }
283        ); // at \n of line 1
284        assert_eq!(
285            line_numbers.get_char_info(6),
286            CharInfo {
287                line_number: 1,
288                column_number: 0,
289                last_newline: Some(5)
290            }
291        ); // at \n of first empty line
292        assert_eq!(
293            line_numbers.get_char_info(7),
294            CharInfo {
295                line_number: 2,
296                column_number: 0,
297                last_newline: Some(6)
298            }
299        ); // at \n of second empty line
300        assert_eq!(
301            line_numbers.get_char_info(8),
302            CharInfo {
303                line_number: 3,
304                column_number: 0,
305                last_newline: Some(7)
306            }
307        ); // on line 4
308    }
309
310    #[test]
311    fn test_trailing_newline() {
312        let input = "line1\nline2\n";
313        let line_numbers = LineNumbers::new(input);
314
315        assert_eq!(line_numbers.indexes, vec![5, 11]);
316        assert_eq!(
317            line_numbers.get_char_info(11),
318            CharInfo {
319                line_number: 1,
320                column_number: 5,
321                last_newline: Some(5)
322            }
323        ); // at the last \n (belongs to line 2)
324        assert_eq!(
325            line_numbers.get_char_info(12),
326            CharInfo {
327                line_number: 2,
328                column_number: 0,
329                last_newline: Some(11)
330            }
331        ); // after the last \n
332        assert_eq!(
333            line_numbers.get_char_info(100),
334            CharInfo {
335                line_number: 2,
336                column_number: 88,
337                last_newline: Some(11)
338            }
339        ); // after the last \n
340    }
341
342    #[test]
343    fn test_unicode_characters() {
344        let input = "こんにちは\n世界";
345        let line_numbers = LineNumbers::new(input);
346
347        assert_eq!(line_numbers.indexes, vec![5]);
348        assert_eq!(
349            line_numbers.get_char_info(3),
350            CharInfo {
351                line_number: 0,
352                column_number: 3,
353                last_newline: None
354            }
355        ); // middle of first line
356        assert_eq!(
357            line_numbers.get_char_info(4),
358            CharInfo {
359                line_number: 0,
360                column_number: 4,
361                last_newline: None
362            }
363        ); // end of first line
364        assert_eq!(
365            line_numbers.get_char_info(5),
366            CharInfo {
367                line_number: 0,
368                column_number: 5,
369                last_newline: None
370            }
371        ); // at \n
372        assert_eq!(
373            line_numbers.get_char_info(6),
374            CharInfo {
375                line_number: 1,
376                column_number: 0,
377                last_newline: Some(5)
378            }
379        ); // start of second line
380    }
381
382    #[test]
383    fn test_large_index() {
384        let input = "line1\nline2";
385        let line_numbers = LineNumbers::new(input);
386
387        // Even though the input is only 11 characters long, the method should work with larger indices
388        assert_eq!(
389            line_numbers.get_char_info(10000),
390            CharInfo {
391                line_number: 1,
392                column_number: 9994,
393                last_newline: Some(5)
394            }
395        );
396    }
397
398    #[test]
399    fn test_line_starting_with_newline() {
400        let input = "\nline2\nline3";
401        let line_numbers = LineNumbers::new(input);
402
403        assert_eq!(line_numbers.indexes, vec![0, 6]);
404        assert_eq!(
405            line_numbers.get_char_info(0),
406            CharInfo {
407                line_number: 0,
408                column_number: 0,
409                last_newline: None
410            }
411        ); // The first character is a newline
412        assert_eq!(
413            line_numbers.get_char_info(1),
414            CharInfo {
415                line_number: 1,
416                column_number: 0,
417                last_newline: Some(0)
418            }
419        ); // First char of line 2
420        assert_eq!(
421            line_numbers.get_char_info(3),
422            CharInfo {
423                line_number: 1,
424                column_number: 2,
425                last_newline: Some(0)
426            }
427        ); // Middle of line 2
428    }
429
430    #[test]
431    fn test_exact_boundary_indices() {
432        let input = "line1\nline2\nline3";
433        let line_numbers = LineNumbers::new(input);
434
435        // Test boundary indices
436        assert_eq!(line_numbers.indexes, vec![5, 11]);
437        assert_eq!(
438            line_numbers.get_char_info(4),
439            CharInfo {
440                line_number: 0,
441                column_number: 4,
442                last_newline: None
443            }
444        ); // Last character of line 1
445        assert_eq!(
446            line_numbers.get_char_info(5),
447            CharInfo {
448                line_number: 0,
449                column_number: 5,
450                last_newline: None
451            }
452        ); // Newline character of line 1
453        assert_eq!(
454            line_numbers.get_char_info(6),
455            CharInfo {
456                line_number: 1,
457                column_number: 0,
458                last_newline: Some(5)
459            }
460        ); // First character of line 2
461        assert_eq!(
462            line_numbers.get_char_info(10),
463            CharInfo {
464                line_number: 1,
465                column_number: 4,
466                last_newline: Some(5)
467            }
468        ); // Last character of line 2
469        assert_eq!(
470            line_numbers.get_char_info(11),
471            CharInfo {
472                line_number: 1,
473                column_number: 5,
474                last_newline: Some(5)
475            }
476        ); // Newline character of line 2
477        assert_eq!(
478            line_numbers.get_char_info(12),
479            CharInfo {
480                line_number: 2,
481                column_number: 0,
482                last_newline: Some(11)
483            }
484        ); // First character of line 3
485    }
486
487    #[test]
488    fn test_only_newlines() {
489        let input = "\n\n";
490        let line_numbers = LineNumbers::new(input);
491        // There are two newline characters at positions 0 and 1
492        assert_eq!(line_numbers.indexes, vec![0, 1]);
493        assert_eq!(
494            line_numbers.get_char_info(0),
495            CharInfo {
496                line_number: 0,
497                column_number: 0,
498                last_newline: None
499            }
500        ); // at first newline
501        assert_eq!(
502            line_numbers.get_char_info(1),
503            CharInfo {
504                line_number: 1,
505                column_number: 0,
506                last_newline: Some(0)
507            }
508        ); // at second newline
509        assert_eq!(
510            line_numbers.get_char_info(2),
511            CharInfo {
512                line_number: 2,
513                column_number: 0,
514                last_newline: Some(1)
515            }
516        ); // after second newline
517        assert_eq!(
518            line_numbers.get_char_info(3),
519            CharInfo {
520                line_number: 2,
521                column_number: 1,
522                last_newline: Some(1)
523            }
524        ); // beyond input
525    }
526
527    #[test]
528    fn test_crlf_line_endings() {
529        let input = "line1\r\nline2\r\nline3";
530        let line_numbers = LineNumbers::new(input);
531        // CRLF yields newline positions at the '\n' characters
532        // 'line1\r\n' => newline at index 6, 'line2\r\n' => newline at index 13
533        assert_eq!(line_numbers.indexes, vec![6, 13]);
534        assert_eq!(
535            line_numbers.get_char_info(6),
536            CharInfo {
537                line_number: 0,
538                column_number: 6,
539                last_newline: None
540            }
541        ); // at first '\n'
542        assert_eq!(
543            line_numbers.get_char_info(7),
544            CharInfo {
545                line_number: 1,
546                column_number: 0,
547                last_newline: Some(6)
548            }
549        ); // after first '\n'
550        assert_eq!(
551            line_numbers.get_char_info(13),
552            CharInfo {
553                line_number: 1,
554                column_number: 6,
555                last_newline: Some(6)
556            }
557        ); // at second '\n'
558        assert_eq!(
559            line_numbers.get_char_info(14),
560            CharInfo {
561                line_number: 2,
562                column_number: 0,
563                last_newline: Some(13)
564            }
565        ); // after second '\n'
566    }
567
568    #[test]
569    fn test_carriage_returns_only_not_supported() {
570        let input = "line1\rline2\rline3";
571        let line_numbers = LineNumbers::new(input);
572        // No \n present, so no newlines recorded
573        assert!(line_numbers.indexes.is_empty());
574        // All indices should map to line 0
575        assert_eq!(
576            line_numbers.get_char_info(0),
577            CharInfo {
578                line_number: 0,
579                column_number: 0,
580                last_newline: None
581            }
582        );
583        assert_eq!(
584            line_numbers.get_char_info(6),
585            CharInfo {
586                line_number: 0,
587                column_number: 6,
588                last_newline: None
589            }
590        );
591        assert_eq!(
592            line_numbers.get_char_info(100),
593            CharInfo {
594                line_number: 0,
595                column_number: 100,
596                last_newline: None
597            }
598        );
599    }
600
601    #[test]
602    fn test_emoji_characters() {
603        let input = "😀\n😃\n😄";
604        let line_numbers = LineNumbers::new(input);
605        // Newline characters at char indices 1 and 3
606        assert_eq!(line_numbers.indexes, vec![1, 3]);
607        // Test mapping
608        assert_eq!(
609            line_numbers.get_char_info(0),
610            CharInfo {
611                line_number: 0,
612                column_number: 0,
613                last_newline: None
614            }
615        ); // before first newline
616        assert_eq!(
617            line_numbers.get_char_info(1),
618            CharInfo {
619                line_number: 0,
620                column_number: 1,
621                last_newline: None
622            }
623        ); // at first newline
624        assert_eq!(
625            line_numbers.get_char_info(2),
626            CharInfo {
627                line_number: 1,
628                column_number: 0,
629                last_newline: Some(1)
630            }
631        ); // after first newline
632        assert_eq!(
633            line_numbers.get_char_info(3),
634            CharInfo {
635                line_number: 1,
636                column_number: 1,
637                last_newline: Some(1)
638            }
639        ); // at second newline
640        assert_eq!(
641            line_numbers.get_char_info(4),
642            CharInfo {
643                line_number: 2,
644                column_number: 0,
645                last_newline: Some(3)
646            }
647        ); // after second newline
648    }
649
650    #[test]
651    fn test_crlf_trailing_newline() {
652        let input = "line1\r\n";
653        let line_numbers = LineNumbers::new(input);
654        // Newline at '\n' index 6
655        assert_eq!(line_numbers.indexes, vec![6]);
656        assert_eq!(
657            line_numbers.get_char_info(6),
658            CharInfo {
659                line_number: 0,
660                column_number: 6,
661                last_newline: None
662            }
663        ); // at '\n'
664        assert_eq!(
665            line_numbers.get_char_info(7),
666            CharInfo {
667                line_number: 1,
668                column_number: 0,
669                last_newline: Some(6)
670            }
671        ); // after '\n'
672        assert_eq!(
673            line_numbers.get_char_info(100),
674            CharInfo {
675                line_number: 1,
676                column_number: 93,
677                last_newline: Some(6)
678            }
679        );
680    }
681
682    #[test]
683    fn test_only_crlfs() {
684        // Input composed solely of CRLF sequences
685        let input = "\r\n\r\n";
686        let line_numbers = LineNumbers::new(input);
687        // CRLF at indices 1 and 3
688        assert_eq!(line_numbers.indexes, vec![1, 3]);
689        // Test mapping
690        assert_eq!(
691            line_numbers.get_char_info(0),
692            CharInfo {
693                line_number: 0,
694                column_number: 0,
695                last_newline: None
696            }
697        ); // before first \n
698        assert_eq!(
699            line_numbers.get_char_info(1),
700            CharInfo {
701                line_number: 0,
702                column_number: 1,
703                last_newline: None
704            }
705        ); // at first \n
706        assert_eq!(
707            line_numbers.get_char_info(2),
708            CharInfo {
709                line_number: 1,
710                column_number: 0,
711                last_newline: Some(1)
712            }
713        ); // between \n and next CR
714        assert_eq!(
715            line_numbers.get_char_info(3),
716            CharInfo {
717                line_number: 1,
718                column_number: 1,
719                last_newline: Some(1)
720            }
721        ); // at second \n
722        assert_eq!(
723            line_numbers.get_char_info(4),
724            CharInfo {
725                line_number: 2,
726                column_number: 0,
727                last_newline: Some(3)
728            }
729        ); // after all newlines
730    }
731
732    #[test]
733    fn test_line_starting_with_crlf() {
734        // Input starting with a CRLF
735        let input = "\r\nfirst\r\nsecond";
736        let line_numbers = LineNumbers::new(input);
737        // CRLF at indices of '\n' chars: 1 and 8
738        assert_eq!(line_numbers.indexes, vec![1, 8]);
739        // Test mapping
740        assert_eq!(
741            line_numbers.get_char_info(0),
742            CharInfo {
743                line_number: 0,
744                column_number: 0,
745                last_newline: None
746            }
747        ); // at CR
748        assert_eq!(
749            line_numbers.get_char_info(1),
750            CharInfo {
751                line_number: 0,
752                column_number: 1,
753                last_newline: None
754            }
755        ); // at LF
756        assert_eq!(
757            line_numbers.get_char_info(2),
758            CharInfo {
759                line_number: 1,
760                column_number: 0,
761                last_newline: Some(1)
762            }
763        ); // 'f'
764        assert_eq!(
765            line_numbers.get_char_info(6),
766            CharInfo {
767                line_number: 1,
768                column_number: 4,
769                last_newline: Some(1)
770            }
771        ); // 't' of 'first'
772        assert_eq!(
773            line_numbers.get_char_info(7),
774            CharInfo {
775                line_number: 1,
776                column_number: 5,
777                last_newline: Some(1)
778            }
779        ); // before second LF
780        assert_eq!(
781            line_numbers.get_char_info(8),
782            CharInfo {
783                line_number: 1,
784                column_number: 6,
785                last_newline: Some(1)
786            }
787        ); // at second LF
788        assert_eq!(
789            line_numbers.get_char_info(9),
790            CharInfo {
791                line_number: 2,
792                column_number: 0,
793                last_newline: Some(8)
794            }
795        ); // 's' of 'second'
796    }
797}