1use line_span::LineSpans;
7use sevenmark_ast::Span;
8
9pub struct LineIndex {
15 line_starts: Vec<usize>,
18}
19
20impl LineIndex {
21 pub fn new(text: &str) -> Self {
23 let line_starts: Vec<usize> = text.line_spans().map(|span| span.range().start).collect();
24
25 if line_starts.is_empty() {
27 return Self {
28 line_starts: vec![0],
29 };
30 }
31
32 Self { line_starts }
33 }
34
35 pub fn byte_offset_to_position(&self, text: &str, offset: usize) -> (u32, u32) {
42 let offset = offset.min(text.len());
43
44 let line = self.line_starts.partition_point(|&start| start <= offset);
47 let line = line.saturating_sub(1);
50
51 let line_start = self.line_starts[line];
52 let character = utf16_len(&text[line_start..offset]);
53
54 (line as u32, character)
55 }
56
57 pub fn span_to_range(&self, text: &str, span: &Span) -> ((u32, u32), (u32, u32)) {
61 let start = self.byte_offset_to_position(text, span.start);
62 let end = self.byte_offset_to_position(text, span.end);
63 (start, end)
64 }
65
66 pub fn position_to_byte_offset(&self, text: &str, line: u32, character: u32) -> usize {
70 let line = line as usize;
71 if line >= self.line_starts.len() {
72 return text.len();
73 }
74
75 let line_start = self.line_starts[line];
76 let line_end = self
78 .line_starts
79 .get(line + 1)
80 .copied()
81 .unwrap_or(text.len());
82 let line_text = &text[line_start..line_end];
83
84 let mut utf16_count = 0u32;
86 let mut byte_offset = 0;
87 for ch in line_text.chars() {
88 if utf16_count >= character {
89 break;
90 }
91 utf16_count += ch.len_utf16() as u32;
92 byte_offset += ch.len_utf8();
93 }
94
95 line_start + byte_offset
96 }
97}
98
99#[inline]
104fn utf16_len(s: &str) -> u32 {
105 s.chars().map(|c| c.len_utf16() as u32).sum()
106}
107
108#[cfg(test)]
109mod tests {
110 use super::*;
111
112 #[test]
113 fn test_single_line_ascii() {
114 let text = "hello world";
115 let idx = LineIndex::new(text);
116 assert_eq!(idx.byte_offset_to_position(text, 0), (0, 0));
117 assert_eq!(idx.byte_offset_to_position(text, 5), (0, 5));
118 assert_eq!(idx.byte_offset_to_position(text, 11), (0, 11));
119 }
120
121 #[test]
122 fn test_multi_line_ascii() {
123 let text = "aaa\nbbb\nccc";
124 let idx = LineIndex::new(text);
125 assert_eq!(idx.byte_offset_to_position(text, 0), (0, 0));
127 assert_eq!(idx.byte_offset_to_position(text, 2), (0, 2));
128 assert_eq!(idx.byte_offset_to_position(text, 4), (1, 0));
130 assert_eq!(idx.byte_offset_to_position(text, 6), (1, 2));
131 assert_eq!(idx.byte_offset_to_position(text, 8), (2, 0));
133 assert_eq!(idx.byte_offset_to_position(text, 11), (2, 3));
134 }
135
136 #[test]
137 fn test_newline_boundary() {
138 let text = "ab\ncd\nef";
140 let idx = LineIndex::new(text);
141 assert_eq!(idx.byte_offset_to_position(text, 2), (0, 2)); assert_eq!(idx.byte_offset_to_position(text, 3), (1, 0)); assert_eq!(idx.byte_offset_to_position(text, 5), (1, 2)); assert_eq!(idx.byte_offset_to_position(text, 6), (2, 0)); }
146
147 #[test]
148 fn test_korean_utf8() {
149 let text = "한글";
151 let idx = LineIndex::new(text);
152 assert_eq!(idx.byte_offset_to_position(text, 0), (0, 0)); assert_eq!(idx.byte_offset_to_position(text, 3), (0, 1)); assert_eq!(idx.byte_offset_to_position(text, 6), (0, 2)); }
156
157 #[test]
158 fn test_emoji_surrogate_pair() {
159 let text = "a🚀b";
162 let idx = LineIndex::new(text);
163 assert_eq!(idx.byte_offset_to_position(text, 0), (0, 0)); assert_eq!(idx.byte_offset_to_position(text, 1), (0, 1)); assert_eq!(idx.byte_offset_to_position(text, 5), (0, 3)); assert_eq!(idx.byte_offset_to_position(text, 6), (0, 4)); }
168
169 #[test]
170 fn test_mixed_multiline_korean() {
171 let text = "한\n글";
173 let idx = LineIndex::new(text);
174 assert_eq!(idx.byte_offset_to_position(text, 0), (0, 0)); assert_eq!(idx.byte_offset_to_position(text, 3), (0, 1)); assert_eq!(idx.byte_offset_to_position(text, 4), (1, 0)); assert_eq!(idx.byte_offset_to_position(text, 7), (1, 1)); }
179
180 #[test]
181 fn test_empty_text() {
182 let text = "";
183 let idx = LineIndex::new(text);
184 assert_eq!(idx.byte_offset_to_position(text, 0), (0, 0));
185 }
186
187 #[test]
188 fn test_crlf() {
189 let text = "aa\r\nbb";
190 let idx = LineIndex::new(text);
191 assert_eq!(idx.byte_offset_to_position(text, 0), (0, 0)); assert_eq!(idx.byte_offset_to_position(text, 2), (0, 2)); assert_eq!(idx.byte_offset_to_position(text, 4), (1, 0)); }
195
196 #[test]
197 fn test_offset_clamped() {
198 let text = "abc";
199 let idx = LineIndex::new(text);
200 assert_eq!(idx.byte_offset_to_position(text, 100), (0, 3));
202 }
203
204 #[test]
205 fn test_span_to_range() {
206 let text = "aaa\nbbb\nccc";
207 let idx = LineIndex::new(text);
208 let span = Span::new(4, 7); let ((sl, sc), (el, ec)) = idx.span_to_range(text, &span);
210 assert_eq!((sl, sc), (1, 0));
211 assert_eq!((el, ec), (1, 3));
212 }
213
214 #[test]
215 fn test_span_across_lines() {
216 let text = "aaa\nbbb\nccc";
217 let idx = LineIndex::new(text);
218 let span = Span::new(2, 9); let ((sl, sc), (el, ec)) = idx.span_to_range(text, &span);
220 assert_eq!((sl, sc), (0, 2));
221 assert_eq!((el, ec), (2, 1));
222 }
223
224 #[test]
225 fn test_trailing_newline() {
226 let text = "abc\n";
227 let idx = LineIndex::new(text);
228 assert_eq!(idx.byte_offset_to_position(text, 3), (0, 3)); assert_eq!(idx.byte_offset_to_position(text, 4), (0, 4)); }
231
232 #[test]
233 fn test_multiple_empty_lines() {
234 let text = "a\n\n\nb";
235 let idx = LineIndex::new(text);
236 assert_eq!(idx.byte_offset_to_position(text, 0), (0, 0)); assert_eq!(idx.byte_offset_to_position(text, 2), (1, 0)); assert_eq!(idx.byte_offset_to_position(text, 3), (2, 0)); assert_eq!(idx.byte_offset_to_position(text, 4), (3, 0)); }
241
242 #[test]
245 fn test_pos_to_byte_ascii() {
246 let text = "aaa\nbbb\nccc";
247 let idx = LineIndex::new(text);
248 assert_eq!(idx.position_to_byte_offset(text, 0, 0), 0);
249 assert_eq!(idx.position_to_byte_offset(text, 0, 2), 2);
250 assert_eq!(idx.position_to_byte_offset(text, 1, 0), 4);
251 assert_eq!(idx.position_to_byte_offset(text, 1, 2), 6);
252 assert_eq!(idx.position_to_byte_offset(text, 2, 0), 8);
253 assert_eq!(idx.position_to_byte_offset(text, 2, 3), 11);
254 }
255
256 #[test]
257 fn test_pos_to_byte_korean() {
258 let text = "한글";
260 let idx = LineIndex::new(text);
261 assert_eq!(idx.position_to_byte_offset(text, 0, 0), 0);
262 assert_eq!(idx.position_to_byte_offset(text, 0, 1), 3); assert_eq!(idx.position_to_byte_offset(text, 0, 2), 6); }
265
266 #[test]
267 fn test_pos_to_byte_emoji() {
268 let text = "a🚀b";
270 let idx = LineIndex::new(text);
271 assert_eq!(idx.position_to_byte_offset(text, 0, 0), 0); assert_eq!(idx.position_to_byte_offset(text, 0, 1), 1); assert_eq!(idx.position_to_byte_offset(text, 0, 3), 5); assert_eq!(idx.position_to_byte_offset(text, 0, 4), 6); }
276
277 #[test]
278 fn test_pos_to_byte_roundtrip() {
279 let text = "Hello, 세계!\n🚀 rocket\nend";
280 let idx = LineIndex::new(text);
281
282 for byte_offset in [0, 1, 7, 10, 14, 15, 19, 23, 26] {
284 if byte_offset <= text.len() {
285 let (line, character) = idx.byte_offset_to_position(text, byte_offset);
286 let recovered = idx.position_to_byte_offset(text, line, character);
287 assert_eq!(
288 recovered, byte_offset,
289 "roundtrip failed for byte_offset={byte_offset}"
290 );
291 }
292 }
293 }
294
295 #[test]
296 fn test_pos_to_byte_beyond_end() {
297 let text = "abc";
298 let idx = LineIndex::new(text);
299 assert_eq!(idx.position_to_byte_offset(text, 5, 0), text.len());
301 assert_eq!(idx.position_to_byte_offset(text, 0, 100), 3);
303 }
304}