1use std::collections::HashSet;
4use std::ops::Range;
5
6fn find_char_boundary(s: &str, byte_idx: usize) -> usize {
13 if byte_idx >= s.len() {
14 return s.len();
15 }
16
17 if s.is_char_boundary(byte_idx) {
19 return byte_idx;
20 }
21
22 let mut pos = byte_idx;
25 while pos > 0 && !s.is_char_boundary(pos) {
26 pos -= 1;
27 }
28 pos
29}
30
31fn byte_to_char_count(s: &str, byte_idx: usize) -> usize {
34 let safe_byte_idx = find_char_boundary(s, byte_idx);
35 s[..safe_byte_idx].chars().count() + 1 }
37
38#[derive(Debug)]
39pub struct LineIndex<'a> {
40 line_starts: Vec<usize>,
41 content: &'a str,
42 code_block_lines: Option<HashSet<usize>>,
43}
44
45impl<'a> LineIndex<'a> {
46 pub fn new(content: &'a str) -> Self {
47 let mut line_starts = vec![0];
48 let mut pos = 0;
49
50 for c in content.chars() {
51 pos += c.len_utf8();
52 if c == '\n' {
53 line_starts.push(pos);
54 }
55 }
56
57 let mut index = Self {
58 line_starts,
59 content,
60 code_block_lines: None,
61 };
62
63 index.compute_code_block_lines();
65
66 index
67 }
68
69 pub fn line_col_to_byte_range(&self, line: usize, column: usize) -> Range<usize> {
70 let line = line.saturating_sub(1);
71 let line_start = *self.line_starts.get(line).unwrap_or(&self.content.len());
72
73 let current_line = self.content.lines().nth(line).unwrap_or("");
74 let char_col = column.saturating_sub(1);
76 let char_count = current_line.chars().count();
77 let safe_char_col = char_col.min(char_count);
78
79 let byte_offset = current_line
81 .char_indices()
82 .nth(safe_char_col)
83 .map(|(idx, _)| idx)
84 .unwrap_or(current_line.len());
85
86 let start = line_start + byte_offset;
87 start..start
88 }
89
90 pub fn line_col_to_byte_range_with_length(&self, line: usize, column: usize, length: usize) -> Range<usize> {
97 let line = line.saturating_sub(1);
98 let line_start = *self.line_starts.get(line).unwrap_or(&self.content.len());
99
100 let current_line = self.content.lines().nth(line).unwrap_or("");
101 let char_col = column.saturating_sub(1);
103 let char_count = current_line.chars().count();
104 let safe_char_col = char_col.min(char_count);
105
106 let mut char_indices = current_line.char_indices();
108 let start_byte = char_indices
109 .nth(safe_char_col)
110 .map(|(idx, _)| idx)
111 .unwrap_or(current_line.len());
112
113 let end_char_col = (safe_char_col + length).min(char_count);
115 let end_byte = current_line
116 .char_indices()
117 .nth(end_char_col)
118 .map(|(idx, _)| idx)
119 .unwrap_or(current_line.len());
120
121 let start = line_start + start_byte;
122 let end = line_start + end_byte;
123 start..end
124 }
125
126 pub fn whole_line_range(&self, line: usize) -> Range<usize> {
129 let line_idx = line.saturating_sub(1);
130 let start = *self.line_starts.get(line_idx).unwrap_or(&self.content.len());
131 let end = self
132 .line_starts
133 .get(line_idx + 1)
134 .copied()
135 .unwrap_or(self.content.len());
136 start..end
137 }
138
139 pub fn multi_line_range(&self, start_line: usize, end_line: usize) -> Range<usize> {
142 let start_idx = start_line.saturating_sub(1);
143 let end_idx = end_line.saturating_sub(1);
144
145 let start = *self.line_starts.get(start_idx).unwrap_or(&self.content.len());
146 let end = self.line_starts.get(end_idx + 1).copied().unwrap_or(self.content.len());
147 start..end
148 }
149
150 pub fn line_text_range(&self, line: usize, start_col: usize, end_col: usize) -> Range<usize> {
157 let line_idx = line.saturating_sub(1);
158 let line_start = *self.line_starts.get(line_idx).unwrap_or(&self.content.len());
159
160 let current_line = self.content.lines().nth(line_idx).unwrap_or("");
162 let char_count = current_line.chars().count();
163
164 let start_char_col = start_col.saturating_sub(1).min(char_count);
166 let end_char_col = end_col.saturating_sub(1).min(char_count);
167
168 let mut char_indices = current_line.char_indices();
169 let start_byte = char_indices
170 .nth(start_char_col)
171 .map(|(idx, _)| idx)
172 .unwrap_or(current_line.len());
173
174 let end_byte = current_line
175 .char_indices()
176 .nth(end_char_col)
177 .map(|(idx, _)| idx)
178 .unwrap_or(current_line.len());
179
180 let start = line_start + start_byte;
181 let end = line_start + end_byte.max(start_byte);
182 start..end
183 }
184
185 pub fn line_content_range(&self, line: usize) -> Range<usize> {
188 let line_idx = line.saturating_sub(1);
189 let line_start = *self.line_starts.get(line_idx).unwrap_or(&self.content.len());
190
191 let current_line = self.content.lines().nth(line_idx).unwrap_or("");
192 let line_end = line_start + current_line.len();
193 line_start..line_end
194 }
195
196 pub fn get_line_start_byte(&self, line_num: usize) -> Option<usize> {
198 if line_num == 0 {
199 return None; }
201 self.line_starts.get(line_num - 1).cloned()
203 }
204
205 pub fn is_code_block(&self, line: usize) -> bool {
207 if let Some(ref code_block_lines) = self.code_block_lines {
208 code_block_lines.contains(&line)
209 } else {
210 self.is_code_fence(line)
212 }
213 }
214
215 pub fn is_code_fence(&self, line: usize) -> bool {
217 self.content.lines().nth(line).is_some_and(|l| {
218 let trimmed = l.trim();
219 trimmed.starts_with("```") || trimmed.starts_with("~~~")
220 })
221 }
222
223 pub fn is_tilde_code_block(&self, line: usize) -> bool {
225 self.content
226 .lines()
227 .nth(line)
228 .is_some_and(|l| l.trim().starts_with("~~~"))
229 }
230
231 pub fn get_content(&self) -> &str {
233 self.content
234 }
235
236 fn compute_code_block_lines(&mut self) {
238 let mut code_block_lines = HashSet::new();
239 let lines: Vec<&str> = self.content.lines().collect();
240
241 let mut in_block = false;
243 let mut active_fence_type = ' '; let mut block_indent = 0;
245 let mut block_fence_length = 0;
246 let mut in_markdown_block = false;
247 let mut nested_fence_start = None;
248 let mut nested_fence_end = None;
249
250 for (i, line) in lines.iter().enumerate() {
252 let trimmed = line.trim();
253 let indent = line.len() - trimmed.len();
254
255 if line.starts_with(" ") || line.starts_with("\t") {
257 code_block_lines.insert(i);
258 continue; }
260
261 if !in_block {
263 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
265 let char_type = if trimmed.starts_with("```") { '`' } else { '~' };
266 let count = trimmed.chars().take_while(|&c| c == char_type).count();
267 let info_string = if trimmed.len() > count {
268 trimmed[count..].trim()
269 } else {
270 ""
271 };
272
273 in_block = true;
275 active_fence_type = char_type;
276 block_indent = indent;
277 block_fence_length = count;
278 in_markdown_block = info_string == "markdown";
279 nested_fence_start = None;
280 nested_fence_end = None;
281
282 code_block_lines.insert(i);
283 }
284 } else {
285 code_block_lines.insert(i);
287
288 if in_markdown_block && nested_fence_start.is_none() && trimmed.starts_with("```") {
290 let count = trimmed.chars().take_while(|&c| c == '`').count();
292 let remaining = if trimmed.len() > count {
293 trimmed[count..].trim()
294 } else {
295 ""
296 };
297
298 if !remaining.is_empty() {
299 nested_fence_start = Some(i);
300 }
301 }
302
303 if in_markdown_block
305 && nested_fence_start.is_some()
306 && nested_fence_end.is_none()
307 && trimmed.starts_with("```")
308 && trimmed.trim_start_matches('`').trim().is_empty()
309 {
310 nested_fence_end = Some(i);
311 }
312
313 if trimmed.starts_with(&active_fence_type.to_string().repeat(3)) {
315 let count = trimmed.chars().take_while(|&c| c == active_fence_type).count();
316 let remaining = if trimmed.len() > count {
317 trimmed[count..].trim()
318 } else {
319 ""
320 };
321
322 let is_valid_closing_fence =
328 count >= block_fence_length && remaining.is_empty() && indent <= block_indent;
329
330 let is_nested_closing = nested_fence_end.is_some() && i == nested_fence_end.unwrap();
333
334 if is_valid_closing_fence && !is_nested_closing {
336 in_block = false;
337 in_markdown_block = false;
338 }
339 }
340 }
341 }
342
343 self.code_block_lines = Some(code_block_lines);
344 }
345}
346
347pub fn calculate_single_line_range(line: usize, start_col: usize, length: usize) -> (usize, usize, usize, usize) {
349 (line, start_col, line, start_col + length)
350}
351
352pub fn calculate_line_range(line: usize, line_content: &str) -> (usize, usize, usize, usize) {
354 let trimmed_len = line_content.trim_end().len();
355 (line, 1, line, trimmed_len + 1)
356}
357
358pub fn calculate_match_range(
364 line: usize,
365 line_content: &str,
366 match_start: usize,
367 match_len: usize,
368) -> (usize, usize, usize, usize) {
369 let line_len = line_content.len();
371 if match_start > line_len {
372 let char_count = line_content.chars().count();
374 return (line, char_count + 1, line, char_count + 1);
375 }
376
377 let safe_match_start = find_char_boundary(line_content, match_start);
379 let safe_match_end_byte = find_char_boundary(line_content, (match_start + match_len).min(line_len));
380
381 let char_start = byte_to_char_count(line_content, safe_match_start);
383 let char_len = if safe_match_end_byte > safe_match_start {
384 line_content[safe_match_start..safe_match_end_byte].chars().count()
386 } else {
387 0
388 };
389 (line, char_start, line, char_start + char_len)
390}
391
392pub fn calculate_trailing_range(line: usize, line_content: &str, content_end: usize) -> (usize, usize, usize, usize) {
398 let safe_content_end = find_char_boundary(line_content, content_end);
400 let char_content_end = byte_to_char_count(line_content, safe_content_end);
401 let line_char_len = line_content.chars().count() + 1;
402 (line, char_content_end, line, line_char_len)
403}
404
405pub fn calculate_heading_range(line: usize, line_content: &str) -> (usize, usize, usize, usize) {
407 calculate_line_range(line, line_content)
408}
409
410pub fn calculate_emphasis_range(
416 line: usize,
417 line_content: &str,
418 start_pos: usize,
419 end_pos: usize,
420) -> (usize, usize, usize, usize) {
421 let safe_start_pos = find_char_boundary(line_content, start_pos);
423 let safe_end_pos = find_char_boundary(line_content, end_pos);
424 let char_start = byte_to_char_count(line_content, safe_start_pos);
425 let char_end = byte_to_char_count(line_content, safe_end_pos);
426 (line, char_start, line, char_end)
427}
428
429pub fn calculate_html_tag_range(
431 line: usize,
432 line_content: &str,
433 tag_start: usize,
434 tag_len: usize,
435) -> (usize, usize, usize, usize) {
436 calculate_match_range(line, line_content, tag_start, tag_len)
437}
438
439pub fn calculate_url_range(
441 line: usize,
442 line_content: &str,
443 url_start: usize,
444 url_len: usize,
445) -> (usize, usize, usize, usize) {
446 calculate_match_range(line, line_content, url_start, url_len)
447}
448
449pub fn calculate_list_marker_range(
451 line: usize,
452 line_content: &str,
453 marker_start: usize,
454 marker_len: usize,
455) -> (usize, usize, usize, usize) {
456 calculate_match_range(line, line_content, marker_start, marker_len)
457}
458
459pub fn calculate_excess_range(line: usize, line_content: &str, limit: usize) -> (usize, usize, usize, usize) {
461 let char_limit = std::cmp::min(limit, line_content.chars().count());
462 let line_char_len = line_content.chars().count() + 1;
463 (line, char_limit + 1, line, line_char_len)
464}
465
466#[cfg(test)]
467mod tests {
468 use super::*;
469
470 #[test]
471 fn test_single_line_range() {
472 let (start_line, start_col, end_line, end_col) = calculate_single_line_range(5, 10, 3);
473 assert_eq!(start_line, 5);
474 assert_eq!(start_col, 10);
475 assert_eq!(end_line, 5);
476 assert_eq!(end_col, 13);
477 }
478
479 #[test]
480 fn test_line_range() {
481 let content = "# This is a heading ";
482 let (start_line, start_col, end_line, end_col) = calculate_line_range(1, content);
483 assert_eq!(start_line, 1);
484 assert_eq!(start_col, 1);
485 assert_eq!(end_line, 1);
486 assert_eq!(end_col, 20); }
488
489 #[test]
490 fn test_match_range() {
491 let content = "Text <div>content</div> more";
492 let tag_start = 5; let tag_len = 5; let (start_line, start_col, end_line, end_col) = calculate_match_range(1, content, tag_start, tag_len);
495 assert_eq!(start_line, 1);
496 assert_eq!(start_col, 6); assert_eq!(end_line, 1);
498 assert_eq!(end_col, 11); }
500
501 #[test]
502 fn test_trailing_range() {
503 let content = "Text content "; let content_end = 12; let (start_line, start_col, end_line, end_col) = calculate_trailing_range(1, content, content_end);
506 assert_eq!(start_line, 1);
507 assert_eq!(start_col, 13); assert_eq!(end_line, 1);
509 assert_eq!(end_col, 16); }
511
512 #[test]
513 fn test_excess_range() {
514 let content = "This line is too long for the limit";
515 let limit = 20;
516 let (start_line, start_col, end_line, end_col) = calculate_excess_range(1, content, limit);
517 assert_eq!(start_line, 1);
518 assert_eq!(start_col, 21); assert_eq!(end_line, 1);
520 assert_eq!(end_col, 36); }
522
523 #[test]
524 fn test_whole_line_range() {
525 let content = "Line 1\nLine 2\nLine 3";
526 let line_index = LineIndex::new(content);
527
528 let range = line_index.whole_line_range(1);
530 assert_eq!(range, 0..7); let range = line_index.whole_line_range(2);
534 assert_eq!(range, 7..14); let range = line_index.whole_line_range(3);
538 assert_eq!(range, 14..20); }
540
541 #[test]
542 fn test_line_content_range() {
543 let content = "Line 1\nLine 2\nLine 3";
544 let line_index = LineIndex::new(content);
545
546 let range = line_index.line_content_range(1);
548 assert_eq!(range, 0..6); let range = line_index.line_content_range(2);
552 assert_eq!(range, 7..13); let range = line_index.line_content_range(3);
556 assert_eq!(range, 14..20); }
558
559 #[test]
560 fn test_line_text_range() {
561 let content = "Hello world\nAnother line";
562 let line_index = LineIndex::new(content);
563
564 let range = line_index.line_text_range(1, 1, 5); assert_eq!(range, 0..4);
567
568 let range = line_index.line_text_range(2, 1, 7); assert_eq!(range, 12..18);
571
572 let range = line_index.line_text_range(1, 1, 100); assert_eq!(range, 0..11); }
576
577 #[test]
578 fn test_calculate_match_range_bounds_checking() {
579 let line_content = "] not a link [";
581 let (line, start_col, end_line, end_col) = calculate_match_range(121, line_content, 57, 10);
582 assert_eq!(line, 121);
583 assert_eq!(start_col, 15); assert_eq!(end_line, 121);
585 assert_eq!(end_col, 15); let line_content = "short";
589 let (line, start_col, end_line, end_col) = calculate_match_range(1, line_content, 2, 10);
590 assert_eq!(line, 1);
591 assert_eq!(start_col, 3); assert_eq!(end_line, 1);
593 assert_eq!(end_col, 6); let line_content = "normal text here";
597 let (line, start_col, end_line, end_col) = calculate_match_range(5, line_content, 7, 4);
598 assert_eq!(line, 5);
599 assert_eq!(start_col, 8); assert_eq!(end_line, 5);
601 assert_eq!(end_col, 12); let line_content = "test line";
605 let (line, start_col, end_line, end_col) = calculate_match_range(10, line_content, 5, 0);
606 assert_eq!(line, 10);
607 assert_eq!(start_col, 6); assert_eq!(end_line, 10);
609 assert_eq!(end_col, 6); }
611
612 #[test]
617 fn test_issue_154_korean_character_boundary() {
618 let line_content = "- 2023 년 초 이후 주가 상승 +1,000% (10 배 상승) ";
621
622 let (line, start_col, end_line, end_col) = calculate_match_range(1, line_content, 19, 1);
625
626 assert!(start_col > 0);
628 assert_eq!(line, 1);
629 assert_eq!(end_line, 1);
630 assert!(end_col >= start_col);
631 }
632
633 #[test]
634 fn test_calculate_match_range_korean() {
635 let line_content = "안녕하세요";
638 let (line, start_col, end_line, end_col) = calculate_match_range(1, line_content, 3, 3);
640 assert_eq!(line, 1);
641 assert_eq!(start_col, 2); assert_eq!(end_line, 1);
643 assert_eq!(end_col, 3); let (line, start_col, end_line, _end_col) = calculate_match_range(1, line_content, 4, 3);
647 assert_eq!(line, 1);
648 assert_eq!(start_col, 2); assert_eq!(end_line, 1);
650 }
651
652 #[test]
653 fn test_calculate_match_range_chinese() {
654 let line_content = "你好世界";
657 let (line, start_col, end_line, end_col) = calculate_match_range(1, line_content, 6, 3);
659 assert_eq!(line, 1);
660 assert_eq!(start_col, 3); assert_eq!(end_line, 1);
662 assert_eq!(end_col, 4); }
664
665 #[test]
666 fn test_calculate_match_range_japanese() {
667 let line_content = "こんにちは";
670 let (line, start_col, end_line, end_col) = calculate_match_range(1, line_content, 9, 3);
672 assert_eq!(line, 1);
673 assert_eq!(start_col, 4); assert_eq!(end_line, 1);
675 assert_eq!(end_col, 5); }
677
678 #[test]
679 fn test_calculate_match_range_mixed_unicode() {
680 let line_content = "Hello 世界";
685
686 let (line, start_col, end_line, end_col) = calculate_match_range(1, line_content, 5, 1);
688 assert_eq!(line, 1);
689 assert_eq!(start_col, 6); assert_eq!(end_line, 1);
691 assert_eq!(end_col, 7); let (line, start_col, end_line, end_col) = calculate_match_range(1, line_content, 6, 3);
695 assert_eq!(line, 1);
696 assert_eq!(start_col, 7); assert_eq!(end_line, 1);
698 assert_eq!(end_col, 8); }
700
701 #[test]
702 fn test_calculate_trailing_range_korean() {
703 let line_content = "안녕하세요 ";
705 let (line, start_col, end_line, end_col) = calculate_trailing_range(1, line_content, 15);
707 assert_eq!(line, 1);
708 assert!(start_col > 0);
709 assert_eq!(end_line, 1);
710 assert!(end_col > start_col);
711 }
712
713 #[test]
714 fn test_calculate_emphasis_range_chinese() {
715 let line_content = "这是**重要**的";
717 let (line, start_col, end_line, end_col) = calculate_emphasis_range(1, line_content, 6, 12);
719 assert_eq!(line, 1);
720 assert!(start_col > 0);
721 assert_eq!(end_line, 1);
722 assert!(end_col > start_col);
723 }
724
725 #[test]
726 fn test_line_col_to_byte_range_korean() {
727 let content = "안녕하세요\nWorld";
729 let line_index = LineIndex::new(content);
730
731 let range = line_index.line_col_to_byte_range(1, 1);
733 assert_eq!(range, 0..0);
734
735 let range = line_index.line_col_to_byte_range(1, 2);
737 assert_eq!(range, 3..3); let range = line_index.line_col_to_byte_range(1, 3);
741 assert_eq!(range, 6..6); }
743
744 #[test]
745 fn test_line_col_to_byte_range_with_length_chinese() {
746 let content = "你好世界\nTest";
748 let line_index = LineIndex::new(content);
749
750 let range = line_index.line_col_to_byte_range_with_length(1, 1, 2);
752 assert_eq!(range, 0..6); let range = line_index.line_col_to_byte_range_with_length(1, 2, 1);
756 assert_eq!(range, 3..6); }
758
759 #[test]
760 fn test_line_text_range_japanese() {
761 let content = "こんにちは\nHello";
763 let line_index = LineIndex::new(content);
764
765 let range = line_index.line_text_range(1, 2, 4);
767 assert_eq!(range, 3..9); }
769
770 #[test]
771 fn test_find_char_boundary_edge_cases() {
772 let s = "안녕";
774
775 assert_eq!(find_char_boundary(s, 0), 0);
777
778 assert_eq!(find_char_boundary(s, 1), 0);
780
781 assert_eq!(find_char_boundary(s, 2), 0);
783
784 assert_eq!(find_char_boundary(s, 3), 3);
786
787 assert_eq!(find_char_boundary(s, 4), 3);
789
790 assert_eq!(find_char_boundary(s, 100), s.len());
792 }
793
794 #[test]
795 fn test_byte_to_char_count_unicode() {
796 let s = "안녕하세요";
798
799 assert_eq!(byte_to_char_count(s, 0), 1);
801
802 assert_eq!(byte_to_char_count(s, 3), 2);
804
805 assert_eq!(byte_to_char_count(s, 6), 3);
807
808 assert_eq!(byte_to_char_count(s, 9), 4);
810
811 assert_eq!(byte_to_char_count(s, 12), 5);
813
814 assert_eq!(byte_to_char_count(s, 15), 6);
816 }
817
818 #[test]
819 fn test_all_range_functions_with_emoji() {
820 let line_content = "Hello 🎉 World 🌍";
822
823 let (line, start_col, end_line, end_col) = calculate_match_range(1, line_content, 6, 4);
825 assert_eq!(line, 1);
826 assert!(start_col > 0);
827 assert_eq!(end_line, 1);
828 assert!(end_col > start_col);
829
830 let (line, start_col, end_line, end_col) = calculate_trailing_range(1, line_content, 12);
832 assert_eq!(line, 1);
833 assert!(start_col > 0);
834 assert_eq!(end_line, 1);
835 assert!(end_col > start_col);
836
837 let (line, start_col, end_line, end_col) = calculate_emphasis_range(1, line_content, 0, 5);
839 assert_eq!(line, 1);
840 assert_eq!(start_col, 1);
841 assert_eq!(end_line, 1);
842 assert!(end_col > start_col);
843 }
844}