1use std::collections::HashSet;
4use std::ops::Range;
5
6fn find_char_boundary(s: &str, byte_idx: usize) -> usize {
13 if byte_idx >= s.len() {
14 return s.len();
15 }
16
17 if s.is_char_boundary(byte_idx) {
19 return byte_idx;
20 }
21
22 let mut pos = byte_idx;
25 while pos > 0 && !s.is_char_boundary(pos) {
26 pos -= 1;
27 }
28 pos
29}
30
31fn byte_to_char_count(s: &str, byte_idx: usize) -> usize {
34 let safe_byte_idx = find_char_boundary(s, byte_idx);
35 s[..safe_byte_idx].chars().count() + 1 }
37
38#[derive(Debug)]
39pub struct LineIndex<'a> {
40 line_starts: Vec<usize>,
41 content: &'a str,
42 code_block_lines: Option<HashSet<usize>>,
43}
44
45impl<'a> LineIndex<'a> {
46 pub fn new(content: &'a str) -> Self {
47 let mut line_starts = vec![0];
48 let mut pos = 0;
49
50 for c in content.chars() {
51 pos += c.len_utf8();
52 if c == '\n' {
53 line_starts.push(pos);
54 }
55 }
56
57 let mut index = Self {
58 line_starts,
59 content,
60 code_block_lines: None,
61 };
62
63 index.compute_code_block_lines();
65
66 index
67 }
68
69 pub fn line_col_to_byte_range(&self, line: usize, column: usize) -> Range<usize> {
70 let line = line.saturating_sub(1);
71 let line_start = *self.line_starts.get(line).unwrap_or(&self.content.len());
72
73 let current_line = self.content.lines().nth(line).unwrap_or("");
74 let char_col = column.saturating_sub(1);
76 let char_count = current_line.chars().count();
77 let safe_char_col = char_col.min(char_count);
78
79 let byte_offset = current_line
81 .char_indices()
82 .nth(safe_char_col)
83 .map(|(idx, _)| idx)
84 .unwrap_or(current_line.len());
85
86 let start = line_start + byte_offset;
87 start..start
88 }
89
90 pub fn line_col_to_byte_range_with_length(&self, line: usize, column: usize, length: usize) -> Range<usize> {
97 let line = line.saturating_sub(1);
98 let line_start = *self.line_starts.get(line).unwrap_or(&self.content.len());
99
100 let current_line = self.content.lines().nth(line).unwrap_or("");
101 let char_col = column.saturating_sub(1);
103 let char_count = current_line.chars().count();
104 let safe_char_col = char_col.min(char_count);
105
106 let mut char_indices = current_line.char_indices();
108 let start_byte = char_indices
109 .nth(safe_char_col)
110 .map(|(idx, _)| idx)
111 .unwrap_or(current_line.len());
112
113 let end_char_col = (safe_char_col + length).min(char_count);
115 let end_byte = current_line
116 .char_indices()
117 .nth(end_char_col)
118 .map(|(idx, _)| idx)
119 .unwrap_or(current_line.len());
120
121 let start = line_start + start_byte;
122 let end = line_start + end_byte;
123 start..end
124 }
125
126 pub fn whole_line_range(&self, line: usize) -> Range<usize> {
129 let line_idx = line.saturating_sub(1);
130 let start = *self.line_starts.get(line_idx).unwrap_or(&self.content.len());
131 let end = self
132 .line_starts
133 .get(line_idx + 1)
134 .copied()
135 .unwrap_or(self.content.len());
136 start..end
137 }
138
139 pub fn line_text_range(&self, line: usize, start_col: usize, end_col: usize) -> Range<usize> {
146 let line_idx = line.saturating_sub(1);
147 let line_start = *self.line_starts.get(line_idx).unwrap_or(&self.content.len());
148
149 let current_line = self.content.lines().nth(line_idx).unwrap_or("");
151 let char_count = current_line.chars().count();
152
153 let start_char_col = start_col.saturating_sub(1).min(char_count);
155 let end_char_col = end_col.saturating_sub(1).min(char_count);
156
157 let mut char_indices = current_line.char_indices();
158 let start_byte = char_indices
159 .nth(start_char_col)
160 .map(|(idx, _)| idx)
161 .unwrap_or(current_line.len());
162
163 let end_byte = current_line
164 .char_indices()
165 .nth(end_char_col)
166 .map(|(idx, _)| idx)
167 .unwrap_or(current_line.len());
168
169 let start = line_start + start_byte;
170 let end = line_start + end_byte.max(start_byte);
171 start..end
172 }
173
174 pub fn line_content_range(&self, line: usize) -> Range<usize> {
177 let line_idx = line.saturating_sub(1);
178 let line_start = *self.line_starts.get(line_idx).unwrap_or(&self.content.len());
179
180 let current_line = self.content.lines().nth(line_idx).unwrap_or("");
181 let line_end = line_start + current_line.len();
182 line_start..line_end
183 }
184
185 pub fn get_line_start_byte(&self, line_num: usize) -> Option<usize> {
187 if line_num == 0 {
188 return None; }
190 self.line_starts.get(line_num - 1).cloned()
192 }
193
194 pub fn is_code_block(&self, line: usize) -> bool {
196 if let Some(ref code_block_lines) = self.code_block_lines {
197 code_block_lines.contains(&line)
198 } else {
199 self.is_code_fence(line)
201 }
202 }
203
204 pub fn is_code_fence(&self, line: usize) -> bool {
206 self.content.lines().nth(line).is_some_and(|l| {
207 let trimmed = l.trim();
208 trimmed.starts_with("```") || trimmed.starts_with("~~~")
209 })
210 }
211
212 pub fn is_tilde_code_block(&self, line: usize) -> bool {
214 self.content
215 .lines()
216 .nth(line)
217 .is_some_and(|l| l.trim().starts_with("~~~"))
218 }
219
220 pub fn get_content(&self) -> &str {
222 self.content
223 }
224
225 fn compute_code_block_lines(&mut self) {
227 let mut code_block_lines = HashSet::new();
228 let lines: Vec<&str> = self.content.lines().collect();
229
230 let mut in_block = false;
232 let mut active_fence_type = ' '; let mut block_indent = 0;
234 let mut block_fence_length = 0;
235 let mut in_markdown_block = false;
236 let mut nested_fence_start = None;
237 let mut nested_fence_end = None;
238
239 for (i, line) in lines.iter().enumerate() {
241 let trimmed = line.trim();
242 let indent = line.len() - trimmed.len();
243
244 if line.starts_with(" ") || line.starts_with("\t") {
246 code_block_lines.insert(i);
247 continue; }
249
250 if !in_block {
252 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
254 let char_type = if trimmed.starts_with("```") { '`' } else { '~' };
255 let count = trimmed.chars().take_while(|&c| c == char_type).count();
256 let info_string = if trimmed.len() > count {
257 trimmed[count..].trim()
258 } else {
259 ""
260 };
261
262 in_block = true;
264 active_fence_type = char_type;
265 block_indent = indent;
266 block_fence_length = count;
267 in_markdown_block = info_string == "markdown";
268 nested_fence_start = None;
269 nested_fence_end = None;
270
271 code_block_lines.insert(i);
272 }
273 } else {
274 code_block_lines.insert(i);
276
277 if in_markdown_block && nested_fence_start.is_none() && trimmed.starts_with("```") {
279 let count = trimmed.chars().take_while(|&c| c == '`').count();
281 let remaining = if trimmed.len() > count {
282 trimmed[count..].trim()
283 } else {
284 ""
285 };
286
287 if !remaining.is_empty() {
288 nested_fence_start = Some(i);
289 }
290 }
291
292 if in_markdown_block
294 && nested_fence_start.is_some()
295 && nested_fence_end.is_none()
296 && trimmed.starts_with("```")
297 && trimmed.trim_start_matches('`').trim().is_empty()
298 {
299 nested_fence_end = Some(i);
300 }
301
302 if trimmed.starts_with(&active_fence_type.to_string().repeat(3)) {
304 let count = trimmed.chars().take_while(|&c| c == active_fence_type).count();
305 let remaining = if trimmed.len() > count {
306 trimmed[count..].trim()
307 } else {
308 ""
309 };
310
311 let is_valid_closing_fence =
317 count >= block_fence_length && remaining.is_empty() && indent <= block_indent;
318
319 let is_nested_closing = nested_fence_end.is_some() && i == nested_fence_end.unwrap();
322
323 if is_valid_closing_fence && !is_nested_closing {
325 in_block = false;
326 in_markdown_block = false;
327 }
328 }
329 }
330 }
331
332 self.code_block_lines = Some(code_block_lines);
333 }
334}
335
336pub fn calculate_single_line_range(line: usize, start_col: usize, length: usize) -> (usize, usize, usize, usize) {
338 (line, start_col, line, start_col + length)
339}
340
341pub fn calculate_line_range(line: usize, line_content: &str) -> (usize, usize, usize, usize) {
343 let trimmed_len = line_content.trim_end().len();
344 (line, 1, line, trimmed_len + 1)
345}
346
347pub fn calculate_match_range(
353 line: usize,
354 line_content: &str,
355 match_start: usize,
356 match_len: usize,
357) -> (usize, usize, usize, usize) {
358 let line_len = line_content.len();
360 if match_start > line_len {
361 let char_count = line_content.chars().count();
363 return (line, char_count + 1, line, char_count + 1);
364 }
365
366 let safe_match_start = find_char_boundary(line_content, match_start);
368 let safe_match_end_byte = find_char_boundary(line_content, (match_start + match_len).min(line_len));
369
370 let char_start = byte_to_char_count(line_content, safe_match_start);
372 let char_len = if safe_match_end_byte > safe_match_start {
373 line_content[safe_match_start..safe_match_end_byte].chars().count()
375 } else {
376 0
377 };
378 (line, char_start, line, char_start + char_len)
379}
380
381pub fn calculate_trailing_range(line: usize, line_content: &str, content_end: usize) -> (usize, usize, usize, usize) {
387 let safe_content_end = find_char_boundary(line_content, content_end);
389 let char_content_end = byte_to_char_count(line_content, safe_content_end);
390 let line_char_len = line_content.chars().count() + 1;
391 (line, char_content_end, line, line_char_len)
392}
393
394pub fn calculate_heading_range(line: usize, line_content: &str) -> (usize, usize, usize, usize) {
396 calculate_line_range(line, line_content)
397}
398
399pub fn calculate_emphasis_range(
405 line: usize,
406 line_content: &str,
407 start_pos: usize,
408 end_pos: usize,
409) -> (usize, usize, usize, usize) {
410 let safe_start_pos = find_char_boundary(line_content, start_pos);
412 let safe_end_pos = find_char_boundary(line_content, end_pos);
413 let char_start = byte_to_char_count(line_content, safe_start_pos);
414 let char_end = byte_to_char_count(line_content, safe_end_pos);
415 (line, char_start, line, char_end)
416}
417
418pub fn calculate_html_tag_range(
420 line: usize,
421 line_content: &str,
422 tag_start: usize,
423 tag_len: usize,
424) -> (usize, usize, usize, usize) {
425 calculate_match_range(line, line_content, tag_start, tag_len)
426}
427
428pub fn calculate_url_range(
430 line: usize,
431 line_content: &str,
432 url_start: usize,
433 url_len: usize,
434) -> (usize, usize, usize, usize) {
435 calculate_match_range(line, line_content, url_start, url_len)
436}
437
438pub fn calculate_list_marker_range(
440 line: usize,
441 line_content: &str,
442 marker_start: usize,
443 marker_len: usize,
444) -> (usize, usize, usize, usize) {
445 calculate_match_range(line, line_content, marker_start, marker_len)
446}
447
448pub fn calculate_excess_range(line: usize, line_content: &str, limit: usize) -> (usize, usize, usize, usize) {
450 let char_limit = std::cmp::min(limit, line_content.chars().count());
451 let line_char_len = line_content.chars().count() + 1;
452 (line, char_limit + 1, line, line_char_len)
453}
454
455#[cfg(test)]
456mod tests {
457 use super::*;
458
459 #[test]
460 fn test_single_line_range() {
461 let (start_line, start_col, end_line, end_col) = calculate_single_line_range(5, 10, 3);
462 assert_eq!(start_line, 5);
463 assert_eq!(start_col, 10);
464 assert_eq!(end_line, 5);
465 assert_eq!(end_col, 13);
466 }
467
468 #[test]
469 fn test_line_range() {
470 let content = "# This is a heading ";
471 let (start_line, start_col, end_line, end_col) = calculate_line_range(1, content);
472 assert_eq!(start_line, 1);
473 assert_eq!(start_col, 1);
474 assert_eq!(end_line, 1);
475 assert_eq!(end_col, 20); }
477
478 #[test]
479 fn test_match_range() {
480 let content = "Text <div>content</div> more";
481 let tag_start = 5; let tag_len = 5; let (start_line, start_col, end_line, end_col) = calculate_match_range(1, content, tag_start, tag_len);
484 assert_eq!(start_line, 1);
485 assert_eq!(start_col, 6); assert_eq!(end_line, 1);
487 assert_eq!(end_col, 11); }
489
490 #[test]
491 fn test_trailing_range() {
492 let content = "Text content "; let content_end = 12; let (start_line, start_col, end_line, end_col) = calculate_trailing_range(1, content, content_end);
495 assert_eq!(start_line, 1);
496 assert_eq!(start_col, 13); assert_eq!(end_line, 1);
498 assert_eq!(end_col, 16); }
500
501 #[test]
502 fn test_excess_range() {
503 let content = "This line is too long for the limit";
504 let limit = 20;
505 let (start_line, start_col, end_line, end_col) = calculate_excess_range(1, content, limit);
506 assert_eq!(start_line, 1);
507 assert_eq!(start_col, 21); assert_eq!(end_line, 1);
509 assert_eq!(end_col, 36); }
511
512 #[test]
513 fn test_whole_line_range() {
514 let content = "Line 1\nLine 2\nLine 3";
515 let line_index = LineIndex::new(content);
516
517 let range = line_index.whole_line_range(1);
519 assert_eq!(range, 0..7); let range = line_index.whole_line_range(2);
523 assert_eq!(range, 7..14); let range = line_index.whole_line_range(3);
527 assert_eq!(range, 14..20); }
529
530 #[test]
531 fn test_line_content_range() {
532 let content = "Line 1\nLine 2\nLine 3";
533 let line_index = LineIndex::new(content);
534
535 let range = line_index.line_content_range(1);
537 assert_eq!(range, 0..6); let range = line_index.line_content_range(2);
541 assert_eq!(range, 7..13); let range = line_index.line_content_range(3);
545 assert_eq!(range, 14..20); }
547
548 #[test]
549 fn test_line_text_range() {
550 let content = "Hello world\nAnother line";
551 let line_index = LineIndex::new(content);
552
553 let range = line_index.line_text_range(1, 1, 5); assert_eq!(range, 0..4);
556
557 let range = line_index.line_text_range(2, 1, 7); assert_eq!(range, 12..18);
560
561 let range = line_index.line_text_range(1, 1, 100); assert_eq!(range, 0..11); }
565
566 #[test]
567 fn test_calculate_match_range_bounds_checking() {
568 let line_content = "] not a link [";
570 let (line, start_col, end_line, end_col) = calculate_match_range(121, line_content, 57, 10);
571 assert_eq!(line, 121);
572 assert_eq!(start_col, 15); assert_eq!(end_line, 121);
574 assert_eq!(end_col, 15); let line_content = "short";
578 let (line, start_col, end_line, end_col) = calculate_match_range(1, line_content, 2, 10);
579 assert_eq!(line, 1);
580 assert_eq!(start_col, 3); assert_eq!(end_line, 1);
582 assert_eq!(end_col, 6); let line_content = "normal text here";
586 let (line, start_col, end_line, end_col) = calculate_match_range(5, line_content, 7, 4);
587 assert_eq!(line, 5);
588 assert_eq!(start_col, 8); assert_eq!(end_line, 5);
590 assert_eq!(end_col, 12); let line_content = "test line";
594 let (line, start_col, end_line, end_col) = calculate_match_range(10, line_content, 5, 0);
595 assert_eq!(line, 10);
596 assert_eq!(start_col, 6); assert_eq!(end_line, 10);
598 assert_eq!(end_col, 6); }
600
601 #[test]
606 fn test_issue_154_korean_character_boundary() {
607 let line_content = "- 2023 년 초 이후 주가 상승 +1,000% (10 배 상승) ";
610
611 let (line, start_col, end_line, end_col) = calculate_match_range(1, line_content, 19, 1);
614
615 assert!(start_col > 0);
617 assert_eq!(line, 1);
618 assert_eq!(end_line, 1);
619 assert!(end_col >= start_col);
620 }
621
622 #[test]
623 fn test_calculate_match_range_korean() {
624 let line_content = "안녕하세요";
627 let (line, start_col, end_line, end_col) = calculate_match_range(1, line_content, 3, 3);
629 assert_eq!(line, 1);
630 assert_eq!(start_col, 2); assert_eq!(end_line, 1);
632 assert_eq!(end_col, 3); let (line, start_col, end_line, _end_col) = calculate_match_range(1, line_content, 4, 3);
636 assert_eq!(line, 1);
637 assert_eq!(start_col, 2); assert_eq!(end_line, 1);
639 }
640
641 #[test]
642 fn test_calculate_match_range_chinese() {
643 let line_content = "你好世界";
646 let (line, start_col, end_line, end_col) = calculate_match_range(1, line_content, 6, 3);
648 assert_eq!(line, 1);
649 assert_eq!(start_col, 3); assert_eq!(end_line, 1);
651 assert_eq!(end_col, 4); }
653
654 #[test]
655 fn test_calculate_match_range_japanese() {
656 let line_content = "こんにちは";
659 let (line, start_col, end_line, end_col) = calculate_match_range(1, line_content, 9, 3);
661 assert_eq!(line, 1);
662 assert_eq!(start_col, 4); assert_eq!(end_line, 1);
664 assert_eq!(end_col, 5); }
666
667 #[test]
668 fn test_calculate_match_range_mixed_unicode() {
669 let line_content = "Hello 世界";
674
675 let (line, start_col, end_line, end_col) = calculate_match_range(1, line_content, 5, 1);
677 assert_eq!(line, 1);
678 assert_eq!(start_col, 6); assert_eq!(end_line, 1);
680 assert_eq!(end_col, 7); let (line, start_col, end_line, end_col) = calculate_match_range(1, line_content, 6, 3);
684 assert_eq!(line, 1);
685 assert_eq!(start_col, 7); assert_eq!(end_line, 1);
687 assert_eq!(end_col, 8); }
689
690 #[test]
691 fn test_calculate_trailing_range_korean() {
692 let line_content = "안녕하세요 ";
694 let (line, start_col, end_line, end_col) = calculate_trailing_range(1, line_content, 15);
696 assert_eq!(line, 1);
697 assert!(start_col > 0);
698 assert_eq!(end_line, 1);
699 assert!(end_col > start_col);
700 }
701
702 #[test]
703 fn test_calculate_emphasis_range_chinese() {
704 let line_content = "这是**重要**的";
706 let (line, start_col, end_line, end_col) = calculate_emphasis_range(1, line_content, 6, 12);
708 assert_eq!(line, 1);
709 assert!(start_col > 0);
710 assert_eq!(end_line, 1);
711 assert!(end_col > start_col);
712 }
713
714 #[test]
715 fn test_line_col_to_byte_range_korean() {
716 let content = "안녕하세요\nWorld";
718 let line_index = LineIndex::new(content);
719
720 let range = line_index.line_col_to_byte_range(1, 1);
722 assert_eq!(range, 0..0);
723
724 let range = line_index.line_col_to_byte_range(1, 2);
726 assert_eq!(range, 3..3); let range = line_index.line_col_to_byte_range(1, 3);
730 assert_eq!(range, 6..6); }
732
733 #[test]
734 fn test_line_col_to_byte_range_with_length_chinese() {
735 let content = "你好世界\nTest";
737 let line_index = LineIndex::new(content);
738
739 let range = line_index.line_col_to_byte_range_with_length(1, 1, 2);
741 assert_eq!(range, 0..6); let range = line_index.line_col_to_byte_range_with_length(1, 2, 1);
745 assert_eq!(range, 3..6); }
747
748 #[test]
749 fn test_line_text_range_japanese() {
750 let content = "こんにちは\nHello";
752 let line_index = LineIndex::new(content);
753
754 let range = line_index.line_text_range(1, 2, 4);
756 assert_eq!(range, 3..9); }
758
759 #[test]
760 fn test_find_char_boundary_edge_cases() {
761 let s = "안녕";
763
764 assert_eq!(find_char_boundary(s, 0), 0);
766
767 assert_eq!(find_char_boundary(s, 1), 0);
769
770 assert_eq!(find_char_boundary(s, 2), 0);
772
773 assert_eq!(find_char_boundary(s, 3), 3);
775
776 assert_eq!(find_char_boundary(s, 4), 3);
778
779 assert_eq!(find_char_boundary(s, 100), s.len());
781 }
782
783 #[test]
784 fn test_byte_to_char_count_unicode() {
785 let s = "안녕하세요";
787
788 assert_eq!(byte_to_char_count(s, 0), 1);
790
791 assert_eq!(byte_to_char_count(s, 3), 2);
793
794 assert_eq!(byte_to_char_count(s, 6), 3);
796
797 assert_eq!(byte_to_char_count(s, 9), 4);
799
800 assert_eq!(byte_to_char_count(s, 12), 5);
802
803 assert_eq!(byte_to_char_count(s, 15), 6);
805 }
806
807 #[test]
808 fn test_all_range_functions_with_emoji() {
809 let line_content = "Hello 🎉 World 🌍";
811
812 let (line, start_col, end_line, end_col) = calculate_match_range(1, line_content, 6, 4);
814 assert_eq!(line, 1);
815 assert!(start_col > 0);
816 assert_eq!(end_line, 1);
817 assert!(end_col > start_col);
818
819 let (line, start_col, end_line, end_col) = calculate_trailing_range(1, line_content, 12);
821 assert_eq!(line, 1);
822 assert!(start_col > 0);
823 assert_eq!(end_line, 1);
824 assert!(end_col > start_col);
825
826 let (line, start_col, end_line, end_col) = calculate_emphasis_range(1, line_content, 0, 5);
828 assert_eq!(line, 1);
829 assert_eq!(start_col, 1);
830 assert_eq!(end_line, 1);
831 assert!(end_col > start_col);
832 }
833}