1use crate::utils::element_cache::ElementCache;
4use std::collections::HashSet;
5use std::ops::Range;
6
7fn find_char_boundary(s: &str, byte_idx: usize) -> usize {
14 if byte_idx >= s.len() {
15 return s.len();
16 }
17
18 if s.is_char_boundary(byte_idx) {
20 return byte_idx;
21 }
22
23 let mut pos = byte_idx;
26 while pos > 0 && !s.is_char_boundary(pos) {
27 pos -= 1;
28 }
29 pos
30}
31
32fn byte_to_char_count(s: &str, byte_idx: usize) -> usize {
35 let safe_byte_idx = find_char_boundary(s, byte_idx);
36 s[..safe_byte_idx].chars().count() + 1 }
38
39#[derive(Debug)]
40pub struct LineIndex<'a> {
41 line_starts: Vec<usize>,
42 content: &'a str,
43 code_block_lines: Option<HashSet<usize>>,
44}
45
46impl<'a> LineIndex<'a> {
47 pub fn new(content: &'a str) -> Self {
48 let mut line_starts = vec![0];
49 let mut pos = 0;
50
51 for c in content.chars() {
52 pos += c.len_utf8();
53 if c == '\n' {
54 line_starts.push(pos);
55 }
56 }
57
58 let mut index = Self {
59 line_starts,
60 content,
61 code_block_lines: None,
62 };
63
64 index.compute_code_block_lines();
66
67 index
68 }
69
70 pub fn line_col_to_byte_range(&self, line: usize, column: usize) -> Range<usize> {
71 let line = line.saturating_sub(1);
72 let line_start = *self.line_starts.get(line).unwrap_or(&self.content.len());
73
74 let current_line = self.content.lines().nth(line).unwrap_or("");
75 let char_col = column.saturating_sub(1);
77 let char_count = current_line.chars().count();
78 let safe_char_col = char_col.min(char_count);
79
80 let byte_offset = current_line
82 .char_indices()
83 .nth(safe_char_col)
84 .map(|(idx, _)| idx)
85 .unwrap_or(current_line.len());
86
87 let start = line_start + byte_offset;
88 start..start
89 }
90
91 pub fn line_col_to_byte_range_with_length(&self, line: usize, column: usize, length: usize) -> Range<usize> {
98 let line = line.saturating_sub(1);
99 let line_start = *self.line_starts.get(line).unwrap_or(&self.content.len());
100 let line_end = self.line_starts.get(line + 1).copied().unwrap_or(self.content.len());
101 let mut current_line = &self.content[line_start..line_end];
102 if let Some(stripped) = current_line.strip_suffix('\n') {
103 current_line = stripped.strip_suffix('\r').unwrap_or(stripped);
104 }
105 if current_line.is_ascii() {
106 let line_len = current_line.len();
107 let start_byte = column.saturating_sub(1).min(line_len);
108 let end_byte = start_byte.saturating_add(length).min(line_len);
109 let start = line_start + start_byte;
110 let end = line_start + end_byte;
111 return start..end;
112 }
113 let char_col = column.saturating_sub(1);
115 let char_count = current_line.chars().count();
116 let safe_char_col = char_col.min(char_count);
117
118 let mut char_indices = current_line.char_indices();
120 let start_byte = char_indices
121 .nth(safe_char_col)
122 .map(|(idx, _)| idx)
123 .unwrap_or(current_line.len());
124
125 let end_char_col = (safe_char_col + length).min(char_count);
127 let end_byte = current_line
128 .char_indices()
129 .nth(end_char_col)
130 .map(|(idx, _)| idx)
131 .unwrap_or(current_line.len());
132
133 let start = line_start + start_byte;
134 let end = line_start + end_byte;
135 start..end
136 }
137
138 pub fn whole_line_range(&self, line: usize) -> Range<usize> {
141 let line_idx = line.saturating_sub(1);
142 let start = *self.line_starts.get(line_idx).unwrap_or(&self.content.len());
143 let end = self
144 .line_starts
145 .get(line_idx + 1)
146 .copied()
147 .unwrap_or(self.content.len());
148 start..end
149 }
150
151 pub fn multi_line_range(&self, start_line: usize, end_line: usize) -> Range<usize> {
154 let start_idx = start_line.saturating_sub(1);
155 let end_idx = end_line.saturating_sub(1);
156
157 let start = *self.line_starts.get(start_idx).unwrap_or(&self.content.len());
158 let end = self.line_starts.get(end_idx + 1).copied().unwrap_or(self.content.len());
159 start..end
160 }
161
162 pub fn line_text_range(&self, line: usize, start_col: usize, end_col: usize) -> Range<usize> {
169 let line_idx = line.saturating_sub(1);
170 let line_start = *self.line_starts.get(line_idx).unwrap_or(&self.content.len());
171
172 let current_line = self.content.lines().nth(line_idx).unwrap_or("");
174 let char_count = current_line.chars().count();
175
176 let start_char_col = start_col.saturating_sub(1).min(char_count);
178 let end_char_col = end_col.saturating_sub(1).min(char_count);
179
180 let mut char_indices = current_line.char_indices();
181 let start_byte = char_indices
182 .nth(start_char_col)
183 .map(|(idx, _)| idx)
184 .unwrap_or(current_line.len());
185
186 let end_byte = current_line
187 .char_indices()
188 .nth(end_char_col)
189 .map(|(idx, _)| idx)
190 .unwrap_or(current_line.len());
191
192 let start = line_start + start_byte;
193 let end = line_start + end_byte.max(start_byte);
194 start..end
195 }
196
197 pub fn line_content_range(&self, line: usize) -> Range<usize> {
200 let line_idx = line.saturating_sub(1);
201 let line_start = *self.line_starts.get(line_idx).unwrap_or(&self.content.len());
202
203 let current_line = self.content.lines().nth(line_idx).unwrap_or("");
204 let line_end = line_start + current_line.len();
205 line_start..line_end
206 }
207
208 pub fn get_line_start_byte(&self, line_num: usize) -> Option<usize> {
210 if line_num == 0 {
211 return None; }
213 self.line_starts.get(line_num - 1).cloned()
215 }
216
217 pub fn is_code_block(&self, line: usize) -> bool {
219 if let Some(ref code_block_lines) = self.code_block_lines {
220 code_block_lines.contains(&line)
221 } else {
222 self.is_code_fence(line)
224 }
225 }
226
227 pub fn is_code_fence(&self, line: usize) -> bool {
229 self.content.lines().nth(line).is_some_and(|l| {
230 let trimmed = l.trim();
231 trimmed.starts_with("```") || trimmed.starts_with("~~~")
232 })
233 }
234
235 pub fn is_tilde_code_block(&self, line: usize) -> bool {
237 self.content
238 .lines()
239 .nth(line)
240 .is_some_and(|l| l.trim().starts_with("~~~"))
241 }
242
243 pub fn get_content(&self) -> &str {
245 self.content
246 }
247
248 fn compute_code_block_lines(&mut self) {
250 let mut code_block_lines = HashSet::new();
251 let lines: Vec<&str> = self.content.lines().collect();
252
253 let mut in_block = false;
255 let mut active_fence_type = ' '; let mut block_indent = 0;
257 let mut block_fence_length = 0;
258 let mut in_markdown_block = false;
259 let mut nested_fence_start = None;
260 let mut nested_fence_end = None;
261
262 for (i, line) in lines.iter().enumerate() {
264 let trimmed = line.trim();
265 let indent = line.len() - trimmed.len();
266
267 if ElementCache::calculate_indentation_width_default(line) >= 4 {
269 code_block_lines.insert(i);
270 continue; }
272
273 if !in_block {
275 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
277 let char_type = if trimmed.starts_with("```") { '`' } else { '~' };
278 let count = trimmed.chars().take_while(|&c| c == char_type).count();
279 let info_string = if trimmed.len() > count {
280 trimmed[count..].trim()
281 } else {
282 ""
283 };
284
285 in_block = true;
287 active_fence_type = char_type;
288 block_indent = indent;
289 block_fence_length = count;
290 in_markdown_block = info_string == "markdown";
291 nested_fence_start = None;
292 nested_fence_end = None;
293
294 code_block_lines.insert(i);
295 }
296 } else {
297 code_block_lines.insert(i);
299
300 if in_markdown_block && nested_fence_start.is_none() && trimmed.starts_with("```") {
302 let count = trimmed.chars().take_while(|&c| c == '`').count();
304 let remaining = if trimmed.len() > count {
305 trimmed[count..].trim()
306 } else {
307 ""
308 };
309
310 if !remaining.is_empty() {
311 nested_fence_start = Some(i);
312 }
313 }
314
315 if in_markdown_block
317 && nested_fence_start.is_some()
318 && nested_fence_end.is_none()
319 && trimmed.starts_with("```")
320 && trimmed.trim_start_matches('`').trim().is_empty()
321 {
322 nested_fence_end = Some(i);
323 }
324
325 if trimmed.starts_with(&active_fence_type.to_string().repeat(3)) {
327 let count = trimmed.chars().take_while(|&c| c == active_fence_type).count();
328 let remaining = if trimmed.len() > count {
329 trimmed[count..].trim()
330 } else {
331 ""
332 };
333
334 let is_valid_closing_fence =
340 count >= block_fence_length && remaining.is_empty() && indent <= block_indent;
341
342 let is_nested_closing = nested_fence_end.is_some() && i == nested_fence_end.unwrap();
345
346 if is_valid_closing_fence && !is_nested_closing {
348 in_block = false;
349 in_markdown_block = false;
350 }
351 }
352 }
353 }
354
355 self.code_block_lines = Some(code_block_lines);
356 }
357}
358
359pub fn calculate_single_line_range(line: usize, start_col: usize, length: usize) -> (usize, usize, usize, usize) {
361 (line, start_col, line, start_col + length)
362}
363
364pub fn calculate_line_range(line: usize, line_content: &str) -> (usize, usize, usize, usize) {
366 let trimmed_len = line_content.trim_end().len();
367 (line, 1, line, trimmed_len + 1)
368}
369
370pub fn calculate_match_range(
376 line: usize,
377 line_content: &str,
378 match_start: usize,
379 match_len: usize,
380) -> (usize, usize, usize, usize) {
381 let line_len = line_content.len();
383 if match_start > line_len {
384 let char_count = line_content.chars().count();
386 return (line, char_count + 1, line, char_count + 1);
387 }
388
389 let safe_match_start = find_char_boundary(line_content, match_start);
391 let safe_match_end_byte = find_char_boundary(line_content, (match_start + match_len).min(line_len));
392
393 let char_start = byte_to_char_count(line_content, safe_match_start);
395 let char_len = if safe_match_end_byte > safe_match_start {
396 line_content[safe_match_start..safe_match_end_byte].chars().count()
398 } else {
399 0
400 };
401 (line, char_start, line, char_start + char_len)
402}
403
404pub fn calculate_trailing_range(line: usize, line_content: &str, content_end: usize) -> (usize, usize, usize, usize) {
410 let safe_content_end = find_char_boundary(line_content, content_end);
412 let char_content_end = byte_to_char_count(line_content, safe_content_end);
413 let line_char_len = line_content.chars().count() + 1;
414 (line, char_content_end, line, line_char_len)
415}
416
417pub fn calculate_heading_range(line: usize, line_content: &str) -> (usize, usize, usize, usize) {
419 calculate_line_range(line, line_content)
420}
421
422pub fn calculate_emphasis_range(
428 line: usize,
429 line_content: &str,
430 start_pos: usize,
431 end_pos: usize,
432) -> (usize, usize, usize, usize) {
433 let safe_start_pos = find_char_boundary(line_content, start_pos);
435 let safe_end_pos = find_char_boundary(line_content, end_pos);
436 let char_start = byte_to_char_count(line_content, safe_start_pos);
437 let char_end = byte_to_char_count(line_content, safe_end_pos);
438 (line, char_start, line, char_end)
439}
440
441pub fn calculate_html_tag_range(
443 line: usize,
444 line_content: &str,
445 tag_start: usize,
446 tag_len: usize,
447) -> (usize, usize, usize, usize) {
448 calculate_match_range(line, line_content, tag_start, tag_len)
449}
450
451pub fn calculate_url_range(
453 line: usize,
454 line_content: &str,
455 url_start: usize,
456 url_len: usize,
457) -> (usize, usize, usize, usize) {
458 calculate_match_range(line, line_content, url_start, url_len)
459}
460
461pub fn calculate_list_marker_range(
463 line: usize,
464 line_content: &str,
465 marker_start: usize,
466 marker_len: usize,
467) -> (usize, usize, usize, usize) {
468 calculate_match_range(line, line_content, marker_start, marker_len)
469}
470
471pub fn calculate_excess_range(line: usize, line_content: &str, limit: usize) -> (usize, usize, usize, usize) {
473 let char_limit = std::cmp::min(limit, line_content.chars().count());
474 let line_char_len = line_content.chars().count() + 1;
475 (line, char_limit + 1, line, line_char_len)
476}
477
478#[cfg(test)]
479mod tests {
480 use super::*;
481
482 #[test]
483 fn test_single_line_range() {
484 let (start_line, start_col, end_line, end_col) = calculate_single_line_range(5, 10, 3);
485 assert_eq!(start_line, 5);
486 assert_eq!(start_col, 10);
487 assert_eq!(end_line, 5);
488 assert_eq!(end_col, 13);
489 }
490
491 #[test]
492 fn test_line_range() {
493 let content = "# This is a heading ";
494 let (start_line, start_col, end_line, end_col) = calculate_line_range(1, content);
495 assert_eq!(start_line, 1);
496 assert_eq!(start_col, 1);
497 assert_eq!(end_line, 1);
498 assert_eq!(end_col, 20); }
500
501 #[test]
502 fn test_match_range() {
503 let content = "Text <div>content</div> more";
504 let tag_start = 5; let tag_len = 5; let (start_line, start_col, end_line, end_col) = calculate_match_range(1, content, tag_start, tag_len);
507 assert_eq!(start_line, 1);
508 assert_eq!(start_col, 6); assert_eq!(end_line, 1);
510 assert_eq!(end_col, 11); }
512
513 #[test]
514 fn test_trailing_range() {
515 let content = "Text content "; let content_end = 12; let (start_line, start_col, end_line, end_col) = calculate_trailing_range(1, content, content_end);
518 assert_eq!(start_line, 1);
519 assert_eq!(start_col, 13); assert_eq!(end_line, 1);
521 assert_eq!(end_col, 16); }
523
524 #[test]
525 fn test_excess_range() {
526 let content = "This line is too long for the limit";
527 let limit = 20;
528 let (start_line, start_col, end_line, end_col) = calculate_excess_range(1, content, limit);
529 assert_eq!(start_line, 1);
530 assert_eq!(start_col, 21); assert_eq!(end_line, 1);
532 assert_eq!(end_col, 36); }
534
535 #[test]
536 fn test_whole_line_range() {
537 let content = "Line 1\nLine 2\nLine 3";
538 let line_index = LineIndex::new(content);
539
540 let range = line_index.whole_line_range(1);
542 assert_eq!(range, 0..7); let range = line_index.whole_line_range(2);
546 assert_eq!(range, 7..14); let range = line_index.whole_line_range(3);
550 assert_eq!(range, 14..20); }
552
553 #[test]
554 fn test_line_content_range() {
555 let content = "Line 1\nLine 2\nLine 3";
556 let line_index = LineIndex::new(content);
557
558 let range = line_index.line_content_range(1);
560 assert_eq!(range, 0..6); let range = line_index.line_content_range(2);
564 assert_eq!(range, 7..13); let range = line_index.line_content_range(3);
568 assert_eq!(range, 14..20); }
570
571 #[test]
572 fn test_line_text_range() {
573 let content = "Hello world\nAnother line";
574 let line_index = LineIndex::new(content);
575
576 let range = line_index.line_text_range(1, 1, 5); assert_eq!(range, 0..4);
579
580 let range = line_index.line_text_range(2, 1, 7); assert_eq!(range, 12..18);
583
584 let range = line_index.line_text_range(1, 1, 100); assert_eq!(range, 0..11); }
588
589 #[test]
590 fn test_calculate_match_range_bounds_checking() {
591 let line_content = "] not a link [";
593 let (line, start_col, end_line, end_col) = calculate_match_range(121, line_content, 57, 10);
594 assert_eq!(line, 121);
595 assert_eq!(start_col, 15); assert_eq!(end_line, 121);
597 assert_eq!(end_col, 15); let line_content = "short";
601 let (line, start_col, end_line, end_col) = calculate_match_range(1, line_content, 2, 10);
602 assert_eq!(line, 1);
603 assert_eq!(start_col, 3); assert_eq!(end_line, 1);
605 assert_eq!(end_col, 6); let line_content = "normal text here";
609 let (line, start_col, end_line, end_col) = calculate_match_range(5, line_content, 7, 4);
610 assert_eq!(line, 5);
611 assert_eq!(start_col, 8); assert_eq!(end_line, 5);
613 assert_eq!(end_col, 12); let line_content = "test line";
617 let (line, start_col, end_line, end_col) = calculate_match_range(10, line_content, 5, 0);
618 assert_eq!(line, 10);
619 assert_eq!(start_col, 6); assert_eq!(end_line, 10);
621 assert_eq!(end_col, 6); }
623
624 #[test]
629 fn test_issue_154_korean_character_boundary() {
630 let line_content = "- 2023 년 초 이후 주가 상승 +1,000% (10 배 상승) ";
633
634 let (line, start_col, end_line, end_col) = calculate_match_range(1, line_content, 19, 1);
637
638 assert!(start_col > 0);
640 assert_eq!(line, 1);
641 assert_eq!(end_line, 1);
642 assert!(end_col >= start_col);
643 }
644
645 #[test]
646 fn test_calculate_match_range_korean() {
647 let line_content = "안녕하세요";
650 let (line, start_col, end_line, end_col) = calculate_match_range(1, line_content, 3, 3);
652 assert_eq!(line, 1);
653 assert_eq!(start_col, 2); assert_eq!(end_line, 1);
655 assert_eq!(end_col, 3); let (line, start_col, end_line, _end_col) = calculate_match_range(1, line_content, 4, 3);
659 assert_eq!(line, 1);
660 assert_eq!(start_col, 2); assert_eq!(end_line, 1);
662 }
663
664 #[test]
665 fn test_calculate_match_range_chinese() {
666 let line_content = "你好世界";
669 let (line, start_col, end_line, end_col) = calculate_match_range(1, line_content, 6, 3);
671 assert_eq!(line, 1);
672 assert_eq!(start_col, 3); assert_eq!(end_line, 1);
674 assert_eq!(end_col, 4); }
676
677 #[test]
678 fn test_calculate_match_range_japanese() {
679 let line_content = "こんにちは";
682 let (line, start_col, end_line, end_col) = calculate_match_range(1, line_content, 9, 3);
684 assert_eq!(line, 1);
685 assert_eq!(start_col, 4); assert_eq!(end_line, 1);
687 assert_eq!(end_col, 5); }
689
690 #[test]
691 fn test_calculate_match_range_mixed_unicode() {
692 let line_content = "Hello 世界";
697
698 let (line, start_col, end_line, end_col) = calculate_match_range(1, line_content, 5, 1);
700 assert_eq!(line, 1);
701 assert_eq!(start_col, 6); assert_eq!(end_line, 1);
703 assert_eq!(end_col, 7); let (line, start_col, end_line, end_col) = calculate_match_range(1, line_content, 6, 3);
707 assert_eq!(line, 1);
708 assert_eq!(start_col, 7); assert_eq!(end_line, 1);
710 assert_eq!(end_col, 8); }
712
713 #[test]
714 fn test_calculate_trailing_range_korean() {
715 let line_content = "안녕하세요 ";
717 let (line, start_col, end_line, end_col) = calculate_trailing_range(1, line_content, 15);
719 assert_eq!(line, 1);
720 assert!(start_col > 0);
721 assert_eq!(end_line, 1);
722 assert!(end_col > start_col);
723 }
724
725 #[test]
726 fn test_calculate_emphasis_range_chinese() {
727 let line_content = "这是**重要**的";
729 let (line, start_col, end_line, end_col) = calculate_emphasis_range(1, line_content, 6, 12);
731 assert_eq!(line, 1);
732 assert!(start_col > 0);
733 assert_eq!(end_line, 1);
734 assert!(end_col > start_col);
735 }
736
737 #[test]
738 fn test_line_col_to_byte_range_korean() {
739 let content = "안녕하세요\nWorld";
741 let line_index = LineIndex::new(content);
742
743 let range = line_index.line_col_to_byte_range(1, 1);
745 assert_eq!(range, 0..0);
746
747 let range = line_index.line_col_to_byte_range(1, 2);
749 assert_eq!(range, 3..3); let range = line_index.line_col_to_byte_range(1, 3);
753 assert_eq!(range, 6..6); }
755
756 #[test]
757 fn test_line_col_to_byte_range_with_length_chinese() {
758 let content = "你好世界\nTest";
760 let line_index = LineIndex::new(content);
761
762 let range = line_index.line_col_to_byte_range_with_length(1, 1, 2);
764 assert_eq!(range, 0..6); let range = line_index.line_col_to_byte_range_with_length(1, 2, 1);
768 assert_eq!(range, 3..6); }
770
771 #[test]
772 fn test_line_text_range_japanese() {
773 let content = "こんにちは\nHello";
775 let line_index = LineIndex::new(content);
776
777 let range = line_index.line_text_range(1, 2, 4);
779 assert_eq!(range, 3..9); }
781
782 #[test]
783 fn test_find_char_boundary_edge_cases() {
784 let s = "안녕";
786
787 assert_eq!(find_char_boundary(s, 0), 0);
789
790 assert_eq!(find_char_boundary(s, 1), 0);
792
793 assert_eq!(find_char_boundary(s, 2), 0);
795
796 assert_eq!(find_char_boundary(s, 3), 3);
798
799 assert_eq!(find_char_boundary(s, 4), 3);
801
802 assert_eq!(find_char_boundary(s, 100), s.len());
804 }
805
806 #[test]
807 fn test_byte_to_char_count_unicode() {
808 let s = "안녕하세요";
810
811 assert_eq!(byte_to_char_count(s, 0), 1);
813
814 assert_eq!(byte_to_char_count(s, 3), 2);
816
817 assert_eq!(byte_to_char_count(s, 6), 3);
819
820 assert_eq!(byte_to_char_count(s, 9), 4);
822
823 assert_eq!(byte_to_char_count(s, 12), 5);
825
826 assert_eq!(byte_to_char_count(s, 15), 6);
828 }
829
830 #[test]
831 fn test_all_range_functions_with_emoji() {
832 let line_content = "Hello 🎉 World 🌍";
834
835 let (line, start_col, end_line, end_col) = calculate_match_range(1, line_content, 6, 4);
837 assert_eq!(line, 1);
838 assert!(start_col > 0);
839 assert_eq!(end_line, 1);
840 assert!(end_col > start_col);
841
842 let (line, start_col, end_line, end_col) = calculate_trailing_range(1, line_content, 12);
844 assert_eq!(line, 1);
845 assert!(start_col > 0);
846 assert_eq!(end_line, 1);
847 assert!(end_col > start_col);
848
849 let (line, start_col, end_line, end_col) = calculate_emphasis_range(1, line_content, 0, 5);
851 assert_eq!(line, 1);
852 assert_eq!(start_col, 1);
853 assert_eq!(end_line, 1);
854 assert!(end_col > start_col);
855 }
856}