1use crate::utils::element_cache::ElementCache;
4use std::collections::HashSet;
5use std::ops::Range;
6
7fn find_char_boundary(s: &str, byte_idx: usize) -> usize {
14 if byte_idx >= s.len() {
15 return s.len();
16 }
17
18 if s.is_char_boundary(byte_idx) {
20 return byte_idx;
21 }
22
23 let mut pos = byte_idx;
26 while pos > 0 && !s.is_char_boundary(pos) {
27 pos -= 1;
28 }
29 pos
30}
31
32fn byte_to_char_count(s: &str, byte_idx: usize) -> usize {
35 let safe_byte_idx = find_char_boundary(s, byte_idx);
36 s[..safe_byte_idx].chars().count() + 1 }
38
39#[derive(Debug)]
40pub struct LineIndex<'a> {
41 line_starts: Vec<usize>,
42 content: &'a str,
43 code_block_lines: Option<HashSet<usize>>,
44}
45
46impl<'a> LineIndex<'a> {
47 pub fn new(content: &'a str) -> Self {
48 let mut line_starts = vec![0];
49 let mut pos = 0;
50
51 for c in content.chars() {
52 pos += c.len_utf8();
53 if c == '\n' {
54 line_starts.push(pos);
55 }
56 }
57
58 let mut index = Self {
59 line_starts,
60 content,
61 code_block_lines: None,
62 };
63
64 index.compute_code_block_lines();
66
67 index
68 }
69
70 pub fn line_col_to_byte_range(&self, line: usize, column: usize) -> Range<usize> {
71 let line = line.saturating_sub(1);
72 let line_start = *self.line_starts.get(line).unwrap_or(&self.content.len());
73
74 let current_line = self.content.lines().nth(line).unwrap_or("");
75 let char_col = column.saturating_sub(1);
77 let char_count = current_line.chars().count();
78 let safe_char_col = char_col.min(char_count);
79
80 let byte_offset = current_line
82 .char_indices()
83 .nth(safe_char_col)
84 .map(|(idx, _)| idx)
85 .unwrap_or(current_line.len());
86
87 let start = line_start + byte_offset;
88 start..start
89 }
90
91 pub fn line_col_to_byte_range_with_length(&self, line: usize, column: usize, length: usize) -> Range<usize> {
98 let line = line.saturating_sub(1);
99 let line_start = *self.line_starts.get(line).unwrap_or(&self.content.len());
100
101 let current_line = self.content.lines().nth(line).unwrap_or("");
102 let char_col = column.saturating_sub(1);
104 let char_count = current_line.chars().count();
105 let safe_char_col = char_col.min(char_count);
106
107 let mut char_indices = current_line.char_indices();
109 let start_byte = char_indices
110 .nth(safe_char_col)
111 .map(|(idx, _)| idx)
112 .unwrap_or(current_line.len());
113
114 let end_char_col = (safe_char_col + length).min(char_count);
116 let end_byte = current_line
117 .char_indices()
118 .nth(end_char_col)
119 .map(|(idx, _)| idx)
120 .unwrap_or(current_line.len());
121
122 let start = line_start + start_byte;
123 let end = line_start + end_byte;
124 start..end
125 }
126
127 pub fn whole_line_range(&self, line: usize) -> Range<usize> {
130 let line_idx = line.saturating_sub(1);
131 let start = *self.line_starts.get(line_idx).unwrap_or(&self.content.len());
132 let end = self
133 .line_starts
134 .get(line_idx + 1)
135 .copied()
136 .unwrap_or(self.content.len());
137 start..end
138 }
139
140 pub fn multi_line_range(&self, start_line: usize, end_line: usize) -> Range<usize> {
143 let start_idx = start_line.saturating_sub(1);
144 let end_idx = end_line.saturating_sub(1);
145
146 let start = *self.line_starts.get(start_idx).unwrap_or(&self.content.len());
147 let end = self.line_starts.get(end_idx + 1).copied().unwrap_or(self.content.len());
148 start..end
149 }
150
151 pub fn line_text_range(&self, line: usize, start_col: usize, end_col: usize) -> Range<usize> {
158 let line_idx = line.saturating_sub(1);
159 let line_start = *self.line_starts.get(line_idx).unwrap_or(&self.content.len());
160
161 let current_line = self.content.lines().nth(line_idx).unwrap_or("");
163 let char_count = current_line.chars().count();
164
165 let start_char_col = start_col.saturating_sub(1).min(char_count);
167 let end_char_col = end_col.saturating_sub(1).min(char_count);
168
169 let mut char_indices = current_line.char_indices();
170 let start_byte = char_indices
171 .nth(start_char_col)
172 .map(|(idx, _)| idx)
173 .unwrap_or(current_line.len());
174
175 let end_byte = current_line
176 .char_indices()
177 .nth(end_char_col)
178 .map(|(idx, _)| idx)
179 .unwrap_or(current_line.len());
180
181 let start = line_start + start_byte;
182 let end = line_start + end_byte.max(start_byte);
183 start..end
184 }
185
186 pub fn line_content_range(&self, line: usize) -> Range<usize> {
189 let line_idx = line.saturating_sub(1);
190 let line_start = *self.line_starts.get(line_idx).unwrap_or(&self.content.len());
191
192 let current_line = self.content.lines().nth(line_idx).unwrap_or("");
193 let line_end = line_start + current_line.len();
194 line_start..line_end
195 }
196
197 pub fn get_line_start_byte(&self, line_num: usize) -> Option<usize> {
199 if line_num == 0 {
200 return None; }
202 self.line_starts.get(line_num - 1).cloned()
204 }
205
206 pub fn is_code_block(&self, line: usize) -> bool {
208 if let Some(ref code_block_lines) = self.code_block_lines {
209 code_block_lines.contains(&line)
210 } else {
211 self.is_code_fence(line)
213 }
214 }
215
216 pub fn is_code_fence(&self, line: usize) -> bool {
218 self.content.lines().nth(line).is_some_and(|l| {
219 let trimmed = l.trim();
220 trimmed.starts_with("```") || trimmed.starts_with("~~~")
221 })
222 }
223
224 pub fn is_tilde_code_block(&self, line: usize) -> bool {
226 self.content
227 .lines()
228 .nth(line)
229 .is_some_and(|l| l.trim().starts_with("~~~"))
230 }
231
232 pub fn get_content(&self) -> &str {
234 self.content
235 }
236
237 fn compute_code_block_lines(&mut self) {
239 let mut code_block_lines = HashSet::new();
240 let lines: Vec<&str> = self.content.lines().collect();
241
242 let mut in_block = false;
244 let mut active_fence_type = ' '; let mut block_indent = 0;
246 let mut block_fence_length = 0;
247 let mut in_markdown_block = false;
248 let mut nested_fence_start = None;
249 let mut nested_fence_end = None;
250
251 for (i, line) in lines.iter().enumerate() {
253 let trimmed = line.trim();
254 let indent = line.len() - trimmed.len();
255
256 if ElementCache::calculate_indentation_width_default(line) >= 4 {
258 code_block_lines.insert(i);
259 continue; }
261
262 if !in_block {
264 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
266 let char_type = if trimmed.starts_with("```") { '`' } else { '~' };
267 let count = trimmed.chars().take_while(|&c| c == char_type).count();
268 let info_string = if trimmed.len() > count {
269 trimmed[count..].trim()
270 } else {
271 ""
272 };
273
274 in_block = true;
276 active_fence_type = char_type;
277 block_indent = indent;
278 block_fence_length = count;
279 in_markdown_block = info_string == "markdown";
280 nested_fence_start = None;
281 nested_fence_end = None;
282
283 code_block_lines.insert(i);
284 }
285 } else {
286 code_block_lines.insert(i);
288
289 if in_markdown_block && nested_fence_start.is_none() && trimmed.starts_with("```") {
291 let count = trimmed.chars().take_while(|&c| c == '`').count();
293 let remaining = if trimmed.len() > count {
294 trimmed[count..].trim()
295 } else {
296 ""
297 };
298
299 if !remaining.is_empty() {
300 nested_fence_start = Some(i);
301 }
302 }
303
304 if in_markdown_block
306 && nested_fence_start.is_some()
307 && nested_fence_end.is_none()
308 && trimmed.starts_with("```")
309 && trimmed.trim_start_matches('`').trim().is_empty()
310 {
311 nested_fence_end = Some(i);
312 }
313
314 if trimmed.starts_with(&active_fence_type.to_string().repeat(3)) {
316 let count = trimmed.chars().take_while(|&c| c == active_fence_type).count();
317 let remaining = if trimmed.len() > count {
318 trimmed[count..].trim()
319 } else {
320 ""
321 };
322
323 let is_valid_closing_fence =
329 count >= block_fence_length && remaining.is_empty() && indent <= block_indent;
330
331 let is_nested_closing = nested_fence_end.is_some() && i == nested_fence_end.unwrap();
334
335 if is_valid_closing_fence && !is_nested_closing {
337 in_block = false;
338 in_markdown_block = false;
339 }
340 }
341 }
342 }
343
344 self.code_block_lines = Some(code_block_lines);
345 }
346}
347
348pub fn calculate_single_line_range(line: usize, start_col: usize, length: usize) -> (usize, usize, usize, usize) {
350 (line, start_col, line, start_col + length)
351}
352
353pub fn calculate_line_range(line: usize, line_content: &str) -> (usize, usize, usize, usize) {
355 let trimmed_len = line_content.trim_end().len();
356 (line, 1, line, trimmed_len + 1)
357}
358
359pub fn calculate_match_range(
365 line: usize,
366 line_content: &str,
367 match_start: usize,
368 match_len: usize,
369) -> (usize, usize, usize, usize) {
370 let line_len = line_content.len();
372 if match_start > line_len {
373 let char_count = line_content.chars().count();
375 return (line, char_count + 1, line, char_count + 1);
376 }
377
378 let safe_match_start = find_char_boundary(line_content, match_start);
380 let safe_match_end_byte = find_char_boundary(line_content, (match_start + match_len).min(line_len));
381
382 let char_start = byte_to_char_count(line_content, safe_match_start);
384 let char_len = if safe_match_end_byte > safe_match_start {
385 line_content[safe_match_start..safe_match_end_byte].chars().count()
387 } else {
388 0
389 };
390 (line, char_start, line, char_start + char_len)
391}
392
393pub fn calculate_trailing_range(line: usize, line_content: &str, content_end: usize) -> (usize, usize, usize, usize) {
399 let safe_content_end = find_char_boundary(line_content, content_end);
401 let char_content_end = byte_to_char_count(line_content, safe_content_end);
402 let line_char_len = line_content.chars().count() + 1;
403 (line, char_content_end, line, line_char_len)
404}
405
406pub fn calculate_heading_range(line: usize, line_content: &str) -> (usize, usize, usize, usize) {
408 calculate_line_range(line, line_content)
409}
410
411pub fn calculate_emphasis_range(
417 line: usize,
418 line_content: &str,
419 start_pos: usize,
420 end_pos: usize,
421) -> (usize, usize, usize, usize) {
422 let safe_start_pos = find_char_boundary(line_content, start_pos);
424 let safe_end_pos = find_char_boundary(line_content, end_pos);
425 let char_start = byte_to_char_count(line_content, safe_start_pos);
426 let char_end = byte_to_char_count(line_content, safe_end_pos);
427 (line, char_start, line, char_end)
428}
429
430pub fn calculate_html_tag_range(
432 line: usize,
433 line_content: &str,
434 tag_start: usize,
435 tag_len: usize,
436) -> (usize, usize, usize, usize) {
437 calculate_match_range(line, line_content, tag_start, tag_len)
438}
439
440pub fn calculate_url_range(
442 line: usize,
443 line_content: &str,
444 url_start: usize,
445 url_len: usize,
446) -> (usize, usize, usize, usize) {
447 calculate_match_range(line, line_content, url_start, url_len)
448}
449
450pub fn calculate_list_marker_range(
452 line: usize,
453 line_content: &str,
454 marker_start: usize,
455 marker_len: usize,
456) -> (usize, usize, usize, usize) {
457 calculate_match_range(line, line_content, marker_start, marker_len)
458}
459
460pub fn calculate_excess_range(line: usize, line_content: &str, limit: usize) -> (usize, usize, usize, usize) {
462 let char_limit = std::cmp::min(limit, line_content.chars().count());
463 let line_char_len = line_content.chars().count() + 1;
464 (line, char_limit + 1, line, line_char_len)
465}
466
467#[cfg(test)]
468mod tests {
469 use super::*;
470
471 #[test]
472 fn test_single_line_range() {
473 let (start_line, start_col, end_line, end_col) = calculate_single_line_range(5, 10, 3);
474 assert_eq!(start_line, 5);
475 assert_eq!(start_col, 10);
476 assert_eq!(end_line, 5);
477 assert_eq!(end_col, 13);
478 }
479
480 #[test]
481 fn test_line_range() {
482 let content = "# This is a heading ";
483 let (start_line, start_col, end_line, end_col) = calculate_line_range(1, content);
484 assert_eq!(start_line, 1);
485 assert_eq!(start_col, 1);
486 assert_eq!(end_line, 1);
487 assert_eq!(end_col, 20); }
489
490 #[test]
491 fn test_match_range() {
492 let content = "Text <div>content</div> more";
493 let tag_start = 5; let tag_len = 5; let (start_line, start_col, end_line, end_col) = calculate_match_range(1, content, tag_start, tag_len);
496 assert_eq!(start_line, 1);
497 assert_eq!(start_col, 6); assert_eq!(end_line, 1);
499 assert_eq!(end_col, 11); }
501
502 #[test]
503 fn test_trailing_range() {
504 let content = "Text content "; let content_end = 12; let (start_line, start_col, end_line, end_col) = calculate_trailing_range(1, content, content_end);
507 assert_eq!(start_line, 1);
508 assert_eq!(start_col, 13); assert_eq!(end_line, 1);
510 assert_eq!(end_col, 16); }
512
513 #[test]
514 fn test_excess_range() {
515 let content = "This line is too long for the limit";
516 let limit = 20;
517 let (start_line, start_col, end_line, end_col) = calculate_excess_range(1, content, limit);
518 assert_eq!(start_line, 1);
519 assert_eq!(start_col, 21); assert_eq!(end_line, 1);
521 assert_eq!(end_col, 36); }
523
524 #[test]
525 fn test_whole_line_range() {
526 let content = "Line 1\nLine 2\nLine 3";
527 let line_index = LineIndex::new(content);
528
529 let range = line_index.whole_line_range(1);
531 assert_eq!(range, 0..7); let range = line_index.whole_line_range(2);
535 assert_eq!(range, 7..14); let range = line_index.whole_line_range(3);
539 assert_eq!(range, 14..20); }
541
542 #[test]
543 fn test_line_content_range() {
544 let content = "Line 1\nLine 2\nLine 3";
545 let line_index = LineIndex::new(content);
546
547 let range = line_index.line_content_range(1);
549 assert_eq!(range, 0..6); let range = line_index.line_content_range(2);
553 assert_eq!(range, 7..13); let range = line_index.line_content_range(3);
557 assert_eq!(range, 14..20); }
559
560 #[test]
561 fn test_line_text_range() {
562 let content = "Hello world\nAnother line";
563 let line_index = LineIndex::new(content);
564
565 let range = line_index.line_text_range(1, 1, 5); assert_eq!(range, 0..4);
568
569 let range = line_index.line_text_range(2, 1, 7); assert_eq!(range, 12..18);
572
573 let range = line_index.line_text_range(1, 1, 100); assert_eq!(range, 0..11); }
577
578 #[test]
579 fn test_calculate_match_range_bounds_checking() {
580 let line_content = "] not a link [";
582 let (line, start_col, end_line, end_col) = calculate_match_range(121, line_content, 57, 10);
583 assert_eq!(line, 121);
584 assert_eq!(start_col, 15); assert_eq!(end_line, 121);
586 assert_eq!(end_col, 15); let line_content = "short";
590 let (line, start_col, end_line, end_col) = calculate_match_range(1, line_content, 2, 10);
591 assert_eq!(line, 1);
592 assert_eq!(start_col, 3); assert_eq!(end_line, 1);
594 assert_eq!(end_col, 6); let line_content = "normal text here";
598 let (line, start_col, end_line, end_col) = calculate_match_range(5, line_content, 7, 4);
599 assert_eq!(line, 5);
600 assert_eq!(start_col, 8); assert_eq!(end_line, 5);
602 assert_eq!(end_col, 12); let line_content = "test line";
606 let (line, start_col, end_line, end_col) = calculate_match_range(10, line_content, 5, 0);
607 assert_eq!(line, 10);
608 assert_eq!(start_col, 6); assert_eq!(end_line, 10);
610 assert_eq!(end_col, 6); }
612
613 #[test]
618 fn test_issue_154_korean_character_boundary() {
619 let line_content = "- 2023 년 초 이후 주가 상승 +1,000% (10 배 상승) ";
622
623 let (line, start_col, end_line, end_col) = calculate_match_range(1, line_content, 19, 1);
626
627 assert!(start_col > 0);
629 assert_eq!(line, 1);
630 assert_eq!(end_line, 1);
631 assert!(end_col >= start_col);
632 }
633
634 #[test]
635 fn test_calculate_match_range_korean() {
636 let line_content = "안녕하세요";
639 let (line, start_col, end_line, end_col) = calculate_match_range(1, line_content, 3, 3);
641 assert_eq!(line, 1);
642 assert_eq!(start_col, 2); assert_eq!(end_line, 1);
644 assert_eq!(end_col, 3); let (line, start_col, end_line, _end_col) = calculate_match_range(1, line_content, 4, 3);
648 assert_eq!(line, 1);
649 assert_eq!(start_col, 2); assert_eq!(end_line, 1);
651 }
652
653 #[test]
654 fn test_calculate_match_range_chinese() {
655 let line_content = "你好世界";
658 let (line, start_col, end_line, end_col) = calculate_match_range(1, line_content, 6, 3);
660 assert_eq!(line, 1);
661 assert_eq!(start_col, 3); assert_eq!(end_line, 1);
663 assert_eq!(end_col, 4); }
665
666 #[test]
667 fn test_calculate_match_range_japanese() {
668 let line_content = "こんにちは";
671 let (line, start_col, end_line, end_col) = calculate_match_range(1, line_content, 9, 3);
673 assert_eq!(line, 1);
674 assert_eq!(start_col, 4); assert_eq!(end_line, 1);
676 assert_eq!(end_col, 5); }
678
679 #[test]
680 fn test_calculate_match_range_mixed_unicode() {
681 let line_content = "Hello 世界";
686
687 let (line, start_col, end_line, end_col) = calculate_match_range(1, line_content, 5, 1);
689 assert_eq!(line, 1);
690 assert_eq!(start_col, 6); assert_eq!(end_line, 1);
692 assert_eq!(end_col, 7); let (line, start_col, end_line, end_col) = calculate_match_range(1, line_content, 6, 3);
696 assert_eq!(line, 1);
697 assert_eq!(start_col, 7); assert_eq!(end_line, 1);
699 assert_eq!(end_col, 8); }
701
702 #[test]
703 fn test_calculate_trailing_range_korean() {
704 let line_content = "안녕하세요 ";
706 let (line, start_col, end_line, end_col) = calculate_trailing_range(1, line_content, 15);
708 assert_eq!(line, 1);
709 assert!(start_col > 0);
710 assert_eq!(end_line, 1);
711 assert!(end_col > start_col);
712 }
713
714 #[test]
715 fn test_calculate_emphasis_range_chinese() {
716 let line_content = "这是**重要**的";
718 let (line, start_col, end_line, end_col) = calculate_emphasis_range(1, line_content, 6, 12);
720 assert_eq!(line, 1);
721 assert!(start_col > 0);
722 assert_eq!(end_line, 1);
723 assert!(end_col > start_col);
724 }
725
726 #[test]
727 fn test_line_col_to_byte_range_korean() {
728 let content = "안녕하세요\nWorld";
730 let line_index = LineIndex::new(content);
731
732 let range = line_index.line_col_to_byte_range(1, 1);
734 assert_eq!(range, 0..0);
735
736 let range = line_index.line_col_to_byte_range(1, 2);
738 assert_eq!(range, 3..3); let range = line_index.line_col_to_byte_range(1, 3);
742 assert_eq!(range, 6..6); }
744
745 #[test]
746 fn test_line_col_to_byte_range_with_length_chinese() {
747 let content = "你好世界\nTest";
749 let line_index = LineIndex::new(content);
750
751 let range = line_index.line_col_to_byte_range_with_length(1, 1, 2);
753 assert_eq!(range, 0..6); let range = line_index.line_col_to_byte_range_with_length(1, 2, 1);
757 assert_eq!(range, 3..6); }
759
760 #[test]
761 fn test_line_text_range_japanese() {
762 let content = "こんにちは\nHello";
764 let line_index = LineIndex::new(content);
765
766 let range = line_index.line_text_range(1, 2, 4);
768 assert_eq!(range, 3..9); }
770
771 #[test]
772 fn test_find_char_boundary_edge_cases() {
773 let s = "안녕";
775
776 assert_eq!(find_char_boundary(s, 0), 0);
778
779 assert_eq!(find_char_boundary(s, 1), 0);
781
782 assert_eq!(find_char_boundary(s, 2), 0);
784
785 assert_eq!(find_char_boundary(s, 3), 3);
787
788 assert_eq!(find_char_boundary(s, 4), 3);
790
791 assert_eq!(find_char_boundary(s, 100), s.len());
793 }
794
795 #[test]
796 fn test_byte_to_char_count_unicode() {
797 let s = "안녕하세요";
799
800 assert_eq!(byte_to_char_count(s, 0), 1);
802
803 assert_eq!(byte_to_char_count(s, 3), 2);
805
806 assert_eq!(byte_to_char_count(s, 6), 3);
808
809 assert_eq!(byte_to_char_count(s, 9), 4);
811
812 assert_eq!(byte_to_char_count(s, 12), 5);
814
815 assert_eq!(byte_to_char_count(s, 15), 6);
817 }
818
819 #[test]
820 fn test_all_range_functions_with_emoji() {
821 let line_content = "Hello 🎉 World 🌍";
823
824 let (line, start_col, end_line, end_col) = calculate_match_range(1, line_content, 6, 4);
826 assert_eq!(line, 1);
827 assert!(start_col > 0);
828 assert_eq!(end_line, 1);
829 assert!(end_col > start_col);
830
831 let (line, start_col, end_line, end_col) = calculate_trailing_range(1, line_content, 12);
833 assert_eq!(line, 1);
834 assert!(start_col > 0);
835 assert_eq!(end_line, 1);
836 assert!(end_col > start_col);
837
838 let (line, start_col, end_line, end_col) = calculate_emphasis_range(1, line_content, 0, 5);
840 assert_eq!(line, 1);
841 assert_eq!(start_col, 1);
842 assert_eq!(end_line, 1);
843 assert!(end_col > start_col);
844 }
845}