perl_position_tracking/
convert.rs1pub fn offset_to_utf16_line_col(text: &str, offset: usize) -> (u32, u32) {
12 if offset > text.len() {
13 let lines: Vec<&str> = text.lines().collect();
14 let last_line = lines.len().saturating_sub(1) as u32;
15 let last_col = lines.last().map(|l| l.encode_utf16().count()).unwrap_or(0) as u32;
16 return (last_line, last_col);
17 }
18 if offset == text.len() && (text.ends_with('\n') || text.ends_with("\r\n")) {
19 return (text.split_inclusive('\n').count() as u32, 0);
20 }
21 let mut acc = 0usize;
22 for (line_idx, line) in text.split_inclusive('\n').enumerate() {
23 let next = acc + line.len();
24 if offset < next {
25 let rel = offset - acc;
26 if rel == 0 {
27 return (line_idx as u32, 0);
28 }
29 if rel >= line.len() {
30 return (line_idx as u32, line.encode_utf16().count() as u32);
31 }
32 if line.is_char_boundary(rel) {
33 return (line_idx as u32, line[..rel].encode_utf16().count() as u32);
34 }
35 let mut cs = rel;
36 while cs > 0 && !line.is_char_boundary(cs) {
37 cs -= 1;
38 }
39 return (line_idx as u32, line[..cs].encode_utf16().count() as u32);
42 }
43 acc = next;
44 }
45 let last_line = text.lines().count().saturating_sub(1) as u32;
46 (last_line, text.lines().last().map(|l| l.encode_utf16().count()).unwrap_or(0) as u32)
47}
48
49pub fn utf16_line_col_to_offset(text: &str, line: u32, col: u32) -> usize {
54 let mut offset = 0;
55 for (curr, lt) in text.split_inclusive('\n').enumerate() {
56 if curr as u32 == line {
57 if col == 0 {
58 return offset;
59 }
60 let mut up = 0u32;
61 for (bi, ch) in lt.char_indices() {
62 if up == col {
63 return offset + bi;
64 }
65 if up < col && col < up + ch.len_utf16() as u32 {
66 return offset + bi;
67 }
68 up += ch.len_utf16() as u32;
69 if up > col {
70 return offset + bi;
71 }
72 }
73 let lcl = if lt.ends_with('\n') { lt.len() - 1 } else { lt.len() };
74 return offset + lcl.min(text.len() - offset);
75 }
76 offset += lt.len();
77 }
78 text.len()
79}
80
81#[cfg(test)]
82mod tests {
83 use super::{offset_to_utf16_line_col, utf16_line_col_to_offset};
84
85 #[test]
86 fn offset_to_utf16_clamps_mid_codepoint_offsets_to_previous_boundary() {
87 let text = "💖z";
88
89 assert_eq!(offset_to_utf16_line_col(text, 1), (0, 0));
92 assert_eq!(offset_to_utf16_line_col(text, 2), (0, 0));
93 assert_eq!(offset_to_utf16_line_col(text, 3), (0, 0));
94 }
95
96 #[test]
97 fn offset_to_utf16_handles_multibyte_and_surrogate_pairs() {
98 let text = "aé💖z";
99
100 assert_eq!(offset_to_utf16_line_col(text, 0), (0, 0));
101 assert_eq!(offset_to_utf16_line_col(text, 1), (0, 1));
102 assert_eq!(offset_to_utf16_line_col(text, 3), (0, 2));
103 assert_eq!(offset_to_utf16_line_col(text, 7), (0, 4));
104 assert_eq!(offset_to_utf16_line_col(text, text.len()), (0, 5));
105 }
106
107 #[test]
108 fn offset_to_utf16_clamps_out_of_bounds_to_last_position() {
109 let text = "alpha\nbeta";
110 assert_eq!(offset_to_utf16_line_col(text, text.len() + 25), (1, 4));
111 }
112
113 #[test]
114 fn offset_to_utf16_reports_new_empty_line_for_terminal_newline() {
115 let text = "one\ntwo\n";
116 assert_eq!(offset_to_utf16_line_col(text, text.len()), (2, 0));
117 }
118
119 #[test]
120 fn utf16_line_col_to_offset_handles_split_surrogate_column() {
121 let text = "x💖y";
122 assert_eq!(utf16_line_col_to_offset(text, 0, 0), 0);
123 assert_eq!(utf16_line_col_to_offset(text, 0, 1), 1);
124 assert_eq!(utf16_line_col_to_offset(text, 0, 2), 1);
125 assert_eq!(utf16_line_col_to_offset(text, 0, 3), 5);
126 assert_eq!(utf16_line_col_to_offset(text, 0, 4), 6);
127 }
128
129 #[test]
130 fn utf16_line_col_to_offset_clamps_when_column_or_line_is_too_large() {
131 let text = "abc\nw💡";
132 assert_eq!(utf16_line_col_to_offset(text, 1, 99), text.len());
133 assert_eq!(utf16_line_col_to_offset(text, 99, 0), text.len());
134 }
135
136 #[test]
137 fn utf16_helpers_handle_crlf_and_surrogates_together() {
138 let text = "a\r\nb💖c\r\n";
139
140 assert_eq!(offset_to_utf16_line_col(text, 3), (1, 0));
141 assert_eq!(offset_to_utf16_line_col(text, 8), (1, 3));
142 assert_eq!(utf16_line_col_to_offset(text, 1, 2), 4);
143 assert_eq!(utf16_line_col_to_offset(text, 1, 3), 8);
144 }
145}