perl_symbol_cursor/
lib.rs1#[derive(Debug, Clone, Copy, PartialEq, Eq)]
8pub enum CursorSymbolKind {
9 Scalar,
10 Array,
11 Hash,
12 Subroutine,
13}
14
15pub fn extract_symbol_from_source(
17 position: usize,
18 source: &str,
19) -> Option<(String, CursorSymbolKind)> {
20 let chars: Vec<char> = source.chars().collect();
21 if position >= chars.len() {
22 return None;
23 }
24
25 let (sigil, name_start) = if position > 0 {
26 match chars.get(position - 1) {
27 Some('$') => (Some(CursorSymbolKind::Scalar), position),
28 Some('@') => (Some(CursorSymbolKind::Array), position),
29 Some('%') => (Some(CursorSymbolKind::Hash), position),
30 Some('&') => (Some(CursorSymbolKind::Subroutine), position),
31 _ => (None, position),
32 }
33 } else {
34 (None, position)
35 };
36
37 let (sigil, name_start) = if sigil.is_none() && position < chars.len() {
38 match chars[position] {
39 '$' => (Some(CursorSymbolKind::Scalar), position + 1),
40 '@' => (Some(CursorSymbolKind::Array), position + 1),
41 '%' => (Some(CursorSymbolKind::Hash), position + 1),
42 '&' => (Some(CursorSymbolKind::Subroutine), position + 1),
43 _ => (sigil, name_start),
44 }
45 } else {
46 (sigil, name_start)
47 };
48
49 let mut end = name_start;
50 while end < chars.len() && (chars[end].is_alphanumeric() || chars[end] == '_') {
51 end += 1;
52 }
53
54 if end > name_start {
55 let name: String = chars[name_start..end].iter().collect();
56 let kind = sigil.unwrap_or(CursorSymbolKind::Subroutine);
57 Some((name, kind))
58 } else {
59 None
60 }
61}
62
63pub fn get_symbol_range_at_position(position: usize, source: &str) -> Option<(usize, usize)> {
65 let chars: Vec<char> = source.chars().collect();
66 if position >= chars.len() {
67 return None;
68 }
69
70 let mut start = position;
71 if start > 0 && matches!(chars[start - 1], '$' | '@' | '%' | '&') {
72 start -= 1;
73 }
74
75 let mut end = position;
76 while end < chars.len() && (chars[end].is_alphanumeric() || chars[end] == '_') {
77 end += 1;
78 }
79
80 while start < position
81 && start < chars.len()
82 && (chars[start].is_alphanumeric() || chars[start] == '_')
83 {
84 start -= 1;
85 }
86
87 Some((start, end))
88}
89
90#[inline]
92pub fn is_modchar(byte: u8) -> bool {
93 byte.is_ascii_alphanumeric() || byte == b':' || byte == b'_'
94}
95
96#[inline]
98pub fn byte_offset_utf16(line_text: &str, col_utf16: usize) -> usize {
99 let mut units = 0;
100 for (i, ch) in line_text.char_indices() {
101 if units == col_utf16 {
102 return i;
103 }
104 units += if ch as u32 >= 0x10000 { 2 } else { 1 };
105 }
106 line_text.len()
107}
108
109pub fn token_under_cursor(text: &str, line: usize, col_utf16: usize) -> Option<String> {
111 let line_text = text.lines().nth(line)?;
112 let byte_pos = byte_offset_utf16(line_text, col_utf16);
113 let bytes = line_text.as_bytes();
114
115 if byte_pos >= bytes.len() {
116 return None;
117 }
118
119 let mut start = byte_pos;
120 let mut end = byte_pos;
121
122 while start > 0 && is_modchar(bytes[start - 1]) {
123 start -= 1;
124 }
125 if start > 0 && matches!(bytes[start - 1], b'$' | b'@' | b'%' | b'&' | b'*') {
126 start -= 1;
127 }
128
129 while end < bytes.len() && is_modchar(bytes[end]) {
130 end += 1;
131 }
132
133 Some(line_text[start..end].to_string())
134}
135
136pub fn is_word_boundary(text: &[u8], pos: usize, word_len: usize) -> bool {
138 if pos > 0 && is_modchar(text[pos - 1]) {
139 return false;
140 }
141
142 let end_pos = pos + word_len;
143 if end_pos < text.len() && is_modchar(text[end_pos]) {
144 return false;
145 }
146
147 true
148}
149
150#[cfg(test)]
151mod tests {
152 use super::{byte_offset_utf16, is_word_boundary, token_under_cursor};
153
154 #[test]
155 fn token_under_cursor_extracts_perl_module_token() {
156 let text = "use Demo::Worker;\n";
157 assert_eq!(token_under_cursor(text, 0, 8), Some("Demo::Worker".to_string()));
158 }
159
160 #[test]
161 fn token_under_cursor_supports_sigils() {
162 let text = "my $value = 1;\n";
163 assert_eq!(token_under_cursor(text, 0, 5), Some("$value".to_string()));
164 }
165
166 #[test]
167 fn utf16_col_to_byte_offset_handles_surrogate_pairs() {
168 let line = "A😀B";
169 assert_eq!(byte_offset_utf16(line, 0), 0);
170 assert_eq!(byte_offset_utf16(line, 1), 1);
171 assert_eq!(byte_offset_utf16(line, 3), 5);
172 assert_eq!(byte_offset_utf16(line, 4), 6);
173 }
174
175 #[test]
176 fn word_boundary_detects_embedded_word() {
177 let text = b"fooDemo::Workerbar";
178 assert!(!is_word_boundary(text, 3, "Demo::Worker".len()));
179 assert!(is_word_boundary(b" Demo::Worker ", 1, "Demo::Worker".len()));
180 }
181}