1use tower_lsp::lsp_types::Position;
2
3pub(crate) fn utf16_pos_to_byte(source: &str, position: Position) -> usize {
5 let mut byte_off = 0usize;
6 for (line_idx, line) in source.split('\n').enumerate() {
7 let line_content = line.strip_suffix('\r').unwrap_or(line);
10 if line_idx == position.line as usize {
11 let mut col_utf16 = 0u32;
12 for ch in line_content.chars() {
13 if col_utf16 >= position.character {
14 break;
15 }
16 col_utf16 += ch.len_utf16() as u32;
17 byte_off += ch.len_utf8();
18 }
19 return byte_off;
20 }
21 byte_off += line.len() + 1; }
23 byte_off
24}
25
26pub(crate) fn fuzzy_camel_match(query: &str, candidate: &str) -> bool {
40 if query.is_empty() {
41 return true;
42 }
43 let ql: String = query.to_lowercase();
44 let cl: String = candidate.to_lowercase();
45 if cl.starts_with(&ql) {
47 return true;
48 }
49 let qchars: Vec<char> = ql.chars().collect();
51 let cchars: Vec<char> = candidate.chars().collect();
52 let mut qi = 0usize;
53 let mut ci = 0usize;
54 while qi < qchars.len() && ci < cchars.len() {
55 let qc = qchars[qi];
56 let is_boundary = ci == 0
59 || cchars[ci - 1] == '_'
60 || (cchars[ci].is_uppercase() && ci > 0 && cchars[ci - 1].is_lowercase());
61 if is_boundary && cchars[ci].to_lowercase().next() == Some(qc) {
62 qi += 1;
63 }
64 ci += 1;
65 }
66 qi == qchars.len()
67}
68
69pub(crate) fn camel_sort_key(query: &str, label: &str) -> String {
73 let lq = query.to_lowercase();
74 let ll = label.to_lowercase();
75 if ll.starts_with(&lq) {
76 format!("0{}", ll)
77 } else {
78 format!("1{}", ll)
79 }
80}
81
82pub(crate) fn is_php_builtin(name: &str) -> bool {
85 const BUILTINS: &[&str] = &[
87 "abs",
88 "acos",
89 "addslashes",
90 "array_chunk",
91 "array_combine",
92 "array_diff",
93 "array_fill",
94 "array_fill_keys",
95 "array_filter",
96 "array_flip",
97 "array_intersect",
98 "array_key_exists",
99 "array_keys",
100 "array_map",
101 "array_merge",
102 "array_pad",
103 "array_pop",
104 "array_push",
105 "array_reduce",
106 "array_replace",
107 "array_reverse",
108 "array_search",
109 "array_shift",
110 "array_slice",
111 "array_splice",
112 "array_unique",
113 "array_unshift",
114 "array_values",
115 "array_walk",
116 "array_walk_recursive",
117 "arsort",
118 "asin",
119 "asort",
120 "atan",
121 "atan2",
122 "base64_decode",
123 "base64_encode",
124 "basename",
125 "boolval",
126 "call_user_func",
127 "call_user_func_array",
128 "ceil",
129 "checkdate",
130 "class_exists",
131 "closedir",
132 "compact",
133 "constant",
134 "copy",
135 "cos",
136 "date",
137 "date_add",
138 "date_create",
139 "date_diff",
140 "date_format",
141 "date_sub",
142 "define",
143 "defined",
144 "die",
145 "dirname",
146 "empty",
147 "exit",
148 "exp",
149 "explode",
150 "extract",
151 "fclose",
152 "feof",
153 "fgets",
154 "file_exists",
155 "file_get_contents",
156 "file_put_contents",
157 "floatval",
158 "floor",
159 "fmod",
160 "fopen",
161 "fputs",
162 "fread",
163 "fseek",
164 "ftell",
165 "function_exists",
166 "get_class",
167 "get_parent_class",
168 "gettype",
169 "glob",
170 "hash",
171 "header",
172 "headers_sent",
173 "htmlentities",
174 "htmlspecialchars",
175 "http_build_query",
176 "implode",
177 "in_array",
178 "intdiv",
179 "interface_exists",
180 "intval",
181 "is_a",
182 "is_array",
183 "is_bool",
184 "is_callable",
185 "is_dir",
186 "is_double",
187 "is_file",
188 "is_finite",
189 "is_float",
190 "is_infinite",
191 "is_int",
192 "is_integer",
193 "is_long",
194 "is_nan",
195 "is_null",
196 "is_numeric",
197 "is_object",
198 "is_readable",
199 "is_string",
200 "is_subclass_of",
201 "is_writable",
202 "isset",
203 "join",
204 "json_decode",
205 "json_encode",
206 "krsort",
207 "ksort",
208 "lcfirst",
209 "list",
210 "log",
211 "ltrim",
212 "max",
213 "md5",
214 "method_exists",
215 "microtime",
216 "min",
217 "mkdir",
218 "mktime",
219 "mt_rand",
220 "nl2br",
221 "number_format",
222 "ob_end_clean",
223 "ob_get_clean",
224 "ob_start",
225 "opendir",
226 "parse_str",
227 "parse_url",
228 "pathinfo",
229 "pi",
230 "pow",
231 "preg_match",
232 "preg_match_all",
233 "preg_quote",
234 "preg_replace",
235 "preg_split",
236 "print_r",
237 "printf",
238 "property_exists",
239 "rand",
240 "random_int",
241 "rawurldecode",
242 "rawurlencode",
243 "readdir",
244 "realpath",
245 "rename",
246 "rewind",
247 "rmdir",
248 "round",
249 "rsort",
250 "rtrim",
251 "scandir",
252 "serialize",
253 "session_destroy",
254 "session_start",
255 "setcookie",
256 "settype",
257 "sha1",
258 "sin",
259 "sleep",
260 "sort",
261 "sprintf",
262 "sqrt",
263 "str_contains",
264 "str_ends_with",
265 "str_pad",
266 "str_repeat",
267 "str_replace",
268 "str_split",
269 "str_starts_with",
270 "str_word_count",
271 "strcasecmp",
272 "strcmp",
273 "strip_tags",
274 "stripslashes",
275 "stristr",
276 "strlen",
277 "strncasecmp",
278 "strncmp",
279 "strpos",
280 "strrpos",
281 "strstr",
282 "strtolower",
283 "strtotime",
284 "strtoupper",
285 "strval",
286 "substr",
287 "substr_count",
288 "substr_replace",
289 "tan",
290 "time",
291 "trim",
292 "uasort",
293 "ucfirst",
294 "ucwords",
295 "uksort",
296 "unlink",
297 "unserialize",
298 "unset",
299 "urldecode",
300 "urlencode",
301 "usleep",
302 "usort",
303 "var_dump",
304 "var_export",
305 "vsprintf",
306 ];
307 debug_assert!(
308 BUILTINS.windows(2).all(|w| w[0] <= w[1]),
309 "BUILTINS must be sorted for binary_search"
310 );
311 BUILTINS.binary_search(&name).is_ok()
312}
313
314pub(crate) fn php_doc_url(name: &str) -> String {
316 let slug = name.replace('_', "-");
318 format!("https://www.php.net/function.{}", slug)
319}
320
321pub(crate) fn utf16_offset_to_byte(s: &str, utf16_offset: usize) -> usize {
328 let mut utf16_count = 0usize;
329 for (byte_idx, ch) in s.char_indices() {
330 if utf16_count >= utf16_offset {
331 return byte_idx;
332 }
333 utf16_count += ch.len_utf16();
334 }
335 s.len()
336}
337
338pub(crate) fn byte_to_utf16(s: &str, byte_offset: usize) -> u32 {
344 s[..byte_offset.min(s.len())]
345 .chars()
346 .map(|c| c.len_utf16() as u32)
347 .sum()
348}
349
350pub(crate) fn split_params(s: &str) -> Vec<&str> {
355 let mut parts = Vec::new();
356 let mut depth = 0i32;
357 let mut start = 0;
358 for (i, ch) in s.char_indices() {
359 match ch {
360 '(' | '[' | '{' => depth += 1,
361 ')' | ']' | '}' => depth -= 1,
362 ',' if depth == 0 => {
363 parts.push(s[start..i].trim());
364 start = i + 1;
365 }
366 _ => {}
367 }
368 }
369 let last = s[start..].trim();
370 if !last.is_empty() {
371 parts.push(last);
372 }
373 parts
374}
375
376pub(crate) fn word_at(source: &str, position: Position) -> Option<String> {
378 let raw = source.split('\n').nth(position.line as usize)?;
382 let line = raw.strip_suffix('\r').unwrap_or(raw);
383 let char_offset = position.character as usize;
384
385 let chars: Vec<char> = line.chars().collect();
386
387 let mut utf16_len = 0usize;
388 let mut char_pos = 0usize;
389 for ch in &chars {
390 if utf16_len >= char_offset {
391 break;
392 }
393 utf16_len += ch.len_utf16();
394 char_pos += 1;
395 }
396
397 let total_utf16: usize = chars.iter().map(|c| c.len_utf16()).sum();
398 if char_offset > total_utf16 {
399 return None;
400 }
401
402 let is_word = |c: char| c.is_alphanumeric() || c == '_' || c == '$' || c == '\\';
403
404 let mut left = char_pos;
405 while left > 0 && is_word(chars[left - 1]) {
406 left -= 1;
407 }
408
409 let mut right = char_pos;
410 while right < chars.len() && is_word(chars[right]) {
411 right += 1;
412 }
413
414 if left == right {
415 return None;
416 }
417
418 let word: String = chars[left..right].iter().collect();
419 if word.is_empty() { None } else { Some(word) }
420}
421
422pub(crate) fn selected_text_range(source: &str, range: tower_lsp::lsp_types::Range) -> String {
427 let lines: Vec<&str> = source.lines().collect();
428 if range.start.line == range.end.line {
429 let line = match lines.get(range.start.line as usize) {
430 Some(l) => l,
431 None => return String::new(),
432 };
433 let start = utf16_offset_to_byte(line, range.start.character as usize);
434 let end = utf16_offset_to_byte(line, range.end.character as usize);
435 line[start..end].to_string()
436 } else {
437 let mut result = String::new();
438 for i in range.start.line..=range.end.line {
439 let line = match lines.get(i as usize) {
440 Some(l) => *l,
441 None => break,
442 };
443 if i == range.start.line {
444 let start = utf16_offset_to_byte(line, range.start.character as usize);
445 result.push_str(&line[start..]);
446 } else if i == range.end.line {
447 let end = utf16_offset_to_byte(line, range.end.character as usize);
448 result.push_str(&line[..end]);
449 } else {
450 result.push_str(line);
451 }
452 if i < range.end.line {
453 result.push('\n');
454 }
455 }
456 result
457 }
458}
459
460#[cfg(test)]
461mod tests {
462 use super::*;
463
464 #[test]
465 fn byte_to_utf16_ascii() {
466 assert_eq!(byte_to_utf16("hello", 3), 3);
467 }
468
469 #[test]
470 fn byte_to_utf16_multibyte_bmp() {
471 let s = "café";
473 assert_eq!(byte_to_utf16(s, 0), 0);
474 assert_eq!(byte_to_utf16(s, 3), 3); assert_eq!(byte_to_utf16(s, 5), 4); }
477
478 #[test]
479 fn byte_to_utf16_surrogate_pair() {
480 let s = "a😀b";
482 assert_eq!(byte_to_utf16(s, 1), 1); assert_eq!(byte_to_utf16(s, 5), 3); assert_eq!(byte_to_utf16(s, 6), 4); }
486
487 #[test]
488 fn byte_to_utf16_past_end_clamps() {
489 assert_eq!(byte_to_utf16("hi", 100), 2);
490 }
491
492 #[test]
493 fn utf16_offset_to_byte_ascii() {
494 assert_eq!(utf16_offset_to_byte("hello", 3), 3);
495 }
496
497 #[test]
498 fn utf16_offset_to_byte_surrogate_pair() {
499 let s = "a😀b";
501 assert_eq!(utf16_offset_to_byte(s, 1), 1);
502 assert_eq!(utf16_offset_to_byte(s, 3), 5);
503 }
504
505 #[test]
506 fn byte_to_utf16_and_back_roundtrip() {
507 let s = "café 😀 world";
508 for (byte_idx, _) in s.char_indices() {
509 let utf16 = byte_to_utf16(s, byte_idx) as usize;
510 assert_eq!(utf16_offset_to_byte(s, utf16), byte_idx);
511 }
512 }
513
514 #[test]
515 fn word_at_last_line_with_trailing_newline() {
516 let src = "<?php\necho strlen($x);\n";
519 let pos = Position {
520 line: 1,
521 character: 6,
522 }; let w = word_at(src, pos);
524 assert_eq!(
525 w.as_deref(),
526 Some("strlen"),
527 "word_at must work on lines before the trailing newline"
528 );
529 let last_line = Position {
531 line: 2,
532 character: 0,
533 };
534 let _ = word_at(src, last_line);
536 }
537
538 #[test]
539 fn word_at_crlf_line_endings() {
540 let src = "<?php\r\nfunction foo() {}\r\n";
541 let pos = Position {
542 line: 1,
543 character: 9,
544 }; let w = word_at(src, pos);
546 assert_eq!(
547 w.as_deref(),
548 Some("foo"),
549 "word_at must handle CRLF line endings"
550 );
551 }
552
553 #[test]
554 fn is_php_builtin_asin_recognized() {
555 assert!(
557 is_php_builtin("asin"),
558 "asin must be recognised as a PHP builtin"
559 );
560 assert!(
561 is_php_builtin("atan"),
562 "atan must be recognised as a PHP builtin"
563 );
564 assert!(
565 is_php_builtin("krsort"),
566 "krsort must be recognised as a PHP builtin"
567 );
568 assert!(
569 is_php_builtin("strcasecmp"),
570 "strcasecmp must be recognised as a PHP builtin"
571 );
572 assert!(
573 is_php_builtin("strncasecmp"),
574 "strncasecmp must be recognised as a PHP builtin"
575 );
576 assert!(
577 is_php_builtin("strip_tags"),
578 "strip_tags must be recognised as a PHP builtin"
579 );
580 }
581}