Skip to main content

php_lsp/
util.rs

1use tower_lsp::lsp_types::{Location, Position, Range, Url};
2
3/// A fuzzy-match query with its lowercase form computed once.
4///
5/// Matching loops over thousands of candidates (workspace symbols, completion
6/// filtering) previously lowercased the query — and the candidate, into fresh
7/// `String`s — per candidate. Build a `FuzzyQuery` once per request instead;
8/// the per-candidate checks below allocate nothing (char-iterator comparisons,
9/// Unicode-lowercase-correct like the old code).
10pub(crate) struct FuzzyQuery {
11    lower: String,
12}
13
14impl FuzzyQuery {
15    pub(crate) fn new(query: &str) -> Self {
16        FuzzyQuery {
17            lower: query.to_lowercase(),
18        }
19    }
20
21    /// Returns `true` if the query matches `candidate` using
22    /// camelCase/underscore abbreviation rules (no substring fallback).
23    ///
24    /// Rules (applied in order, first match wins):
25    /// 1. `candidate` starts with the query (case-insensitive prefix match).
26    /// 2. Every character of the query matches a camelCase word boundary or
27    ///    character after `_` / `$` in the candidate.
28    ///
29    /// Examples:
30    /// - `"GRF"` matches `"getRecentFiles"`
31    /// - `"str_r"` matches `"str_replace"` (boundary after `_`)
32    ///
33    /// See [`Self::symbol_match`] for the variant that also accepts
34    /// substrings, which is appropriate for workspace symbol search but not
35    /// for completions.
36    pub(crate) fn camel_match(&self, candidate: &str) -> bool {
37        if self.lower.is_empty() {
38            return true;
39        }
40        // Rule 1: plain prefix
41        if starts_with_at(candidate, &self.lower, 0) {
42            return true;
43        }
44        // Rule 2: camel / underscore abbreviation
45        self.camel_abbrev(candidate)
46    }
47
48    /// Like [`Self::camel_match`] but also accepts contiguous substrings as a
49    /// fallback.  Use for workspace symbol search (where "Controller" should
50    /// match "BlogController") but NOT for completions (substring produces too
51    /// many hits).
52    pub(crate) fn symbol_match(&self, candidate: &str) -> bool {
53        if self.camel_match(candidate) {
54            return true;
55        }
56        // Substring fallback: "Controller" matches "BlogController".
57        // char_indices keeps the scan allocation-free; candidates are short
58        // identifiers, so the O(n·m) window walk beats two String allocations.
59        candidate
60            .char_indices()
61            .any(|(i, _)| starts_with_at(candidate, &self.lower, i))
62    }
63
64    /// Core camel/underscore abbreviation check against the lowercased query.
65    fn camel_abbrev(&self, candidate: &str) -> bool {
66        let mut query = self.lower.chars().peekable();
67        let mut prev: Option<char> = None;
68        for cc in candidate.chars() {
69            let Some(&qc) = query.peek() else {
70                return true;
71            };
72            // A "word boundary" in the candidate is: position 0, after '_' or
73            // '$', or an uppercase letter after a lowercase letter (camelCase
74            // transition).
75            let is_boundary = match prev {
76                None => true,
77                Some('_') | Some('$') => true,
78                Some(p) => cc.is_uppercase() && p.is_lowercase(),
79            };
80            if is_boundary && cc.to_lowercase().next() == Some(qc) {
81                query.next();
82            }
83            prev = Some(cc);
84        }
85        query.peek().is_none()
86    }
87}
88
89/// Case-insensitive (Unicode-lowercase) test that `candidate[at..]` starts
90/// with the already-lowercased `query_lower`, without allocating.
91fn starts_with_at(candidate: &str, query_lower: &str, at: usize) -> bool {
92    let mut c = candidate[at..].chars().flat_map(char::to_lowercase);
93    let mut q = query_lower.chars();
94    loop {
95        match (q.next(), c.next()) {
96            (None, _) => return true,
97            (Some(qc), Some(cc)) if qc == cc => continue,
98            _ => return false,
99        }
100    }
101}
102
103/// One-shot wrapper around [`FuzzyQuery::camel_match`]. Prefer building a
104/// [`FuzzyQuery`] outside the loop when matching many candidates.
105pub(crate) fn fuzzy_camel_match(query: &str, candidate: &str) -> bool {
106    FuzzyQuery::new(query).camel_match(candidate)
107}
108
109/// One-shot wrapper around [`FuzzyQuery::symbol_match`]. Prefer building a
110/// [`FuzzyQuery`] outside the loop when matching many candidates.
111pub(crate) fn fuzzy_symbol_match(query: &str, candidate: &str) -> bool {
112    FuzzyQuery::new(query).symbol_match(candidate)
113}
114
115/// Compute a sort key so prefix matches sort before camel-abbreviation matches.
116/// Lower string = higher priority.  Only called on items that passed
117/// [`fuzzy_camel_match`], so the `else` branch (substring) is unreachable here.
118pub(crate) fn camel_sort_key(query: &str, label: &str) -> String {
119    let lq = query.to_lowercase();
120    let ll = label.to_lowercase();
121    if ll.starts_with(&lq) {
122        format!("0{}", ll)
123    } else {
124        format!("1{}", ll)
125    }
126}
127
128/// Return `true` if `name` is a known PHP built-in function.
129/// Used by hover to generate php.net links.
130pub(crate) fn is_php_builtin(name: &str) -> bool {
131    // Sorted for binary search.
132    const BUILTINS: &[&str] = &[
133        "abs",
134        "acos",
135        "addslashes",
136        "array_chunk",
137        "array_combine",
138        "array_diff",
139        "array_fill",
140        "array_fill_keys",
141        "array_filter",
142        "array_flip",
143        "array_intersect",
144        "array_key_exists",
145        "array_keys",
146        "array_map",
147        "array_merge",
148        "array_pad",
149        "array_pop",
150        "array_push",
151        "array_reduce",
152        "array_replace",
153        "array_reverse",
154        "array_search",
155        "array_shift",
156        "array_slice",
157        "array_splice",
158        "array_unique",
159        "array_unshift",
160        "array_values",
161        "array_walk",
162        "array_walk_recursive",
163        "arsort",
164        "asin",
165        "asort",
166        "atan",
167        "atan2",
168        "base64_decode",
169        "base64_encode",
170        "basename",
171        "boolval",
172        "call_user_func",
173        "call_user_func_array",
174        "ceil",
175        "checkdate",
176        "class_exists",
177        "closedir",
178        "compact",
179        "constant",
180        "copy",
181        "cos",
182        "date",
183        "date_add",
184        "date_create",
185        "date_diff",
186        "date_format",
187        "date_sub",
188        "define",
189        "defined",
190        "die",
191        "dirname",
192        "empty",
193        "exit",
194        "exp",
195        "explode",
196        "extract",
197        "fclose",
198        "feof",
199        "fgets",
200        "file_exists",
201        "file_get_contents",
202        "file_put_contents",
203        "floatval",
204        "floor",
205        "fmod",
206        "fopen",
207        "fputs",
208        "fread",
209        "fseek",
210        "ftell",
211        "function_exists",
212        "get_class",
213        "get_parent_class",
214        "gettype",
215        "glob",
216        "hash",
217        "header",
218        "headers_sent",
219        "htmlentities",
220        "htmlspecialchars",
221        "http_build_query",
222        "implode",
223        "in_array",
224        "intdiv",
225        "interface_exists",
226        "intval",
227        "is_a",
228        "is_array",
229        "is_bool",
230        "is_callable",
231        "is_dir",
232        "is_double",
233        "is_file",
234        "is_finite",
235        "is_float",
236        "is_infinite",
237        "is_int",
238        "is_integer",
239        "is_long",
240        "is_nan",
241        "is_null",
242        "is_numeric",
243        "is_object",
244        "is_readable",
245        "is_string",
246        "is_subclass_of",
247        "is_writable",
248        "isset",
249        "join",
250        "json_decode",
251        "json_encode",
252        "krsort",
253        "ksort",
254        "lcfirst",
255        "list",
256        "log",
257        "ltrim",
258        "max",
259        "md5",
260        "method_exists",
261        "microtime",
262        "min",
263        "mkdir",
264        "mktime",
265        "mt_rand",
266        "nl2br",
267        "number_format",
268        "ob_end_clean",
269        "ob_get_clean",
270        "ob_start",
271        "opendir",
272        "parse_str",
273        "parse_url",
274        "pathinfo",
275        "pi",
276        "pow",
277        "preg_match",
278        "preg_match_all",
279        "preg_quote",
280        "preg_replace",
281        "preg_split",
282        "print_r",
283        "printf",
284        "property_exists",
285        "rand",
286        "random_int",
287        "rawurldecode",
288        "rawurlencode",
289        "readdir",
290        "realpath",
291        "rename",
292        "rewind",
293        "rmdir",
294        "round",
295        "rsort",
296        "rtrim",
297        "scandir",
298        "serialize",
299        "session_destroy",
300        "session_start",
301        "setcookie",
302        "settype",
303        "sha1",
304        "sin",
305        "sleep",
306        "sort",
307        "sprintf",
308        "sqrt",
309        "str_contains",
310        "str_ends_with",
311        "str_pad",
312        "str_repeat",
313        "str_replace",
314        "str_split",
315        "str_starts_with",
316        "str_word_count",
317        "strcasecmp",
318        "strcmp",
319        "strip_tags",
320        "stripslashes",
321        "stristr",
322        "strlen",
323        "strncasecmp",
324        "strncmp",
325        "strpos",
326        "strrpos",
327        "strstr",
328        "strtolower",
329        "strtotime",
330        "strtoupper",
331        "strval",
332        "substr",
333        "substr_count",
334        "substr_replace",
335        "tan",
336        "time",
337        "trim",
338        "uasort",
339        "ucfirst",
340        "ucwords",
341        "uksort",
342        "unlink",
343        "unserialize",
344        "unset",
345        "urldecode",
346        "urlencode",
347        "usleep",
348        "usort",
349        "var_dump",
350        "var_export",
351        "vsprintf",
352    ];
353    debug_assert!(
354        BUILTINS.windows(2).all(|w| w[0] <= w[1]),
355        "BUILTINS must be sorted for binary_search"
356    );
357    BUILTINS.binary_search(&name).is_ok()
358}
359
360/// Build the php.net documentation URL for a built-in function name.
361pub(crate) fn php_doc_url(name: &str) -> String {
362    // php.net uses underscores replaced with dashes in the URL path.
363    let slug = name.replace('_', "-");
364    format!("https://www.php.net/function.{}", slug)
365}
366
367/// Convert a UTF-16 code unit offset into a UTF-8 byte offset for `s`.
368///
369/// LSP positions use UTF-16 code units; Rust strings are UTF-8.  This helper
370/// walks the string's `char_indices`, accumulating UTF-16 units, and returns
371/// the byte index of the character at the given UTF-16 offset.  If the offset
372/// is past the end of the string, `s.len()` is returned.
373pub(crate) fn utf16_offset_to_byte(s: &str, utf16_offset: usize) -> usize {
374    let mut utf16_count = 0usize;
375    for (byte_idx, ch) in s.char_indices() {
376        if utf16_count >= utf16_offset {
377            return byte_idx;
378        }
379        utf16_count += ch.len_utf16();
380    }
381    s.len()
382}
383
384/// Convert an LSP `Position` (line + UTF-16 character column) into a byte
385/// offset in `text`. Out-of-range lines clamp to `text.len()`; out-of-range
386/// columns clamp to the end of the line (before its `\n`). Used by
387/// incremental text sync.
388pub(crate) fn position_to_byte_offset(text: &str, pos: Position) -> usize {
389    let mut line_start = 0usize;
390    for _ in 0..pos.line {
391        match text[line_start..].find('\n') {
392            Some(i) => line_start += i + 1,
393            None => return text.len(),
394        }
395    }
396    let line_end = text[line_start..]
397        .find('\n')
398        .map_or(text.len(), |i| line_start + i);
399    line_start + utf16_offset_to_byte(&text[line_start..line_end], pos.character as usize)
400}
401
402/// Apply one LSP incremental content change (replace `range` with `new_text`)
403/// to `text`. A malformed range whose end precedes its start degrades to an
404/// insertion at `start`.
405pub(crate) fn apply_content_change(text: &mut String, range: Range, new_text: &str) {
406    let start = position_to_byte_offset(text, range.start);
407    let end = position_to_byte_offset(text, range.end).max(start);
408    text.replace_range(start..end, new_text);
409}
410
411/// Convert a UTF-8 byte offset into a UTF-16 code unit count.
412///
413/// LSP `Position.character` is measured in UTF-16 code units.  Given a string
414/// and a byte offset into it, this returns how many UTF-16 units precede that
415/// offset — which is the correct LSP character value.
416pub(crate) fn byte_to_utf16(s: &str, byte_offset: usize) -> u32 {
417    s[..byte_offset.min(s.len())]
418        .chars()
419        .map(|c| c.len_utf16() as u32)
420        .sum()
421}
422
423/// Split a parameter list string on commas, respecting bracket nesting.
424///
425/// This avoids splitting inside default values like `array $x = [1, 2, 3]`.
426/// Each returned slice is trimmed of leading/trailing whitespace.
427pub(crate) fn split_params(s: &str) -> Vec<&str> {
428    let mut parts = Vec::new();
429    let mut depth = 0i32;
430    let mut start = 0;
431    for (i, ch) in s.char_indices() {
432        match ch {
433            '(' | '[' | '{' => depth += 1,
434            ')' | ']' | '}' => depth -= 1,
435            ',' if depth == 0 => {
436                parts.push(s[start..i].trim());
437                start = i + 1;
438            }
439            _ => {}
440        }
441    }
442    let last = s[start..].trim();
443    if !last.is_empty() {
444        parts.push(last);
445    }
446    parts
447}
448
449/// Extract the word (identifier) under the cursor, handling UTF-16 offsets.
450fn char_range_for_word(line: &str, char_offset: usize) -> Option<(usize, usize)> {
451    let chars: Vec<char> = line.chars().collect();
452    let mut utf16_len = 0usize;
453    let mut char_pos = 0usize;
454    for ch in &chars {
455        if utf16_len >= char_offset {
456            break;
457        }
458        utf16_len += ch.len_utf16();
459        char_pos += 1;
460    }
461    let total_utf16: usize = chars.iter().map(|c| c.len_utf16()).sum();
462    if char_offset > total_utf16 {
463        return None;
464    }
465    let is_word = |c: char| c.is_alphanumeric() || c == '_' || c == '$' || c == '\\';
466    let mut left = char_pos;
467    while left > 0 && is_word(chars[left - 1]) {
468        left -= 1;
469    }
470    let mut right = char_pos;
471    while right < chars.len() && is_word(chars[right]) {
472        right += 1;
473    }
474    if left == right {
475        None
476    } else {
477        Some((left, right))
478    }
479}
480
481pub(crate) fn word_at_position(source: &str, position: Position) -> Option<String> {
482    // Use split('\n') rather than lines() so that a trailing newline produces a
483    // final empty entry — lines() silently drops it, causing word_at_position to return
484    // None for any cursor on the last line of a normally-saved PHP file.
485    let raw = source.split('\n').nth(position.line as usize)?;
486    let line = raw.strip_suffix('\r').unwrap_or(raw);
487    let char_offset = position.character as usize;
488    let chars: Vec<char> = line.chars().collect();
489    let (left, right) = char_range_for_word(line, char_offset)?;
490    let word: String = chars[left..right].iter().collect();
491    if word.is_empty() { None } else { Some(word) }
492}
493
494/// Return the LSP `Range` of the word (identifier) under the cursor.
495/// Uses the same word-boundary rules as `word_at_position`.
496pub(crate) fn word_range_at(source: &str, position: Position) -> Option<Range> {
497    let raw = source.split('\n').nth(position.line as usize)?;
498    let line = raw.strip_suffix('\r').unwrap_or(raw);
499    let char_offset = position.character as usize;
500    let chars: Vec<char> = line.chars().collect();
501    let (left, right) = char_range_for_word(line, char_offset)?;
502    let start_col = chars[..left]
503        .iter()
504        .map(|c| c.len_utf16() as u32)
505        .sum::<u32>();
506    let end_col = chars[..right]
507        .iter()
508        .map(|c| c.len_utf16() as u32)
509        .sum::<u32>();
510    Some(Range {
511        start: Position {
512            line: position.line,
513            character: start_col,
514        },
515        end: Position {
516            line: position.line,
517            character: end_col,
518        },
519    })
520}
521
522/// Extract the source text covered by an LSP `Range`.
523///
524/// `Range` positions use UTF-16 code-unit offsets; this function converts them
525/// correctly before slicing the UTF-8 source string.
526pub(crate) fn selected_text_range(source: &str, range: tower_lsp::lsp_types::Range) -> String {
527    let lines: Vec<&str> = source.lines().collect();
528    if range.start.line == range.end.line {
529        let line = match lines.get(range.start.line as usize) {
530            Some(l) => l,
531            None => return String::new(),
532        };
533        let start = utf16_offset_to_byte(line, range.start.character as usize);
534        let end = utf16_offset_to_byte(line, range.end.character as usize);
535        line[start..end].to_string()
536    } else {
537        let mut result = String::new();
538        for i in range.start.line..=range.end.line {
539            let line = match lines.get(i as usize) {
540                Some(l) => *l,
541                None => break,
542            };
543            if i == range.start.line {
544                let start = utf16_offset_to_byte(line, range.start.character as usize);
545                result.push_str(&line[start..]);
546            } else if i == range.end.line {
547                let end = utf16_offset_to_byte(line, range.end.character as usize);
548                result.push_str(&line[..end]);
549            } else {
550                result.push_str(line);
551            }
552            if i < range.end.line {
553                result.push('\n');
554            }
555        }
556        result
557    }
558}
559
560/// Count the UTF-16 code units in a string.
561/// Needed for LSP `Position.character` calculations, which use UTF-16 offsets.
562pub fn utf16_code_units(s: &str) -> u32 {
563    s.chars().map(|c| c.len_utf16() as u32).sum()
564}
565
566/// Strip the leading `$` sigil from a variable name, if present.
567/// Variables are stored both ways: `$var` in source, `var` in symbol tables.
568pub fn strip_variable_sigil(word: &str) -> &str {
569    word.strip_prefix('$').unwrap_or(word)
570}
571
572/// Return the unqualified short name from a PHP fully-qualified name.
573/// `"\App\Service\Foo"` → `"Foo"`, `"Foo"` → `"Foo"`, `""` → `""`.
574pub(crate) fn fqn_short_name(fqn: &str) -> &str {
575    fqn.rsplit('\\').next().unwrap_or(fqn)
576}
577
578/// Build a zero-width LSP `Range` at the start of `line` (character 0).
579/// Used for index-backed features where only line-level precision is available.
580pub(crate) fn zero_width_range(line: u32) -> Range {
581    let pos = Position { line, character: 0 };
582    Range {
583        start: pos,
584        end: pos,
585    }
586}
587
588/// Build a `Location` pointing to the start of `line` in `uri` (character 0).
589pub(crate) fn zero_width_location(uri: &Url, line: u32) -> Location {
590    Location {
591        uri: uri.clone(),
592        range: zero_width_range(line),
593    }
594}
595
596#[cfg(test)]
597mod tests {
598    use super::*;
599
600    #[test]
601    fn byte_to_utf16_ascii() {
602        assert_eq!(byte_to_utf16("hello", 3), 3);
603    }
604
605    #[test]
606    fn byte_to_utf16_multibyte_bmp() {
607        // "é" is U+00E9: 2 bytes in UTF-8, 1 code unit in UTF-16.
608        let s = "café";
609        assert_eq!(byte_to_utf16(s, 0), 0);
610        assert_eq!(byte_to_utf16(s, 3), 3); // up to "caf" (all ASCII)
611        assert_eq!(byte_to_utf16(s, 5), 4); // full string (é = 2 bytes → 1 UTF-16 unit)
612    }
613
614    #[test]
615    fn byte_to_utf16_surrogate_pair() {
616        // "😀" is U+1F600: 4 bytes in UTF-8, 2 code units in UTF-16 (surrogate pair).
617        let s = "a😀b";
618        assert_eq!(byte_to_utf16(s, 1), 1); // after "a"
619        assert_eq!(byte_to_utf16(s, 5), 3); // after "a😀" (emoji = 4 bytes → 2 UTF-16 units)
620        assert_eq!(byte_to_utf16(s, 6), 4); // full string
621    }
622
623    #[test]
624    fn byte_to_utf16_past_end_clamps() {
625        assert_eq!(byte_to_utf16("hi", 100), 2);
626    }
627
628    #[test]
629    fn utf16_offset_to_byte_ascii() {
630        assert_eq!(utf16_offset_to_byte("hello", 3), 3);
631    }
632
633    #[test]
634    fn utf16_offset_to_byte_surrogate_pair() {
635        // "a😀b": UTF-16 offset 1 → byte 1 (start of emoji), offset 3 → byte 5 (after emoji)
636        let s = "a😀b";
637        assert_eq!(utf16_offset_to_byte(s, 1), 1);
638        assert_eq!(utf16_offset_to_byte(s, 3), 5);
639    }
640
641    #[test]
642    fn position_to_byte_offset_basic() {
643        let s = "<?php\necho 1;\n";
644        let p = |line, character| Position { line, character };
645        assert_eq!(position_to_byte_offset(s, p(0, 0)), 0);
646        assert_eq!(position_to_byte_offset(s, p(0, 5)), 5);
647        assert_eq!(position_to_byte_offset(s, p(1, 0)), 6);
648        assert_eq!(position_to_byte_offset(s, p(1, 4)), 10);
649        // Column past end of line clamps to before the newline.
650        assert_eq!(position_to_byte_offset(s, p(0, 99)), 5);
651        // Line past end of text clamps to text length.
652        assert_eq!(position_to_byte_offset(s, p(9, 0)), s.len());
653    }
654
655    #[test]
656    fn position_to_byte_offset_multibyte() {
657        // 😀 is one char, 4 UTF-8 bytes, 2 UTF-16 units.
658        let s = "a😀b\nx";
659        let p = |line, character| Position { line, character };
660        assert_eq!(position_to_byte_offset(s, p(0, 1)), 1);
661        assert_eq!(position_to_byte_offset(s, p(0, 3)), 5);
662        assert_eq!(position_to_byte_offset(s, p(1, 0)), 7);
663        assert_eq!(position_to_byte_offset(s, p(1, 1)), 8);
664    }
665
666    #[test]
667    fn apply_content_change_replaces_inserts_deletes() {
668        let r = |sl, sc, el, ec| Range {
669            start: Position {
670                line: sl,
671                character: sc,
672            },
673            end: Position {
674                line: el,
675                character: ec,
676            },
677        };
678        // Replacement within a line.
679        let mut s = String::from("<?php\necho one;\n");
680        apply_content_change(&mut s, r(1, 5, 1, 8), "two");
681        assert_eq!(s, "<?php\necho two;\n");
682        // Pure insertion (empty range).
683        let mut s = String::from("ab\ncd\n");
684        apply_content_change(&mut s, r(1, 1, 1, 1), "X");
685        assert_eq!(s, "ab\ncXd\n");
686        // Deletion spanning a newline (end position at start of next line).
687        let mut s = String::from("ab\ncd\nef\n");
688        apply_content_change(&mut s, r(0, 2, 1, 0), "");
689        assert_eq!(s, "abcd\nef\n");
690        // Malformed range (end before start) degrades to insertion.
691        let mut s = String::from("abc");
692        apply_content_change(&mut s, r(0, 2, 0, 1), "X");
693        assert_eq!(s, "abXc");
694    }
695
696    #[test]
697    fn byte_to_utf16_and_back_roundtrip() {
698        let s = "café 😀 world";
699        for (byte_idx, _) in s.char_indices() {
700            let utf16 = byte_to_utf16(s, byte_idx) as usize;
701            assert_eq!(utf16_offset_to_byte(s, utf16), byte_idx);
702        }
703    }
704
705    #[test]
706    fn word_at_last_line_with_trailing_newline() {
707        // Editors save files with a trailing newline; lines() drops the final
708        // empty entry, making word_at return None for cursors on the last line.
709        let src = "<?php\necho strlen($x);\n";
710        let pos = Position {
711            line: 1,
712            character: 6,
713        }; // "strlen" on line 1
714        let w = word_at_position(src, pos);
715        assert_eq!(
716            w.as_deref(),
717            Some("strlen"),
718            "word_at_position must work on lines before the trailing newline"
719        );
720        // Position on the final empty line produced by the trailing newline.
721        let last_line = Position {
722            line: 2,
723            character: 0,
724        };
725        // Should return None (empty line), but must not panic.
726        let _ = word_at_position(src, last_line);
727    }
728
729    #[test]
730    fn word_at_crlf_line_endings() {
731        let src = "<?php\r\nfunction foo() {}\r\n";
732        let pos = Position {
733            line: 1,
734            character: 9,
735        }; // "foo"
736        let w = word_at_position(src, pos);
737        assert_eq!(
738            w.as_deref(),
739            Some("foo"),
740            "word_at_position must handle CRLF line endings"
741        );
742    }
743
744    #[test]
745    fn is_php_builtin_asin_recognized() {
746        // asin was out of order in BUILTINS, causing binary_search to miss it.
747        assert!(
748            is_php_builtin("asin"),
749            "asin must be recognised as a PHP builtin"
750        );
751        assert!(
752            is_php_builtin("atan"),
753            "atan must be recognised as a PHP builtin"
754        );
755        assert!(
756            is_php_builtin("krsort"),
757            "krsort must be recognised as a PHP builtin"
758        );
759        assert!(
760            is_php_builtin("strcasecmp"),
761            "strcasecmp must be recognised as a PHP builtin"
762        );
763        assert!(
764            is_php_builtin("strncasecmp"),
765            "strncasecmp must be recognised as a PHP builtin"
766        );
767        assert!(
768            is_php_builtin("strip_tags"),
769            "strip_tags must be recognised as a PHP builtin"
770        );
771    }
772
773    #[test]
774    fn fuzzy_camel_match_prefix() {
775        assert!(fuzzy_camel_match("Blog", "BlogController"));
776        assert!(fuzzy_camel_match("blog", "BlogController"));
777    }
778
779    #[test]
780    fn fuzzy_camel_match_abbreviation() {
781        assert!(fuzzy_camel_match("BC", "BlogController"));
782        assert!(fuzzy_camel_match("GRF", "getRecentFiles"));
783        assert!(fuzzy_camel_match("str_r", "str_replace")); // boundary after '_'
784    }
785
786    #[test]
787    fn fuzzy_camel_match_no_substring() {
788        // fuzzy_camel_match (used for completions) must NOT match substrings
789        assert!(!fuzzy_camel_match("Controller", "BlogController"));
790        assert!(!fuzzy_camel_match("xyz", "BlogController"));
791    }
792
793    #[test]
794    fn fuzzy_symbol_match_substring_fallback() {
795        // fuzzy_symbol_match (used for workspace symbols) DOES match substrings
796        assert!(fuzzy_symbol_match("Controller", "BlogController"));
797        assert!(fuzzy_symbol_match("controller", "BlogController"));
798        assert!(fuzzy_symbol_match("controller", "UserController"));
799        // prefix and camel still work
800        assert!(fuzzy_symbol_match("Blog", "BlogController"));
801        assert!(fuzzy_symbol_match("BC", "BlogController"));
802        // no match
803        assert!(!fuzzy_symbol_match("xyz", "BlogController"));
804    }
805}