use tower_lsp::lsp_types::Position;
pub(crate) fn utf16_pos_to_byte(source: &str, position: Position) -> usize {
let mut byte_off = 0usize;
for (line_idx, line) in source.split('\n').enumerate() {
let line_content = line.strip_suffix('\r').unwrap_or(line);
if line_idx == position.line as usize {
let mut col_utf16 = 0u32;
for ch in line_content.chars() {
if col_utf16 >= position.character {
break;
}
col_utf16 += ch.len_utf16() as u32;
byte_off += ch.len_utf8();
}
return byte_off;
}
byte_off += line.len() + 1; }
byte_off
}
pub(crate) fn fuzzy_camel_match(query: &str, candidate: &str) -> bool {
if query.is_empty() {
return true;
}
let ql: String = query.to_lowercase();
let cl: String = candidate.to_lowercase();
if cl.starts_with(&ql) {
return true;
}
let qchars: Vec<char> = ql.chars().collect();
let cchars: Vec<char> = candidate.chars().collect();
let mut qi = 0usize;
let mut ci = 0usize;
while qi < qchars.len() && ci < cchars.len() {
let qc = qchars[qi];
let is_boundary = ci == 0
|| cchars[ci - 1] == '_'
|| (cchars[ci].is_uppercase() && ci > 0 && cchars[ci - 1].is_lowercase());
if is_boundary && cchars[ci].to_lowercase().next() == Some(qc) {
qi += 1;
}
ci += 1;
}
qi == qchars.len()
}
pub(crate) fn camel_sort_key(query: &str, label: &str) -> String {
let lq = query.to_lowercase();
let ll = label.to_lowercase();
if ll.starts_with(&lq) {
format!("0{}", ll)
} else {
format!("1{}", ll)
}
}
pub(crate) fn is_php_builtin(name: &str) -> bool {
const BUILTINS: &[&str] = &[
"abs",
"acos",
"addslashes",
"array_chunk",
"array_combine",
"array_diff",
"array_fill",
"array_fill_keys",
"array_filter",
"array_flip",
"array_intersect",
"array_key_exists",
"array_keys",
"array_map",
"array_merge",
"array_pad",
"array_pop",
"array_push",
"array_reduce",
"array_replace",
"array_reverse",
"array_search",
"array_shift",
"array_slice",
"array_splice",
"array_unique",
"array_unshift",
"array_values",
"array_walk",
"array_walk_recursive",
"arsort",
"asin",
"asort",
"atan",
"atan2",
"base64_decode",
"base64_encode",
"basename",
"boolval",
"call_user_func",
"call_user_func_array",
"ceil",
"checkdate",
"class_exists",
"closedir",
"compact",
"constant",
"copy",
"cos",
"date",
"date_add",
"date_create",
"date_diff",
"date_format",
"date_sub",
"define",
"defined",
"die",
"dirname",
"empty",
"exit",
"exp",
"explode",
"extract",
"fclose",
"feof",
"fgets",
"file_exists",
"file_get_contents",
"file_put_contents",
"floatval",
"floor",
"fmod",
"fopen",
"fputs",
"fread",
"fseek",
"ftell",
"function_exists",
"get_class",
"get_parent_class",
"gettype",
"glob",
"hash",
"header",
"headers_sent",
"htmlentities",
"htmlspecialchars",
"http_build_query",
"implode",
"in_array",
"intdiv",
"interface_exists",
"intval",
"is_a",
"is_array",
"is_bool",
"is_callable",
"is_dir",
"is_double",
"is_file",
"is_finite",
"is_float",
"is_infinite",
"is_int",
"is_integer",
"is_long",
"is_nan",
"is_null",
"is_numeric",
"is_object",
"is_readable",
"is_string",
"is_subclass_of",
"is_writable",
"isset",
"join",
"json_decode",
"json_encode",
"krsort",
"ksort",
"lcfirst",
"list",
"log",
"ltrim",
"max",
"md5",
"method_exists",
"microtime",
"min",
"mkdir",
"mktime",
"mt_rand",
"nl2br",
"number_format",
"ob_end_clean",
"ob_get_clean",
"ob_start",
"opendir",
"parse_str",
"parse_url",
"pathinfo",
"pi",
"pow",
"preg_match",
"preg_match_all",
"preg_quote",
"preg_replace",
"preg_split",
"print_r",
"printf",
"property_exists",
"rand",
"random_int",
"rawurldecode",
"rawurlencode",
"readdir",
"realpath",
"rename",
"rewind",
"rmdir",
"round",
"rsort",
"rtrim",
"scandir",
"serialize",
"session_destroy",
"session_start",
"setcookie",
"settype",
"sha1",
"sin",
"sleep",
"sort",
"sprintf",
"sqrt",
"str_contains",
"str_ends_with",
"str_pad",
"str_repeat",
"str_replace",
"str_split",
"str_starts_with",
"str_word_count",
"strcasecmp",
"strcmp",
"strip_tags",
"stripslashes",
"stristr",
"strlen",
"strncasecmp",
"strncmp",
"strpos",
"strrpos",
"strstr",
"strtolower",
"strtotime",
"strtoupper",
"strval",
"substr",
"substr_count",
"substr_replace",
"tan",
"time",
"trim",
"uasort",
"ucfirst",
"ucwords",
"uksort",
"unlink",
"unserialize",
"unset",
"urldecode",
"urlencode",
"usleep",
"usort",
"var_dump",
"var_export",
"vsprintf",
];
debug_assert!(
BUILTINS.windows(2).all(|w| w[0] <= w[1]),
"BUILTINS must be sorted for binary_search"
);
BUILTINS.binary_search(&name).is_ok()
}
pub(crate) fn php_doc_url(name: &str) -> String {
let slug = name.replace('_', "-");
format!("https://www.php.net/function.{}", slug)
}
pub(crate) fn utf16_offset_to_byte(s: &str, utf16_offset: usize) -> usize {
let mut utf16_count = 0usize;
for (byte_idx, ch) in s.char_indices() {
if utf16_count >= utf16_offset {
return byte_idx;
}
utf16_count += ch.len_utf16();
}
s.len()
}
pub(crate) fn byte_to_utf16(s: &str, byte_offset: usize) -> u32 {
s[..byte_offset.min(s.len())]
.chars()
.map(|c| c.len_utf16() as u32)
.sum()
}
pub(crate) fn split_params(s: &str) -> Vec<&str> {
let mut parts = Vec::new();
let mut depth = 0i32;
let mut start = 0;
for (i, ch) in s.char_indices() {
match ch {
'(' | '[' | '{' => depth += 1,
')' | ']' | '}' => depth -= 1,
',' if depth == 0 => {
parts.push(s[start..i].trim());
start = i + 1;
}
_ => {}
}
}
let last = s[start..].trim();
if !last.is_empty() {
parts.push(last);
}
parts
}
pub(crate) fn word_at(source: &str, position: Position) -> Option<String> {
let raw = source.split('\n').nth(position.line as usize)?;
let line = raw.strip_suffix('\r').unwrap_or(raw);
let char_offset = position.character as usize;
let chars: Vec<char> = line.chars().collect();
let mut utf16_len = 0usize;
let mut char_pos = 0usize;
for ch in &chars {
if utf16_len >= char_offset {
break;
}
utf16_len += ch.len_utf16();
char_pos += 1;
}
let total_utf16: usize = chars.iter().map(|c| c.len_utf16()).sum();
if char_offset > total_utf16 {
return None;
}
let is_word = |c: char| c.is_alphanumeric() || c == '_' || c == '$' || c == '\\';
let mut left = char_pos;
while left > 0 && is_word(chars[left - 1]) {
left -= 1;
}
let mut right = char_pos;
while right < chars.len() && is_word(chars[right]) {
right += 1;
}
if left == right {
return None;
}
let word: String = chars[left..right].iter().collect();
if word.is_empty() { None } else { Some(word) }
}
pub(crate) fn selected_text_range(source: &str, range: tower_lsp::lsp_types::Range) -> String {
let lines: Vec<&str> = source.lines().collect();
if range.start.line == range.end.line {
let line = match lines.get(range.start.line as usize) {
Some(l) => l,
None => return String::new(),
};
let start = utf16_offset_to_byte(line, range.start.character as usize);
let end = utf16_offset_to_byte(line, range.end.character as usize);
line[start..end].to_string()
} else {
let mut result = String::new();
for i in range.start.line..=range.end.line {
let line = match lines.get(i as usize) {
Some(l) => *l,
None => break,
};
if i == range.start.line {
let start = utf16_offset_to_byte(line, range.start.character as usize);
result.push_str(&line[start..]);
} else if i == range.end.line {
let end = utf16_offset_to_byte(line, range.end.character as usize);
result.push_str(&line[..end]);
} else {
result.push_str(line);
}
if i < range.end.line {
result.push('\n');
}
}
result
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn byte_to_utf16_ascii() {
assert_eq!(byte_to_utf16("hello", 3), 3);
}
#[test]
fn byte_to_utf16_multibyte_bmp() {
let s = "café";
assert_eq!(byte_to_utf16(s, 0), 0);
assert_eq!(byte_to_utf16(s, 3), 3); assert_eq!(byte_to_utf16(s, 5), 4); }
#[test]
fn byte_to_utf16_surrogate_pair() {
let s = "a😀b";
assert_eq!(byte_to_utf16(s, 1), 1); assert_eq!(byte_to_utf16(s, 5), 3); assert_eq!(byte_to_utf16(s, 6), 4); }
#[test]
fn byte_to_utf16_past_end_clamps() {
assert_eq!(byte_to_utf16("hi", 100), 2);
}
#[test]
fn utf16_offset_to_byte_ascii() {
assert_eq!(utf16_offset_to_byte("hello", 3), 3);
}
#[test]
fn utf16_offset_to_byte_surrogate_pair() {
let s = "a😀b";
assert_eq!(utf16_offset_to_byte(s, 1), 1);
assert_eq!(utf16_offset_to_byte(s, 3), 5);
}
#[test]
fn byte_to_utf16_and_back_roundtrip() {
let s = "café 😀 world";
for (byte_idx, _) in s.char_indices() {
let utf16 = byte_to_utf16(s, byte_idx) as usize;
assert_eq!(utf16_offset_to_byte(s, utf16), byte_idx);
}
}
#[test]
fn word_at_last_line_with_trailing_newline() {
let src = "<?php\necho strlen($x);\n";
let pos = Position {
line: 1,
character: 6,
}; let w = word_at(src, pos);
assert_eq!(
w.as_deref(),
Some("strlen"),
"word_at must work on lines before the trailing newline"
);
let last_line = Position {
line: 2,
character: 0,
};
let _ = word_at(src, last_line);
}
#[test]
fn word_at_crlf_line_endings() {
let src = "<?php\r\nfunction foo() {}\r\n";
let pos = Position {
line: 1,
character: 9,
}; let w = word_at(src, pos);
assert_eq!(
w.as_deref(),
Some("foo"),
"word_at must handle CRLF line endings"
);
}
#[test]
fn is_php_builtin_asin_recognized() {
assert!(
is_php_builtin("asin"),
"asin must be recognised as a PHP builtin"
);
assert!(
is_php_builtin("atan"),
"atan must be recognised as a PHP builtin"
);
assert!(
is_php_builtin("krsort"),
"krsort must be recognised as a PHP builtin"
);
assert!(
is_php_builtin("strcasecmp"),
"strcasecmp must be recognised as a PHP builtin"
);
assert!(
is_php_builtin("strncasecmp"),
"strncasecmp must be recognised as a PHP builtin"
);
assert!(
is_php_builtin("strip_tags"),
"strip_tags must be recognised as a PHP builtin"
);
}
}