use regex::Regex;
pub(crate) fn normalize_signature(sig: &str) -> String {
let normalized: String = sig.split_whitespace().collect::<Vec<_>>().join(" ");
let normalized = Regex::new(r",\s*\)")
.unwrap()
.replace_all(&normalized, ")")
.to_string();
let normalized = Regex::new(r"\b(?:\w+::)+(\w+)")
.unwrap()
.replace_all(&normalized, "$1")
.to_string();
let normalized = Regex::new(r"\bmut\s+")
.unwrap()
.replace_all(&normalized, "")
.to_string();
let normalized = strip_return_type(&normalized);
let normalized = strip_param_types(&normalized);
normalized
}
fn strip_param_types(sig: &str) -> String {
let open = match sig.find('(') {
Some(pos) => pos,
None => return sig.to_string(),
};
let after_open = &sig[open + 1..];
let mut depth = 1;
let mut close_offset = None;
for (i, ch) in after_open.char_indices() {
match ch {
'(' => depth += 1,
')' => {
depth -= 1;
if depth == 0 {
close_offset = Some(i);
break;
}
}
_ => {}
}
}
let close_offset = match close_offset {
Some(o) => o,
None => return sig.to_string(),
};
let params_str = &after_open[..close_offset];
if params_str.trim().is_empty() {
return sig.to_string();
}
let normalized_params: Vec<String> = params_str
.split(',')
.map(|param| {
let param = param.trim();
if param.is_empty() {
return String::new();
}
extract_param_name(param)
})
.collect();
let prefix = &sig[..=open];
let suffix = &sig[open + 1 + close_offset..];
format!("{}{}{}", prefix, normalized_params.join(", "), suffix)
}
fn extract_param_name(param: &str) -> String {
let trimmed = param.trim();
if trimmed == "self" || trimmed == "&self" || trimmed == "&mut self" || trimmed == "mut self" {
return "&self".to_string();
}
let tokens: Vec<&str> = trimmed.split_whitespace().collect();
if tokens.is_empty() {
return String::new();
}
if let Some(colon_pos) = tokens.iter().position(|t| *t == ":" || t.ends_with(':')) {
if colon_pos > 0 {
let name_token = if tokens[colon_pos].ends_with(':') {
&tokens[colon_pos][..tokens[colon_pos].len() - 1]
} else {
tokens[colon_pos - 1]
};
return name_token.to_string();
}
}
let last = tokens.last().unwrap();
if tokens.len() > 1 {
let has_spread = tokens.iter().any(|t| t.starts_with("..."));
if has_spread && !last.starts_with("...") {
return format!("...{}", last);
}
}
last.to_string()
}
fn strip_return_type(sig: &str) -> String {
if let Some(paren_pos) = sig.rfind(')') {
let after_paren = &sig[paren_pos + 1..].trim_start();
if after_paren.starts_with(':') || after_paren.starts_with("->") {
return sig[..=paren_pos].to_string();
}
}
sig.to_string()
}
pub(crate) fn tokenize_signature(sig: &str) -> Vec<String> {
let sig = normalize_signature(sig);
let mut tokens = Vec::new();
let mut current = String::new();
for ch in sig.chars() {
if ch.is_alphanumeric() || ch == '_' {
current.push(ch);
} else if ch.is_whitespace() {
if !current.is_empty() {
tokens.push(std::mem::take(&mut current));
}
} else {
if !current.is_empty() {
tokens.push(std::mem::take(&mut current));
}
if ch == '-' {
current.push(ch);
} else if ch == '>' && current == "-" {
current.push(ch);
tokens.push(std::mem::take(&mut current));
} else {
if !current.is_empty() {
tokens.push(std::mem::take(&mut current));
}
tokens.push(ch.to_string());
}
}
}
if !current.is_empty() {
tokens.push(current);
}
tokens
}
pub(crate) fn compute_signature_skeleton(
tokenized_sigs: &[Vec<String>],
) -> Option<Vec<Option<String>>> {
if tokenized_sigs.is_empty() {
return None;
}
let expected_len = tokenized_sigs[0].len();
if !tokenized_sigs.iter().all(|t| t.len() == expected_len) {
return None;
}
let mut skeleton = Vec::with_capacity(expected_len);
for i in 0..expected_len {
let first = &tokenized_sigs[0][i];
if tokenized_sigs.iter().all(|t| &t[i] == first) {
skeleton.push(Some(first.clone()));
} else {
skeleton.push(None); }
}
Some(skeleton)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn strip_return_type_php_void() {
let sig = "public function register(): void";
let normalized = normalize_signature(sig);
assert!(
!normalized.contains("void"),
"Return type should be stripped: {}",
normalized
);
}
#[test]
fn strip_return_type_php_nullable() {
let sig = "public function get_items(): ?array";
let normalized = normalize_signature(sig);
assert!(
!normalized.contains("array"),
"Return type should be stripped: {}",
normalized
);
}
#[test]
fn strip_return_type_rust() {
let sig = "pub fn run(args: FooArgs) -> CmdResult<FooOutput>";
let normalized = normalize_signature(sig);
assert!(
!normalized.contains("CmdResult"),
"Return type should be stripped: {}",
normalized
);
}
#[test]
fn strip_return_type_preserves_params() {
let sig = "pub fn run(args: FooArgs)";
let normalized = normalize_signature(sig);
assert!(
normalized.contains("FooArgs"),
"Params should be preserved: {}",
normalized
);
}
#[test]
fn same_tokens_with_and_without_return_type() {
let with_return = tokenize_signature("public function register(): void");
let without_return = tokenize_signature("public function register()");
assert_eq!(
with_return.len(),
without_return.len(),
"Token count should match regardless of return type: {:?} vs {:?}",
with_return,
without_return
);
}
#[test]
fn rust_return_type_stripped_in_skeleton() {
let sigs = vec![
tokenize_signature("pub fn run(args: RunArgs) -> Result<Output>"),
tokenize_signature("pub fn run(args: RunArgs)"),
];
let skeleton = compute_signature_skeleton(&sigs);
assert!(
skeleton.is_some(),
"Skeleton should compute successfully after return type stripping"
);
}
#[test]
fn tokenize_preserves_function_name() {
let tokens = tokenize_signature("pub fn do_stuff(x: i32)");
assert!(tokens.contains(&"do_stuff".to_string()));
}
#[test]
fn php_prefix_type_stripped() {
let sig = "public function execute(array $config)";
let normalized = normalize_signature(sig);
assert!(
!normalized.contains("array"),
"PHP type hint should be stripped: {}",
normalized
);
assert!(
normalized.contains("$config"),
"Param name preserved: {}",
normalized
);
}
#[test]
fn php_class_type_stripped() {
let sig = "public function handle(WP_REST_Request $request)";
let normalized = normalize_signature(sig);
assert!(
!normalized.contains("WP_REST_Request"),
"PHP class type should be stripped: {}",
normalized
);
assert!(
normalized.contains("$request"),
"Param name preserved: {}",
normalized
);
}
#[test]
fn php_typed_and_untyped_same_tokens() {
let typed = tokenize_signature("public function execute(array $config)");
let untyped = tokenize_signature("public function execute($config)");
assert_eq!(
typed.len(),
untyped.len(),
"Token count should match regardless of type hints: {:?} vs {:?}",
typed,
untyped
);
}
#[test]
fn php_class_typed_and_untyped_same_tokens() {
let typed = tokenize_signature("public function handle(WP_REST_Request $request)");
let untyped = tokenize_signature("public function handle($request)");
assert_eq!(
typed.len(),
untyped.len(),
"Token count should match: {:?} vs {:?}",
typed,
untyped
);
}
#[test]
fn php_multiple_params_types_stripped() {
let typed =
tokenize_signature("public function execute(array $config, WP_REST_Request $request)");
let untyped = tokenize_signature("public function execute($config, $request)");
assert_eq!(
typed.len(),
untyped.len(),
"Token count should match with multiple params: {:?} vs {:?}",
typed,
untyped
);
}
#[test]
fn rust_postfix_type_stripped() {
let typed = tokenize_signature("pub fn run(args: RunArgs)");
let untyped = tokenize_signature("pub fn run(args)");
assert_eq!(
typed.len(),
untyped.len(),
"Rust type annotation should be stripped: {:?} vs {:?}",
typed,
untyped
);
}
#[test]
fn rust_self_param_normalized() {
let with_self = tokenize_signature("pub fn run(&self, args: RunArgs)");
let with_mut_self = tokenize_signature("pub fn run(&mut self, args: RunArgs)");
assert_eq!(
with_self.len(),
with_mut_self.len(),
"&self and &mut self should normalize the same: {:?} vs {:?}",
with_self,
with_mut_self
);
}
#[test]
fn empty_params_unchanged() {
let sig = "public function register()";
let normalized = normalize_signature(sig);
assert!(
normalized.contains("()"),
"Empty params should stay empty: {}",
normalized
);
}
#[test]
fn php_nullable_type_stripped() {
let typed = tokenize_signature("public function get(?string $name)");
let untyped = tokenize_signature("public function get($name)");
assert_eq!(
typed.len(),
untyped.len(),
"Nullable type should be stripped: {:?} vs {:?}",
typed,
untyped
);
}
#[test]
fn skeleton_matches_with_type_differences() {
let sigs = vec![
tokenize_signature("public function execute(array $config)"),
tokenize_signature("public function execute($config)"),
];
let skeleton = compute_signature_skeleton(&sigs);
assert!(
skeleton.is_some(),
"Skeleton should compute despite type hint differences"
);
}
}