use regex::Regex;
use std::collections::HashSet;
use std::sync::OnceLock;
static VAR_REGEX: OnceLock<Regex> = OnceLock::new();
pub(super) fn truncate_snippet(text: &str) -> String {
const CHAR_LIMIT: usize = 100;
let char_count = text.chars().count();
if char_count > CHAR_LIMIT {
let byte_idx = text
.char_indices()
.nth(CHAR_LIMIT)
.map(|(idx, _)| idx)
.unwrap_or(text.len());
format!("{}...", &text[..byte_idx])
} else {
text.to_string()
}
}
pub(super) fn extract_identifiers_outside_strings(expr: &str) -> HashSet<String> {
let mut refs = HashSet::new();
let var_regex = VAR_REGEX.get_or_init(|| {
Regex::new(r"\b([a-zA-Z_][a-zA-Z0-9_]*)\b").expect("VAR_REGEX should be valid")
});
let mut in_single_quote = false;
let mut in_double_quote = false;
let mut escaped = false;
let mut inside_string = vec![false; expr.len()];
for (idx, ch) in expr.char_indices() {
if escaped {
escaped = false;
if in_single_quote || in_double_quote {
inside_string[idx] = true;
}
continue;
}
match ch {
'\\' => {
escaped = true;
if in_single_quote || in_double_quote {
inside_string[idx] = true;
}
}
'\'' if !in_double_quote => {
inside_string[idx] = true; in_single_quote = !in_single_quote;
}
'"' if !in_single_quote => {
inside_string[idx] = true; in_double_quote = !in_double_quote;
}
_ => {
if in_single_quote || in_double_quote {
inside_string[idx] = true;
}
}
}
}
for cap in var_regex.captures_iter(expr) {
if let Some(name_match) = cap.get(1) {
let name = name_match.as_str();
let start_pos = name_match.start();
if start_pos < inside_string.len()
&& !inside_string[start_pos]
&& !is_python_keyword(name)
{
refs.insert(name.to_string());
}
}
}
refs
}
pub(super) fn is_lambda_parameter(
lambda_expr: &str,
name: &str,
) -> bool {
if let Some(after_lambda) = lambda_expr.strip_prefix("lambda") {
if let Some(colon_pos) = after_lambda.find(':') {
let params_section = &after_lambda[..colon_pos].trim();
params_section.split(',').any(|param| param.trim() == name)
} else {
false
}
} else {
false
}
}
pub(super) fn is_python_keyword(name: &str) -> bool {
matches!(
name,
"True"
| "False"
| "None"
| "and"
| "or"
| "not"
| "is"
| "in"
| "if"
| "else"
| "elif"
| "for"
| "while"
| "lambda"
| "def"
| "class"
| "return"
| "yield"
| "try"
| "except"
| "finally"
| "raise"
| "with"
| "as"
| "import"
| "from"
| "pass"
| "break"
| "continue"
| "global"
| "nonlocal"
| "assert"
| "del"
| "abs"
| "sin"
| "cos"
| "tan"
| "asin"
| "acos"
| "atan"
| "atan2"
| "sqrt"
| "floor"
| "ceil" )
}
pub(super) fn is_simple_identifier(name: &str) -> bool {
!is_python_keyword(name)
&& name.chars().all(|c| c.is_alphanumeric() || c == '_')
&& name
.chars()
.next()
.is_some_and(|c| c.is_alphabetic() || c == '_')
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_extract_identifiers_basic() {
let expr = "x + y * z";
let ids = extract_identifiers_outside_strings(expr);
assert_eq!(ids.len(), 3);
assert!(ids.contains("x"));
assert!(ids.contains("y"));
assert!(ids.contains("z"));
}
#[test]
fn test_extract_identifiers_single_quote_string() {
let expr = "data['initial_positions']";
let ids = extract_identifiers_outside_strings(expr);
assert_eq!(ids.len(), 1);
assert!(ids.contains("data"));
assert!(
!ids.contains("initial_positions"),
"String literal should be skipped"
);
}
#[test]
fn test_extract_identifiers_double_quote_string() {
let expr = r#"data["key_name"]"#;
let ids = extract_identifiers_outside_strings(expr);
assert_eq!(ids.len(), 1);
assert!(ids.contains("data"));
assert!(
!ids.contains("key_name"),
"String literal should be skipped"
);
}
#[test]
fn test_extract_identifiers_escaped_quote_single() {
let expr = r"message['can\'t']";
let ids = extract_identifiers_outside_strings(expr);
assert_eq!(ids.len(), 1);
assert!(ids.contains("message"));
assert!(!ids.contains("can"));
assert!(!ids.contains("t"));
}
#[test]
fn test_extract_identifiers_escaped_quote_double() {
let expr = r#"data["quote\"inside"]"#;
let ids = extract_identifiers_outside_strings(expr);
assert_eq!(ids.len(), 1);
assert!(ids.contains("data"));
assert!(!ids.contains("quote"));
assert!(!ids.contains("inside"));
}
#[test]
fn test_extract_identifiers_mixed_quotes() {
let expr = r#"func('single') + other("double")"#;
let ids = extract_identifiers_outside_strings(expr);
assert_eq!(ids.len(), 2);
assert!(ids.contains("func"));
assert!(ids.contains("other"));
assert!(!ids.contains("single"));
assert!(!ids.contains("double"));
}
#[test]
fn test_extract_identifiers_quote_inside_different_quote() {
let expr = r#"data["it's"]"#;
let ids = extract_identifiers_outside_strings(expr);
assert_eq!(ids.len(), 1);
assert!(ids.contains("data"));
assert!(!ids.contains("it"));
assert!(!ids.contains("s"));
let expr2 = r#"data['"quoted"']"#;
let ids2 = extract_identifiers_outside_strings(expr2);
assert_eq!(ids2.len(), 1);
assert!(ids2.contains("data"));
assert!(!ids2.contains("quoted"));
}
#[test]
fn test_extract_identifiers_function_call() {
let expr = "load_yaml(filename)";
let ids = extract_identifiers_outside_strings(expr);
assert_eq!(ids.len(), 2);
assert!(ids.contains("load_yaml"));
assert!(ids.contains("filename"));
}
#[test]
fn test_extract_identifiers_complex_expression() {
let expr = "load_yaml(file)['initial_positions']['joint1']";
let ids = extract_identifiers_outside_strings(expr);
assert_eq!(ids.len(), 2);
assert!(ids.contains("load_yaml"));
assert!(ids.contains("file"));
assert!(
!ids.contains("initial_positions"),
"String literal in brackets"
);
assert!(!ids.contains("joint1"), "String literal in brackets");
}
#[test]
fn test_extract_identifiers_python_keywords_filtered() {
let expr = "if x and y or z";
let ids = extract_identifiers_outside_strings(expr);
assert_eq!(ids.len(), 3);
assert!(ids.contains("x"));
assert!(ids.contains("y"));
assert!(ids.contains("z"));
assert!(!ids.contains("if"));
assert!(!ids.contains("and"));
assert!(!ids.contains("or"));
}
#[test]
fn test_extract_identifiers_empty_string() {
let expr = "";
let ids = extract_identifiers_outside_strings(expr);
assert_eq!(ids.len(), 0);
}
#[test]
fn test_extract_identifiers_only_strings() {
let expr = "'hello' + \"world\"";
let ids = extract_identifiers_outside_strings(expr);
assert_eq!(ids.len(), 0, "Only string literals, no identifiers");
}
#[test]
fn test_extract_identifiers_multiple_occurrences() {
let expr = "x + x * x";
let ids = extract_identifiers_outside_strings(expr);
assert_eq!(ids.len(), 1, "Set should deduplicate");
assert!(ids.contains("x"));
}
#[test]
fn test_extract_identifiers_underscore_names() {
let expr = "_private + __dunder__ + normal_name";
let ids = extract_identifiers_outside_strings(expr);
assert_eq!(ids.len(), 3);
assert!(ids.contains("_private"));
assert!(ids.contains("__dunder__"));
assert!(ids.contains("normal_name"));
}
#[test]
fn test_extract_identifiers_numbers_excluded() {
let expr = "x + 123 + y";
let ids = extract_identifiers_outside_strings(expr);
assert_eq!(ids.len(), 2);
assert!(ids.contains("x"));
assert!(ids.contains("y"));
}
#[test]
fn test_extract_identifiers_empty_quotes() {
let expr = "data[''] + other[\"\"]";
let ids = extract_identifiers_outside_strings(expr);
assert_eq!(ids.len(), 2);
assert!(ids.contains("data"));
assert!(ids.contains("other"));
}
#[test]
fn test_extract_identifiers_adjacent_strings() {
let expr = "'first''second'";
let ids = extract_identifiers_outside_strings(expr);
assert_eq!(ids.len(), 0, "Adjacent string literals");
}
#[test]
fn test_extract_identifiers_backslash_at_end_of_string() {
let expr = r"data['path\\'] + x";
let ids = extract_identifiers_outside_strings(expr);
assert!(ids.contains("data"));
assert!(ids.contains("x"));
assert!(!ids.contains("path"));
}
#[test]
fn test_truncate_snippet_short_string() {
let text = "Short string";
assert_eq!(truncate_snippet(text), "Short string");
}
#[test]
fn test_truncate_snippet_exactly_100_chars() {
let text = "a".repeat(100);
assert_eq!(truncate_snippet(&text), text);
}
#[test]
fn test_truncate_snippet_longer_than_100_chars() {
let text = "a".repeat(150);
let result = truncate_snippet(&text);
assert!(result.ends_with("..."));
assert!(result.len() <= 103); }
#[test]
fn test_truncate_snippet_utf8_boundary() {
let mut text = "a".repeat(98);
text.push_str("🦀"); text.push_str(&"x".repeat(10));
let result = truncate_snippet(&text);
assert!(result.ends_with("..."));
assert!(result.is_char_boundary(result.len() - 3));
}
#[test]
fn test_truncate_snippet_counts_chars_not_bytes() {
let text = "🦀".repeat(120); let result = truncate_snippet(&text);
assert!(result.ends_with("..."));
let result_without_ellipsis = &result[..result.len() - 3];
assert_eq!(result_without_ellipsis.chars().count(), 100);
}
#[test]
fn test_is_lambda_parameter_single_param() {
assert!(is_lambda_parameter("lambda x: x + 1", "x"));
assert!(!is_lambda_parameter("lambda x: x + 1", "y"));
}
#[test]
fn test_is_lambda_parameter_multiple_params() {
assert!(is_lambda_parameter("lambda a, b, c: a + b", "a"));
assert!(is_lambda_parameter("lambda a, b, c: a + b", "b"));
assert!(is_lambda_parameter("lambda a, b, c: a + b", "c"));
assert!(!is_lambda_parameter("lambda a, b, c: a + b", "d"));
}
#[test]
fn test_is_lambda_parameter_with_spaces() {
assert!(is_lambda_parameter("lambda x , y : x + y", "x"));
assert!(is_lambda_parameter("lambda x , y : x + y", "y"));
}
#[test]
fn test_is_lambda_parameter_not_lambda() {
assert!(!is_lambda_parameter("x + y", "x"));
assert!(!is_lambda_parameter("def foo(x): return x", "x"));
}
#[test]
fn test_is_lambda_parameter_word_boundary() {
assert!(!is_lambda_parameter("notlambda x: x", "x"));
assert!(!is_lambda_parameter("lambda_var: something", "var"));
}
#[test]
fn test_is_lambda_parameter_no_colon() {
assert!(!is_lambda_parameter("lambda x", "x"));
}
#[test]
fn test_is_lambda_parameter_underscore_names() {
assert!(is_lambda_parameter("lambda _x, __y: _x + __y", "_x"));
assert!(is_lambda_parameter("lambda _x, __y: _x + __y", "__y"));
}
#[test]
fn test_is_simple_identifier_valid() {
assert!(is_simple_identifier("x"));
assert!(is_simple_identifier("my_var"));
assert!(is_simple_identifier("var123"));
assert!(is_simple_identifier("_private"));
assert!(is_simple_identifier("__dunder__"));
}
#[test]
fn test_is_simple_identifier_invalid() {
assert!(!is_simple_identifier("x + y"));
assert!(!is_simple_identifier("data['key']"));
assert!(!is_simple_identifier("obj.attr"));
assert!(!is_simple_identifier("123abc")); assert!(!is_simple_identifier("")); assert!(!is_simple_identifier("x-y")); }
#[test]
fn test_is_simple_identifier_keywords() {
assert!(!is_simple_identifier("True"));
assert!(!is_simple_identifier("False"));
assert!(!is_simple_identifier("None"));
assert!(!is_simple_identifier("lambda"));
assert!(!is_simple_identifier("if"));
}
#[test]
fn test_is_simple_identifier_numbers() {
assert!(!is_simple_identifier("123"));
assert!(!is_simple_identifier("3.14"));
}
}