use super::scan_state::ScanState;
use std::collections::HashSet;
const BUILTIN_VARIABLES: &[&str] = &["$ENV", "$__loc__"];
pub fn extract_variables(query: &str) -> Vec<String> {
let mut variables: HashSet<String> = HashSet::new();
for var in extract_user_variables(query) {
variables.insert(var);
}
for builtin in BUILTIN_VARIABLES {
variables.insert((*builtin).to_string());
}
let mut result: Vec<String> = variables.into_iter().collect();
result.sort();
result
}
fn extract_user_variables(query: &str) -> Vec<String> {
let mut variables = Vec::new();
let chars: Vec<char> = query.chars().collect();
let mut scan_state = ScanState::Normal;
let mut i = 0;
while i < chars.len() {
let ch = chars[i];
let prev_state = scan_state;
scan_state = scan_state.advance(ch);
if prev_state.is_in_string() {
i += 1;
continue;
}
if is_variable_definition_keyword_at(&chars, i)
&& let Some((var_names, end_pos)) = extract_variables_after_keyword(&chars, i)
{
for var_name in var_names {
variables.push(var_name);
}
i = end_pos;
continue;
}
i += 1;
}
variables
}
fn is_variable_definition_keyword_at(chars: &[char], pos: usize) -> bool {
is_keyword_at(chars, pos, "as") || is_keyword_at(chars, pos, "label")
}
fn is_keyword_at(chars: &[char], pos: usize, keyword: &str) -> bool {
let keyword_chars: Vec<char> = keyword.chars().collect();
if pos > 0 && is_identifier_char(chars[pos - 1]) {
return false;
}
if pos + keyword_chars.len() > chars.len() {
return false;
}
for (j, kc) in keyword_chars.iter().enumerate() {
if chars[pos + j] != *kc {
return false;
}
}
let after_pos = pos + keyword_chars.len();
if after_pos < chars.len() && is_identifier_char(chars[after_pos]) {
return false;
}
true
}
fn extract_variables_after_keyword(
chars: &[char],
keyword_pos: usize,
) -> Option<(Vec<String>, usize)> {
let keyword_len = if is_keyword_at(chars, keyword_pos, "label") {
5
} else {
2
};
let mut pos = keyword_pos + keyword_len;
pos = skip_whitespace(chars, pos);
if pos >= chars.len() {
return None;
}
let ch = chars[pos];
if ch == '$' {
if let Some((var_name, end_pos)) = extract_single_variable(chars, pos) {
return Some((vec![var_name], end_pos));
}
} else if ch == '[' {
return extract_array_destructure_variables(chars, pos);
} else if ch == '{' {
return extract_object_destructure_variables(chars, pos);
}
None
}
fn extract_single_variable(chars: &[char], pos: usize) -> Option<(String, usize)> {
if pos >= chars.len() || chars[pos] != '$' {
return None;
}
let mut end = pos + 1;
while end < chars.len() && is_identifier_char(chars[end]) {
end += 1;
}
if end == pos + 1 {
return None;
}
let var_name: String = chars[pos..end].iter().collect();
Some((var_name, end))
}
fn extract_array_destructure_variables(chars: &[char], pos: usize) -> Option<(Vec<String>, usize)> {
if pos >= chars.len() || chars[pos] != '[' {
return None;
}
let mut variables = Vec::new();
let mut i = pos + 1;
let mut depth = 1;
while i < chars.len() && depth > 0 {
let ch = chars[i];
match ch {
'[' => depth += 1,
']' => depth -= 1,
'$' if depth >= 1 => {
if let Some((var_name, end_pos)) = extract_single_variable(chars, i) {
variables.push(var_name);
i = end_pos;
continue;
}
}
_ => {}
}
i += 1;
}
Some((variables, i))
}
fn extract_object_destructure_variables(
chars: &[char],
pos: usize,
) -> Option<(Vec<String>, usize)> {
if pos >= chars.len() || chars[pos] != '{' {
return None;
}
let mut variables = Vec::new();
let mut i = pos + 1;
let mut depth = 1;
while i < chars.len() && depth > 0 {
let ch = chars[i];
match ch {
'{' => depth += 1,
'}' => depth -= 1,
'$' if depth >= 1 => {
if let Some((var_name, end_pos)) = extract_single_variable(chars, i) {
variables.push(var_name);
i = end_pos;
continue;
}
}
_ => {}
}
i += 1;
}
Some((variables, i))
}
fn skip_whitespace(chars: &[char], mut pos: usize) -> usize {
while pos < chars.len() && chars[pos].is_whitespace() {
pos += 1;
}
pos
}
fn is_identifier_char(ch: char) -> bool {
ch.is_alphanumeric() || ch == '_'
}
#[cfg(test)]
#[path = "variable_extractor_tests.rs"]
mod variable_extractor_tests;