use crate::value::PerlValue;
use once_cell::sync::Lazy;
use regex::Regex;
use thiserror::Error;
#[derive(Debug, Error)]
pub enum VariableParseError {
#[error("unrecognized variable format: {0}")]
UnrecognizedFormat(String),
#[error("maximum nesting depth exceeded ({0})")]
MaxDepthExceeded(usize),
#[error("unterminated string literal")]
UnterminatedString,
#[error("unterminated collection")]
UnterminatedCollection,
#[error("regex error: {0}")]
RegexError(#[from] regex::Error),
}
static SCALAR_VAR_RE: Lazy<Result<Regex, regex::Error>> =
Lazy::new(|| Regex::new(r"^\s*(?P<name>[\$\@\%][\w:]+)\s*=\s*(?P<value>.*?)$"));
static UNDEF_RE: Lazy<Result<Regex, regex::Error>> = Lazy::new(|| Regex::new(r"^undef$"));
static INTEGER_RE: Lazy<Result<Regex, regex::Error>> = Lazy::new(|| Regex::new(r"^-?\d+$"));
static NUMBER_RE: Lazy<Result<Regex, regex::Error>> =
Lazy::new(|| Regex::new(r"^-?(?:\d+\.?\d*|\.\d+)(?:[eE][+-]?\d+)?$"));
static QUOTED_STRING_RE: Lazy<Result<Regex, regex::Error>> =
Lazy::new(|| Regex::new(r#"^'(?:[^'\\]|\\.)*'|^"(?:[^"\\]|\\.)*""#));
static ARRAY_REF_RE: Lazy<Result<Regex, regex::Error>> =
Lazy::new(|| Regex::new(r"^ARRAY\(0x[0-9a-fA-F]+\)$"));
static HASH_REF_RE: Lazy<Result<Regex, regex::Error>> =
Lazy::new(|| Regex::new(r"^HASH\(0x[0-9a-fA-F]+\)$"));
static CODE_REF_RE: Lazy<Result<Regex, regex::Error>> =
Lazy::new(|| Regex::new(r"^CODE\(0x[0-9a-fA-F]+\)$"));
static OBJECT_RE: Lazy<Result<Regex, regex::Error>> = Lazy::new(|| {
Regex::new(r"^(?P<class>[\w:]+)=(?P<type>ARRAY|HASH|SCALAR|GLOB)\(0x[0-9a-fA-F]+\)$")
});
static GLOB_RE: Lazy<Result<Regex, regex::Error>> =
Lazy::new(|| Regex::new(r"^\*(?P<name>[\w:]+)$"));
#[allow(dead_code)]
static REGEX_RE: Lazy<Result<Regex, regex::Error>> =
Lazy::new(|| Regex::new(r"^(?:\(\?(?P<flags>[xism-]*)(?:-[xism]+)?:)?(?P<pattern>.*?)\)?$"));
fn scalar_var_re() -> Option<&'static Regex> {
SCALAR_VAR_RE.as_ref().ok()
}
fn undef_re() -> Option<&'static Regex> {
UNDEF_RE.as_ref().ok()
}
fn integer_re() -> Option<&'static Regex> {
INTEGER_RE.as_ref().ok()
}
fn number_re() -> Option<&'static Regex> {
NUMBER_RE.as_ref().ok()
}
fn quoted_string_re() -> Option<&'static Regex> {
QUOTED_STRING_RE.as_ref().ok()
}
fn array_ref_re() -> Option<&'static Regex> {
ARRAY_REF_RE.as_ref().ok()
}
fn hash_ref_re() -> Option<&'static Regex> {
HASH_REF_RE.as_ref().ok()
}
fn code_ref_re() -> Option<&'static Regex> {
CODE_REF_RE.as_ref().ok()
}
fn object_re() -> Option<&'static Regex> {
OBJECT_RE.as_ref().ok()
}
fn glob_re() -> Option<&'static Regex> {
GLOB_RE.as_ref().ok()
}
#[derive(Debug, Default)]
pub struct VariableParser {
max_depth: usize,
}
impl VariableParser {
#[must_use]
pub fn new() -> Self {
Self { max_depth: 50 }
}
#[must_use]
pub fn with_max_depth(mut self, depth: usize) -> Self {
self.max_depth = depth;
self
}
pub fn parse_assignment(&self, line: &str) -> Result<(String, PerlValue), VariableParseError> {
let re = scalar_var_re()
.ok_or_else(|| VariableParseError::UnrecognizedFormat(line.to_string()))?;
if let Some(caps) = re.captures(line) {
let name = caps
.name("name")
.ok_or_else(|| VariableParseError::UnrecognizedFormat(line.to_string()))?
.as_str()
.to_string();
let value_str = caps
.name("value")
.ok_or_else(|| VariableParseError::UnrecognizedFormat(line.to_string()))?
.as_str();
let value = self.parse_value(value_str, 0)?;
Ok((name, value))
} else {
Err(VariableParseError::UnrecognizedFormat(line.to_string()))
}
}
pub fn parse_value(&self, text: &str, depth: usize) -> Result<PerlValue, VariableParseError> {
if depth > self.max_depth {
return Err(VariableParseError::MaxDepthExceeded(self.max_depth));
}
let text = text.trim();
if undef_re().is_some_and(|re| re.is_match(text)) {
return Ok(PerlValue::Undef);
}
if integer_re().is_some_and(|re| re.is_match(text)) {
if let Ok(i) = text.parse::<i64>() {
return Ok(PerlValue::Integer(i));
}
}
if number_re().is_some_and(|re| re.is_match(text)) {
if let Ok(n) = text.parse::<f64>() {
return Ok(PerlValue::Number(n));
}
}
if quoted_string_re().is_some_and(|re| re.is_match(text)) {
let unquoted = self.unquote_string(text)?;
return Ok(PerlValue::Scalar(unquoted));
}
if array_ref_re().is_some_and(|re| re.is_match(text)) {
return Ok(PerlValue::Array(vec![]));
}
if hash_ref_re().is_some_and(|re| re.is_match(text)) {
return Ok(PerlValue::Hash(vec![]));
}
if code_ref_re().is_some_and(|re| re.is_match(text)) {
return Ok(PerlValue::Code { name: None });
}
if let Some(caps) = object_re().and_then(|re| re.captures(text)) {
let class = caps
.name("class")
.ok_or_else(|| VariableParseError::UnrecognizedFormat(text.to_string()))?
.as_str()
.to_string();
let type_str = caps
.name("type")
.ok_or_else(|| VariableParseError::UnrecognizedFormat(text.to_string()))?
.as_str();
let inner = match type_str {
"ARRAY" => PerlValue::Array(vec![]),
"HASH" => PerlValue::Hash(vec![]),
_ => PerlValue::Scalar(String::new()),
};
return Ok(PerlValue::Object { class, value: Box::new(inner) });
}
if let Some(caps) = glob_re().and_then(|re| re.captures(text)) {
let name = caps
.name("name")
.ok_or_else(|| VariableParseError::UnrecognizedFormat(text.to_string()))?
.as_str()
.to_string();
return Ok(PerlValue::Glob(name));
}
if text.starts_with('(') && text.ends_with(')') {
return self.parse_array_literal(text, depth);
}
if text.starts_with('[') && text.ends_with(']') {
return self.parse_array_literal(text, depth);
}
if text.starts_with('{') && text.ends_with('}') {
return self.parse_hash_literal(text, depth);
}
Ok(PerlValue::Scalar(text.to_string()))
}
fn parse_array_literal(
&self,
text: &str,
depth: usize,
) -> Result<PerlValue, VariableParseError> {
let inner = &text[1..text.len() - 1];
if inner.trim().is_empty() {
return Ok(PerlValue::Array(vec![]));
}
let elements = self.split_elements(inner)?;
let parsed: Result<Vec<PerlValue>, _> =
elements.iter().map(|e| self.parse_value(e, depth + 1)).collect();
Ok(PerlValue::Array(parsed?))
}
fn parse_hash_literal(
&self,
text: &str,
depth: usize,
) -> Result<PerlValue, VariableParseError> {
let inner = &text[1..text.len() - 1];
if inner.trim().is_empty() {
return Ok(PerlValue::Hash(vec![]));
}
let elements = self.split_elements(inner)?;
let mut pairs = Vec::new();
for element in elements {
if let Some((key, value)) = element.split_once("=>") {
let key = self.unquote_key(key.trim());
let value = self.parse_value(value.trim(), depth + 1)?;
pairs.push((key, value));
} else {
let key = self.unquote_key(element.trim());
pairs.push((key, PerlValue::Undef));
}
}
Ok(PerlValue::Hash(pairs))
}
fn split_elements(&self, text: &str) -> Result<Vec<String>, VariableParseError> {
let mut elements = Vec::new();
let mut current = String::new();
let mut paren_depth: u32 = 0;
let mut bracket_depth: u32 = 0;
let mut brace_depth: u32 = 0;
let mut in_string = false;
let mut string_char = ' ';
let mut escape_next = false;
for ch in text.chars() {
if escape_next {
current.push(ch);
escape_next = false;
continue;
}
if ch == '\\' {
current.push(ch);
escape_next = true;
continue;
}
if in_string {
current.push(ch);
if ch == string_char {
in_string = false;
}
continue;
}
match ch {
'"' | '\'' => {
current.push(ch);
in_string = true;
string_char = ch;
}
'(' => {
current.push(ch);
paren_depth += 1;
}
')' => {
current.push(ch);
paren_depth = paren_depth.saturating_sub(1);
}
'[' => {
current.push(ch);
bracket_depth += 1;
}
']' => {
current.push(ch);
bracket_depth = bracket_depth.saturating_sub(1);
}
'{' => {
current.push(ch);
brace_depth += 1;
}
'}' => {
current.push(ch);
brace_depth = brace_depth.saturating_sub(1);
}
',' if paren_depth == 0 && bracket_depth == 0 && brace_depth == 0 => {
let trimmed = current.trim().to_string();
if !trimmed.is_empty() {
elements.push(trimmed);
}
current = String::new();
}
_ => {
current.push(ch);
}
}
}
let trimmed = current.trim().to_string();
if !trimmed.is_empty() {
elements.push(trimmed);
}
Ok(elements)
}
fn unquote_string(&self, text: &str) -> Result<String, VariableParseError> {
if text.len() < 2 {
return Err(VariableParseError::UnterminatedString);
}
let first = text.chars().next();
let last = text.chars().next_back();
match (first, last) {
(Some('"'), Some('"')) | (Some('\''), Some('\'')) => {
let inner = &text[1..text.len() - 1];
Ok(self.unescape_string(inner))
}
_ => Ok(text.to_string()),
}
}
fn unquote_key(&self, text: &str) -> String {
if text.len() >= 2 {
let first = text.chars().next();
let last = text.chars().next_back();
match (first, last) {
(Some('"'), Some('"')) | (Some('\''), Some('\'')) => {
return self.unescape_string(&text[1..text.len() - 1]);
}
_ => {}
}
}
text.to_string()
}
fn unescape_string(&self, text: &str) -> String {
let mut result = String::with_capacity(text.len());
let mut chars = text.chars().peekable();
while let Some(ch) = chars.next() {
if ch == '\\' {
match chars.next() {
Some('n') => result.push('\n'),
Some('r') => result.push('\r'),
Some('t') => result.push('\t'),
Some('\\') => result.push('\\'),
Some('"') => result.push('"'),
Some('\'') => result.push('\''),
Some(other) => {
result.push('\\');
result.push(other);
}
None => result.push('\\'),
}
} else {
result.push(ch);
}
}
result
}
pub fn parse_variables(&self, output: &str) -> Vec<(String, PerlValue)> {
output.lines().filter_map(|line| self.parse_assignment(line).ok()).collect()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_undef() {
let parser = VariableParser::new();
let result = parser.parse_value("undef", 0);
assert!(matches!(result, Ok(PerlValue::Undef)));
}
#[test]
fn test_parse_integer() {
let parser = VariableParser::new();
let result = parser.parse_value("42", 0);
assert!(matches!(result, Ok(PerlValue::Integer(42))));
let result = parser.parse_value("-17", 0);
assert!(matches!(result, Ok(PerlValue::Integer(-17))));
}
#[test]
fn test_parse_number() {
let parser = VariableParser::new();
let result = parser.parse_value("3.25", 0);
assert!(matches!(result, Ok(PerlValue::Number(n)) if (n - 3.25).abs() < 0.001));
let result = parser.parse_value("1.5e10", 0);
assert!(matches!(result, Ok(PerlValue::Number(_))));
}
#[test]
fn test_parse_quoted_string() {
let parser = VariableParser::new();
let result = parser.parse_value("\"hello\"", 0);
assert!(matches!(result, Ok(PerlValue::Scalar(s)) if s == "hello"));
let result = parser.parse_value("'world'", 0);
assert!(matches!(result, Ok(PerlValue::Scalar(s)) if s == "world"));
}
#[test]
fn test_parse_string_with_escapes() {
let parser = VariableParser::new();
let result = parser.parse_value("\"line1\\nline2\"", 0);
assert!(matches!(result, Ok(PerlValue::Scalar(s)) if s.contains('\n')));
}
#[test]
fn test_parse_array_reference() {
let parser = VariableParser::new();
let result = parser.parse_value("ARRAY(0x1234abcd)", 0);
assert!(matches!(result, Ok(PerlValue::Array(_))));
}
#[test]
fn test_parse_hash_reference() {
let parser = VariableParser::new();
let result = parser.parse_value("HASH(0x5678abcd)", 0);
assert!(matches!(result, Ok(PerlValue::Hash(_))));
}
#[test]
fn test_parse_code_reference() {
let parser = VariableParser::new();
let result = parser.parse_value("CODE(0xdeadbeef)", 0);
assert!(matches!(result, Ok(PerlValue::Code { name: None })));
}
#[test]
fn test_parse_object() {
let parser = VariableParser::new();
let result = parser.parse_value("My::Class=HASH(0x1234)", 0);
assert!(matches!(result, Ok(PerlValue::Object { class, .. }) if class == "My::Class"));
}
#[test]
fn test_parse_glob() {
let parser = VariableParser::new();
let result = parser.parse_value("*main::foo", 0);
assert!(matches!(result, Ok(PerlValue::Glob(name)) if name == "main::foo"));
}
#[test]
fn test_parse_array_literal() {
let parser = VariableParser::new();
let result = parser.parse_value("(1, 2, 3)", 0);
assert!(matches!(result, Ok(PerlValue::Array(arr)) if arr.len() == 3));
let result = parser.parse_value("[1, 2, 3]", 0);
assert!(matches!(result, Ok(PerlValue::Array(arr)) if arr.len() == 3));
let result = parser.parse_value("()", 0);
assert!(matches!(result, Ok(PerlValue::Array(arr)) if arr.is_empty()));
}
#[test]
fn test_parse_hash_literal() {
let parser = VariableParser::new();
let result = parser.parse_value("{foo => 1, bar => 2}", 0);
assert!(matches!(result, Ok(PerlValue::Hash(pairs)) if pairs.len() == 2));
let result = parser.parse_value("{}", 0);
assert!(matches!(result, Ok(PerlValue::Hash(pairs)) if pairs.is_empty()));
}
#[test]
fn test_parse_assignment() {
let parser = VariableParser::new();
let result = parser.parse_assignment("$x = 42");
assert!(matches!(result, Ok((name, PerlValue::Integer(42))) if name == "$x"));
let result = parser.parse_assignment("@arr = (1, 2, 3)");
assert!(matches!(result, Ok((name, PerlValue::Array(_))) if name == "@arr"));
let result = parser.parse_assignment("%hash = {a => 1}");
assert!(matches!(result, Ok((name, PerlValue::Hash(_))) if name == "%hash"));
}
#[test]
fn test_parse_variables_multi_line() {
let parser = VariableParser::new();
let output = "$x = 1\n$y = 2\n$z = \"hello\"";
let vars = parser.parse_variables(output);
assert_eq!(vars.len(), 3);
assert_eq!(vars[0].0, "$x");
assert_eq!(vars[1].0, "$y");
assert_eq!(vars[2].0, "$z");
}
#[test]
fn test_max_depth_exceeded() {
let parser = VariableParser::new().with_max_depth(2);
let result = parser.parse_value("(((1)))", 0);
assert!(matches!(result, Err(VariableParseError::MaxDepthExceeded(_))));
}
#[test]
fn test_parse_nested_structure() {
let parser = VariableParser::new();
let result = parser.parse_value("{arr => [1, 2], hash => {a => 1}}", 0);
assert!(matches!(result, Ok(PerlValue::Hash(pairs)) if pairs.len() == 2));
}
}