1use crate::PerlValue;
7use once_cell::sync::Lazy;
8use regex::Regex;
9use thiserror::Error;
10
11#[derive(Debug, Error)]
13pub enum VariableParseError {
14 #[error("unrecognized variable format: {0}")]
16 UnrecognizedFormat(String),
17
18 #[error("maximum nesting depth exceeded ({0})")]
20 MaxDepthExceeded(usize),
21
22 #[error("unterminated string literal")]
24 UnterminatedString,
25
26 #[error("unterminated collection")]
28 UnterminatedCollection,
29
30 #[error("regex error: {0}")]
32 RegexError(#[from] regex::Error),
33}
34
35static SCALAR_VAR_RE: Lazy<Result<Regex, regex::Error>> =
40 Lazy::new(|| Regex::new(r"^\s*(?P<name>[\$\@\%][\w:]+)\s*=\s*(?P<value>.*?)$"));
41
42static UNDEF_RE: Lazy<Result<Regex, regex::Error>> = Lazy::new(|| Regex::new(r"^undef$"));
43
44static INTEGER_RE: Lazy<Result<Regex, regex::Error>> = Lazy::new(|| Regex::new(r"^-?\d+$"));
45
46static NUMBER_RE: Lazy<Result<Regex, regex::Error>> =
47 Lazy::new(|| Regex::new(r"^-?(?:\d+\.?\d*|\.\d+)(?:[eE][+-]?\d+)?$"));
48
49static QUOTED_STRING_RE: Lazy<Result<Regex, regex::Error>> =
50 Lazy::new(|| Regex::new(r#"^'(?:[^'\\]|\\.)*'|^"(?:[^"\\]|\\.)*""#));
51
52static ARRAY_REF_RE: Lazy<Result<Regex, regex::Error>> =
53 Lazy::new(|| Regex::new(r"^ARRAY\(0x[0-9a-fA-F]+\)$"));
54
55static HASH_REF_RE: Lazy<Result<Regex, regex::Error>> =
56 Lazy::new(|| Regex::new(r"^HASH\(0x[0-9a-fA-F]+\)$"));
57
58static CODE_REF_RE: Lazy<Result<Regex, regex::Error>> =
59 Lazy::new(|| Regex::new(r"^CODE\(0x[0-9a-fA-F]+\)$"));
60
61static OBJECT_RE: Lazy<Result<Regex, regex::Error>> = Lazy::new(|| {
62 Regex::new(r"^(?P<class>[\w:]+)=(?P<type>ARRAY|HASH|SCALAR|GLOB)\(0x[0-9a-fA-F]+\)$")
63});
64
65static GLOB_RE: Lazy<Result<Regex, regex::Error>> =
66 Lazy::new(|| Regex::new(r"^\*(?P<name>[\w:]+)$"));
67
68#[allow(dead_code)]
70static REGEX_RE: Lazy<Result<Regex, regex::Error>> =
71 Lazy::new(|| Regex::new(r"^(?:\(\?(?P<flags>[xism-]*)(?:-[xism]+)?:)?(?P<pattern>.*?)\)?$"));
72
73fn scalar_var_re() -> Option<&'static Regex> {
75 SCALAR_VAR_RE.as_ref().ok()
76}
77fn undef_re() -> Option<&'static Regex> {
78 UNDEF_RE.as_ref().ok()
79}
80fn integer_re() -> Option<&'static Regex> {
81 INTEGER_RE.as_ref().ok()
82}
83fn number_re() -> Option<&'static Regex> {
84 NUMBER_RE.as_ref().ok()
85}
86fn quoted_string_re() -> Option<&'static Regex> {
87 QUOTED_STRING_RE.as_ref().ok()
88}
89fn array_ref_re() -> Option<&'static Regex> {
90 ARRAY_REF_RE.as_ref().ok()
91}
92fn hash_ref_re() -> Option<&'static Regex> {
93 HASH_REF_RE.as_ref().ok()
94}
95fn code_ref_re() -> Option<&'static Regex> {
96 CODE_REF_RE.as_ref().ok()
97}
98fn object_re() -> Option<&'static Regex> {
99 OBJECT_RE.as_ref().ok()
100}
101fn glob_re() -> Option<&'static Regex> {
102 GLOB_RE.as_ref().ok()
103}
104
105#[derive(Debug, Default)]
110pub struct VariableParser {
111 max_depth: usize,
113}
114
115impl VariableParser {
116 #[must_use]
118 pub fn new() -> Self {
119 Self { max_depth: 50 }
120 }
121
122 #[must_use]
124 pub fn with_max_depth(mut self, depth: usize) -> Self {
125 self.max_depth = depth;
126 self
127 }
128
129 pub fn parse_assignment(&self, line: &str) -> Result<(String, PerlValue), VariableParseError> {
143 let re = scalar_var_re()
144 .ok_or_else(|| VariableParseError::UnrecognizedFormat(line.to_string()))?;
145 if let Some(caps) = re.captures(line) {
146 let name = caps
147 .name("name")
148 .ok_or_else(|| VariableParseError::UnrecognizedFormat(line.to_string()))?
149 .as_str()
150 .to_string();
151 let value_str = caps
152 .name("value")
153 .ok_or_else(|| VariableParseError::UnrecognizedFormat(line.to_string()))?
154 .as_str();
155 let value = self.parse_value(value_str, 0)?;
156 Ok((name, value))
157 } else {
158 Err(VariableParseError::UnrecognizedFormat(line.to_string()))
159 }
160 }
161
162 pub fn parse_value(&self, text: &str, depth: usize) -> Result<PerlValue, VariableParseError> {
176 if depth > self.max_depth {
177 return Err(VariableParseError::MaxDepthExceeded(self.max_depth));
178 }
179
180 let text = text.trim();
181
182 if undef_re().is_some_and(|re| re.is_match(text)) {
184 return Ok(PerlValue::Undef);
185 }
186
187 if integer_re().is_some_and(|re| re.is_match(text)) {
189 if let Ok(i) = text.parse::<i64>() {
190 return Ok(PerlValue::Integer(i));
191 }
192 }
193
194 if number_re().is_some_and(|re| re.is_match(text)) {
196 if let Ok(n) = text.parse::<f64>() {
197 return Ok(PerlValue::Number(n));
198 }
199 }
200
201 if quoted_string_re().is_some_and(|re| re.is_match(text)) {
203 let unquoted = self.unquote_string(text)?;
204 return Ok(PerlValue::Scalar(unquoted));
205 }
206
207 if array_ref_re().is_some_and(|re| re.is_match(text)) {
209 return Ok(PerlValue::Array(vec![]));
210 }
211
212 if hash_ref_re().is_some_and(|re| re.is_match(text)) {
214 return Ok(PerlValue::Hash(vec![]));
215 }
216
217 if code_ref_re().is_some_and(|re| re.is_match(text)) {
219 return Ok(PerlValue::Code { name: None });
220 }
221
222 if let Some(caps) = object_re().and_then(|re| re.captures(text)) {
224 let class = caps
225 .name("class")
226 .ok_or_else(|| VariableParseError::UnrecognizedFormat(text.to_string()))?
227 .as_str()
228 .to_string();
229 let type_str = caps
230 .name("type")
231 .ok_or_else(|| VariableParseError::UnrecognizedFormat(text.to_string()))?
232 .as_str();
233 let inner = match type_str {
234 "ARRAY" => PerlValue::Array(vec![]),
235 "HASH" => PerlValue::Hash(vec![]),
236 _ => PerlValue::Scalar(String::new()),
237 };
238 return Ok(PerlValue::Object { class, value: Box::new(inner) });
239 }
240
241 if let Some(caps) = glob_re().and_then(|re| re.captures(text)) {
243 let name = caps
244 .name("name")
245 .ok_or_else(|| VariableParseError::UnrecognizedFormat(text.to_string()))?
246 .as_str()
247 .to_string();
248 return Ok(PerlValue::Glob(name));
249 }
250
251 if text.starts_with('(') && text.ends_with(')') {
253 return self.parse_array_literal(text, depth);
254 }
255
256 if text.starts_with('[') && text.ends_with(']') {
258 return self.parse_array_literal(text, depth);
259 }
260
261 if text.starts_with('{') && text.ends_with('}') {
263 return self.parse_hash_literal(text, depth);
264 }
265
266 Ok(PerlValue::Scalar(text.to_string()))
268 }
269
270 fn parse_array_literal(
272 &self,
273 text: &str,
274 depth: usize,
275 ) -> Result<PerlValue, VariableParseError> {
276 let inner = &text[1..text.len() - 1];
278
279 if inner.trim().is_empty() {
280 return Ok(PerlValue::Array(vec![]));
281 }
282
283 let elements = self.split_elements(inner)?;
284 let parsed: Result<Vec<PerlValue>, _> =
285 elements.iter().map(|e| self.parse_value(e, depth + 1)).collect();
286
287 Ok(PerlValue::Array(parsed?))
288 }
289
290 fn parse_hash_literal(
292 &self,
293 text: &str,
294 depth: usize,
295 ) -> Result<PerlValue, VariableParseError> {
296 let inner = &text[1..text.len() - 1];
298
299 if inner.trim().is_empty() {
300 return Ok(PerlValue::Hash(vec![]));
301 }
302
303 let elements = self.split_elements(inner)?;
304 let mut pairs = Vec::new();
305
306 for element in elements {
307 if let Some((key, value)) = element.split_once("=>") {
308 let key = self.unquote_key(key.trim());
309 let value = self.parse_value(value.trim(), depth + 1)?;
310 pairs.push((key, value));
311 } else {
312 let key = self.unquote_key(element.trim());
314 pairs.push((key, PerlValue::Undef));
315 }
316 }
317
318 Ok(PerlValue::Hash(pairs))
319 }
320
321 fn split_elements(&self, text: &str) -> Result<Vec<String>, VariableParseError> {
323 let mut elements = Vec::new();
324 let mut current = String::new();
325 let mut paren_depth: u32 = 0;
326 let mut bracket_depth: u32 = 0;
327 let mut brace_depth: u32 = 0;
328 let mut in_string = false;
329 let mut string_char = ' ';
330 let mut escape_next = false;
331
332 for ch in text.chars() {
333 if escape_next {
334 current.push(ch);
335 escape_next = false;
336 continue;
337 }
338
339 if ch == '\\' {
340 current.push(ch);
341 escape_next = true;
342 continue;
343 }
344
345 if in_string {
346 current.push(ch);
347 if ch == string_char {
348 in_string = false;
349 }
350 continue;
351 }
352
353 match ch {
354 '"' | '\'' => {
355 current.push(ch);
356 in_string = true;
357 string_char = ch;
358 }
359 '(' => {
360 current.push(ch);
361 paren_depth += 1;
362 }
363 ')' => {
364 current.push(ch);
365 paren_depth = paren_depth.saturating_sub(1);
366 }
367 '[' => {
368 current.push(ch);
369 bracket_depth += 1;
370 }
371 ']' => {
372 current.push(ch);
373 bracket_depth = bracket_depth.saturating_sub(1);
374 }
375 '{' => {
376 current.push(ch);
377 brace_depth += 1;
378 }
379 '}' => {
380 current.push(ch);
381 brace_depth = brace_depth.saturating_sub(1);
382 }
383 ',' if paren_depth == 0 && bracket_depth == 0 && brace_depth == 0 => {
384 let trimmed = current.trim().to_string();
385 if !trimmed.is_empty() {
386 elements.push(trimmed);
387 }
388 current = String::new();
389 }
390 _ => {
391 current.push(ch);
392 }
393 }
394 }
395
396 let trimmed = current.trim().to_string();
398 if !trimmed.is_empty() {
399 elements.push(trimmed);
400 }
401
402 Ok(elements)
403 }
404
405 fn unquote_string(&self, text: &str) -> Result<String, VariableParseError> {
407 if text.len() < 2 {
408 return Err(VariableParseError::UnterminatedString);
409 }
410
411 let first = text.chars().next();
412 let last = text.chars().next_back();
413
414 match (first, last) {
415 (Some('"'), Some('"')) | (Some('\''), Some('\'')) => {
416 let inner = &text[1..text.len() - 1];
417 Ok(self.unescape_string(inner))
418 }
419 _ => Ok(text.to_string()),
420 }
421 }
422
423 fn unquote_key(&self, text: &str) -> String {
425 if text.len() >= 2 {
426 let first = text.chars().next();
427 let last = text.chars().next_back();
428
429 match (first, last) {
430 (Some('"'), Some('"')) | (Some('\''), Some('\'')) => {
431 return self.unescape_string(&text[1..text.len() - 1]);
432 }
433 _ => {}
434 }
435 }
436 text.to_string()
437 }
438
439 fn unescape_string(&self, text: &str) -> String {
441 let mut result = String::with_capacity(text.len());
442 let mut chars = text.chars().peekable();
443
444 while let Some(ch) = chars.next() {
445 if ch == '\\' {
446 match chars.next() {
447 Some('n') => result.push('\n'),
448 Some('r') => result.push('\r'),
449 Some('t') => result.push('\t'),
450 Some('\\') => result.push('\\'),
451 Some('"') => result.push('"'),
452 Some('\'') => result.push('\''),
453 Some(other) => {
454 result.push('\\');
455 result.push(other);
456 }
457 None => result.push('\\'),
458 }
459 } else {
460 result.push(ch);
461 }
462 }
463
464 result
465 }
466
467 pub fn parse_variables(&self, output: &str) -> Vec<(String, PerlValue)> {
477 output.lines().filter_map(|line| self.parse_assignment(line).ok()).collect()
478 }
479}
480
481#[cfg(test)]
482mod tests {
483 use super::*;
484
485 #[test]
486 fn test_parse_undef() {
487 let parser = VariableParser::new();
488 let result = parser.parse_value("undef", 0);
489 assert!(matches!(result, Ok(PerlValue::Undef)));
490 }
491
492 #[test]
493 fn test_parse_integer() {
494 let parser = VariableParser::new();
495
496 let result = parser.parse_value("42", 0);
497 assert!(matches!(result, Ok(PerlValue::Integer(42))));
498
499 let result = parser.parse_value("-17", 0);
500 assert!(matches!(result, Ok(PerlValue::Integer(-17))));
501 }
502
503 #[test]
504 fn test_parse_number() {
505 let parser = VariableParser::new();
506
507 let result = parser.parse_value("3.25", 0);
508 assert!(matches!(result, Ok(PerlValue::Number(n)) if (n - 3.25).abs() < 0.001));
509
510 let result = parser.parse_value("1.5e10", 0);
511 assert!(matches!(result, Ok(PerlValue::Number(_))));
512 }
513
514 #[test]
515 fn test_parse_quoted_string() {
516 let parser = VariableParser::new();
517
518 let result = parser.parse_value("\"hello\"", 0);
519 assert!(matches!(result, Ok(PerlValue::Scalar(s)) if s == "hello"));
520
521 let result = parser.parse_value("'world'", 0);
522 assert!(matches!(result, Ok(PerlValue::Scalar(s)) if s == "world"));
523 }
524
525 #[test]
526 fn test_parse_string_with_escapes() {
527 let parser = VariableParser::new();
528
529 let result = parser.parse_value("\"line1\\nline2\"", 0);
530 assert!(matches!(result, Ok(PerlValue::Scalar(s)) if s.contains('\n')));
531 }
532
533 #[test]
534 fn test_parse_array_reference() {
535 let parser = VariableParser::new();
536
537 let result = parser.parse_value("ARRAY(0x1234abcd)", 0);
538 assert!(matches!(result, Ok(PerlValue::Array(_))));
539 }
540
541 #[test]
542 fn test_parse_hash_reference() {
543 let parser = VariableParser::new();
544
545 let result = parser.parse_value("HASH(0x5678abcd)", 0);
546 assert!(matches!(result, Ok(PerlValue::Hash(_))));
547 }
548
549 #[test]
550 fn test_parse_code_reference() {
551 let parser = VariableParser::new();
552
553 let result = parser.parse_value("CODE(0xdeadbeef)", 0);
554 assert!(matches!(result, Ok(PerlValue::Code { name: None })));
555 }
556
557 #[test]
558 fn test_parse_object() {
559 let parser = VariableParser::new();
560
561 let result = parser.parse_value("My::Class=HASH(0x1234)", 0);
562 assert!(matches!(result, Ok(PerlValue::Object { class, .. }) if class == "My::Class"));
563 }
564
565 #[test]
566 fn test_parse_glob() {
567 let parser = VariableParser::new();
568
569 let result = parser.parse_value("*main::foo", 0);
570 assert!(matches!(result, Ok(PerlValue::Glob(name)) if name == "main::foo"));
571 }
572
573 #[test]
574 fn test_parse_array_literal() {
575 let parser = VariableParser::new();
576
577 let result = parser.parse_value("(1, 2, 3)", 0);
578 assert!(matches!(result, Ok(PerlValue::Array(arr)) if arr.len() == 3));
579
580 let result = parser.parse_value("[1, 2, 3]", 0);
581 assert!(matches!(result, Ok(PerlValue::Array(arr)) if arr.len() == 3));
582
583 let result = parser.parse_value("()", 0);
584 assert!(matches!(result, Ok(PerlValue::Array(arr)) if arr.is_empty()));
585 }
586
587 #[test]
588 fn test_parse_hash_literal() {
589 let parser = VariableParser::new();
590
591 let result = parser.parse_value("{foo => 1, bar => 2}", 0);
592 assert!(matches!(result, Ok(PerlValue::Hash(pairs)) if pairs.len() == 2));
593
594 let result = parser.parse_value("{}", 0);
595 assert!(matches!(result, Ok(PerlValue::Hash(pairs)) if pairs.is_empty()));
596 }
597
598 #[test]
599 fn test_parse_assignment() {
600 let parser = VariableParser::new();
601
602 let result = parser.parse_assignment("$x = 42");
603 assert!(matches!(result, Ok((name, PerlValue::Integer(42))) if name == "$x"));
604
605 let result = parser.parse_assignment("@arr = (1, 2, 3)");
606 assert!(matches!(result, Ok((name, PerlValue::Array(_))) if name == "@arr"));
607
608 let result = parser.parse_assignment("%hash = {a => 1}");
609 assert!(matches!(result, Ok((name, PerlValue::Hash(_))) if name == "%hash"));
610 }
611
612 #[test]
613 fn test_parse_variables_multi_line() {
614 let parser = VariableParser::new();
615
616 let output = "$x = 1\n$y = 2\n$z = \"hello\"";
617 let vars = parser.parse_variables(output);
618
619 assert_eq!(vars.len(), 3);
620 assert_eq!(vars[0].0, "$x");
621 assert_eq!(vars[1].0, "$y");
622 assert_eq!(vars[2].0, "$z");
623 }
624
625 #[test]
626 fn test_max_depth_exceeded() {
627 let parser = VariableParser::new().with_max_depth(2);
628
629 let result = parser.parse_value("(((1)))", 0);
631 assert!(matches!(result, Err(VariableParseError::MaxDepthExceeded(_))));
632 }
633
634 #[test]
635 fn test_parse_nested_structure() {
636 let parser = VariableParser::new();
637
638 let result = parser.parse_value("{arr => [1, 2], hash => {a => 1}}", 0);
639 assert!(matches!(result, Ok(PerlValue::Hash(pairs)) if pairs.len() == 2));
640 }
641}