use super::tokens::Token;
use super::chumsky_parser::parse_expression_chumsky;
use crate::units::parse_unit;
pub fn parse_line_reference(text: &str) -> Option<usize> {
let text_lower = text.to_lowercase();
if let Some(number_part) = text_lower.strip_prefix("line") {
if let Ok(line_num) = number_part.parse::<usize>() {
if line_num > 0 {
return Some(line_num - 1); }
}
}
None
}
pub fn tokenize_with_units(expr: &str) -> Option<Vec<Token>> {
match parse_expression_chumsky(expr) {
Ok(tokens) if tokens.is_empty() => None, Ok(tokens) => Some(tokens),
Err(_) => None,
}
}
pub fn find_math_expression(text: &str) -> Vec<String> {
use super::chumsky_parser::parse_expression_chumsky;
let mut expressions = Vec::new();
let trimmed = text.trim();
if !trimmed.is_empty() {
if let Ok(tokens) = parse_expression_chumsky(trimmed) {
if !tokens.is_empty() {
expressions.push(trimmed.to_string());
return expressions; }
}
if is_obviously_invalid_single_expression(trimmed) {
return expressions; }
}
let separators = [';', ':', '!', '?', '\n'];
let mut parts = vec![text];
for sep in separators {
let mut new_parts = Vec::new();
for part in parts {
new_parts.extend(part.split(sep).map(|s| s.trim()).filter(|s| !s.is_empty()));
}
parts = new_parts;
}
let mut candidates = Vec::new();
for part in parts {
let words: Vec<&str> = part.split_whitespace().collect();
for start in 0..words.len() {
for end in start + 1..=words.len() {
let candidate = words[start..end].join(" ");
if !candidate.trim().is_empty() {
candidates.push(candidate);
}
}
}
}
for candidate in candidates {
if let Ok(tokens) = parse_expression_chumsky(&candidate) {
if !tokens.is_empty() {
expressions.push(candidate);
}
}
}
expressions.sort_by(|a, b| b.len().cmp(&a.len())); let mut filtered = Vec::new();
for expr in &expressions {
let is_subexpression = filtered.iter().any(|longer: &String| longer.contains(expr) && longer != expr);
if !is_subexpression {
filtered.push(expr.clone());
}
}
filtered
}
fn is_obviously_invalid_single_expression(text: &str) -> bool {
let words: Vec<&str> = text.split_whitespace().collect();
if words.len() == 2 {
if let Ok(_) = words[0].replace(",", "").parse::<f64>() {
if words[1].chars().all(|c| c.is_ascii_alphabetic()) && parse_unit(words[1]).is_none() {
return true;
}
}
}
if words.len() == 4 && words[2].to_lowercase() == "to" {
if let Ok(_) = words[0].replace(",", "").parse::<f64>() {
if words[1].chars().all(|c| c.is_ascii_alphabetic()) && parse_unit(words[1]).is_none() {
return true;
}
}
}
false
}
pub fn is_valid_math_expression(expr: &str) -> bool {
let expr = expr.trim();
if expr.is_empty() {
return false;
}
let mut has_number = false;
let mut paren_count = 0;
let mut prev_was_operator = true;
let chars: Vec<char> = expr.chars().collect();
let mut i = 0;
while i < chars.len() {
let ch = chars[i];
match ch {
' ' => {
i += 1;
continue;
}
'0'..='9' => {
has_number = true;
prev_was_operator = false;
while i < chars.len()
&& (chars[i].is_ascii_digit() || chars[i] == '.' || chars[i] == ',')
{
i += 1;
}
while i < chars.len() && chars[i] == ' ' {
i += 1;
}
if i < chars.len() && chars[i].is_ascii_alphabetic() {
let unit_start = i;
while i < chars.len() && (chars[i].is_ascii_alphabetic() || chars[i] == '/') {
i += 1;
}
let unit_str: String = chars[unit_start..i].iter().collect();
if parse_unit(&unit_str).is_none()
&& unit_str.to_lowercase() != "to"
&& unit_str.to_lowercase() != "in"
&& parse_line_reference(&unit_str).is_none()
{
i = unit_start;
}
}
continue;
}
'.' => {
if prev_was_operator {
return false; }
i += 1;
}
'+' | '-' | '*' | '/' => {
if prev_was_operator && ch != '-' {
return false; }
prev_was_operator = true;
i += 1;
}
'(' => {
paren_count += 1;
prev_was_operator = true;
i += 1;
}
')' => {
paren_count -= 1;
if paren_count < 0 {
return false;
}
prev_was_operator = false;
i += 1;
}
_ => {
if ch.is_ascii_alphabetic() {
let unit_start = i;
while i < chars.len()
&& (chars[i].is_ascii_alphabetic()
|| chars[i].is_ascii_digit()
|| chars[i] == '/')
{
i += 1;
}
let word: String = chars[unit_start..i].iter().collect();
if word.to_lowercase() == "to" || word.to_lowercase() == "in" {
prev_was_operator = true;
} else if parse_line_reference(&word).is_some() {
has_number = true;
prev_was_operator = false;
} else if parse_unit(&word).is_some() {
prev_was_operator = false;
} else {
break;
}
} else {
break;
}
}
}
}
paren_count == 0 && has_number && !prev_was_operator
}
#[cfg(test)]
mod parser_tests {
use super::*;
#[test]
fn test_parse_line_reference() {
assert_eq!(parse_line_reference("line1"), Some(0));
assert_eq!(parse_line_reference("line2"), Some(1));
assert_eq!(parse_line_reference("line10"), Some(9));
assert_eq!(parse_line_reference("line999"), Some(998));
assert_eq!(parse_line_reference("LINE1"), Some(0));
assert_eq!(parse_line_reference("Line2"), Some(1));
assert_eq!(parse_line_reference("LiNe3"), Some(2));
assert_eq!(parse_line_reference("line0"), None); assert_eq!(parse_line_reference("line"), None); assert_eq!(parse_line_reference("line-1"), None); assert_eq!(parse_line_reference("linea"), None); assert_eq!(parse_line_reference("notline1"), None); assert_eq!(parse_line_reference(""), None); assert_eq!(parse_line_reference("1line"), None); }
#[test]
fn test_tokenize_with_units_basic() {
let tokens = tokenize_with_units("42").unwrap();
assert_eq!(tokens.len(), 1);
assert!(matches!(tokens[0], Token::Number(42.0)));
let tokens = tokenize_with_units("5 GiB").unwrap();
assert_eq!(tokens.len(), 1);
assert!(matches!(tokens[0], Token::NumberWithUnit(5.0, _)));
let tokens = tokenize_with_units("2 + 3").unwrap();
assert_eq!(tokens.len(), 3);
assert!(matches!(tokens[0], Token::Number(2.0)));
assert!(matches!(tokens[1], Token::Plus));
assert!(matches!(tokens[2], Token::Number(3.0)));
}
#[test]
fn test_tokenize_with_units_invalid() {
assert!(tokenize_with_units("invalid text").is_none());
assert!(tokenize_with_units("1 + 2)").is_none());
assert!(tokenize_with_units("1 invalidunit").is_none());
let result = tokenize_with_units("");
assert!(result.is_none());
}
#[test]
fn test_find_math_expression_basic() {
let expressions = find_math_expression("The value is 42");
assert!(expressions.contains(&"42".to_string()));
let expressions = find_math_expression("Calculate 2 + 3 for the result");
assert!(expressions.contains(&"2 + 3".to_string()));
let expressions = find_math_expression("We need 5 GiB of memory");
assert!(expressions.contains(&"5 GiB".to_string()));
}
#[test]
fn test_find_math_expression_complex() {
let expressions = find_math_expression("The calculation (5 + 3) * 2 gives us the answer");
assert!(expressions.contains(&"(5 + 3) * 2".to_string()));
let expressions = find_math_expression("Server stats: 16 GiB RAM, 100 QPS, 50 TB storage");
assert!(expressions.contains(&"16 GiB".to_string()));
assert!(expressions.len() >= 1);
let expressions = find_math_expression("Use line1 + line2 for the total");
assert!(!expressions.is_empty());
}
#[test]
fn test_find_math_expression_edge_cases() {
let expressions = find_math_expression("");
assert!(expressions.is_empty());
let expressions = find_math_expression("Just some text without numbers");
assert!(expressions.is_empty());
let expressions = find_math_expression("First: 1 + 2, Second: 3 * 4, Third: 5 / 6");
assert!(expressions.len() >= 1);
let expressions = find_math_expression("Calculate 1 + 2 + 3");
assert!(!expressions.is_empty()); }
#[test]
fn test_is_valid_math_expression() {
assert!(is_valid_math_expression("42"));
assert!(is_valid_math_expression("2 + 3"));
assert!(is_valid_math_expression("(1 + 2) * 3"));
assert!(is_valid_math_expression("5 GiB + 10 MiB"));
assert!(is_valid_math_expression("line1 * 2"));
assert!(is_valid_math_expression("1 TiB to GiB"));
assert!(is_valid_math_expression("24 MiB * 32 in KiB"));
assert!(!is_valid_math_expression(""));
assert!(!is_valid_math_expression("invalid text"));
assert!(!is_valid_math_expression("1 +"));
assert!(!is_valid_math_expression("+ 2"));
assert!(!is_valid_math_expression("1 + + 2"));
assert!(!is_valid_math_expression("(1 + 2"));
assert!(!is_valid_math_expression("1 + 2)"));
assert!(is_valid_math_expression("0"));
assert!(is_valid_math_expression("-5")); assert!(is_valid_math_expression("1.5"));
assert!(is_valid_math_expression("1,000"));
assert!(is_valid_math_expression("1,000,000.50"));
}
#[test]
fn test_is_valid_math_expression_units() {
assert!(is_valid_math_expression("5GiB")); assert!(is_valid_math_expression("5 GiB")); assert!(is_valid_math_expression("10.5 MB/s")); assert!(is_valid_math_expression("100 QPS")); assert!(is_valid_math_expression("1 hour")); assert!(is_valid_math_expression("8 bit"));
assert!(is_valid_math_expression("1 GiB to MiB"));
assert!(is_valid_math_expression("24 MiB * 32 in KiB"));
assert!(is_valid_math_expression("100 QPS to req/min"));
assert!(is_valid_math_expression("1 gib TO mib"));
assert!(is_valid_math_expression("1 GIB to MIB"));
}
#[test]
fn test_is_valid_math_expression_operators() {
assert!(is_valid_math_expression("1 + 2"));
assert!(is_valid_math_expression("5 - 3"));
assert!(is_valid_math_expression("4 * 6"));
assert!(is_valid_math_expression("8 / 2"));
assert!(is_valid_math_expression("1 + 2 - 3"));
assert!(is_valid_math_expression("2 * 3 + 4"));
assert!(is_valid_math_expression("10 / 2 - 1"));
assert!(is_valid_math_expression("(1 + 2) * 3"));
assert!(is_valid_math_expression("1 + (2 * 3)"));
assert!(is_valid_math_expression("((1 + 2) * 3) - 4"));
assert!(!is_valid_math_expression("1 + * 2"));
assert!(!is_valid_math_expression("* 1 + 2"));
assert!(!is_valid_math_expression("1 + 2 *"));
}
#[test]
fn test_is_valid_math_expression_line_references() {
assert!(is_valid_math_expression("line1"));
assert!(is_valid_math_expression("line10"));
assert!(is_valid_math_expression("line1 + line2"));
assert!(is_valid_math_expression("line1 * 2"));
assert!(is_valid_math_expression("(line1 + line2) / 2"));
assert!(is_valid_math_expression("line1 + 5 GiB"));
assert!(is_valid_math_expression("line1 to MiB"));
assert!(is_valid_math_expression("line1 + line2 in KiB"));
assert!(is_valid_math_expression("LINE1"));
assert!(is_valid_math_expression("Line2"));
assert!(is_valid_math_expression("LiNe3 + LiNe4"));
}
#[test]
fn test_whitespace_handling() {
assert!(is_valid_math_expression(" 1 + 2 "));
assert!(is_valid_math_expression("1 + 2"));
assert!(is_valid_math_expression("1\t+\t2"));
assert!(is_valid_math_expression("1+2"));
assert!(is_valid_math_expression("5 GiB"));
assert!(is_valid_math_expression("5GiB"));
assert!(is_valid_math_expression("1 GiB to MiB"));
assert!(is_valid_math_expression("1 GiB to MiB"));
}
#[test]
fn test_find_math_expression_extraction() {
let expressions = find_math_expression("42 plus more text");
assert!(expressions.contains(&"42".to_string()));
let expressions = find_math_expression("2 + 3 equals five");
assert!(expressions.contains(&"2 + 3".to_string()));
let expressions = find_math_expression("5 GiB of storage");
assert!(expressions.contains(&"5 GiB".to_string()));
let expressions = find_math_expression("(1 + 2) * 3 is the result");
assert!(expressions.contains(&"(1 + 2) * 3".to_string()));
let expressions = find_math_expression("10 GiB/s transfer rate");
assert!(expressions.contains(&"10 GiB/s".to_string()));
let expressions = find_math_expression("1 GiB to MiB conversion");
assert!(expressions.contains(&"1 GiB to MiB".to_string()));
}
}