use std::collections::BTreeMap;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct WordProblemNormalization {
pub expression: String,
pub reasoning_steps: Vec<String>,
pub result_label: Option<String>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
enum BoxRule {
Known(i64),
Multiple { factor: i64, source: String },
Add { source: String, addend: i64 },
}
fn fibonacci_value(n: u32) -> u64 {
if n == 0 {
return 0;
}
let (mut previous, mut current) = (0u64, 1u64);
for _ in 1..n {
let next = previous + current;
previous = current;
current = next;
}
current
}
fn parse_ordinal(token: &str) -> Option<u32> {
let token = token.trim_matches(|c: char| !c.is_alphanumeric());
if token.is_empty() {
return None;
}
let digits: String = token.chars().take_while(char::is_ascii_digit).collect();
if !digits.is_empty() {
let suffix = &token[digits.len()..];
if suffix.is_empty() || matches!(suffix, "st" | "nd" | "rd" | "th") {
return digits.parse().ok();
}
return None;
}
Some(match token.to_lowercase().as_str() {
"first" => 1,
"second" => 2,
"third" => 3,
"fourth" => 4,
"fifth" => 5,
"sixth" => 6,
"seventh" => 7,
"eighth" => 8,
"ninth" => 9,
"tenth" => 10,
_ => return None,
})
}
fn bare_word(token: &str) -> String {
token
.trim_matches(|c: char| !c.is_alphanumeric())
.to_lowercase()
}
fn resolve_fibonacci_references(text: &str) -> String {
if !text.to_lowercase().contains("fibonacci") {
return text.to_owned();
}
let tokens: Vec<&str> = text.split_whitespace().collect();
let mut out: Vec<String> = Vec::new();
let mut index = 0;
while index < tokens.len() {
if let Some(n) = parse_ordinal(tokens[index]) {
if tokens
.get(index + 1)
.is_some_and(|next| bare_word(next) == "fibonacci")
{
if out.last().is_some_and(|last| bare_word(last) == "the") {
out.pop();
}
out.push(fibonacci_value(n).to_string());
index += 2;
if tokens.get(index).is_some_and(|next| {
matches!(bare_word(next).as_str(), "number" | "term" | "sequence")
}) {
index += 1;
}
continue;
}
}
out.push(tokens[index].to_owned());
index += 1;
}
out.join(" ")
}
fn split_sentences(text: &str) -> Vec<String> {
let chars: Vec<char> = text.chars().collect();
let mut sentences = Vec::new();
let mut current = String::new();
for (index, &ch) in chars.iter().enumerate() {
if ch == '.'
&& chars
.get(index + 1)
.map_or(true, |next| next.is_whitespace())
{
let sentence = current.trim().to_owned();
if !sentence.is_empty() {
sentences.push(sentence);
}
current.clear();
continue;
}
current.push(ch);
}
let sentence = current.trim().to_owned();
if !sentence.is_empty() {
sentences.push(sentence);
}
sentences
}
fn sentence_words(sentence: &str) -> Vec<String> {
sentence
.split(|ch: char| !ch.is_alphanumeric())
.filter(|token| !token.is_empty())
.map(str::to_lowercase)
.collect()
}
fn parse_int_token(token: &str) -> Option<i64> {
if let Ok(value) = token.parse::<i64>() {
return Some(value);
}
match token {
"zero" => Some(0),
"one" | "a" | "an" => Some(1),
"two" => Some(2),
"three" => Some(3),
"four" => Some(4),
"five" => Some(5),
"six" => Some(6),
"seven" => Some(7),
"eight" => Some(8),
"nine" => Some(9),
"ten" => Some(10),
"eleven" => Some(11),
"twelve" => Some(12),
"thirteen" => Some(13),
"fourteen" => Some(14),
"fifteen" => Some(15),
"sixteen" => Some(16),
"seventeen" => Some(17),
"eighteen" => Some(18),
"nineteen" => Some(19),
"twenty" => Some(20),
_ => None,
}
}
fn canonical_box_id(token: &str) -> Option<String> {
let cleaned = token
.trim_matches(|ch: char| !ch.is_alphanumeric())
.to_ascii_uppercase();
(!cleaned.is_empty() && cleaned.chars().count() <= 3).then_some(cleaned)
}
fn parse_declared_box_count(words: &[String]) -> Option<usize> {
if words.len() >= 4
&& words[0] == "i"
&& words[1] == "have"
&& matches!(words[3].as_str(), "box" | "boxes")
{
let count = parse_int_token(&words[2])?;
usize::try_from(count).ok()
} else {
None
}
}
fn parse_box_rule(words: &[String]) -> Option<(String, BoxRule, Option<String>)> {
let mut index = usize::from(words.first().is_some_and(|word| word == "if"));
if words.get(index)? != "box" {
return None;
}
let target = canonical_box_id(words.get(index + 1)?)?;
if words.get(index + 2)? != "has" {
return None;
}
index += 3;
if words.get(index).is_some_and(|word| word == "twice")
&& words.get(index + 1).is_some_and(|word| word == "as")
&& words.get(index + 2).is_some_and(|word| word == "many")
&& words.get(index + 4).is_some_and(|word| word == "as")
&& words.get(index + 5).is_some_and(|word| word == "box")
{
let item = words.get(index + 3).cloned();
let source = canonical_box_id(words.get(index + 6)?)?;
return Some((target, BoxRule::Multiple { factor: 2, source }, item));
}
let value = parse_int_token(words.get(index)?)?;
if words.get(index + 1).is_some_and(|word| word == "more")
&& words.get(index + 3).is_some_and(|word| word == "than")
&& words.get(index + 4).is_some_and(|word| word == "box")
{
let item = words.get(index + 2).cloned();
let source = canonical_box_id(words.get(index + 5)?)?;
return Some((
target,
BoxRule::Add {
source,
addend: value,
},
item,
));
}
let item = words.get(index + 1).cloned();
Some((target, BoxRule::Known(value), item))
}
fn resolve_box_value(
id: &str,
rules: &BTreeMap<String, BoxRule>,
memo: &mut BTreeMap<String, i64>,
stack: &mut Vec<String>,
reasoning_steps: &mut Vec<String>,
result_label: &str,
) -> Option<i64> {
if let Some(value) = memo.get(id) {
return Some(*value);
}
if stack.iter().any(|existing| existing == id) {
return None;
}
let rule = rules.get(id)?;
stack.push(id.to_owned());
let value =
match rule {
BoxRule::Known(value) => {
reasoning_steps.push(format!("Box {id} = {value} {result_label}."));
Some(*value)
}
BoxRule::Multiple { factor, source } => {
resolve_box_value(source, rules, memo, stack, reasoning_steps, result_label)
.and_then(|source_value| {
let value = source_value.checked_mul(*factor)?;
reasoning_steps.push(format!(
"Box {id} = {factor} * {source_value} = {value} {result_label}."
));
Some(value)
})
}
BoxRule::Add { source, addend } => {
resolve_box_value(source, rules, memo, stack, reasoning_steps, result_label)
.and_then(|source_value| {
let value = source_value.checked_add(*addend)?;
reasoning_steps.push(format!(
"Box {id} = {source_value} + {addend} = {value} {result_label}."
));
Some(value)
})
}
};
stack.pop();
let value = value?;
memo.insert(id.to_owned(), value);
Some(value)
}
fn normalize_box_total_problem(text: &str) -> Option<WordProblemNormalization> {
let lower = text.to_lowercase();
if !(lower.contains("box")
&& lower.contains("how many")
&& lower.contains("total")
&& (lower.contains("twice as many") || lower.contains("more") && lower.contains("than")))
{
return None;
}
let mut declared_count = None;
let mut rules = BTreeMap::new();
let mut result_label = None;
for sentence in split_sentences(text) {
let words = sentence_words(&sentence);
if words.is_empty() {
continue;
}
declared_count = declared_count.or_else(|| parse_declared_box_count(&words));
if let Some((target, rule, item)) = parse_box_rule(&words) {
if let Some(item) = item.filter(|value| !matches!(value.as_str(), "box" | "boxes")) {
result_label = Some(item);
}
rules.insert(target, rule);
}
}
if rules.len() < 2 {
return None;
}
if let Some(count) = declared_count {
if rules.len() < count {
return None;
}
}
let result_label = result_label.unwrap_or_else(|| String::from("items"));
let mut memo = BTreeMap::new();
let mut reasoning_steps = Vec::new();
let ids = rules.keys().cloned().collect::<Vec<_>>();
for id in &ids {
resolve_box_value(
id,
&rules,
&mut memo,
&mut Vec::new(),
&mut reasoning_steps,
&result_label,
)?;
}
let values = ids
.iter()
.map(|id| memo.get(id).copied())
.collect::<Option<Vec<_>>>()?;
if values.is_empty() {
return None;
}
let expression = values
.iter()
.map(ToString::to_string)
.collect::<Vec<_>>()
.join(" + ");
reasoning_steps.push(format!("Total = {expression} {result_label}."));
Some(WordProblemNormalization {
expression,
reasoning_steps,
result_label: Some(result_label),
})
}
#[must_use]
pub fn normalize_word_problem_detailed(expression: &str) -> Option<WordProblemNormalization> {
let trimmed = expression.trim();
if trimmed.is_empty() {
return None;
}
if let Some(normalized) = normalize_box_total_problem(trimmed) {
return Some(normalized);
}
let arithmetic: Vec<String> = split_sentences(trimmed)
.into_iter()
.filter(|sentence| {
!sentence.is_empty()
&& (sentence.chars().any(|c| c.is_ascii_digit()) || sentence.contains('%'))
})
.collect();
if arithmetic.is_empty() {
return None;
}
let mut working = resolve_fibonacci_references(&arithmetic.join(". "));
for (phrase, symbol) in [
(" and multiply it by ", " * "),
(" and multiply by ", " * "),
(" multiply it by ", " * "),
(" multiplied by ", " * "),
(" multiply by ", " * "),
(" and divide it by ", " / "),
(" and divide by ", " / "),
(" divide it by ", " / "),
(" divided by ", " / "),
(" divide by ", " / "),
] {
let lower = working.to_lowercase();
if let Some(position) = lower.find(phrase) {
working = format!(
"{}{symbol}{}",
&working[..position],
&working[position + phrase.len()..]
);
}
}
let working = working.split_whitespace().collect::<Vec<_>>().join(" ");
if working.is_empty() || working.eq_ignore_ascii_case(trimmed) {
return None;
}
Some(WordProblemNormalization {
expression: working,
reasoning_steps: Vec::new(),
result_label: None,
})
}