use std::cmp::min;
use num_bigint::BigUint;
use num_traits::{ToPrimitive, Zero};
use sha2::{Digest, Sha256};
use crate::{
Dictionary, Memo128, Memo128Error, ACTION_BITS, CHECKSUM_BITS, CHUNK_BITS, NUM_CHUNKS,
OBJECT_BITS, OUTCOME_BITS, SETTING_BITS,
};
type ComponentIndices = (usize, usize, usize, usize, usize);
pub fn levenshtein_distance(s1: &str, s2: &str) -> usize {
if s1.is_empty() {
return s2.len();
}
if s2.is_empty() {
return s1.len();
}
let s1_chars: Vec<char> = s1.chars().collect();
let s2_chars: Vec<char> = s2.chars().collect();
let s1_len = s1_chars.len();
let s2_len = s2_chars.len();
let mut dp = vec![vec![0; s2_len + 1]; s1_len + 1];
for (i, item) in dp.iter_mut().enumerate().take(s1_len + 1) {
item[0] = i;
}
for j in 0..=s2_len {
dp[0][j] = j;
}
for i in 1..=s1_len {
for j in 1..=s2_len {
let cost = if s1_chars[i - 1] == s2_chars[j - 1] {
0
} else {
1
};
dp[i][j] = min(
min(
dp[i - 1][j] + 1, dp[i][j - 1] + 1, ),
dp[i - 1][j - 1] + cost, );
}
}
dp[s1_len][s2_len]
}
pub struct FuzzyMemo128 {
memo128: Memo128,
max_levenshtein_distance: usize,
}
impl FuzzyMemo128 {
pub fn new(max_levenshtein_distance: usize) -> Result<Self, Memo128Error> {
Ok(FuzzyMemo128 {
memo128: Memo128::new()?,
max_levenshtein_distance,
})
}
fn calculate_checksum(&self, data: &[u8]) -> u8 {
let mut hasher = Sha256::new();
hasher.update(data);
let result = hasher.finalize();
(result[0] >> 1) & 0x7F
}
fn find_fuzzy_matches(
&self,
text: &str,
dictionary: &Dictionary,
max_distance: usize,
) -> Vec<(usize, usize, usize)> {
let mut matches = Vec::new();
let min_len = if text.len() > max_distance {
text.len() - max_distance
} else {
0
};
let max_len = text.len() + max_distance;
for (idx, entry) in dictionary.entries.iter().enumerate() {
if entry.len() < min_len || entry.len() > max_len {
continue;
}
let distance = levenshtein_distance(text, entry);
if distance <= max_distance {
matches.push((idx, distance, entry.len()));
}
}
matches.sort_by(|&(_, dist_a, len_a), &(_, dist_b, len_b)| {
match dist_a.cmp(&dist_b) {
std::cmp::Ordering::Equal => {
len_b.cmp(&len_a) }
ordering => ordering,
}
});
matches
}
fn fuzzy_parse_sentence(&self, sentence: &str) -> Vec<ComponentIndices> {
let mut results = Vec::new();
let character_dict = &self.memo128.get_character_dict();
let setting_dict = &self.memo128.get_setting_dict();
let action_dict = &self.memo128.get_action_dict();
let object_dict = &self.memo128.get_object_dict();
let outcome_dict = &self.memo128.get_outcome_dict();
fn find_sequences<'a>(
sentence: &'a str,
component_idx: usize,
current_indices: &mut Vec<usize>,
results: &mut Vec<ComponentIndices>,
dictionaries: &[&&'a Dictionary],
max_distance: usize,
fuzzy_memo: &'a FuzzyMemo128,
) {
if component_idx == 5 {
if sentence.trim().is_empty() {
results.push((
current_indices[0],
current_indices[1],
current_indices[2],
current_indices[3],
current_indices[4],
));
}
return;
}
let dictionary = dictionaries[component_idx];
let max_segment_len = if component_idx == 4 {
sentence.len()
} else {
min(
sentence.len(),
match component_idx {
0 => 50, 1 => 50, 2 => 30, 3 => 40, _ => 100, },
)
};
let mut segment_points = Vec::new();
if component_idx == 4 {
segment_points.push(sentence.len());
} else {
let space_positions: Vec<usize> = sentence
.char_indices()
.filter(|&(_, c)| c == ' ')
.map(|(i, _)| i)
.collect();
if !space_positions.is_empty() {
for &pos in space_positions.iter().take(min(5, space_positions.len())) {
segment_points.push(pos);
}
if space_positions.len() >= 2 {
for i in (2..min(10, space_positions.len())).step_by(2) {
segment_points.push(space_positions[i]);
}
}
if sentence.len() <= max_segment_len {
segment_points.push(sentence.len());
}
segment_points.sort();
segment_points.dedup();
} else {
if sentence.len() <= 10 {
segment_points = (1..=min(max_segment_len, sentence.len())).collect();
} else {
for len in (5..=min(max_segment_len, sentence.len())).step_by(5) {
segment_points.push(len);
}
}
}
}
if segment_points.len() > 15 {
let mut limited_points = Vec::with_capacity(15);
let step = segment_points.len() / 15;
for i in (0..segment_points.len()).step_by(step.max(1)) {
limited_points.push(segment_points[i]);
}
if let Some(&last) = segment_points.last() {
if limited_points.last() != Some(&last) {
limited_points.push(last);
}
}
segment_points = limited_points;
}
for &k in &segment_points {
if k == 0 || k > sentence.len() {
continue;
}
let prefix = &sentence[..k];
let matches = fuzzy_memo.find_fuzzy_matches(prefix, dictionary, max_distance);
for (idx, _, _) in matches.into_iter().take(5) {
current_indices.push(idx);
let remainder = if k < sentence.len() {
if sentence[k..].starts_with(' ') {
&sentence[k + 1..]
} else {
&sentence[k..]
}
} else {
""
};
find_sequences(
remainder,
component_idx + 1,
current_indices,
results,
dictionaries,
max_distance,
fuzzy_memo,
);
current_indices.pop();
if results.len() >= 50 {
return;
}
}
}
}
let dictionaries = [
character_dict,
setting_dict,
action_dict,
object_dict,
outcome_dict,
];
let mut current_indices = Vec::with_capacity(5);
find_sequences(
sentence,
0,
&mut current_indices,
&mut results,
&dictionaries[..],
self.max_levenshtein_distance,
self,
);
results
}
pub fn fuzzy_decode(&self, input_sentences: &[String]) -> Result<Vec<String>, Memo128Error> {
if input_sentences.len() != NUM_CHUNKS {
return Err(Memo128Error::ParsingError(format!(
"Expected exactly {} sentences, got {}",
NUM_CHUNKS,
input_sentences.len()
)));
}
let normalized_sentences: Vec<String> = input_sentences
.iter()
.map(|s| {
let trimmed = s.trim();
let mut normalized = String::with_capacity(trimmed.len());
let mut last_was_space = false;
for c in trimmed.chars() {
if c.is_whitespace() {
if !last_was_space {
normalized.push(' ');
last_was_space = true;
}
} else {
normalized.push(c);
last_was_space = false;
}
}
normalized
})
.collect();
let mut sentence_candidates: Vec<Vec<ComponentIndices>> = Vec::new();
for (i, sentence) in normalized_sentences.iter().enumerate() {
if sentence.is_empty() {
return Err(Memo128Error::ParsingError(format!(
"Sentence {} is empty after normalization",
i + 1
)));
}
let candidates = self.fuzzy_parse_sentence(sentence);
if candidates.is_empty() {
return Err(Memo128Error::ParsingError(format!(
"No fuzzy matches found for sentence {}: {}",
i + 1,
sentence
)));
}
let max_candidates = 50;
let limited_candidates = if candidates.len() > max_candidates {
candidates[0..max_candidates].to_vec()
} else {
candidates
};
sentence_candidates.push(limited_candidates);
}
let mut valid_hex_results = Vec::new();
let total_combinations: usize = sentence_candidates
.iter()
.map(|candidates| candidates.len())
.product();
if total_combinations > 1_000_000 {
return Err(Memo128Error::ParsingError(format!(
"Too many possible combinations ({}) to check efficiently. Try increasing the fuzzy matching threshold.",
total_combinations
)));
}
self.check_candidates(
&sentence_candidates,
0,
&mut Vec::with_capacity(NUM_CHUNKS),
&mut valid_hex_results,
);
valid_hex_results.sort();
let max_results = 100;
if valid_hex_results.len() > max_results {
valid_hex_results.truncate(max_results);
}
Ok(valid_hex_results)
}
fn check_candidates(
&self,
sentence_candidates: &[Vec<ComponentIndices>],
sentence_idx: usize,
current_combo: &mut Vec<ComponentIndices>,
valid_hex_results: &mut Vec<String>,
) {
if sentence_idx == sentence_candidates.len() {
let reconstructed_135_num = self.reconstruct_number(current_combo);
let checksum_mask = BigUint::from((1u16 << CHECKSUM_BITS) - 1);
let checksum_bits_decoded = (&reconstructed_135_num & &checksum_mask).to_u8().unwrap();
let data_num_decoded = &reconstructed_135_num >> CHECKSUM_BITS;
let data_bytes_decoded = data_num_decoded.to_bytes_be();
let mut padded_bytes = vec![0; 16];
let offset = 16 - data_bytes_decoded.len();
padded_bytes[offset..].copy_from_slice(&data_bytes_decoded);
let checksum_bits_calculated = self.calculate_checksum(&padded_bytes);
if checksum_bits_decoded == checksum_bits_calculated {
let hex_result = Memo128::bytes_to_hex(&padded_bytes);
if !valid_hex_results.contains(&hex_result) {
valid_hex_results.push(hex_result);
}
}
return;
}
for &candidate in &sentence_candidates[sentence_idx] {
current_combo.push(candidate);
self.check_candidates(
sentence_candidates,
sentence_idx + 1,
current_combo,
valid_hex_results,
);
current_combo.pop();
}
}
fn reconstruct_number(&self, component_combos: &[ComponentIndices]) -> BigUint {
let mut reconstructed_135_num = BigUint::zero();
for &(idx_c, idx_s, idx_a, idx_o, idx_k) in component_combos {
let chunk_value = (BigUint::from(idx_c)
<< (SETTING_BITS + ACTION_BITS + OBJECT_BITS + OUTCOME_BITS))
| (BigUint::from(idx_s) << (ACTION_BITS + OBJECT_BITS + OUTCOME_BITS))
| (BigUint::from(idx_a) << (OBJECT_BITS + OUTCOME_BITS))
| (BigUint::from(idx_o) << OUTCOME_BITS)
| BigUint::from(idx_k);
reconstructed_135_num = (reconstructed_135_num << CHUNK_BITS) | chunk_value;
}
reconstructed_135_num
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_levenshtein_distance() {
assert_eq!(levenshtein_distance("kitten", "kitten"), 0);
assert_eq!(levenshtein_distance("kitten", "sitting"), 3);
assert_eq!(levenshtein_distance("", "abc"), 3);
assert_eq!(levenshtein_distance("abc", ""), 3);
assert_eq!(levenshtein_distance("", ""), 0);
assert_eq!(levenshtein_distance("a", "b"), 1);
assert_eq!(levenshtein_distance("ab", "abc"), 1);
assert_eq!(levenshtein_distance("abc", "abcd"), 1);
assert_eq!(levenshtein_distance("abcd", "abc"), 1);
}
}