use crate::prelude::*;
use crate::textgen::FullGen;
pub(crate) struct Anagram {
file_name: String,
phrase: String,
full: bool,
}
impl Anagram {
pub(crate) fn new(spec: &str, full: bool) -> Result<Self> {
if let Some((a, b)) = spec.split_once(',') {
Ok(Self { file_name: a.to_string(), phrase: b.to_string(), full })
} else {
err!("Anagram spec must be file,phrase")
}
}
}
impl FullGen for Anagram {
fn write(&mut self, w: &mut dyn Write) -> Result<()> {
let max_display = if self.full { 10000 } else { 0 };
do_anagram(&self.phrase, max_display, 1, &self.file_name, w)
}
}
type Quad = u128;
type Score = i64;
const MASK_BITS: usize = 128;
const MAX_MATCHES: usize = 100_000;
const MAX_CAND: usize = 4_000;
const MAX_SOL: usize = 64;
const ALPHABET: usize = 26;
const SCORE_BUCKETS: usize = 1000;
const SCORE_MAX: Score = 999;
const DEFAULT_SCORE_ALGORITHM: ScoreAlgorithm = ScoreAlgorithm::FrequencyWeighted;
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
enum ScoreAlgorithm {
Square,
FewestWords,
FrequencyWeighted,
}
#[derive(Clone, Copy, Default)]
struct Letter {
frequency: u32,
shift: u32,
bits: Quad,
}
#[derive(Clone, Copy, Default)]
struct Word {
mask: Quad,
text_idx: usize,
length: usize,
frequency: usize,
}
#[derive(Clone)]
struct OneWordRec {
words: Vec<u8>,
score: Score,
}
#[derive(Clone, Debug)]
struct DictEntry {
word: Vec<u8>,
frequency: usize,
}
#[derive(Clone, Debug)]
struct Dictionary {
entries: Vec<DictEntry>,
max_frequency_seen: usize,
}
struct Solver {
original_phrase: String,
normalized_phrase: Vec<u8>,
al_phrase: [Letter; ALPHABET],
ach_by_frequency: [usize; ALPHABET],
au_global_frequency: [u32; ALPHABET],
aq_main_mask: Quad,
aq_main_sign: Quad,
cch_phrase_length: usize,
candidates: Box<[Word]>,
candidate_len: usize,
solution_text_indices: [usize; MAX_SOL],
solution_lengths: [usize; MAX_SOL],
solution_frequencies: [usize; MAX_SOL],
solution_depth: usize,
score_counts: [usize; SCORE_BUCKETS],
total_scores: usize,
total_attempts: usize,
min_score: Score,
words_found: usize,
max_display: usize,
score_algorithm: ScoreAlgorithm,
max_dictionary_frequency: usize,
records: Vec<OneWordRec>,
do_quit: bool,
}
impl Solver {
fn new() -> Self {
let mut ach = [0usize; ALPHABET];
for (i, v) in ach.iter_mut().enumerate() {
*v = i;
}
Self {
original_phrase: String::new(),
normalized_phrase: Vec::new(),
al_phrase: [Letter::default(); ALPHABET],
ach_by_frequency: ach,
au_global_frequency: [0; ALPHABET],
aq_main_mask: 0,
aq_main_sign: 0,
cch_phrase_length: 0,
candidates: vec![Word::default(); MAX_CAND].into_boxed_slice(),
candidate_len: 0,
solution_text_indices: [0; MAX_SOL],
solution_lengths: [0; MAX_SOL],
solution_frequencies: [0; MAX_SOL],
solution_depth: 0,
score_counts: [0; SCORE_BUCKETS],
total_scores: 0,
total_attempts: 0,
min_score: 0,
words_found: 0,
max_display: 1000,
score_algorithm: DEFAULT_SCORE_ALGORITHM,
max_dictionary_frequency: 0,
records: Vec::new(),
do_quit: false,
}
}
const fn set_score_algorithm(&mut self, score_algorithm: ScoreAlgorithm) {
self.score_algorithm = score_algorithm;
}
const fn set_max_dictionary_frequency(&mut self, max_dictionary_frequency: usize) {
self.max_dictionary_frequency = max_dictionary_frequency;
}
fn set_phrase(&mut self, phrase: &str) -> Result<()> {
self.original_phrase.clear();
self.original_phrase.push_str(phrase);
let normalized_phrase = normalize_phrase(&self.original_phrase);
self.build_mask(&normalized_phrase)?;
self.normalized_phrase = normalized_phrase;
Ok(())
}
fn set_score(&mut self, mut score: Score) -> bool {
self.total_attempts += 1;
score = score.clamp(0, SCORE_MAX);
self.score_counts[usize::try_from(score).unwrap_or(SCORE_BUCKETS - 1)] += 1;
if score >= self.min_score {
self.total_scores += 1;
}
if self.total_scores - self.score_counts[self.min_score as usize] > self.max_display {
let removed = self.score_counts[self.min_score as usize];
if removed > 0 {
self.total_scores -= removed;
}
self.min_score += 1;
}
score >= self.min_score
}
fn build_mask(&mut self, phrase: &[u8]) -> Result<()> {
self.al_phrase = [Letter::default(); ALPHABET];
self.aq_main_mask = 0;
self.aq_main_sign = 0;
self.cch_phrase_length = phrase.len();
for &b in phrase {
let idx = (b - b'a') as usize;
self.al_phrase[idx].frequency += 1;
}
let mut bits_used = 0usize;
for i in 0..ALPHABET {
if self.al_phrase[i].frequency == 0 {
self.au_global_frequency[i] = u32::MAX;
continue;
}
self.au_global_frequency[i] = 0;
let mut bits_needed = 1usize;
let mut q_need: Quad = 1;
while Quad::from(self.al_phrase[i].frequency) >= q_need {
bits_needed += 1;
q_need <<= 1;
}
if bits_used + bits_needed > MASK_BITS {
return err!("Quad not large enough");
}
self.al_phrase[i].bits = q_need - 1;
let sign_bit = q_need << bits_used;
self.aq_main_sign |= sign_bit;
self.aq_main_mask |= Quad::from(self.al_phrase[i].frequency) << bits_used;
self.al_phrase[i].shift = bits_used as u32;
bits_used += bits_needed;
}
Ok(())
}
#[expect(clippy::needless_range_loop)]
fn build_word(&mut self, entry: &DictEntry, text_idx: usize) {
let mut freq = [0u8; ALPHABET];
let mut length = 0usize;
for b in &entry.word {
debug_assert!(b.is_ascii_lowercase());
let i = (b - b'a') as usize;
freq[i] = freq[i].saturating_add(1);
if u32::from(freq[i]) > self.al_phrase[i].frequency {
return;
}
length += 1;
}
let mut mask = 0;
for i in 0..ALPHABET {
self.au_global_frequency[i] =
self.au_global_frequency[i].saturating_add(u32::from(freq[i]));
let letter = self.al_phrase[i];
mask |= Quad::from(freq[i]) << letter.shift;
}
if self.candidate_len < MAX_CAND {
self.candidates[self.candidate_len] =
Word { mask, text_idx, length, frequency: entry.frequency };
self.candidate_len += 1;
}
}
fn add_words(&mut self, dict: &[DictEntry]) {
self.candidate_len = 0;
for (text_idx, entry) in dict.iter().enumerate() {
let c_letters = entry.word.len();
if c_letters <= self.cch_phrase_length {
self.build_word(entry, text_idx);
if self.candidate_len >= MAX_CAND {
break;
}
}
}
}
fn sort_candidates(&mut self) {
let freq = self.au_global_frequency;
self.ach_by_frequency.sort_unstable_by(|a, b| freq[*a].cmp(&freq[*b]));
}
fn dump_candidates(&mut self, dict: &[DictEntry], w: &mut dyn Write) -> Result<()> {
let candidates = &mut self.candidates[..self.candidate_len];
for word in candidates {
w.write_all(&dict[word.text_idx].word)?;
w.write_all(b"\n")?;
}
Ok(())
}
fn dump_words(&mut self, dict: &[DictEntry]) {
if self.records.len() >= MAX_MATCHES {
return;
}
let score = self.calculate_score(
&self.solution_lengths[..self.solution_depth],
&self.solution_frequencies[..self.solution_depth],
self.max_dictionary_frequency,
);
if self.set_score(score) {
let mut phrase = Vec::with_capacity(self.normalized_phrase.len() + self.solution_depth);
for depth in 0..self.solution_depth {
if depth > 0 {
phrase.push(b' ');
}
let text_idx = self.solution_text_indices[depth];
phrase.extend(&dict[text_idx].word);
}
self.words_found += 1;
self.records.push(OneWordRec { words: phrase, score });
}
}
fn calculate_score(
&self,
solution_lengths: &[usize],
solution_frequencies: &[usize],
max_dictionary_frequency: usize,
) -> Score {
match self.score_algorithm {
ScoreAlgorithm::Square => Self::calculate_square_score(
solution_lengths,
solution_frequencies,
max_dictionary_frequency,
),
ScoreAlgorithm::FewestWords => Self::calculate_fewest_words_score(
solution_lengths,
solution_frequencies,
max_dictionary_frequency,
),
ScoreAlgorithm::FrequencyWeighted => Self::calculate_frequency_weighted_score(
solution_lengths,
solution_frequencies,
max_dictionary_frequency,
),
}
}
fn calculate_square_score(
solution_lengths: &[usize],
_solution_frequencies: &[usize],
_max_dictionary_frequency: usize,
) -> Score {
let mut score: Score = 0;
for &len_usize in solution_lengths {
let len_i64 = Score::try_from(len_usize).unwrap_or(Score::MAX);
score += len_i64 * len_i64;
}
score
}
fn calculate_fewest_words_score(
solution_lengths: &[usize],
_solution_frequencies: &[usize],
_max_dictionary_frequency: usize,
) -> Score {
let penalty =
Score::try_from(solution_lengths.len().saturating_sub(1)).unwrap_or(SCORE_MAX);
SCORE_MAX.saturating_sub(penalty)
}
fn calculate_frequency_weighted_score(
solution_lengths: &[usize],
solution_frequencies: &[usize],
max_dictionary_frequency: usize,
) -> Score {
if solution_lengths.is_empty() || solution_frequencies.is_empty() {
return 0;
}
let max_seen = max_dictionary_frequency.max(1);
let min_freq = *solution_frequencies.iter().min().unwrap_or(&0);
let sum_freq: usize = solution_frequencies.iter().sum();
let avg_freq = sum_freq / solution_frequencies.len();
let score_max_usize = usize::try_from(SCORE_MAX).unwrap_or(0);
let min_norm =
Score::try_from((min_freq * score_max_usize) / max_seen).unwrap_or(SCORE_MAX);
let avg_norm =
Score::try_from((avg_freq * score_max_usize) / max_seen).unwrap_or(SCORE_MAX);
let words = solution_lengths.len();
let word_score = if words <= 1 {
SCORE_MAX
} else {
let penalty = Score::try_from(((words - 1) * score_max_usize) / (MAX_SOL - 1))
.unwrap_or(SCORE_MAX);
SCORE_MAX.saturating_sub(penalty)
};
(7 * min_norm + 2 * avg_norm + word_score) / 10
}
fn display_words(&mut self, w: &mut dyn Write) -> Result<()> {
let mut x = self.words_found;
if x > self.max_display {
x = self.max_display;
}
self.records.sort_by(|a, b| match b.score.cmp(&a.score) {
Ordering::Equal => a.words.cmp(&b.words),
ord => ord,
});
for rec in self.records.iter().take(x) {
w.write_all(&rec.words)?;
writeln!(w, "\t{}", rec.score)?;
}
Ok(())
}
fn find_anagram(
&mut self,
phrase_mask: Quad,
start: usize,
mut i_letter: usize,
dict: &[DictEntry],
) {
if self.records.len() >= MAX_MATCHES || self.do_quit {
return;
}
let pivot_mask = loop {
if i_letter >= ALPHABET {
return;
}
let letter_idx = self.ach_by_frequency[i_letter];
let l = &self.al_phrase[letter_idx];
let mask = l.bits << l.shift;
if (phrase_mask & mask) != 0 {
break mask;
}
i_letter += 1;
};
self.find_anagram_hot(phrase_mask, start, i_letter, pivot_mask, dict);
}
fn find_anagram_hot(
&mut self,
phrase_mask: Quad,
start: usize,
i_letter: usize,
pivot_mask: Quad,
dict: &[DictEntry],
) {
let mut idx = start;
let mut end = self.candidate_len;
let cand_ptr = self.candidates.as_mut_ptr();
debug_assert!(end <= MAX_CAND, "candidate_len must stay within MAX_CAND");
while idx < end {
if self.records.len() >= MAX_MATCHES || self.do_quit {
return;
}
debug_assert!(idx < self.candidate_len, "idx must stay in active candidate range");
debug_assert!(end <= self.candidate_len, "end boundary must stay in candidate range");
let word_mask = unsafe { (*cand_ptr.add(idx)).mask };
let next_phrase_mask = phrase_mask.wrapping_sub(word_mask);
if (next_phrase_mask & self.aq_main_sign) != 0 {
idx += 1;
continue;
}
if (word_mask & pivot_mask) == 0 {
unsafe {
std::ptr::swap(cand_ptr.add(idx), cand_ptr.add(end - 1));
}
end -= 1;
continue;
}
if self.solution_depth >= MAX_SOL {
return;
}
let chosen = unsafe { *cand_ptr.add(idx) };
let word_len = chosen.length;
self.solution_text_indices[self.solution_depth] = chosen.text_idx;
self.solution_lengths[self.solution_depth] = word_len;
self.solution_frequencies[self.solution_depth] = chosen.frequency;
self.solution_depth += 1;
self.cch_phrase_length -= word_len;
if self.cch_phrase_length > 0 {
self.find_anagram(next_phrase_mask, idx, i_letter, dict);
end = self.candidate_len;
} else {
self.dump_words(dict);
}
self.cch_phrase_length += word_len;
self.solution_depth -= 1;
idx += 1;
}
}
}
fn read_dict(path: &str, min_length: usize) -> Result<Dictionary> {
let mode = TextFileMode::default();
let mut file = Reader::new_open(path, &mode)?;
let mut words = Vec::new();
let mut max_frequency_seen = 0usize;
if file.is_done() {
return err!("Anagram Vocab file must not be empty.");
}
loop {
let word = file.curr_line().get(0);
let freq = file.curr_line().get(1);
if !word.iter().all(|b: &u8| b.is_ascii_lowercase()) {
return err!("invalid dictionary entry : {}", String::from_utf8_lossy(word));
}
let frequency = freq.to_f64().0 as usize;
max_frequency_seen = max_frequency_seen.max(frequency);
if word.len() >= min_length {
words.push(DictEntry { word: word.to_vec(), frequency });
}
if file.get_line()? {
break;
}
}
Ok(Dictionary { entries: words, max_frequency_seen })
}
fn normalize_phrase(phrase: &str) -> Vec<u8> {
let mut out = Vec::with_capacity(phrase.len());
for b in phrase.bytes() {
if b.is_ascii_alphabetic() {
out.push(b.to_ascii_lowercase());
}
}
out
}
fn do_anagram(
phrase: &str,
max_return: usize,
min_length: usize,
dict_name: &str,
w: &mut dyn Write,
) -> Result<()> {
let dictionary = read_dict(dict_name, min_length)?;
let dict = &dictionary.entries;
let mut solver = Solver::new();
solver.max_display = max_return;
let score_algorithm = score_algorithm_from_env()?;
solver.set_score_algorithm(score_algorithm);
solver.set_max_dictionary_frequency(dictionary.max_frequency_seen);
solver.set_phrase(phrase)?;
solver.add_words(dict);
if solver.candidate_len > 0 && solver.cch_phrase_length > 0 {
if solver.max_display == 0 {
solver.dump_candidates(dict, w)?;
} else {
solver.sort_candidates();
let main_mask = solver.aq_main_mask;
solver.find_anagram(main_mask, 0, 0, dict);
solver.display_words(w)?;
}
}
Ok(())
}
fn parse_score_algorithm(value: &str) -> Result<ScoreAlgorithm> {
match value {
"square" => Ok(ScoreAlgorithm::Square),
"fewest_words" => Ok(ScoreAlgorithm::FewestWords),
"frequency_weighted" => Ok(ScoreAlgorithm::FrequencyWeighted),
_ => err!(
"invalid ANAGRAM_SCORE value `{value}`; expected one of: square, fewest_words, frequency_weighted"
),
}
}
fn score_algorithm_from_env() -> Result<ScoreAlgorithm> {
match std::env::var("ANAGRAM_SCORE") {
Ok(value) => parse_score_algorithm(&value),
Err(std::env::VarError::NotPresent) => Ok(DEFAULT_SCORE_ALGORITHM),
Err(std::env::VarError::NotUnicode(_)) => {
err!("invalid ANAGRAM_SCORE: value is not valid UTF-8".to_string())
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::process;
#[test]
fn build_mask_counts_ascii_letters_only() -> Result<()> {
let mut solver = Solver::new();
let normalized = normalize_phrase("Aa!b?");
solver.build_mask(&normalized)?;
assert_eq!(solver.cch_phrase_length, 3);
assert_eq!(solver.al_phrase[0].frequency, 2); assert_eq!(solver.al_phrase[1].frequency, 1); assert_eq!(solver.al_phrase[2].frequency, 0); Ok(())
}
#[test]
fn set_score_clamps_to_valid_range() {
let mut solver = Solver::new();
assert!(solver.set_score(-5));
assert!(solver.set_score(SCORE_MAX + 500));
assert_eq!(solver.score_counts[0], 1);
assert_eq!(solver.score_counts[SCORE_BUCKETS - 1], 1);
}
#[test]
fn frequency_weighted_scoring_emphasizes_lowest_frequency() {
let mut solver = Solver::new();
solver.set_score_algorithm(ScoreAlgorithm::FrequencyWeighted);
solver.set_max_dictionary_frequency(1_000_000);
let low_min = solver.calculate_score(&[4, 4, 4], &[900_000, 900_000, 100_000], 1_000_000);
let high_min = solver.calculate_score(&[4, 4, 4], &[500_000, 500_000, 500_000], 1_000_000);
assert!(high_min > low_min);
}
#[test]
fn frequency_weighted_scoring_prefers_fewer_words() {
let mut solver = Solver::new();
solver.set_score_algorithm(ScoreAlgorithm::FrequencyWeighted);
solver.set_max_dictionary_frequency(1_000_000);
let two_words = solver.calculate_score(&[5, 5], &[600_000, 600_000], 1_000_000);
let four_words = solver.calculate_score(&[5, 5, 5, 5], &[600_000; 4], 1_000_000);
assert!(two_words > four_words);
}
#[test]
fn parse_score_algorithm_accepts_all_known_values() {
assert_eq!(parse_score_algorithm("square").unwrap(), ScoreAlgorithm::Square);
assert_eq!(parse_score_algorithm("fewest_words").unwrap(), ScoreAlgorithm::FewestWords);
assert_eq!(
parse_score_algorithm("frequency_weighted").unwrap(),
ScoreAlgorithm::FrequencyWeighted
);
}
#[test]
fn parse_score_algorithm_rejects_unknown_values() {
assert!(parse_score_algorithm("nope").is_err());
}
#[test]
fn normalize_phrase_keeps_only_lowercase_ascii_letters() {
let normalized = normalize_phrase("Ru-th! 42 ANNE");
assert_eq!(normalized, b"ruthanne");
}
#[test]
fn read_dict_rejects_non_lowercase_ascii_entries() {
let path = format!("/tmp/anagram_rust_test_{}_{}.txt", process::id(), "invalid_dict");
std::fs::write(&path, "good\t60000\nBad\t60000\n").expect("write test dictionary");
read_dict(&path, 1).expect_err("dictionary should be rejected");
drop(std::fs::remove_file(&path));
}
}