hangman_solver 0.6.0

#![allow(clippy::unwrap_used)]
#![forbid(unsafe_code)]

use easy_parallel::Parallel;
use inflector::Inflector;
use itertools::Itertools;
use std::env;
use std::fs;
use std::fs::File;
use std::io;
use std::io::BufRead;
use std::io::BufReader;
use std::iter::Iterator;
use std::path::{Path, PathBuf};
use std::time::Instant;
use unicode_segmentation::UnicodeSegmentation;

type StrConv = fn(String) -> String;

fn read_lines_of_file(
    path: &Path,
) -> Result<impl Iterator<Item = String>, io::Error> {
    let file = File::open(path)?;
    let reader = BufReader::new(file);

    Ok(reader.lines().map_while(Result::ok))
}

#[derive(Clone, Eq, PartialEq, Debug)]
struct WordsData {
    pub path: String,
    pub lang: String,
    pub conv: StrConv,
}

impl WordsData {
    fn clone_with_lang(&self, lang: String) -> Self {
        Self::new(self.path.clone(), lang, self.conv)
    }

    fn new(path: String, lang: String, conv: StrConv) -> Self {
        Self { path, lang, conv }
    }

    fn from_path(path: &Path) -> Self {
        let lang: &str = path
            .file_name()
            .unwrap()
            .to_str()
            .unwrap()
            .trim_end_matches(".txt");

        let path_str: &str = path.to_str().unwrap();

        Self::new(String::from(path_str), String::from(lang), |string| string)
    }

    fn read_lines(&self) -> impl Iterator<Item = String> {
        read_lines_of_file(Path::new(self.path.as_str()))
            .expect("Reading file should not fail.")
            .filter(|word| !word.is_empty())
            .map(|word| word.to_lowercase())
            .map(self.conv)
    }

    fn enum_name(&self) -> String {
        self.lang.replace('-', "_").to_pascal_case()
    }

    fn out_file_name(&self) -> String {
        format!("{}.txt.rs", self.lang)
    }

    fn dest_path(&self) -> PathBuf {
        get_out_dir_joined(self.out_file_name())
    }
}
fn get_out_dir_joined(path: String) -> PathBuf {
    let out_dir: &String = &env::var("OUT_DIR").unwrap();
    Path::new(out_dir).join(path)
}

fn write_words_data(words_data: &WordsData) {
    let start = Instant::now();

    let lang = words_data.lang.as_str();
    let mut words: Vec<(usize, String)> = words_data
        .read_lines()
        .map(|word| (word.chars().count(), word))
        .collect();

    words.sort_unstable();
    words.dedup();

    let mut max_string_lit_len: usize = 0;

    let mut output = String::from("match length {");
    for chunk in
        words.chunk_by(|(length_a, _), (length_b, _)| *length_a == *length_b)
    {
        let char_count = chunk.first().expect("needs to have first").0;
        let max_word_byte_count: usize = chunk
            .iter()
            .map(|(_, word)| word.as_str().len())
            .max()
            .expect("word group needs to have max length");

        max_string_lit_len =
            max_string_lit_len.max(max_word_byte_count * chunk.len());

        let start_of_case = format!(
            "{char_count} => (NonZeroUsize::MIN.saturating_add({max_word_byte_count} - 1), \""
        );
        const END_OF_CASE: &str = "\"),\n";
        output.reserve(
            max_word_byte_count * chunk.len()
                + start_of_case.len()
                + END_OF_CASE.len(),
        );
        output.push_str(&start_of_case);

        for (_, word) in chunk {
            assert_eq!(
                word.graphemes(true).count(),
                char_count,
                "{lang}: {word} has graphemes",
            );
            assert_eq!(
                word.unicode_words().count(),
                1,
                "{lang}: {word} is multiple words",
            );

            for _ in 0..(max_word_byte_count - word.len()) {
                output.push('\0');
            }
            for ch in word.chars() {
                if ch == '"' {
                    output.push('\\');
                }
                output.push(ch);
            }
        }
        output += END_OF_CASE;
    }
    output.push_str("_ => (NonZeroUsize::MIN, \"\")}");
    fs::write(words_data.dest_path(), output).unwrap();

    println!("cargo:warning={max_string_lit_len}");

    println!(
        "cargo:warning=-- write_words_data {lang} bytes {:?}",
        start.elapsed()
    );
}

const UMLAUTS: [char; 4] = ['ß', 'ä', 'ö', 'ü'];
const ASCII_UMLAUT_REPLACEMENTS: [&str; 4] = ["ss", "ae", "oe", "ue"];

#[inline]
#[allow(clippy::ptr_arg)]
#[allow(clippy::needless_pass_by_value)]
fn str_contains_umlaut(string: String) -> bool {
    UMLAUTS.iter().any(
        #[inline]
        |ch| string.contains(*ch),
    )
}

#[allow(clippy::ptr_arg)]
#[allow(clippy::needless_pass_by_value)]
fn replace_umlauts(mut string: String) -> String {
    while let Some((idx, ch, repl_index)) =
        string.char_indices().find_map(|(idx, ch)| {
            UMLAUTS
                .iter()
                .position(|u| u == &ch)
                .map(|repl_idx| (idx, ch, repl_idx))
        })
    {
        string.replace_range(
            idx..idx + ch.len_utf8(),
            ASCII_UMLAUT_REPLACEMENTS
                .get(repl_index)
                .expect("Can find replacement"),
        );
    }
    string
}

const WORDS_DIR: &str = "./words/";

fn main() {
    let now = Instant::now();
    println!("cargo:warning=start main {:?}", now.elapsed());
    println!("cargo:rerun-if-changed={WORDS_DIR}");
    let paths = fs::read_dir(WORDS_DIR).unwrap();

    let mut words_vec: Vec<WordsData> = vec![];

    for dir_entry in paths {
        let p = dir_entry.unwrap().path();
        let path = p.as_path();
        if path.file_name().unwrap().to_str().unwrap() == "LICENSE" {
            continue;
        }
        println!("cargo:rerun-if-changed={}", path.display());

        let mut data: WordsData = WordsData::from_path(path);
        if data.read_lines().any(str_contains_umlaut) {
            words_vec
                .push(data.clone_with_lang(format!("{}_umlauts", data.lang)));
            data.conv = replace_umlauts;
        }
        words_vec.push(data);
    }

    words_vec.sort_by(|w1, w2| w1.lang.cmp(&w2.lang));

    let words_vec = words_vec;

    println!("cargo:warning=before write_words_data {:?}", now.elapsed());
    Parallel::new().each(&words_vec, write_words_data).run();
    println!("cargo:warning=after write_words_data {:?}", now.elapsed());

    let language_count = words_vec.len();

    fs::write(
        get_out_dir_joined(String::from("language.rs")),
        format!(
            r###"#[cfg_attr(feature = "pyo3", pyclass(eq))]
#[derive(Copy, Clone, Eq, PartialEq, Debug)]
pub enum Language {{
    {}
}}

impl Language {{
    #[must_use]
    pub const fn read_words(self, length: usize) -> WordSequence {{
        let (padded_length, words): (NonZeroUsize, &'static str) = match self {{
            {}
        }};
        WordSequence::new(length, words, padded_length)
    }}

    #[inline]
    #[must_use]
    pub const fn all() -> [Self; {language_count}] {{
        [
            {}
        ]
    }}

    #[inline]
    #[must_use]
    pub fn from_string(string: &str) -> Option<Self> {{
        match string.to_lowercase().replace('-', "_").as_str() {{
            {},
            _ => None,
        }}
    }}

    #[allow(clippy::needless_pass_by_value)]
    #[allow(clippy::trivially_copy_pass_by_ref)]
    #[must_use]
    #[inline]
    pub const fn name(&self) -> &'static str {{
        match self {{
            {}
        }}
    }}
}}

#[cfg(feature = "pyo3")]
#[pymethods]
impl Language {{
    #[staticmethod]
    #[must_use]
    const fn values() -> [Self; {language_count}] {{
        Self::all()
    }}

    #[allow(clippy::trivially_copy_pass_by_ref)]
    #[getter]
    #[must_use]
    const fn value(&self) -> &'static str {{
        self.name()
    }}

    #[staticmethod]
    #[pyo3(signature = (name, default = None))]
    pub fn parse_string(name: &str, default: Option<Self>) -> PyResult<Self> {{
        Self::from_string(name)
            .or(default)
            .ok_or_else(|| UnknownLanguageError::new_err(name.to_owned()))
    }}
}}
"###,
            words_vec.iter().map(WordsData::enum_name).join(",\n"),
            words_vec
                .iter()
                .map(|data| format!(
                    "Self::{} => include!(concat!(env!(\"OUT_DIR\"), \"/{}\"))",
                    data.enum_name(),
                    data.out_file_name()
                ))
                .join("\n,"),
            words_vec
                .iter()
                .map(|data| format!("Self::{}", data.enum_name()))
                .join(", "),
            words_vec
                .iter()
                .map(|data| format!(
                    "\"{}\" => Some(Self::{})",
                    data.lang,
                    data.enum_name()
                ))
                .join(",\n"),
            words_vec
                .iter()
                .map(|data| format!(
                    "Self::{} => \"{}\"",
                    data.enum_name(),
                    data.lang
                ))
                .join(",\n"),
        ),
    )
    .unwrap();
    println!("cargo:warning=end main {:?}", now.elapsed());
}