use anyhow::Result;
use std::collections::HashMap;
const K_BOLTZMANN: f64 = 1.380649e-23;
const TEMPERATURE: f64 = 293.15;
fn energy_per_bit() -> f64 {
K_BOLTZMANN * TEMPERATURE * 2_f64.ln()
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum BlessingLevel {
Strict = 0,
MinorBlessings = 1,
Harmony = 2,
Creative = 3,
}
impl BlessingLevel {
pub fn from_i32(level: i32) -> Result<Self> {
match level {
0 => Ok(BlessingLevel::Strict),
1 => Ok(BlessingLevel::MinorBlessings),
2 => Ok(BlessingLevel::Harmony),
3 => Ok(BlessingLevel::Creative),
_ => Err(anyhow::anyhow!(
"Invalid blessing level: {}. Must be 0-3.",
level
)),
}
}
pub fn name(&self) -> &'static str {
match self {
BlessingLevel::Strict => "STRICT",
BlessingLevel::MinorBlessings => "MINOR_BLESSINGS",
BlessingLevel::Harmony => "HARMONY",
BlessingLevel::Creative => "CREATIVE",
}
}
pub fn description(&self) -> &'static str {
match self {
BlessingLevel::Strict => "Bit-perfect reconstruction (for Hutter Prize)",
BlessingLevel::MinorBlessings => "Fix typos, spacing, obvious errors",
BlessingLevel::Harmony => "Wikipedia structure fixes, template harmonization",
BlessingLevel::Creative => "Training data augmentation, semantic variations",
}
}
}
#[derive(Debug, Default)]
pub struct BlessingStats {
pub original_length: usize,
pub blessed_length: usize,
pub blessings_applied: usize,
pub entropy_added: f64,
pub energy_added: f64,
}
impl BlessingStats {
pub fn calculate_thermodynamics(&mut self) {
self.entropy_added = self.blessings_applied as f64;
self.energy_added = self.entropy_added * energy_per_bit();
}
pub fn size_delta(&self) -> i64 {
self.blessed_length as i64 - self.original_length as i64
}
}
pub struct Angel {
level: BlessingLevel,
typo_dict: HashMap<String, String>,
seed: u64,
}
impl Angel {
pub fn new(level: BlessingLevel) -> Self {
Self {
level,
typo_dict: Self::create_typo_dictionary(),
seed: 0,
}
}
pub fn with_seed(level: BlessingLevel, seed: u64) -> Self {
Self {
level,
typo_dict: Self::create_typo_dictionary(),
seed,
}
}
fn create_typo_dictionary() -> HashMap<String, String> {
let mut dict = HashMap::new();
dict.insert("teh".to_string(), "the".to_string());
dict.insert("recieve".to_string(), "receive".to_string());
dict.insert("occured".to_string(), "occurred".to_string());
dict.insert("seperate".to_string(), "separate".to_string());
dict.insert("definately".to_string(), "definitely".to_string());
dict.insert("wierd".to_string(), "weird".to_string());
dict.insert("accomodate".to_string(), "accommodate".to_string());
dict.insert("beleive".to_string(), "believe".to_string());
dict
}
pub fn bless(&self, text: &str) -> Result<(String, BlessingStats)> {
let mut stats = BlessingStats {
original_length: text.len(),
..Default::default()
};
let blessed = match self.level {
BlessingLevel::Strict => {
text.to_string()
}
BlessingLevel::MinorBlessings => self.apply_minor_blessings(text, &mut stats),
BlessingLevel::Harmony => {
let minor = self.apply_minor_blessings(text, &mut stats);
self.apply_harmony_blessings(&minor, &mut stats)
}
BlessingLevel::Creative => {
let minor = self.apply_minor_blessings(text, &mut stats);
let harmony = self.apply_harmony_blessings(&minor, &mut stats);
self.apply_creative_blessings(&harmony, &mut stats)
}
};
stats.blessed_length = blessed.len();
stats.calculate_thermodynamics();
Ok((blessed, stats))
}
fn apply_minor_blessings(&self, text: &str, stats: &mut BlessingStats) -> String {
let mut result = text.to_string();
let double_space_count = result.matches(" ").count();
result = result.replace(" ", " ");
if double_space_count > 0 {
stats.blessings_applied += double_space_count;
}
for (typo, correct) in &self.typo_dict {
let before = result.clone();
result = result.replace(typo, correct);
if result != before {
stats.blessings_applied += 1;
}
}
while result.contains("\n\n\n") {
result = result.replace("\n\n\n", "\n\n");
stats.blessings_applied += 1;
}
let before = result.clone();
result = result.replace(" .", ".");
if result != before {
stats.blessings_applied += 1;
}
let before = result.clone();
result = result.replace(" ,", ",");
if result != before {
stats.blessings_applied += 1;
}
let before = result.clone();
result = result.replace(" !", "!");
if result != before {
stats.blessings_applied += 1;
}
let before = result.clone();
result = result.replace(" ?", "?");
if result != before {
stats.blessings_applied += 1;
}
result
}
fn apply_harmony_blessings(&self, text: &str, stats: &mut BlessingStats) -> String {
let mut result = text.to_string();
if result.contains("[[category:") {
result = result.replace("[[category:", "[[Category:");
stats.blessings_applied += 1;
}
if result.contains("[[CATEGORY:") {
result = result.replace("[[CATEGORY:", "[[Category:");
stats.blessings_applied += 1;
}
if result.contains("{{template ") {
result = result.replace("{{template ", "{{Template ");
stats.blessings_applied += 1;
}
let prev_result = result.clone();
result = result.replace("]] ", "]]");
if result != prev_result {
stats.blessings_applied += 1;
}
let prev_result = result.clone();
result = result.replace(" [[", "[[");
if result != prev_result {
stats.blessings_applied += 1;
}
for i in 1..=6 {
let wrong = format!("{} ", "#".repeat(i));
let right = format!("{} ", "#".repeat(i));
if result.contains(&wrong) {
result = result.replace(&wrong, &right);
stats.blessings_applied += 1;
}
}
result = result.replace("\n*", "\n* "); result = result.replace("\n-", "\n- ");
result
}
fn apply_creative_blessings(&self, text: &str, stats: &mut BlessingStats) -> String {
let mut result = text.to_string();
let mut rng = self.seed;
let mut next_random = || {
rng = (rng.wrapping_mul(1103515245).wrapping_add(12345)) % (1 << 31);
rng
};
let variations = vec![
("is a", "is an example of"),
("the", "this"),
("and", "as well as"),
("but", "however"),
("also", "additionally"),
];
for (from, to) in variations {
if next_random() % 100 < 5 && result.contains(from) {
result = result.replacen(from, to, 1);
stats.blessings_applied += 1;
}
}
result
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_blessing_level_parsing() {
assert_eq!(BlessingLevel::from_i32(0).unwrap(), BlessingLevel::Strict);
assert_eq!(
BlessingLevel::from_i32(1).unwrap(),
BlessingLevel::MinorBlessings
);
assert_eq!(BlessingLevel::from_i32(2).unwrap(), BlessingLevel::Harmony);
assert_eq!(BlessingLevel::from_i32(3).unwrap(), BlessingLevel::Creative);
assert!(BlessingLevel::from_i32(4).is_err());
}
#[test]
fn test_strict_mode() {
let angel = Angel::new(BlessingLevel::Strict);
let input = "This has double spaces";
let (output, stats) = angel.bless(input).unwrap();
assert_eq!(output, input);
assert_eq!(stats.blessings_applied, 0);
}
#[test]
fn test_minor_blessings() {
let angel = Angel::new(BlessingLevel::MinorBlessings);
let input = "This has double spaces and teh is a typo";
let (output, stats) = angel.bless(input).unwrap();
assert!(output.contains("the is a typo"));
assert!(!output.contains(" "));
assert!(stats.blessings_applied > 0);
}
#[test]
fn test_harmony_blessings() {
let angel = Angel::new(BlessingLevel::Harmony);
let input = "[[category:test]] and [[CATEGORY:other]]";
let (output, stats) = angel.bless(input).unwrap();
assert!(output.contains("[[Category:test]]"));
assert!(output.contains("[[Category:other]]"));
assert!(stats.blessings_applied >= 2);
}
#[test]
fn test_thermodynamics() {
let angel = Angel::new(BlessingLevel::MinorBlessings);
let input = "This has typos and teh recieve";
let (_output, stats) = angel.bless(input).unwrap();
assert!(stats.energy_added > 0.0);
assert_eq!(stats.entropy_added, stats.blessings_applied as f64);
}
}