use crate::world::fact_check_lang::{contains_word, Lang, Msg, Weather};
use crate::world::proposals::PlaceLink;
use crate::world::types::magic::{CheckContext, MagicLedger};
pub struct Gazetteer {
places: Vec<PlaceLink>,
}
impl Gazetteer {
pub fn new(places: Vec<PlaceLink>) -> Self {
Self { places }
}
pub fn mentioned_in(&self, text: &str) -> Vec<&PlaceLink> {
self.mentioned_in_lang(text, Lang::En)
}
pub fn mentioned_in_lang(&self, text: &str, lang: Lang) -> Vec<&PlaceLink> {
let lower = text.to_lowercase();
self.places
.iter()
.filter(|p| {
crate::world::fact_check_lang::name_variants(&p.name, lang)
.iter()
.any(|v| contains_word(&lower, v))
})
.collect()
}
}
pub fn declared_places(def: &crate::world::types::WorldDefinition) -> Vec<PlaceLink> {
let Some(geo) = def.geography.as_ref() else { return Vec::new() };
geo.landmarks
.iter()
.filter(|l| !l.name.trim().is_empty())
.map(|l| PlaceLink {
place_id: uuid::Uuid::nil(),
name: l.name.clone(),
biome: l.climate_zone.clone(),
climate_zone: l.climate_zone.clone(),
hydrology_basis: l.kind.clone(),
population: l.population,
x: 0,
y: 0,
})
.collect()
}
pub struct WorldContext {
pub gazetteer: Gazetteer,
pub moons: Vec<String>,
pub minerals: Vec<String>,
}
impl WorldContext {
pub fn new(gazetteer: Gazetteer, moons: Vec<String>, minerals: Vec<String>) -> Self {
Self { gazetteer, moons, minerals }
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct Finding {
pub category: String,
pub severity: String,
pub body: String,
pub body_en: String,
pub suppressed_by: Option<String>,
}
pub fn check_paragraph(
text: &str,
ledger: &MagicLedger,
roles: &[String],
ctx: Option<&WorldContext>,
) -> Vec<Finding> {
let (detected, confident) = crate::world::fact_check_lang::detect_with_confidence(text);
let lang = if confident { detected } else { Lang::En };
let mut findings = Vec::new();
findings.extend(check_travel_time(text, ledger, roles, lang));
if let Some(c) = ctx {
findings.extend(check_climate(text, &c.gazetteer, ledger, lang));
findings.extend(check_population(text, &c.gazetteer, ledger, lang));
findings.extend(check_astronomy(text, &c.moons, ledger, lang));
findings.extend(check_economy(text, &c.minerals, ledger, lang));
}
findings
}
fn check_economy(text: &str, minerals: &[String], ledger: &MagicLedger, lang: Lang) -> Vec<Finding> {
if minerals.is_empty() {
return Vec::new();
}
let available: std::collections::HashSet<String> =
minerals.iter().map(|m| m.to_ascii_lowercase()).collect();
let mut out = Vec::new();
for sentence in split_sentences(text) {
if !has_extraction_context(sentence, lang) {
continue;
}
let lower = sentence.to_lowercase();
let mut seen = std::collections::HashSet::new();
for (canonical, names) in lang.metals() {
if available.contains(*canonical) || seen.contains(*canonical) {
continue;
}
if names.iter().any(|n| contains_word(&lower, &n.to_lowercase())) {
seen.insert(*canonical);
let mineral_list = minerals.join(", ");
let msg = Msg::Economy { metal: canonical, minerals: &mineral_list };
let ctx = CheckContext { category: "economy", ..Default::default() };
let suppressed_by = ledger.find_suppressor(&ctx).map(|r| r.kind.clone());
let severity = if suppressed_by.is_some() { "info" } else { "warning" };
out.push(Finding {
category: "economy".into(),
severity: severity.into(),
body: lang.render(&msg),
body_en: Lang::En.render(&msg),
suppressed_by,
});
}
}
}
out
}
fn has_extraction_context(s: &str, lang: Lang) -> bool {
let l = s.to_lowercase();
lang.extraction_words().iter().any(|w| contains_word(&l, &w.to_lowercase()))
}
fn check_astronomy(text: &str, moons: &[String], ledger: &MagicLedger, lang: Lang) -> Vec<Finding> {
let world_count = moons.len();
if world_count == 0 {
return Vec::new();
}
let mut out = Vec::new();
for sentence in split_sentences(text) {
let Some(claimed) = find_moon_count(sentence, lang) else {
continue;
};
if claimed == world_count {
continue;
}
let moon_list = moons.join(", ");
let msg = Msg::Astronomy { claimed, world: world_count, moons: &moon_list };
let ctx = CheckContext { category: "astronomy", ..Default::default() };
let suppressed_by = ledger.find_suppressor(&ctx).map(|r| r.kind.clone());
let severity = if suppressed_by.is_some() { "info" } else { "warning" };
out.push(Finding {
category: "astronomy".into(),
severity: severity.into(),
body: lang.render(&msg),
body_en: Lang::En.render(&msg),
suppressed_by,
});
}
out
}
fn find_moon_count(s: &str, lang: Lang) -> Option<usize> {
let both = both_words(lang);
let mut number_words: Vec<&str> =
lang.numbers().iter().map(|(w, _)| *w).chain(both.iter().copied()).collect();
number_words.sort_by_key(|w| std::cmp::Reverse(w.len()));
let num_alt = number_words.iter().map(|w| regex::escape(w)).collect::<Vec<_>>().join("|");
let moon_alt = alternation(lang.moon_words());
let re = regex::Regex::new(&format!(r"(?i)(\d+|{num_alt})\s+({moon_alt})")).ok()?;
let caps = re.captures(s)?;
let w = caps.get(1)?.as_str().to_lowercase();
if both.iter().any(|b| b.to_lowercase() == w) {
return Some(2);
}
word_to_number(&w, lang).map(|n| n as usize)
}
fn both_words(lang: Lang) -> &'static [&'static str] {
match lang {
Lang::En => &["both"],
Lang::Ru => &["оба", "обе"],
Lang::Es => &["ambos", "ambas"],
Lang::De => &["beide"],
Lang::Fr => &[],
}
}
fn alternation(words: &[&str]) -> String {
let mut w: Vec<&str> = words.to_vec();
w.sort_by_key(|x| std::cmp::Reverse(x.len()));
w.iter().map(|x| regex::escape(x)).collect::<Vec<_>>().join("|")
}
fn check_climate(text: &str, gaz: &Gazetteer, ledger: &MagicLedger, lang: Lang) -> Vec<Finding> {
let mut out = Vec::new();
for sentence in split_sentences(text) {
let Some(weather) = detect_weather(sentence, lang) else {
continue;
};
for p in gaz.mentioned_in_lang(sentence, lang) {
if !climate_conflict(&p.climate_zone, weather) {
continue;
}
let msg = Msg::Climate { weather, place: &p.name, zone: &p.climate_zone };
let ctx = CheckContext {
category: "climate_anomaly",
roles: &[],
region: Some(&p.name),
..Default::default()
};
let suppressed_by = ledger.find_suppressor(&ctx).map(|r| r.kind.clone());
let severity = if suppressed_by.is_some() { "info" } else { "warning" };
out.push(Finding {
category: "climate".into(),
severity: severity.into(),
body: lang.render(&msg),
body_en: Lang::En.render(&msg),
suppressed_by,
});
}
}
out
}
fn check_population(text: &str, gaz: &Gazetteer, ledger: &MagicLedger, lang: Lang) -> Vec<Finding> {
let mut out = Vec::new();
for sentence in split_sentences(text) {
let Some(claimed) = find_population(sentence, lang) else {
continue;
};
let places: Vec<_> =
gaz.mentioned_in_lang(sentence, lang).into_iter().filter(|p| p.population > 0).collect();
if places.len() != 1 {
continue;
}
let p = places[0];
let modeled = p.population as f32;
let ratio = claimed / modeled;
if ratio <= 3.0 && ratio >= 0.33 {
continue; }
let msg = Msg::Population { place: &p.name, claimed: claimed as u64, modeled: p.population };
let ctx = CheckContext { category: "demographics", region: Some(&p.name), ..Default::default() };
let suppressed_by = ledger.find_suppressor(&ctx).map(|r| r.kind.clone());
let severity = if suppressed_by.is_some() { "info" } else { "warning" };
out.push(Finding {
category: "demographics".into(),
severity: severity.into(),
body: lang.render(&msg),
body_en: Lang::En.render(&msg),
suppressed_by,
});
}
out
}
fn split_sentences(text: &str) -> impl Iterator<Item = &str> {
text.split(|c| c == '.' || c == '!' || c == '?' || c == '\n')
}
fn detect_weather(s: &str, lang: Lang) -> Option<Weather> {
let l = s.to_lowercase();
if lang.cold_weather().iter().any(|w| l.contains(&w.to_lowercase())) {
Some(Weather::Cold)
} else if lang.hot_weather().iter().any(|w| l.contains(&w.to_lowercase())) {
Some(Weather::Hot)
} else {
None
}
}
fn climate_conflict(zone: &str, weather: Weather) -> bool {
let warm_zones = ["hot_desert", "savanna", "tropical_rainforest", "tropical_seasonal"];
let cold_zones = ["tundra", "ice_cap", "taiga"];
match weather {
Weather::Cold => warm_zones.contains(&zone),
Weather::Hot => cold_zones.contains(&zone),
}
}
fn find_population(s: &str, lang: Lang) -> Option<f32> {
let thousand = alternation(lang.thousand_words());
let million = alternation(lang.million_words());
let re = regex::Regex::new(&format!(
r"(?i)(\d[\d,. ]*\d|\d)\s*({thousand}|{million})?"
))
.ok()?;
let mut best: Option<f32> = None;
for caps in re.captures_iter(s) {
let raw = caps.get(1)?.as_str().replace([',', ' '], "");
let Ok(mut n) = raw.parse::<f32>() else { continue };
if let Some(unit) = caps.get(2).map(|m| m.as_str().to_lowercase()) {
if lang.thousand_words().iter().any(|w| w.to_lowercase() == unit) {
n *= 1_000.0;
} else if lang.million_words().iter().any(|w| w.to_lowercase() == unit) {
n *= 1_000_000.0;
}
}
if n >= 500.0 && best.map_or(true, |b| n > b) {
best = Some(n);
}
}
best
}
fn fmt_pop(n: u64) -> String {
if n >= 1_000_000 {
format!("{:.1}M", n as f64 / 1_000_000.0)
} else if n >= 10_000 {
format!("{:.0}k", n as f64 / 1_000.0)
} else {
n.to_string()
}
}
fn check_travel_time(text: &str, ledger: &MagicLedger, roles: &[String], lang: Lang) -> Vec<Finding> {
let mut out = Vec::new();
for sentence in split_sentences(text) {
let (Some(km), Some(days)) =
(find_distance_km(sentence, lang), find_duration_days(sentence, lang))
else {
continue;
};
if days <= 0.0 || km <= 0.0 {
continue;
}
let pace = km / days;
let baseline = 65.0_f32;
let ratio = pace / baseline;
let (severity, severe) = if ratio > 2.5 {
("contradiction", true)
} else if ratio > 1.5 {
("warning", false)
} else {
continue; };
let msg = Msg::Travel { km, days, pace, severe };
let ctx = CheckContext { category: "travel_time", roles, ..Default::default() };
let suppressed_by = ledger.find_suppressor(&ctx).map(|r| r.kind.clone());
let severity = if suppressed_by.is_some() { "info" } else { severity };
out.push(Finding {
category: "travel_time".into(),
severity: severity.into(),
body: lang.render(&msg),
body_en: Lang::En.render(&msg),
suppressed_by,
});
}
out
}
pub fn emit_finding(f: &Finding, source: Option<uuid::Uuid>) {
use crate::pane::output::{kinds, Lifetime, Message, Severity};
let severity = match f.severity.as_str() {
"contradiction" => Severity::Contradiction,
"warning" => Severity::Warning,
_ => Severity::Info,
};
let text = match &f.suppressed_by {
Some(rule) => format!("{} (consistent with magic rule `{rule}`)", f.body),
None => f.body.clone(),
};
let mut msg = Message::new(
kinds::FACT_CHECK_WARNING,
severity,
Lifetime::UntilActedOn,
serde_json::json!({
"text": text,
"body_en": f.body_en,
"category": f.category,
"track": "fast",
"suppressed_by": f.suppressed_by,
}),
);
if let Some(id) = source {
msg = msg.with_source_paragraph(id);
}
crate::pane::output::emit(&msg);
}
fn find_distance_km(s: &str, lang: Lang) -> Option<f32> {
let groups = lang.distance_units();
let all: Vec<&str> = groups.iter().flat_map(|(us, _)| us.iter().copied()).collect();
let alt = alternation(&all);
let re = regex::Regex::new(&format!(r"(?i)(\d+(?:[.,]\d+)?)\s*({alt})")).ok()?;
let caps = re.captures(s)?;
let n: f32 = caps.get(1)?.as_str().replace(',', ".").parse().ok()?;
let unit = caps.get(2)?.as_str().to_lowercase();
let factor = groups
.iter()
.find(|(us, _)| us.iter().any(|u| u.to_lowercase() == unit))
.map(|(_, f)| *f)
.unwrap_or(1.0);
Some(n * factor)
}
fn find_duration_days(s: &str, lang: Lang) -> Option<f32> {
let nums: Vec<&str> = lang.numbers().iter().map(|(w, _)| *w).collect();
let num_alt = alternation(&nums);
let day_week: Vec<&str> =
lang.day_words().iter().chain(lang.week_words()).copied().collect();
let unit_alt = alternation(&day_week);
let re = regex::Regex::new(&format!(r"(?i)(\d+|{num_alt})\s+({unit_alt})")).ok()?;
let caps = re.captures(s)?;
let n = word_to_number(caps.get(1)?.as_str(), lang)?;
let unit = caps.get(2)?.as_str().to_lowercase();
let is_week = lang.week_words().iter().any(|w| w.to_lowercase() == unit);
Some(if is_week { n * 7.0 } else { n })
}
fn word_to_number(w: &str, lang: Lang) -> Option<f32> {
if let Ok(n) = w.parse::<f32>() {
return Some(n);
}
let lw = w.to_lowercase();
lang.numbers().iter().find(|(word, _)| word.to_lowercase() == lw).map(|(_, n)| *n)
}
#[cfg(test)]
mod tests {
use super::*;
fn empty_ledger() -> MagicLedger {
MagicLedger::default()
}
#[test]
fn flags_an_impossible_pace() {
let f = check_paragraph(
"The messenger rode 612 km in three days to reach the capital.",
&empty_ledger(),
&[],
None,
);
assert_eq!(f.len(), 1);
assert_eq!(f[0].category, "travel_time");
assert_eq!(f[0].severity, "contradiction");
assert!(f[0].suppressed_by.is_none());
}
#[test]
fn passes_a_plausible_pace() {
let f = check_paragraph("They walked 120 km in three days.", &empty_ledger(), &[], None);
assert!(f.is_empty(), "got {f:?}");
}
#[test]
fn miles_are_converted() {
let f = check_paragraph("She flew 300 miles in two days.", &empty_ledger(), &[], None);
assert_eq!(f.len(), 1);
assert_eq!(f[0].severity, "contradiction");
}
#[test]
fn magic_rule_suppresses_with_a_note() {
let ledger: MagicLedger = serde_hjson::from_str(
r#"{ enabled: true, rules: [ { kind: "messenger_birds", covers: ["travel_time"], applicable_to: { roles: ["any"] } } ] }"#,
)
.unwrap();
let f = check_paragraph("The messenger rode 612 km in three days.", &ledger, &[], None);
assert_eq!(f.len(), 1);
assert_eq!(f[0].severity, "info"); assert_eq!(f[0].suppressed_by.as_deref(), Some("messenger_birds"));
}
fn gaz() -> Gazetteer {
Gazetteer::new(vec![
PlaceLink {
place_id: uuid::Uuid::nil(),
name: "Velmaril".into(),
biome: "tropical_seasonal".into(),
climate_zone: "tropical_seasonal".into(),
hydrology_basis: "river_mouth".into(),
population: 40_000,
x: 60,
y: 69,
},
PlaceLink {
place_id: uuid::Uuid::nil(),
name: "Korthun".into(),
biome: "tundra".into(),
climate_zone: "tundra".into(),
hydrology_basis: "confluence".into(),
population: 8_000,
x: 42,
y: 12,
},
])
}
#[test]
fn flags_snow_in_the_tropics() {
let g = WorldContext::new(gaz(), vec![], vec![]);
let f = check_paragraph("A blizzard buried Velmaril overnight.", &empty_ledger(), &[], Some(&g));
assert_eq!(f.len(), 1);
assert_eq!(f[0].category, "climate");
assert_eq!(f[0].severity, "warning");
let f2 = check_paragraph("A blizzard buried Korthun overnight.", &empty_ledger(), &[], Some(&g));
assert!(f2.is_empty(), "got {f2:?}");
}
#[test]
fn flags_a_population_mismatch() {
let g = WorldContext::new(gaz(), vec![], vec![]);
let f = check_paragraph("Velmaril, a teeming city of 2 million souls.", &empty_ledger(), &[], Some(&g));
assert_eq!(f.len(), 1);
assert_eq!(f[0].category, "demographics");
let f2 = check_paragraph("Velmaril, a city of 45,000.", &empty_ledger(), &[], Some(&g));
assert!(f2.is_empty(), "got {f2:?}");
}
#[test]
fn flags_a_resource_the_geology_lacks() {
let ctx = WorldContext::new(
Gazetteer::new(vec![]),
vec![],
vec!["copper".into(), "gold".into(), "iron".into(), "coal".into()],
);
let f = check_paragraph("The silver mines of the north ran deep.", &empty_ledger(), &[], Some(&ctx));
assert_eq!(f.len(), 1);
assert_eq!(f[0].category, "economy");
let f2 = check_paragraph("The copper mines of the north ran deep.", &empty_ledger(), &[], Some(&ctx));
assert!(f2.is_empty(), "got {f2:?}");
let f3 = check_paragraph("She wore a silver ring.", &empty_ledger(), &[], Some(&ctx));
assert!(f3.is_empty(), "got {f3:?}");
}
#[test]
fn gazetteer_matches_whole_words_only() {
let g = gaz();
assert_eq!(g.mentioned_in("the Korthuns").len(), 0);
assert_eq!(g.mentioned_in("near Korthun, north").len(), 1);
}
#[test]
fn declared_geography_and_economy_feed_the_checker() {
let body = r#"{
name: "T"
seed: 1
astronomy: {
star: { luminosity_solar: 1.0 }
planet: { mass_earth: 1.0, radius_earth: 1.0, axial_tilt_deg: 23.4, day_length_hours: 24.0 }
orbit: { semi_major_axis_au: 1.0 }
calendar: { months: 12, month_length_days: 30 }
}
geology: { generated: { notable_minerals: ["iron", "Tin"] } }
economy: { resources: ["petroleum", "iron"] }
geography: {
landmarks: [
{ name: "Cairo", kind: "city", climate_zone: "hot_desert", population: 9000000 }
]
}
}"#;
let def = crate::world::types::WorldDefinition::from_hjson(body).unwrap();
let m = def.declared_minerals();
assert!(m.contains(&"iron".to_string()));
assert!(m.contains(&"tin".to_string()));
assert!(m.contains(&"petroleum".to_string()));
assert_eq!(m.iter().filter(|x| *x == "iron").count(), 1, "deduped");
let places = declared_places(&def);
assert_eq!(places.len(), 1);
assert_eq!(places[0].name, "Cairo");
assert_eq!(places[0].climate_zone, "hot_desert");
let ctx = WorldContext::new(Gazetteer::new(places), vec![], def.declared_minerals());
let f = check_paragraph("Snow fell on Cairo for three days.", &empty_ledger(), &[], Some(&ctx));
assert!(f.iter().any(|f| f.category == "climate"), "got {f:?}");
}
#[test]
fn gazetteer_resolves_declined_russian_name() {
let g = Gazetteer::new(vec![PlaceLink {
place_id: uuid::Uuid::nil(),
name: "Москва".into(),
biome: "temperate_forest".into(),
climate_zone: "temperate_forest".into(),
hydrology_basis: "river".into(),
population: 50_000,
x: 10,
y: 10,
}]);
assert_eq!(g.mentioned_in_lang("дорога вела в Москве", Lang::Ru).len(), 1);
assert_eq!(g.mentioned_in_lang("к северу от Москвы", Lang::Ru).len(), 1);
assert_eq!(g.mentioned_in_lang("дорога вела в Москве", Lang::En).len(), 0);
}
#[test]
fn per_language_extractors() {
assert_eq!(find_distance_km("600 км", Lang::Ru), Some(600.0));
assert_eq!(find_duration_days("три дня", Lang::Ru), Some(3.0));
assert!(detect_weather("на город опустился снег", Lang::Ru).is_some());
assert_eq!(find_duration_days("tres días", Lang::Es), Some(3.0));
assert_eq!(find_duration_days("trois jours", Lang::Fr), Some(3.0));
assert!((find_distance_km("300 Meilen", Lang::De).unwrap() - 482.7).abs() < 1.0);
assert_eq!(find_duration_days("drei Tage", Lang::De), Some(3.0));
assert!((find_population("2 Millionen", Lang::De).unwrap() - 2_000_000.0).abs() < 1.0);
}
#[test]
fn russian_travel_time_flags() {
let f = check_paragraph(
"Гонец проскакал 600 км за три дня без отдыха, чтобы доставить королевский приказ.",
&empty_ledger(),
&[],
None,
);
assert_eq!(f.len(), 1, "got {f:?}");
assert_eq!(f[0].category, "travel_time");
}
#[test]
fn flags_wrong_moon_count() {
let ctx = WorldContext::new(gaz(), vec!["Korthana".into(), "Eldra".into()], vec![]);
let f = check_paragraph("All three moons hung over the bay.", &empty_ledger(), &[], Some(&ctx));
assert_eq!(f.len(), 1);
assert_eq!(f[0].category, "astronomy");
assert_eq!(f[0].severity, "warning");
let f2 = check_paragraph("Both moons were full.", &empty_ledger(), &[], Some(&ctx));
assert!(f2.is_empty(), "got {f2:?}");
}
}