use std::{
collections::{BTreeMap, HashMap, HashSet},
path::Path,
};
use regex::Regex;
use serde::{Deserialize, Serialize};
use crate::{
helper::{MDD_LIST_SEPARATOR, country_code},
mdd::species::SpeciesData,
};
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
#[serde(rename_all = "camelCase")]
pub struct CountryStats {
pub total_countries: u32,
pub domesticated: Vec<u32>,
pub widespread: Vec<u32>,
pub country_data: BTreeMap<String, CountryData>,
}
impl CountryStats {
pub fn new() -> Self {
Self {
total_countries: 0,
domesticated: Vec::new(),
widespread: Vec::new(),
country_data: BTreeMap::new(),
}
}
pub fn parse_country_data(&mut self, mdd_data: &[SpeciesData]) {
let mut records: HashMap<String, CountryRecord> = HashMap::new();
for species in mdd_data {
if species.country_distribution.is_empty() {
continue;
}
if species.country_distribution.to_lowercase() == "domesticated" {
self.domesticated.push(species.id);
continue; }
if species.country_distribution.to_lowercase() == "na" {
self.widespread.push(species.id);
continue; }
if species.country_distribution.contains('|') {
self.parse_multiple_countries(&mut records, &species.country_distribution, species);
} else {
self.update_record(&species.country_distribution, &mut records, species);
}
}
self.update_data(&mut records);
self.check_missing_country_code();
}
pub fn write_to_json_file(&self, file_path: &Path) {
let json_data = self.to_json();
std::fs::write(file_path, json_data).expect("Failed to write CountryMDDStats to JSON file");
}
pub fn get_species_list_by_country(&self, country_code: &str) -> Vec<String> {
let country_data = self.country_data.get(country_code);
country_data.map_or(Vec::new(), |country_data| country_data.species_list.clone())
}
fn to_json(&self) -> String {
serde_json::to_string(self).expect("Failed to serialize CountryMDDStats")
}
fn update_data(&mut self, records: &mut HashMap<String, CountryRecord>) {
for (country_code, record) in records.iter_mut() {
let country_data = CountryData::from_record(record);
self.country_data
.insert(country_code.to_string(), country_data);
}
self.total_countries = self.country_data.len() as u32;
}
fn check_missing_country_code(&self) {
for (code, record) in &self.country_data {
if code.is_empty() {
eprintln!(
"Warning: Empty country code found in MDD data for species IDs: {:?}. \
This will be skipped.",
record.name
);
}
}
}
fn parse_multiple_countries(
&mut self,
records: &mut HashMap<String, CountryRecord>,
distribution: &str,
data: &SpeciesData,
) {
let countries = distribution.split(MDD_LIST_SEPARATOR);
for country in countries {
self.update_record(country, records, data);
}
}
fn update_record(
&mut self,
country_name: &str,
records: &mut HashMap<String, CountryRecord>,
data: &SpeciesData,
) {
let country_name = country_name.trim();
if country_name.is_empty() {
eprintln!(
"Warning: Empty country name found in MDD data for species ID: {}. \
It could be due to trailing spaces. \
This will be skipped.",
data.id
);
return;
}
let predicted = country_name.ends_with('?');
let country_name = if predicted {
country_name.replace("?", "").to_string()
} else {
country_name.to_string()
};
if !country_code::is_known_country_region(&country_name) {
eprintln!(
"Warning: '{}' does not match any known country code.",
country_name
);
}
let country_code = country_code::get_country_code(&country_name);
let record = records
.entry(country_code)
.or_insert_with(|| CountryRecord::new(country_name));
record.update(data, predicted);
}
}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
#[serde(rename_all = "camelCase")]
pub struct CountryData {
pub name: String,
pub total_orders: u32,
pub total_families: u32,
pub total_genera: u32,
pub total_living_species: u32,
pub total_extinct_species: u32,
pub species_list: Vec<String>,
}
impl CountryData {
pub fn new() -> Self {
Self {
name: String::new(),
total_orders: 0,
total_families: 0,
total_genera: 0,
total_living_species: 0,
total_extinct_species: 0,
species_list: Vec::new(),
}
}
fn from_record(record: &CountryRecord) -> Self {
Self {
name: record.name.clone(),
total_orders: record.orders.len() as u32,
total_families: record.families.len() as u32,
total_genera: record.genera.len() as u32,
total_living_species: record.living_species_ids.len() as u32,
total_extinct_species: record.extinct_species_ids.len() as u32,
species_list: record
.living_species_ids
.iter()
.chain(record.extinct_species_ids.iter())
.map(|id| id.to_string())
.collect(),
}
}
}
struct CountryRecord {
name: String,
orders: HashSet<String>,
families: HashSet<String>,
genera: HashSet<String>,
living_species_ids: Vec<String>,
extinct_species_ids: Vec<String>,
}
impl CountryRecord {
fn new(country_name: String) -> Self {
Self {
name: country_name,
orders: HashSet::new(),
families: HashSet::new(),
genera: HashSet::new(),
living_species_ids: Vec::new(),
extinct_species_ids: Vec::new(),
}
}
fn update(&mut self, data: &SpeciesData, predicted_distribution: bool) {
self.add_species(data.id.to_string(), data.extinct, predicted_distribution);
self.add_order(data.taxon_order.to_string());
self.add_family(data.family.to_string());
self.add_genus(data.genus.to_string());
}
fn add_species(&mut self, species_id: String, extinct: u8, predicted_distribution: bool) {
let id = if predicted_distribution {
format!("{}?", species_id)
} else {
species_id
};
if extinct == 1 {
self.extinct_species_ids.push(id);
} else {
self.living_species_ids.push(id);
}
}
fn add_order(&mut self, order: String) {
self.orders.insert(order);
}
fn add_family(&mut self, family: String) {
self.families.insert(family);
}
fn add_genus(&mut self, genus: String) {
self.genera.insert(genus);
}
}
lazy_static::lazy_static! {
static ref COUNTRY_CODE_REGEX: Regex = Regex::new(r"^[A-Z]{2}$").expect("Failed to compile country code regex");
}