use std::collections::BTreeMap;
use std::path::Path;
use serde::{Deserialize, Serialize};
use crate::datasets::sodir::error::{Result, SodirError};
use crate::datasets::sodir::layout::Workdir;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DatasetEntry {
pub kind: String,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub layer_id: Option<u32>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub base_url: Option<String>,
pub csv_path: String,
pub row_count: u64,
pub fetched_at_iso: String,
pub count_checked_at_iso: String,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub fetch_duration_secs: Option<f64>,
}
impl DatasetEntry {
pub fn fetched(
kind: &str,
layer_id: u32,
base_url: &str,
stem: &str,
row_count: u64,
duration_secs: f64,
now: &str,
) -> Self {
Self {
kind: kind.to_string(),
layer_id: Some(layer_id),
base_url: Some(base_url.to_string()),
csv_path: format!("csv/{stem}.csv"),
row_count,
fetched_at_iso: now.to_string(),
count_checked_at_iso: now.to_string(),
fetch_duration_secs: Some((duration_secs * 100.0).round() / 100.0),
}
}
pub fn user_supplied(stem: &str, row_count: u64, now: &str) -> Self {
Self {
kind: "user_supplied".to_string(),
layer_id: None,
base_url: None,
csv_path: format!("csv/{stem}.csv"),
row_count,
fetched_at_iso: now.to_string(),
count_checked_at_iso: now.to_string(),
fetch_duration_secs: None,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SodirIndex {
pub schema_version: u32,
pub endpoint: String,
pub last_full_check_iso: Option<String>,
pub datasets: BTreeMap<String, DatasetEntry>,
}
impl Default for SodirIndex {
fn default() -> Self {
Self {
schema_version: 1,
endpoint: "https://factmaps.sodir.no/api/rest/services/DataService".to_string(),
last_full_check_iso: None,
datasets: BTreeMap::new(),
}
}
}
pub fn load(workdir: &Workdir) -> Result<SodirIndex> {
let path = workdir.index_file();
if !path.is_file() {
return Ok(SodirIndex::default());
}
let text = std::fs::read_to_string(&path)?;
serde_json::from_str(&text).map_err(|e| SodirError::Decode(format!("sodir_index.json: {e}")))
}
pub fn save(workdir: &Workdir, index: &SodirIndex) -> Result<()> {
let text = serde_json::to_string_pretty(index)
.map_err(|e| SodirError::Decode(format!("serialize index: {e}")))?;
std::fs::write(workdir.index_file(), text)?;
Ok(())
}
pub fn now_iso() -> String {
chrono::Utc::now().to_rfc3339()
}
pub fn age_days_iso(iso: &str) -> f64 {
match chrono::DateTime::parse_from_rfc3339(iso) {
Ok(dt) => {
let delta = chrono::Utc::now().signed_duration_since(dt.with_timezone(&chrono::Utc));
delta.num_seconds() as f64 / 86_400.0
}
Err(_) => f64::INFINITY,
}
}
pub fn file_mtime_age_days(path: &Path) -> Option<f64> {
let mtime = std::fs::metadata(path).ok()?.modified().ok()?;
let age = std::time::SystemTime::now().duration_since(mtime).ok()?;
Some(age.as_secs_f64() / 86_400.0)
}
pub fn quick_row_count(csv_path: &Path) -> u64 {
match std::fs::read_to_string(csv_path) {
Ok(content) => (content.lines().count().saturating_sub(1)) as u64,
Err(_) => 0,
}
}
pub fn sweep_due(last_full_check: Option<&str>, index_cooldown_days: i64) -> bool {
match last_full_check {
None => true,
Some(iso) => age_days_iso(iso) >= index_cooldown_days as f64,
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Action {
Skip,
Probe,
Fetch,
}
pub fn decide_action(
entry: Option<&DatasetEntry>,
csv_path: &Path,
sweep_due: bool,
dataset_cooldown_days: i64,
) -> Action {
let Some(entry) = entry else {
return Action::Fetch;
};
if !csv_path.is_file() {
return Action::Fetch;
}
if std::fs::metadata(csv_path)
.map(|m| m.len() < 5)
.unwrap_or(true)
{
return Action::Fetch;
}
if age_days_iso(&entry.fetched_at_iso) >= dataset_cooldown_days as f64 {
return Action::Fetch;
}
if sweep_due {
return Action::Probe;
}
Action::Skip
}
#[cfg(test)]
mod tests {
use super::*;
fn entry(fetched_at: &str) -> DatasetEntry {
DatasetEntry {
kind: "layer".to_string(),
layer_id: Some(7100),
base_url: Some("http://x".to_string()),
csv_path: "csv/field.csv".to_string(),
row_count: 87,
fetched_at_iso: fetched_at.to_string(),
count_checked_at_iso: fetched_at.to_string(),
fetch_duration_secs: Some(1.0),
}
}
#[test]
fn index_json_roundtrips() {
let mut idx = SodirIndex::default();
idx.datasets.insert("field".to_string(), entry(&now_iso()));
let json = serde_json::to_string(&idx).unwrap();
let back: SodirIndex = serde_json::from_str(&json).unwrap();
assert_eq!(back.schema_version, 1);
assert!(back.datasets.contains_key("field"));
}
#[test]
fn user_supplied_entry_omits_catalog_fields() {
let e = DatasetEntry::user_supplied("custom", 10, &now_iso());
let json = serde_json::to_string(&e).unwrap();
assert!(!json.contains("layer_id"));
assert!(!json.contains("base_url"));
assert!(!json.contains("fetch_duration_secs"));
}
#[test]
fn decide_action_truth_table() {
let tmp = tempfile::tempdir().unwrap();
let csv = tmp.path().join("field.csv");
std::fs::write(&csv, "a,b\n1,2\n").unwrap();
let fresh = now_iso();
assert_eq!(decide_action(None, &csv, false, 30), Action::Fetch);
let missing = tmp.path().join("nope.csv");
assert_eq!(
decide_action(Some(&entry(&fresh)), &missing, false, 30),
Action::Fetch
);
assert_eq!(
decide_action(Some(&entry(&fresh)), &csv, false, 30),
Action::Skip
);
assert_eq!(
decide_action(Some(&entry(&fresh)), &csv, true, 30),
Action::Probe
);
let stale = "2000-01-01T00:00:00+00:00";
assert_eq!(
decide_action(Some(&entry(stale)), &csv, false, 30),
Action::Fetch
);
}
#[test]
fn corrupt_tiny_csv_forces_fetch() {
let tmp = tempfile::tempdir().unwrap();
let csv = tmp.path().join("field.csv");
std::fs::write(&csv, "\n").unwrap(); assert_eq!(
decide_action(Some(&entry(&now_iso())), &csv, false, 30),
Action::Fetch
);
}
#[test]
fn sweep_due_logic() {
assert!(sweep_due(None, 14));
assert!(sweep_due(Some("2000-01-01T00:00:00+00:00"), 14));
assert!(!sweep_due(Some(&now_iso()), 14));
}
}