statumen 0.3.0

Statumen whole-slide image reader
Documentation
//! Parity-corpus manifest loader.

use std::path::{Path, PathBuf};

use serde::Deserialize;

#[derive(Debug, Clone, Deserialize)]
pub struct CorpusEntry {
    pub name: String,
    pub alias: String,
    #[serde(default)]
    pub path: String,
    pub format: String,
    pub codecs: Vec<String>,
    #[serde(default)]
    pub must_decode: Vec<String>,
    pub source: String,
    pub license: String,
    pub redistributable: bool,
    #[serde(default)]
    pub sha256: String,
    #[serde(default)]
    pub citation: String,
    #[serde(default)]
    pub phi_reviewed: bool,
    #[serde(default)]
    pub tolerant_regions: Vec<String>,
    #[serde(default)]
    pub expected_failures: Vec<String>,
    #[serde(default)]
    pub url: String,
}

impl CorpusEntry {
    pub fn must_decode_level(&self, level: u32) -> bool {
        self.must_decode.iter().any(|item| {
            if item == "base" {
                return level == 0;
            }
            item.strip_prefix("level")
                .and_then(|n| n.parse::<u32>().ok())
                == Some(level)
        })
    }

    pub fn expected_failure(&self, pair: &str, level: u32) -> bool {
        let numbered = format!("{pair}:level{level}");
        let alias = if level == 0 {
            Some(format!("{pair}:base"))
        } else {
            None
        };
        self.expected_failures
            .iter()
            .any(|item| item == &numbered || alias.as_ref() == Some(item))
    }
}

#[derive(Debug, Clone, Deserialize)]
pub struct CorpusManifest {
    #[serde(rename = "slide", default)]
    pub slides: Vec<CorpusEntry>,
}

pub fn parse_manifest(toml_text: &str) -> Result<CorpusManifest, String> {
    toml::from_str::<CorpusManifest>(toml_text).map_err(|e| format!("manifest parse: {e}"))
}

pub fn load_public() -> Result<CorpusManifest, String> {
    let path = public_manifest_path();
    let text =
        std::fs::read_to_string(&path).map_err(|e| format!("read {}: {e}", path.display()))?;
    let mut manifest = parse_manifest(&text)?;
    apply_alias_filter(
        &mut manifest,
        std::env::var("STATUMEN_PARITY_ALIASES").ok().as_deref(),
    );
    Ok(manifest)
}

pub fn load_private() -> Result<Option<CorpusManifest>, String> {
    let path = if let Some(p) = std::env::var_os("STATUMEN_PARITY_PRIVATE_MANIFEST") {
        PathBuf::from(p)
    } else {
        private_manifest_path()
    };
    if !path.is_file() {
        return Ok(None);
    }
    let text =
        std::fs::read_to_string(&path).map_err(|e| format!("read {}: {e}", path.display()))?;
    Ok(Some(parse_manifest(&text)?))
}

pub fn corpus_cache_dir() -> PathBuf {
    if let Some(p) = std::env::var_os("STATUMEN_PARITY_CORPUS_CACHE") {
        return PathBuf::from(p);
    }
    let home = std::env::var_os("HOME")
        .map(PathBuf::from)
        .unwrap_or_else(|| PathBuf::from("."));
    home.join(".cache")
        .join("slideviewer")
        .join("parity-corpus")
}

pub fn resolve_entry_path(entry: &CorpusEntry) -> PathBuf {
    if !entry.path.is_empty() {
        let p = PathBuf::from(&entry.path);
        if p.is_file() {
            return p;
        }
        if p.is_relative() {
            let repo_relative = Path::new(env!("CARGO_MANIFEST_DIR"))
                .join("..")
                .join("..")
                .join(&p);
            if repo_relative.is_file() {
                return repo_relative;
            }
        }
    }

    for candidate in cache_candidates(entry) {
        if let Some(path) = resolve_candidate(entry, &candidate) {
            return path;
        }
    }

    cache_candidates(entry)
        .into_iter()
        .next()
        .unwrap_or_else(|| corpus_cache_dir().join(&entry.alias))
}

pub fn find_slide_by_alias(alias: &str) -> Option<PathBuf> {
    if let Ok(public) = load_public() {
        for entry in &public.slides {
            if entry.alias == alias {
                let path = resolve_entry_path(entry);
                if path.is_file() {
                    return Some(path);
                }
            }
        }
    }
    if let Ok(Some(private)) = load_private() {
        for entry in &private.slides {
            if entry.alias == alias {
                let path = resolve_entry_path(entry);
                if path.is_file() {
                    return Some(path);
                }
            }
        }
    }
    None
}

fn cache_candidates(entry: &CorpusEntry) -> Vec<PathBuf> {
    let mut candidates = Vec::new();
    let cache = corpus_cache_dir();
    candidates.push(cache.join(&entry.alias));

    candidates.push(cache.join(format!("{}.d", entry.alias)));

    if let Some(ext) = format_default_extension(&entry.format) {
        candidates.push(cache.join(format!("{}.{}", entry.alias, ext)));
    }

    if let Some(name) = url_file_name(&entry.url) {
        candidates.push(cache.join(name));
    }

    candidates
}

fn resolve_candidate(entry: &CorpusEntry, candidate: &Path) -> Option<PathBuf> {
    if candidate.is_file() {
        return Some(candidate.to_path_buf());
    }
    if !candidate.is_dir() {
        return None;
    }
    let wanted_ext = match entry.format.as_str() {
        "hamamatsu_vms" => "vms",
        "mirax" => "mrxs",
        "dicom" => "dcm",
        _ => return None,
    };
    find_file_with_extension(candidate, wanted_ext)
}

fn find_file_with_extension(root: &Path, ext: &str) -> Option<PathBuf> {
    let entries = std::fs::read_dir(root).ok()?;
    for entry in entries.flatten() {
        let path = entry.path();
        if path.is_file()
            && path
                .extension()
                .and_then(|value| value.to_str())
                .is_some_and(|value| value.eq_ignore_ascii_case(ext))
        {
            return Some(path);
        }
        if path.is_dir() {
            if let Some(found) = find_file_with_extension(&path, ext) {
                return Some(found);
            }
        }
    }
    None
}

fn url_file_name(url: &str) -> Option<&str> {
    url.rsplit('/').next().filter(|name| !name.is_empty())
}

pub(crate) fn format_default_extension(format: &str) -> Option<&'static str> {
    match format {
        "aperio" => Some("svs"),
        "leica" => Some("scn"),
        "ventana" => Some("bif"),
        "philips_tiff" | "tiff" => Some("tif"),
        "ndpi" => Some("ndpi"),
        "hamamatsu_vms" => Some("zip"),
        "dicom" => Some("dcm"),
        "zeiss_czi" => Some("czi"),
        "mirax" => Some("zip"),
        _ => None,
    }
}

pub fn public_manifest_path() -> PathBuf {
    Path::new(env!("CARGO_MANIFEST_DIR"))
        .join("tests")
        .join("fixtures")
        .join("parity_corpus.public.toml")
}

pub fn private_manifest_path() -> PathBuf {
    Path::new(env!("CARGO_MANIFEST_DIR"))
        .join("tests")
        .join("fixtures")
        .join("parity_corpus.private.toml")
}

pub(crate) fn apply_alias_filter(manifest: &mut CorpusManifest, raw_aliases: Option<&str>) {
    let Some(raw_aliases) = raw_aliases else {
        return;
    };
    let aliases = raw_aliases
        .split(|ch: char| ch == ',' || ch == ';' || ch.is_whitespace())
        .filter(|alias| !alias.is_empty())
        .collect::<std::collections::HashSet<_>>();
    if aliases.is_empty() {
        return;
    }
    manifest
        .slides
        .retain(|entry| aliases.contains(entry.alias.as_str()));
}