use core::fmt;
use std::collections::BTreeMap;
use std::fs;
use std::path::{Path, PathBuf};
use crate::{
jpeg_to_htj2k, JpegToHtj2kError, JpegToHtj2kOptions, TranscodeValidationClassification,
};
pub const TRANSCODE_WSI_ROOT_ENV: &str = "SIGNINUM_TRANSCODE_WSI_ROOT";
pub const REQUIRE_TRANSCODE_WSI_ROOT_ENV: &str = "SIGNINUM_REQUIRE_TRANSCODE_WSI_ROOT";
pub const TRANSCODE_WSI_TILE_LIMIT_ENV: &str = "SIGNINUM_TRANSCODE_WSI_TILE_LIMIT";
pub const TRANSCODE_WSI_MAX_PAYLOAD_BYTES_ENV: &str = "SIGNINUM_TRANSCODE_WSI_MAX_PAYLOAD_BYTES";
const DEFAULT_EXTERNAL_TILE_LIMIT: usize = 8;
const DEFAULT_MAX_EXTERNAL_PAYLOAD_BYTES: u64 = 67_108_864;
#[derive(Debug, Clone, Copy)]
pub struct CorpusFixture<'a> {
pub name: &'a str,
pub bytes: &'a [u8],
}
#[derive(Debug, Clone)]
pub struct OwnedCorpusFixture {
pub name: String,
pub bytes: Vec<u8>,
}
impl OwnedCorpusFixture {
#[must_use]
pub fn as_fixture(&self) -> CorpusFixture<'_> {
CorpusFixture {
name: &self.name,
bytes: &self.bytes,
}
}
}
#[derive(Debug, Clone)]
pub struct CorpusValidationOptions {
pub transcode_options: JpegToHtj2kOptions,
pub external_wsi_roots: Vec<PathBuf>,
pub require_external_wsi: bool,
pub external_tile_limit: usize,
pub max_external_payload_bytes: u64,
}
impl Default for CorpusValidationOptions {
fn default() -> Self {
Self {
transcode_options: JpegToHtj2kOptions::default(),
external_wsi_roots: Vec::new(),
require_external_wsi: false,
external_tile_limit: DEFAULT_EXTERNAL_TILE_LIMIT,
max_external_payload_bytes: DEFAULT_MAX_EXTERNAL_PAYLOAD_BYTES,
}
}
}
impl CorpusValidationOptions {
#[must_use]
pub fn from_env() -> Self {
let mut options = Self::default();
if let Some(roots) = std::env::var_os(TRANSCODE_WSI_ROOT_ENV) {
options.external_wsi_roots = std::env::split_paths(&roots).collect();
}
options.require_external_wsi = std::env::var_os(REQUIRE_TRANSCODE_WSI_ROOT_ENV).is_some();
if let Ok(limit) = std::env::var(TRANSCODE_WSI_TILE_LIMIT_ENV) {
if let Ok(limit) = limit.parse() {
options.external_tile_limit = limit;
}
}
if let Ok(max_bytes) = std::env::var(TRANSCODE_WSI_MAX_PAYLOAD_BYTES_ENV) {
if let Ok(max_bytes) = max_bytes.parse() {
options.max_external_payload_bytes = max_bytes;
}
}
options
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct CorpusValidationReport {
pub fixture_count: usize,
pub sample_count: usize,
pub exact_match_count: usize,
pub max_abs_error: i64,
pub classification: TranscodeValidationClassification,
pub histogram_buckets: BTreeMap<i64, usize>,
pub fixtures: Vec<CorpusFixtureReport>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct CorpusFixtureReport {
pub name: String,
pub dimensions: (u32, u32),
pub component_count: usize,
pub sample_count: usize,
pub exact_match_count: usize,
pub max_abs_error: i64,
pub classification: TranscodeValidationClassification,
}
#[derive(Debug)]
pub enum CorpusValidationError {
EmptyCorpus,
MissingRequiredExternalCorpus(&'static str),
Io {
path: PathBuf,
source: std::io::Error,
},
Transcode {
name: String,
source: JpegToHtj2kError,
},
MissingMetrics {
name: String,
},
}
impl fmt::Display for CorpusValidationError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::EmptyCorpus => write!(f, "corpus validation requires at least one fixture"),
Self::MissingRequiredExternalCorpus(reason) => {
write!(f, "required external corpus is unavailable: {reason}")
}
Self::Io { path, source } => {
write!(
f,
"failed to read external fixture {}: {source}",
path.display()
)
}
Self::Transcode { name, source } => {
write!(f, "failed to transcode fixture {name}: {source}")
}
Self::MissingMetrics { name } => {
write!(f, "fixture {name} did not report validation metrics")
}
}
}
}
impl std::error::Error for CorpusValidationError {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
match self {
Self::Io { source, .. } => Some(source),
Self::Transcode { source, .. } => Some(source),
Self::EmptyCorpus
| Self::MissingRequiredExternalCorpus(_)
| Self::MissingMetrics { .. } => None,
}
}
}
pub fn validate_transcode_corpus(
fixtures: &[CorpusFixture<'_>],
options: &CorpusValidationOptions,
) -> Result<CorpusValidationReport, CorpusValidationError> {
if fixtures.is_empty() {
return Err(CorpusValidationError::EmptyCorpus);
}
let mut report = CorpusValidationReport {
fixture_count: 0,
sample_count: 0,
exact_match_count: 0,
max_abs_error: 0,
classification: TranscodeValidationClassification::Exact,
histogram_buckets: BTreeMap::new(),
fixtures: Vec::with_capacity(fixtures.len()),
};
for fixture in fixtures.iter().copied() {
let validated = validate_fixture(fixture, options)?;
report.fixture_count += 1;
report.sample_count += validated.report.sample_count;
report.exact_match_count += validated.report.exact_match_count;
report.max_abs_error = report.max_abs_error.max(validated.report.max_abs_error);
for (error, count) in validated.histogram_buckets {
*report.histogram_buckets.entry(error).or_insert(0) += count;
}
report.fixtures.push(validated.report);
}
report.classification = classify_corpus_report(&report);
Ok(report)
}
struct ValidatedFixture {
report: CorpusFixtureReport,
histogram_buckets: BTreeMap<i64, usize>,
}
fn validate_fixture(
fixture: CorpusFixture<'_>,
options: &CorpusValidationOptions,
) -> Result<ValidatedFixture, CorpusValidationError> {
let mut transcode_options = options.transcode_options.clone();
transcode_options.validate_against_integer_reference = true;
let encoded = jpeg_to_htj2k(fixture.bytes, &transcode_options).map_err(|source| {
CorpusValidationError::Transcode {
name: fixture.name.to_string(),
source,
}
})?;
let metrics = encoded
.report
.integer_reference_metrics
.as_ref()
.ok_or_else(|| CorpusValidationError::MissingMetrics {
name: fixture.name.to_string(),
})?;
Ok(ValidatedFixture {
report: CorpusFixtureReport {
name: fixture.name.to_string(),
dimensions: (encoded.report.width, encoded.report.height),
component_count: encoded.report.component_count,
sample_count: metrics.total,
exact_match_count: metrics.exact_matches,
max_abs_error: metrics.max_abs_error,
classification: TranscodeValidationClassification::classify_metrics(metrics),
},
histogram_buckets: metrics.absolute_error_histogram.clone(),
})
}
fn classify_corpus_report(report: &CorpusValidationReport) -> TranscodeValidationClassification {
if report.sample_count == 0 {
return TranscodeValidationClassification::Exact;
}
if report.exact_match_count == report.sample_count && report.max_abs_error == 0 {
TranscodeValidationClassification::Exact
} else {
let exact_match_rate = report.exact_match_count as f64 / report.sample_count as f64;
if report.max_abs_error <= 1 && exact_match_rate >= 0.999 {
TranscodeValidationClassification::OneLsbBounded
} else {
TranscodeValidationClassification::OutsideThreshold
}
}
}
pub fn load_external_wsi_fixtures(
options: &CorpusValidationOptions,
) -> Result<Vec<OwnedCorpusFixture>, CorpusValidationError> {
if options.external_wsi_roots.is_empty() {
if options.require_external_wsi {
return Err(CorpusValidationError::MissingRequiredExternalCorpus(
"no roots configured",
));
}
return Ok(Vec::new());
}
let mut paths = Vec::new();
for root in &options.external_wsi_roots {
collect_jpegs(root, &mut paths);
}
paths.sort();
if paths.is_empty() && options.require_external_wsi {
return Err(CorpusValidationError::MissingRequiredExternalCorpus(
"configured roots contained no JPEG files",
));
}
let limit = if options.external_tile_limit == 0 {
usize::MAX
} else {
options.external_tile_limit
};
let mut fixtures = Vec::new();
for path in paths.into_iter().take(limit) {
let metadata = fs::metadata(&path).map_err(|source| CorpusValidationError::Io {
path: path.clone(),
source,
})?;
if metadata.len() > options.max_external_payload_bytes {
continue;
}
let bytes = fs::read(&path).map_err(|source| CorpusValidationError::Io {
path: path.clone(),
source,
})?;
fixtures.push(OwnedCorpusFixture {
name: path.display().to_string(),
bytes,
});
}
Ok(fixtures)
}
fn collect_jpegs(root: &Path, out: &mut Vec<PathBuf>) {
if root.is_file() {
if is_jpeg_path(root) {
out.push(root.to_path_buf());
}
return;
}
let Ok(entries) = fs::read_dir(root) else {
return;
};
for entry in entries.flatten() {
let path = entry.path();
if path.is_dir() {
collect_jpegs(&path, out);
} else if is_jpeg_path(&path) {
out.push(path);
}
}
}
fn is_jpeg_path(path: &Path) -> bool {
path.extension()
.and_then(|ext| ext.to_str())
.is_some_and(|ext| matches!(ext.to_ascii_lowercase().as_str(), "jpg" | "jpeg"))
}