Skip to main content

codec_eval/import/
mod.rs

1//! CSV import for third-party encoder results.
2//!
3//! This module allows importing benchmark results from external sources,
4//! enabling cross-codec comparisons without re-running encoders.
5//!
6//! ## Supported Formats
7//!
8//! The importer is flexible and can handle various CSV schemas. At minimum,
9//! it expects columns for:
10//! - Image identifier
11//! - Codec name
12//! - Quality setting or file size
13//! - At least one quality metric
14//!
15//! ## Example
16//!
17//! ```rust,ignore
18//! use codec_eval::import::{CsvImporter, CsvSchema};
19//!
20//! let schema = CsvSchema::builder()
21//!     .image_column("filename")
22//!     .codec_column("encoder")
23//!     .quality_column("q")
24//!     .size_column("bytes")
25//!     .dssim_column("dssim")
26//!     .build();
27//!
28//! let results = CsvImporter::new(schema).import("results.csv")?;
29//! ```
30
31use std::collections::HashMap;
32use std::path::Path;
33
34use serde::{Deserialize, Serialize};
35
36use crate::error::{Error, Result};
37
38/// An imported result from an external encoder benchmark.
39#[derive(Debug, Clone, Serialize, Deserialize)]
40pub struct ExternalResult {
41    /// Image name or identifier.
42    pub image_name: String,
43
44    /// Codec identifier.
45    pub codec: String,
46
47    /// Codec version (if available).
48    pub codec_version: Option<String>,
49
50    /// Quality setting used.
51    pub quality_setting: Option<f64>,
52
53    /// Encoded file size in bytes.
54    pub file_size: Option<usize>,
55
56    /// Bits per pixel.
57    pub bits_per_pixel: Option<f64>,
58
59    /// SSIMULACRA2 score (if available).
60    pub ssimulacra2: Option<f64>,
61
62    /// DSSIM value (if available).
63    pub dssim: Option<f64>,
64
65    /// PSNR value (if available).
66    pub psnr: Option<f64>,
67
68    /// Butteraugli distance (if available).
69    pub butteraugli: Option<f64>,
70
71    /// Encoding time in milliseconds (if available).
72    pub encode_time_ms: Option<f64>,
73
74    /// Additional fields.
75    #[serde(default)]
76    pub extra: HashMap<String, String>,
77}
78
79/// Schema for CSV import.
80#[derive(Debug, Clone, Default)]
81pub struct CsvSchema {
82    /// Column name for image identifier.
83    pub image_column: Option<String>,
84    /// Column name for codec name.
85    pub codec_column: Option<String>,
86    /// Column name for codec version.
87    pub codec_version_column: Option<String>,
88    /// Column name for quality setting.
89    pub quality_column: Option<String>,
90    /// Column name for file size.
91    pub size_column: Option<String>,
92    /// Column name for bits per pixel.
93    pub bpp_column: Option<String>,
94    /// Column name for SSIMULACRA2.
95    pub ssimulacra2_column: Option<String>,
96    /// Column name for DSSIM.
97    pub dssim_column: Option<String>,
98    /// Column name for PSNR.
99    pub psnr_column: Option<String>,
100    /// Column name for Butteraugli.
101    pub butteraugli_column: Option<String>,
102    /// Column name for encode time (ms).
103    pub encode_time_column: Option<String>,
104}
105
106impl CsvSchema {
107    /// Create a schema builder.
108    #[must_use]
109    pub fn builder() -> CsvSchemaBuilder {
110        CsvSchemaBuilder::default()
111    }
112
113    /// Create a schema that auto-detects columns from common names.
114    #[must_use]
115    pub fn auto_detect() -> Self {
116        Self::default()
117    }
118
119    /// Try to find a column index by name (case-insensitive, with aliases).
120    fn find_column(
121        &self,
122        headers: &[&str],
123        primary: Option<&str>,
124        aliases: &[&str],
125    ) -> Option<usize> {
126        // First try the configured column name
127        if let Some(name) = primary {
128            if let Some(idx) = find_header_index(headers, name) {
129                return Some(idx);
130            }
131        }
132
133        // Then try aliases
134        for alias in aliases {
135            if let Some(idx) = find_header_index(headers, alias) {
136                return Some(idx);
137            }
138        }
139
140        None
141    }
142}
143
144/// Builder for CSV schema.
145#[derive(Debug, Default)]
146pub struct CsvSchemaBuilder {
147    schema: CsvSchema,
148}
149
150impl CsvSchemaBuilder {
151    /// Set the image column name.
152    #[must_use]
153    pub fn image_column(mut self, name: impl Into<String>) -> Self {
154        self.schema.image_column = Some(name.into());
155        self
156    }
157
158    /// Set the codec column name.
159    #[must_use]
160    pub fn codec_column(mut self, name: impl Into<String>) -> Self {
161        self.schema.codec_column = Some(name.into());
162        self
163    }
164
165    /// Set the codec version column name.
166    #[must_use]
167    pub fn codec_version_column(mut self, name: impl Into<String>) -> Self {
168        self.schema.codec_version_column = Some(name.into());
169        self
170    }
171
172    /// Set the quality column name.
173    #[must_use]
174    pub fn quality_column(mut self, name: impl Into<String>) -> Self {
175        self.schema.quality_column = Some(name.into());
176        self
177    }
178
179    /// Set the file size column name.
180    #[must_use]
181    pub fn size_column(mut self, name: impl Into<String>) -> Self {
182        self.schema.size_column = Some(name.into());
183        self
184    }
185
186    /// Set the bits per pixel column name.
187    #[must_use]
188    pub fn bpp_column(mut self, name: impl Into<String>) -> Self {
189        self.schema.bpp_column = Some(name.into());
190        self
191    }
192
193    /// Set the SSIMULACRA2 column name.
194    #[must_use]
195    pub fn ssimulacra2_column(mut self, name: impl Into<String>) -> Self {
196        self.schema.ssimulacra2_column = Some(name.into());
197        self
198    }
199
200    /// Set the DSSIM column name.
201    #[must_use]
202    pub fn dssim_column(mut self, name: impl Into<String>) -> Self {
203        self.schema.dssim_column = Some(name.into());
204        self
205    }
206
207    /// Set the PSNR column name.
208    #[must_use]
209    pub fn psnr_column(mut self, name: impl Into<String>) -> Self {
210        self.schema.psnr_column = Some(name.into());
211        self
212    }
213
214    /// Set the Butteraugli column name.
215    #[must_use]
216    pub fn butteraugli_column(mut self, name: impl Into<String>) -> Self {
217        self.schema.butteraugli_column = Some(name.into());
218        self
219    }
220
221    /// Set the encode time column name.
222    #[must_use]
223    pub fn encode_time_column(mut self, name: impl Into<String>) -> Self {
224        self.schema.encode_time_column = Some(name.into());
225        self
226    }
227
228    /// Build the schema.
229    #[must_use]
230    pub fn build(self) -> CsvSchema {
231        self.schema
232    }
233}
234
235/// CSV importer for external results.
236pub struct CsvImporter {
237    schema: CsvSchema,
238}
239
240impl CsvImporter {
241    /// Create a new importer with the given schema.
242    #[must_use]
243    pub fn new(schema: CsvSchema) -> Self {
244        Self { schema }
245    }
246
247    /// Create an importer that auto-detects columns.
248    #[must_use]
249    pub fn auto_detect() -> Self {
250        Self::new(CsvSchema::auto_detect())
251    }
252
253    /// Import results from a CSV file.
254    pub fn import(&self, path: impl AsRef<Path>) -> Result<Vec<ExternalResult>> {
255        let path = path.as_ref();
256        let mut reader = csv::Reader::from_path(path)?;
257
258        let headers: Vec<String> = reader.headers()?.iter().map(String::from).collect();
259        let header_refs: Vec<&str> = headers.iter().map(String::as_str).collect();
260
261        // Find column indices
262        let image_idx = self.schema.find_column(
263            &header_refs,
264            self.schema.image_column.as_deref(),
265            &["image", "filename", "file", "name", "source", "input"],
266        );
267
268        let codec_idx = self.schema.find_column(
269            &header_refs,
270            self.schema.codec_column.as_deref(),
271            &["codec", "encoder", "format", "method"],
272        );
273
274        let version_idx = self.schema.find_column(
275            &header_refs,
276            self.schema.codec_version_column.as_deref(),
277            &["version", "codec_version", "encoder_version"],
278        );
279
280        let quality_idx = self.schema.find_column(
281            &header_refs,
282            self.schema.quality_column.as_deref(),
283            &["quality", "q", "qp", "crf", "effort"],
284        );
285
286        let size_idx = self.schema.find_column(
287            &header_refs,
288            self.schema.size_column.as_deref(),
289            &["size", "file_size", "bytes", "filesize"],
290        );
291
292        let bpp_idx = self.schema.find_column(
293            &header_refs,
294            self.schema.bpp_column.as_deref(),
295            &["bpp", "bits_per_pixel", "bitrate"],
296        );
297
298        let ssimulacra2_idx = self.schema.find_column(
299            &header_refs,
300            self.schema.ssimulacra2_column.as_deref(),
301            &["ssimulacra2", "ssim2", "ssimulacra_2"],
302        );
303
304        let dssim_idx = self.schema.find_column(
305            &header_refs,
306            self.schema.dssim_column.as_deref(),
307            &["dssim", "ssim", "ms_ssim", "ms-ssim"],
308        );
309
310        let psnr_idx = self.schema.find_column(
311            &header_refs,
312            self.schema.psnr_column.as_deref(),
313            &["psnr", "psnr_db", "psnr-hvs"],
314        );
315
316        let butteraugli_idx = self.schema.find_column(
317            &header_refs,
318            self.schema.butteraugli_column.as_deref(),
319            &["butteraugli", "butter", "ba"],
320        );
321
322        let encode_time_idx = self.schema.find_column(
323            &header_refs,
324            self.schema.encode_time_column.as_deref(),
325            &["encode_time", "encode_ms", "time_ms", "encoding_time"],
326        );
327
328        // Check we have at least image and codec columns
329        let image_idx = image_idx.ok_or_else(|| Error::CsvImport {
330            line: 0,
331            reason: "Could not find image/filename column".to_string(),
332        })?;
333
334        let codec_idx = codec_idx.ok_or_else(|| Error::CsvImport {
335            line: 0,
336            reason: "Could not find codec/encoder column".to_string(),
337        })?;
338
339        let mut results = Vec::new();
340
341        for (line_num, record) in reader.records().enumerate() {
342            let record = record.map_err(|e| Error::CsvImport {
343                line: line_num + 2, // +2 for 1-based and header
344                reason: e.to_string(),
345            })?;
346
347            let image_name = record.get(image_idx).unwrap_or("").to_string();
348            let codec = record.get(codec_idx).unwrap_or("").to_string();
349
350            if image_name.is_empty() || codec.is_empty() {
351                continue;
352            }
353
354            let result = ExternalResult {
355                image_name,
356                codec,
357                codec_version: version_idx.and_then(|i| record.get(i)).map(String::from),
358                quality_setting: quality_idx
359                    .and_then(|i| record.get(i))
360                    .and_then(|s| s.parse().ok()),
361                file_size: size_idx
362                    .and_then(|i| record.get(i))
363                    .and_then(|s| s.parse().ok()),
364                bits_per_pixel: bpp_idx
365                    .and_then(|i| record.get(i))
366                    .and_then(|s| s.parse().ok()),
367                ssimulacra2: ssimulacra2_idx
368                    .and_then(|i| record.get(i))
369                    .and_then(|s| s.parse().ok()),
370                dssim: dssim_idx
371                    .and_then(|i| record.get(i))
372                    .and_then(|s| s.parse().ok()),
373                psnr: psnr_idx
374                    .and_then(|i| record.get(i))
375                    .and_then(|s| s.parse().ok()),
376                butteraugli: butteraugli_idx
377                    .and_then(|i| record.get(i))
378                    .and_then(|s| s.parse().ok()),
379                encode_time_ms: encode_time_idx
380                    .and_then(|i| record.get(i))
381                    .and_then(|s| s.parse().ok()),
382                extra: HashMap::new(),
383            };
384
385            results.push(result);
386        }
387
388        Ok(results)
389    }
390}
391
392/// Find a header index by name (case-insensitive).
393fn find_header_index(headers: &[&str], name: &str) -> Option<usize> {
394    let name_lower = name.to_lowercase();
395    headers.iter().position(|h| h.to_lowercase() == name_lower)
396}
397
398#[cfg(test)]
399mod tests {
400    use super::*;
401
402    #[test]
403    fn test_schema_builder() {
404        let schema = CsvSchema::builder()
405            .image_column("img")
406            .codec_column("enc")
407            .quality_column("q")
408            .build();
409
410        assert_eq!(schema.image_column, Some("img".to_string()));
411        assert_eq!(schema.codec_column, Some("enc".to_string()));
412        assert_eq!(schema.quality_column, Some("q".to_string()));
413    }
414
415    #[test]
416    fn test_find_header_index() {
417        let headers = ["Image", "Codec", "Quality", "DSSIM"];
418        assert_eq!(find_header_index(&headers, "image"), Some(0));
419        assert_eq!(find_header_index(&headers, "QUALITY"), Some(2));
420        assert_eq!(find_header_index(&headers, "unknown"), None);
421    }
422}