1use std::collections::HashMap;
32use std::path::Path;
33
34use serde::{Deserialize, Serialize};
35
36use crate::error::{Error, Result};
37
38#[derive(Debug, Clone, Serialize, Deserialize)]
40pub struct ExternalResult {
41 pub image_name: String,
43
44 pub codec: String,
46
47 pub codec_version: Option<String>,
49
50 pub quality_setting: Option<f64>,
52
53 pub file_size: Option<usize>,
55
56 pub bits_per_pixel: Option<f64>,
58
59 pub ssimulacra2: Option<f64>,
61
62 pub dssim: Option<f64>,
64
65 pub psnr: Option<f64>,
67
68 pub butteraugli: Option<f64>,
70
71 pub encode_time_ms: Option<f64>,
73
74 #[serde(default)]
76 pub extra: HashMap<String, String>,
77}
78
79#[derive(Debug, Clone, Default)]
81pub struct CsvSchema {
82 pub image_column: Option<String>,
84 pub codec_column: Option<String>,
86 pub codec_version_column: Option<String>,
88 pub quality_column: Option<String>,
90 pub size_column: Option<String>,
92 pub bpp_column: Option<String>,
94 pub ssimulacra2_column: Option<String>,
96 pub dssim_column: Option<String>,
98 pub psnr_column: Option<String>,
100 pub butteraugli_column: Option<String>,
102 pub encode_time_column: Option<String>,
104}
105
106impl CsvSchema {
107 #[must_use]
109 pub fn builder() -> CsvSchemaBuilder {
110 CsvSchemaBuilder::default()
111 }
112
113 #[must_use]
115 pub fn auto_detect() -> Self {
116 Self::default()
117 }
118
119 fn find_column(
121 &self,
122 headers: &[&str],
123 primary: Option<&str>,
124 aliases: &[&str],
125 ) -> Option<usize> {
126 if let Some(name) = primary {
128 if let Some(idx) = find_header_index(headers, name) {
129 return Some(idx);
130 }
131 }
132
133 for alias in aliases {
135 if let Some(idx) = find_header_index(headers, alias) {
136 return Some(idx);
137 }
138 }
139
140 None
141 }
142}
143
144#[derive(Debug, Default)]
146pub struct CsvSchemaBuilder {
147 schema: CsvSchema,
148}
149
150impl CsvSchemaBuilder {
151 #[must_use]
153 pub fn image_column(mut self, name: impl Into<String>) -> Self {
154 self.schema.image_column = Some(name.into());
155 self
156 }
157
158 #[must_use]
160 pub fn codec_column(mut self, name: impl Into<String>) -> Self {
161 self.schema.codec_column = Some(name.into());
162 self
163 }
164
165 #[must_use]
167 pub fn codec_version_column(mut self, name: impl Into<String>) -> Self {
168 self.schema.codec_version_column = Some(name.into());
169 self
170 }
171
172 #[must_use]
174 pub fn quality_column(mut self, name: impl Into<String>) -> Self {
175 self.schema.quality_column = Some(name.into());
176 self
177 }
178
179 #[must_use]
181 pub fn size_column(mut self, name: impl Into<String>) -> Self {
182 self.schema.size_column = Some(name.into());
183 self
184 }
185
186 #[must_use]
188 pub fn bpp_column(mut self, name: impl Into<String>) -> Self {
189 self.schema.bpp_column = Some(name.into());
190 self
191 }
192
193 #[must_use]
195 pub fn ssimulacra2_column(mut self, name: impl Into<String>) -> Self {
196 self.schema.ssimulacra2_column = Some(name.into());
197 self
198 }
199
200 #[must_use]
202 pub fn dssim_column(mut self, name: impl Into<String>) -> Self {
203 self.schema.dssim_column = Some(name.into());
204 self
205 }
206
207 #[must_use]
209 pub fn psnr_column(mut self, name: impl Into<String>) -> Self {
210 self.schema.psnr_column = Some(name.into());
211 self
212 }
213
214 #[must_use]
216 pub fn butteraugli_column(mut self, name: impl Into<String>) -> Self {
217 self.schema.butteraugli_column = Some(name.into());
218 self
219 }
220
221 #[must_use]
223 pub fn encode_time_column(mut self, name: impl Into<String>) -> Self {
224 self.schema.encode_time_column = Some(name.into());
225 self
226 }
227
228 #[must_use]
230 pub fn build(self) -> CsvSchema {
231 self.schema
232 }
233}
234
235pub struct CsvImporter {
237 schema: CsvSchema,
238}
239
240impl CsvImporter {
241 #[must_use]
243 pub fn new(schema: CsvSchema) -> Self {
244 Self { schema }
245 }
246
247 #[must_use]
249 pub fn auto_detect() -> Self {
250 Self::new(CsvSchema::auto_detect())
251 }
252
253 pub fn import(&self, path: impl AsRef<Path>) -> Result<Vec<ExternalResult>> {
255 let path = path.as_ref();
256 let mut reader = csv::Reader::from_path(path)?;
257
258 let headers: Vec<String> = reader.headers()?.iter().map(String::from).collect();
259 let header_refs: Vec<&str> = headers.iter().map(String::as_str).collect();
260
261 let image_idx = self.schema.find_column(
263 &header_refs,
264 self.schema.image_column.as_deref(),
265 &["image", "filename", "file", "name", "source", "input"],
266 );
267
268 let codec_idx = self.schema.find_column(
269 &header_refs,
270 self.schema.codec_column.as_deref(),
271 &["codec", "encoder", "format", "method"],
272 );
273
274 let version_idx = self.schema.find_column(
275 &header_refs,
276 self.schema.codec_version_column.as_deref(),
277 &["version", "codec_version", "encoder_version"],
278 );
279
280 let quality_idx = self.schema.find_column(
281 &header_refs,
282 self.schema.quality_column.as_deref(),
283 &["quality", "q", "qp", "crf", "effort"],
284 );
285
286 let size_idx = self.schema.find_column(
287 &header_refs,
288 self.schema.size_column.as_deref(),
289 &["size", "file_size", "bytes", "filesize"],
290 );
291
292 let bpp_idx = self.schema.find_column(
293 &header_refs,
294 self.schema.bpp_column.as_deref(),
295 &["bpp", "bits_per_pixel", "bitrate"],
296 );
297
298 let ssimulacra2_idx = self.schema.find_column(
299 &header_refs,
300 self.schema.ssimulacra2_column.as_deref(),
301 &["ssimulacra2", "ssim2", "ssimulacra_2"],
302 );
303
304 let dssim_idx = self.schema.find_column(
305 &header_refs,
306 self.schema.dssim_column.as_deref(),
307 &["dssim", "ssim", "ms_ssim", "ms-ssim"],
308 );
309
310 let psnr_idx = self.schema.find_column(
311 &header_refs,
312 self.schema.psnr_column.as_deref(),
313 &["psnr", "psnr_db", "psnr-hvs"],
314 );
315
316 let butteraugli_idx = self.schema.find_column(
317 &header_refs,
318 self.schema.butteraugli_column.as_deref(),
319 &["butteraugli", "butter", "ba"],
320 );
321
322 let encode_time_idx = self.schema.find_column(
323 &header_refs,
324 self.schema.encode_time_column.as_deref(),
325 &["encode_time", "encode_ms", "time_ms", "encoding_time"],
326 );
327
328 let image_idx = image_idx.ok_or_else(|| Error::CsvImport {
330 line: 0,
331 reason: "Could not find image/filename column".to_string(),
332 })?;
333
334 let codec_idx = codec_idx.ok_or_else(|| Error::CsvImport {
335 line: 0,
336 reason: "Could not find codec/encoder column".to_string(),
337 })?;
338
339 let mut results = Vec::new();
340
341 for (line_num, record) in reader.records().enumerate() {
342 let record = record.map_err(|e| Error::CsvImport {
343 line: line_num + 2, reason: e.to_string(),
345 })?;
346
347 let image_name = record.get(image_idx).unwrap_or("").to_string();
348 let codec = record.get(codec_idx).unwrap_or("").to_string();
349
350 if image_name.is_empty() || codec.is_empty() {
351 continue;
352 }
353
354 let result = ExternalResult {
355 image_name,
356 codec,
357 codec_version: version_idx.and_then(|i| record.get(i)).map(String::from),
358 quality_setting: quality_idx
359 .and_then(|i| record.get(i))
360 .and_then(|s| s.parse().ok()),
361 file_size: size_idx
362 .and_then(|i| record.get(i))
363 .and_then(|s| s.parse().ok()),
364 bits_per_pixel: bpp_idx
365 .and_then(|i| record.get(i))
366 .and_then(|s| s.parse().ok()),
367 ssimulacra2: ssimulacra2_idx
368 .and_then(|i| record.get(i))
369 .and_then(|s| s.parse().ok()),
370 dssim: dssim_idx
371 .and_then(|i| record.get(i))
372 .and_then(|s| s.parse().ok()),
373 psnr: psnr_idx
374 .and_then(|i| record.get(i))
375 .and_then(|s| s.parse().ok()),
376 butteraugli: butteraugli_idx
377 .and_then(|i| record.get(i))
378 .and_then(|s| s.parse().ok()),
379 encode_time_ms: encode_time_idx
380 .and_then(|i| record.get(i))
381 .and_then(|s| s.parse().ok()),
382 extra: HashMap::new(),
383 };
384
385 results.push(result);
386 }
387
388 Ok(results)
389 }
390}
391
392fn find_header_index(headers: &[&str], name: &str) -> Option<usize> {
394 let name_lower = name.to_lowercase();
395 headers.iter().position(|h| h.to_lowercase() == name_lower)
396}
397
398#[cfg(test)]
399mod tests {
400 use super::*;
401
402 #[test]
403 fn test_schema_builder() {
404 let schema = CsvSchema::builder()
405 .image_column("img")
406 .codec_column("enc")
407 .quality_column("q")
408 .build();
409
410 assert_eq!(schema.image_column, Some("img".to_string()));
411 assert_eq!(schema.codec_column, Some("enc".to_string()));
412 assert_eq!(schema.quality_column, Some("q".to_string()));
413 }
414
415 #[test]
416 fn test_find_header_index() {
417 let headers = ["Image", "Codec", "Quality", "DSSIM"];
418 assert_eq!(find_header_index(&headers, "image"), Some(0));
419 assert_eq!(find_header_index(&headers, "QUALITY"), Some(2));
420 assert_eq!(find_header_index(&headers, "unknown"), None);
421 }
422}