Skip to main content

casc_lib/extract/
metadata.rs

1//! Extraction metadata recording and statistics.
2//!
3//! During a bulk extraction, the `MetadataWriter` records per-file results to
4//! both JSONL and CSV index files under a `.casc-meta/` directory. When
5//! extraction finishes, a `summary.json` with aggregate `ExtractionStats`
6//! is written alongside the index files.
7
8use std::fs::{self, File};
9use std::io::{BufWriter, Write};
10use std::path::{Path, PathBuf};
11use std::sync::Mutex;
12use std::time::Instant;
13
14use serde::Serialize;
15
16use crate::error::{CascError, Result};
17
18/// A single file extraction result.
19#[derive(Debug, Clone, Serialize)]
20pub struct MetadataEntry {
21    /// FileDataID of the extracted file.
22    pub fdid: u32,
23    /// Resolved file path from the listfile, or `"unknown/<fdid>.dat"`.
24    pub path: String,
25    /// Size of the extracted file in bytes (0 on error/skip).
26    pub size: u64,
27    /// Hex-encoded content key (CKey) for this file.
28    pub ckey: String,
29    /// Raw locale flags bitmask from the root entry.
30    pub locale_flags: u32,
31    /// Raw content flags bitmask from the root entry.
32    pub content_flags: u32,
33    /// Extraction status: `"ok"`, `"error:<reason>"`, or `"skipped:<reason>"`.
34    pub status: String,
35}
36
37/// Accumulated extraction statistics.
38#[derive(Debug, Clone, Serialize)]
39pub struct ExtractionStats {
40    /// Total number of files processed (success + errors + skipped).
41    pub total: u64,
42    /// Number of files successfully extracted.
43    pub success: u64,
44    /// Number of files that failed extraction.
45    pub errors: u64,
46    /// Number of files skipped (e.g. encrypted files when `skip_encrypted` is set).
47    pub skipped: u64,
48    /// Total bytes written to disk across all successful extractions.
49    pub bytes_written: u64,
50}
51
52impl ExtractionStats {
53    /// Create a zeroed stats instance.
54    pub fn new() -> Self {
55        Self {
56            total: 0,
57            success: 0,
58            errors: 0,
59            skipped: 0,
60            bytes_written: 0,
61        }
62    }
63}
64
65impl Default for ExtractionStats {
66    fn default() -> Self {
67        Self::new()
68    }
69}
70
71/// Summary written at the end of extraction.
72#[derive(Debug, Serialize)]
73pub struct ExtractionSummary {
74    /// Build name from the build config.
75    pub build: String,
76    /// Product identifier (e.g. `"wow"`).
77    pub product: String,
78    /// ISO 8601 UTC timestamp of when extraction completed.
79    pub extracted_at: String,
80    /// Wall-clock duration of the extraction in seconds.
81    pub duration_secs: f64,
82    /// Aggregate extraction statistics.
83    pub stats: ExtractionStats,
84}
85
86/// Thread-safe metadata writer that records extraction results to JSONL and CSV
87/// files, and tracks statistics.
88pub struct MetadataWriter {
89    jsonl_writer: Mutex<BufWriter<File>>,
90    csv_writer: Mutex<csv::Writer<File>>,
91    stats: Mutex<ExtractionStats>,
92    meta_dir: PathBuf,
93    build_name: String,
94    product: String,
95    start_time: Instant,
96}
97
98impl MetadataWriter {
99    /// Create a new writer, creating the `.casc-meta/` directory and opening
100    /// index files. Writes the CSV header row.
101    pub fn new(output_dir: &Path, build_name: &str, product: &str) -> Result<Self> {
102        let meta_dir = output_dir.join(".casc-meta");
103        fs::create_dir_all(&meta_dir)?;
104
105        let jsonl_file = File::create(meta_dir.join("index.jsonl"))?;
106        let csv_file = File::create(meta_dir.join("index.csv"))?;
107
108        let jsonl_writer = BufWriter::new(jsonl_file);
109        let mut csv_writer = csv::Writer::from_writer(csv_file);
110
111        // Write CSV header
112        csv_writer
113            .write_record([
114                "fdid",
115                "path",
116                "size",
117                "ckey",
118                "locale_flags",
119                "content_flags",
120                "status",
121            ])
122            .map_err(|e| CascError::Io(std::io::Error::other(e)))?;
123
124        Ok(Self {
125            jsonl_writer: Mutex::new(jsonl_writer),
126            csv_writer: Mutex::new(csv_writer),
127            stats: Mutex::new(ExtractionStats::new()),
128            meta_dir,
129            build_name: build_name.to_owned(),
130            product: product.to_owned(),
131            start_time: Instant::now(),
132        })
133    }
134
135    /// Record a single extraction result. Thread-safe.
136    pub fn record(&self, entry: &MetadataEntry) -> Result<()> {
137        // Write JSONL line
138        {
139            let json_line = serde_json::to_string(entry)
140                .map_err(|e| CascError::Io(std::io::Error::other(e)))?;
141            let mut writer = self.jsonl_writer.lock().unwrap();
142            writeln!(writer, "{json_line}")?;
143        }
144
145        // Write CSV row
146        {
147            let mut writer = self.csv_writer.lock().unwrap();
148            writer
149                .write_record(&[
150                    entry.fdid.to_string(),
151                    entry.path.clone(),
152                    entry.size.to_string(),
153                    entry.ckey.clone(),
154                    entry.locale_flags.to_string(),
155                    entry.content_flags.to_string(),
156                    entry.status.clone(),
157                ])
158                .map_err(|e| CascError::Io(std::io::Error::other(e)))?;
159        }
160
161        // Update stats
162        {
163            let mut stats = self.stats.lock().unwrap();
164            stats.total += 1;
165            if entry.status == "ok" {
166                stats.success += 1;
167                stats.bytes_written += entry.size;
168            } else if entry.status.starts_with("error") {
169                stats.errors += 1;
170            } else if entry.status.starts_with("skipped") {
171                stats.skipped += 1;
172            }
173        }
174
175        Ok(())
176    }
177
178    /// Get a snapshot of the current stats.
179    pub fn stats(&self) -> ExtractionStats {
180        self.stats.lock().unwrap().clone()
181    }
182
183    /// Finalize: flush files and write `summary.json`.
184    pub fn finish(self) -> Result<ExtractionStats> {
185        let duration = self.start_time.elapsed();
186
187        // Flush JSONL writer
188        {
189            let mut writer = self.jsonl_writer.lock().unwrap();
190            writer.flush()?;
191        }
192
193        // Flush CSV writer
194        {
195            let mut writer = self.csv_writer.lock().unwrap();
196            writer
197                .flush()
198                .map_err(|e| CascError::Io(std::io::Error::other(e)))?;
199        }
200
201        let stats = self.stats.lock().unwrap().clone();
202
203        // Write summary.json
204        let summary = ExtractionSummary {
205            build: self.build_name.clone(),
206            product: self.product.clone(),
207            extracted_at: now_iso8601(),
208            duration_secs: duration.as_secs_f64(),
209            stats: stats.clone(),
210        };
211
212        let summary_path = self.meta_dir.join("summary.json");
213        let summary_file = File::create(summary_path)?;
214        serde_json::to_writer_pretty(BufWriter::new(summary_file), &summary)
215            .map_err(|e| CascError::Io(std::io::Error::other(e)))?;
216
217        Ok(stats)
218    }
219}
220
221/// Produce an ISO 8601 timestamp without pulling in the `chrono` crate.
222fn now_iso8601() -> String {
223    // Use SystemTime to produce a basic UTC timestamp.
224    use std::time::SystemTime;
225    let dur = SystemTime::now()
226        .duration_since(SystemTime::UNIX_EPOCH)
227        .unwrap_or_default();
228    let secs = dur.as_secs();
229
230    // Break epoch seconds into date/time components (UTC).
231    let days = secs / 86400;
232    let time_of_day = secs % 86400;
233    let hours = time_of_day / 3600;
234    let minutes = (time_of_day % 3600) / 60;
235    let seconds = time_of_day % 60;
236
237    // Convert days since epoch to y/m/d using a civil calendar algorithm.
238    let (year, month, day) = days_to_ymd(days as i64);
239
240    format!("{year:04}-{month:02}-{day:02}T{hours:02}:{minutes:02}:{seconds:02}Z")
241}
242
243/// Convert days since Unix epoch to (year, month, day) using Howard Hinnant's
244/// civil_from_days algorithm.
245fn days_to_ymd(days: i64) -> (i64, u32, u32) {
246    let z = days + 719468;
247    let era = if z >= 0 { z } else { z - 146096 } / 146097;
248    let doe = (z - era * 146097) as u32;
249    let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146096) / 365;
250    let y = yoe as i64 + era * 400;
251    let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
252    let mp = (5 * doy + 2) / 153;
253    let d = doy - (153 * mp + 2) / 5 + 1;
254    let m = if mp < 10 { mp + 3 } else { mp - 9 };
255    let y = if m <= 2 { y + 1 } else { y };
256    (y, m, d)
257}
258
259#[cfg(test)]
260mod tests {
261    use super::*;
262    use std::fs;
263
264    fn temp_dir(name: &str) -> PathBuf {
265        let dir = std::env::temp_dir().join("casc_metadata_test").join(name);
266        if dir.exists() {
267            fs::remove_dir_all(&dir).ok();
268        }
269        fs::create_dir_all(&dir).unwrap();
270        dir
271    }
272
273    fn sample_entry(fdid: u32, status: &str) -> MetadataEntry {
274        MetadataEntry {
275            fdid,
276            path: format!("test/file_{fdid}.dat"),
277            size: 1024,
278            ckey: "abcdef1234567890abcdef1234567890".into(),
279            locale_flags: 0x2,
280            content_flags: 0x0,
281            status: status.into(),
282        }
283    }
284
285    #[test]
286    fn creates_meta_directory() {
287        let dir = temp_dir("creates_meta_dir");
288        let writer = MetadataWriter::new(&dir, "test-build", "wow").unwrap();
289        assert!(dir.join(".casc-meta").exists());
290        assert!(dir.join(".casc-meta/index.jsonl").exists());
291        assert!(dir.join(".casc-meta/index.csv").exists());
292        drop(writer);
293        fs::remove_dir_all(&dir).ok();
294    }
295
296    #[test]
297    fn record_writes_jsonl_line() {
298        let dir = temp_dir("jsonl_line");
299        let writer = MetadataWriter::new(&dir, "test-build", "wow").unwrap();
300        writer.record(&sample_entry(100, "ok")).unwrap();
301        writer.finish().unwrap();
302
303        let content = fs::read_to_string(dir.join(".casc-meta/index.jsonl")).unwrap();
304        let lines: Vec<&str> = content.lines().collect();
305        assert_eq!(lines.len(), 1);
306        let parsed: serde_json::Value = serde_json::from_str(lines[0]).unwrap();
307        assert_eq!(parsed["fdid"], 100);
308        assert_eq!(parsed["status"], "ok");
309        fs::remove_dir_all(&dir).ok();
310    }
311
312    #[test]
313    fn record_writes_csv_row() {
314        let dir = temp_dir("csv_row");
315        let writer = MetadataWriter::new(&dir, "test-build", "wow").unwrap();
316        writer.record(&sample_entry(200, "ok")).unwrap();
317        writer.finish().unwrap();
318
319        let content = fs::read_to_string(dir.join(".casc-meta/index.csv")).unwrap();
320        let lines: Vec<&str> = content.lines().collect();
321        assert_eq!(lines.len(), 2); // header + 1 data row
322        assert!(lines[0].starts_with("fdid,"));
323        assert!(lines[1].starts_with("200,"));
324        fs::remove_dir_all(&dir).ok();
325    }
326
327    #[test]
328    fn stats_tracks_success_and_errors() {
329        let dir = temp_dir("stats_tracking");
330        let writer = MetadataWriter::new(&dir, "test-build", "wow").unwrap();
331        writer.record(&sample_entry(1, "ok")).unwrap();
332        writer.record(&sample_entry(2, "ok")).unwrap();
333        writer
334            .record(&sample_entry(3, "error:corrupt BLTE"))
335            .unwrap();
336        writer
337            .record(&sample_entry(4, "skipped:encrypted"))
338            .unwrap();
339
340        let stats = writer.stats();
341        assert_eq!(stats.total, 4);
342        assert_eq!(stats.success, 2);
343        assert_eq!(stats.errors, 1);
344        assert_eq!(stats.skipped, 1);
345        assert_eq!(stats.bytes_written, 2048); // 2 successful * 1024
346
347        writer.finish().unwrap();
348        fs::remove_dir_all(&dir).ok();
349    }
350
351    #[test]
352    fn finish_writes_summary_json() {
353        let dir = temp_dir("summary_json");
354        let writer = MetadataWriter::new(&dir, "my-build-123", "wow").unwrap();
355        writer.record(&sample_entry(1, "ok")).unwrap();
356        let stats = writer.finish().unwrap();
357
358        assert_eq!(stats.success, 1);
359
360        let summary_path = dir.join(".casc-meta/summary.json");
361        assert!(summary_path.exists());
362        let content = fs::read_to_string(&summary_path).unwrap();
363        let parsed: serde_json::Value = serde_json::from_str(&content).unwrap();
364        assert_eq!(parsed["build"], "my-build-123");
365        assert_eq!(parsed["product"], "wow");
366        assert!(!parsed["extracted_at"].as_str().unwrap().is_empty());
367        assert!(parsed["duration_secs"].as_f64().unwrap() >= 0.0);
368        assert_eq!(parsed["stats"]["success"], 1);
369        fs::remove_dir_all(&dir).ok();
370    }
371
372    #[test]
373    fn multiple_records_accumulate() {
374        let dir = temp_dir("accumulate");
375        let writer = MetadataWriter::new(&dir, "build", "wow").unwrap();
376        for i in 0..50 {
377            writer.record(&sample_entry(i, "ok")).unwrap();
378        }
379        let stats = writer.finish().unwrap();
380        assert_eq!(stats.total, 50);
381        assert_eq!(stats.success, 50);
382
383        let content = fs::read_to_string(dir.join(".casc-meta/index.jsonl")).unwrap();
384        assert_eq!(content.lines().count(), 50);
385        fs::remove_dir_all(&dir).ok();
386    }
387
388    #[test]
389    fn entry_serialization_round_trip() {
390        let entry = sample_entry(42, "ok");
391        let json = serde_json::to_string(&entry).unwrap();
392        let back: serde_json::Value = serde_json::from_str(&json).unwrap();
393        assert_eq!(back["fdid"], 42);
394        assert_eq!(back["path"], "test/file_42.dat");
395    }
396}