Skip to main content

revelo_core/
file_level.rs

1//! File-level field derivation shared by every front-end (CLI, diff
2//! harness, future C ABI shim).
3//!
4//! Some `General`-stream fields are not derivable from the media bytes
5//! alone — they come from the path / fstat / cross-stream arithmetic.
6//! On the C++ side these are filled by `MediaInfo_Internal` (the wrapper
7//! that drives the parser). Previously this logic lived only in the diff
8//! harness, so the CLI emitted incomplete output (no FileSize,
9//! OverallBitRate, Duration, FileExtension, dates, or StreamSize
10//! overhead). Centralising it here keeps every front-end consistent.
11
12use crate::{FileAnalyze, StreamKind};
13
14/// Inputs the engine can't read from the media stream itself. The caller
15/// (which has filesystem access) supplies them.
16pub struct FileLevelInfo<'a> {
17    /// Total file size in bytes (from fstat).
18    pub file_size: u64,
19    /// Lowercase-or-as-is file extension, e.g. "mp4" (no leading dot).
20    pub extension: Option<&'a str>,
21    /// File modification time as a Unix timestamp (seconds), if known.
22    pub modified_unix_secs: Option<i64>,
23    /// Local timezone offset in seconds east of UTC, for the
24    /// `_Local` date variant (e.g. +36000 for AEST).
25    pub local_offset_secs: i64,
26}
27
28/// Fill the derived `General`-stream fields. Mirrors what the C++
29/// `MediaInfo_Internal` wrapper does after a parser finishes.
30pub fn fill_file_level_fields(fa: &mut FileAnalyze, info: &FileLevelInfo<'_>) {
31    let file_size = info.file_size;
32    fa.set_field(StreamKind::General, 0, "FileSize", file_size.to_string());
33
34    if let Some(ext) = info.extension {
35        fa.set_field(StreamKind::General, 0, "FileExtension", ext.to_owned());
36    }
37
38    let audio_stream_size: Option<u64> =
39        fa.retrieve(StreamKind::Audio, 0, "StreamSize").and_then(|z| z.as_str().parse().ok());
40
41    // Prefer a parser-filled General.Duration (always integer ms).
42    // Fall back to Audio.Duration for parsers that only emit there in
43    // int-ms form. A float Audio.Duration (e.g. MKV "1.500000000")
44    // parses as None here — fine, since those parsers also fill
45    // General.Duration.
46    let duration_ms: Option<u64> = fa
47        .retrieve(StreamKind::General, 0, "Duration")
48        .and_then(|z| z.as_str().parse().ok())
49        .or_else(|| {
50            fa.retrieve(StreamKind::Audio, 0, "Duration").and_then(|z| z.as_str().parse().ok())
51        });
52
53    if let Some(ms) = duration_ms {
54        fa.set_field(StreamKind::General, 0, "Duration", ms.to_string());
55    }
56
57    // OverallBitRate = FileSize × 8 × 1000 / Duration_ms, rounded to
58    // nearest. OverallBitRate_Mode:
59    //   * audio-only file: mirror Audio.BitRate_Mode
60    //   * video present: "VBR" only when Video is VFR (authored MP4s
61    //     with VBR AAC inside CFR video omit the field, matching oracle)
62    if let Some(ms) = duration_ms
63        && ms > 0
64    {
65        let overall = ((file_size as f64) * 8.0 * 1000.0 / (ms as f64)).round() as u64;
66        let has_video = fa.stream_count(StreamKind::Video) > 0;
67        let overall_mode = if !has_video {
68            fa.retrieve(StreamKind::Audio, 0, "BitRate_Mode").map(|z| z.as_str().to_owned())
69        } else {
70            let video_fr_mode =
71                fa.retrieve(StreamKind::Video, 0, "FrameRate_Mode").map(|z| z.as_str().to_owned());
72            if video_fr_mode.as_deref() == Some("VFR") { Some("VBR".to_owned()) } else { None }
73        };
74        if let Some(mode) = overall_mode {
75            fa.set_field(StreamKind::General, 0, "OverallBitRate_Mode", mode);
76        }
77        fa.set_field(StreamKind::General, 0, "OverallBitRate", overall.to_string());
78    }
79
80    // Propagate the primary video stream's frame rate + total frame
81    // count up to the General stream — MediaInfo surfaces them at the
82    // container level (e.g. big_buck_bunny General FrameRate=24.000,
83    // FrameCount=1440). Only when a video track exists and the parser
84    // hasn't already set them on General.
85    if fa.stream_count(StreamKind::Video) > 0 {
86        if fa.retrieve(StreamKind::General, 0, "FrameRate").is_none()
87            && let Some(fr) =
88                fa.retrieve(StreamKind::Video, 0, "FrameRate").map(|z| z.as_str().to_owned())
89        {
90            fa.set_field(StreamKind::General, 0, "FrameRate", fr);
91        }
92        if fa.retrieve(StreamKind::General, 0, "FrameCount").is_none()
93            && let Some(fc) =
94                fa.retrieve(StreamKind::Video, 0, "FrameCount").map(|z| z.as_str().to_owned())
95        {
96            fa.set_field(StreamKind::General, 0, "FrameCount", fc);
97        }
98    }
99
100    // General StreamSize = container overhead = FileSize − elementary
101    // stream sizes. Skipped when elementary ≥ file_size (e.g. Ogg/Vorbis
102    // reports a bitrate-derived StreamSize larger than the file).
103    let video_stream_size: u64 = fa
104        .retrieve(StreamKind::Video, 0, "StreamSize")
105        .and_then(|z| z.as_str().parse().ok())
106        .unwrap_or(0);
107    if let Some(audio_size) = audio_stream_size {
108        let elementary = audio_size + video_stream_size;
109        if elementary < file_size {
110            fa.set_field(
111                StreamKind::General,
112                0,
113                "StreamSize",
114                (file_size - elementary).to_string(),
115            );
116        }
117    } else if video_stream_size > 0 && video_stream_size < file_size {
118        fa.set_field(
119            StreamKind::General,
120            0,
121            "StreamSize",
122            (file_size - video_stream_size).to_string(),
123        );
124    }
125
126    if let Some(unix_secs) = info.modified_unix_secs {
127        fa.set_field(StreamKind::General, 0, "File_Modified_Date", format_utc(unix_secs));
128        fa.set_field(
129            StreamKind::General,
130            0,
131            "File_Modified_Date_Local",
132            format_local(unix_secs, info.local_offset_secs),
133        );
134    }
135}
136
137fn format_utc(unix_secs: i64) -> String {
138    let (y, m, d, hh, mm, ss) = civil_from_unix(unix_secs);
139    format!("{y:04}-{m:02}-{d:02} {hh:02}:{mm:02}:{ss:02} UTC")
140}
141
142fn format_local(unix_secs: i64, local_offset_secs: i64) -> String {
143    let (y, m, d, hh, mm, ss) = civil_from_unix(unix_secs + local_offset_secs);
144    format!("{y:04}-{m:02}-{d:02} {hh:02}:{mm:02}:{ss:02}")
145}
146
147/// Unix timestamp → (year, month, day, hour, min, sec) via Howard
148/// Hinnant's `days_from_civil` algorithm (proleptic Gregorian).
149fn civil_from_unix(unix_secs: i64) -> (i32, u8, u8, u8, u8, u8) {
150    let days = unix_secs.div_euclid(86400);
151    let rem = unix_secs.rem_euclid(86400);
152    let hh = (rem / 3600) as u8;
153    let mm = ((rem % 3600) / 60) as u8;
154    let ss = (rem % 60) as u8;
155
156    let z = days + 719_468;
157    let era = if z >= 0 { z / 146_097 } else { (z - 146_096) / 146_097 };
158    let doe = (z - era * 146_097) as u32;
159    let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146_096) / 365;
160    let y = yoe as i64 + era * 400;
161    let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
162    let mp = (5 * doy + 2) / 153;
163    let d = (doy - (153 * mp + 2) / 5 + 1) as u8;
164    let m = (if mp < 10 { mp + 3 } else { mp - 9 }) as u8;
165    let year = if m <= 2 { y + 1 } else { y } as i32;
166    (year, m, d, hh, mm, ss)
167}
168
169#[cfg(test)]
170mod tests {
171    use super::*;
172
173    #[test]
174    fn fills_filesize_and_extension() {
175        let mut fa = FileAnalyze::new(b"");
176        fa.stream_prepare(StreamKind::General);
177        let info = FileLevelInfo {
178            file_size: 12548,
179            extension: Some("jpg"),
180            modified_unix_secs: None,
181            local_offset_secs: 0,
182        };
183        fill_file_level_fields(&mut fa, &info);
184        assert_eq!(
185            fa.retrieve(StreamKind::General, 0, "FileSize")
186                .map(|z| z.as_str().to_owned())
187                .as_deref(),
188            Some("12548")
189        );
190        assert_eq!(
191            fa.retrieve(StreamKind::General, 0, "FileExtension")
192                .map(|z| z.as_str().to_owned())
193                .as_deref(),
194            Some("jpg")
195        );
196    }
197
198    #[test]
199    fn propagates_video_framerate_and_framecount_to_general() {
200        let mut fa = FileAnalyze::new(b"");
201        fa.stream_prepare(StreamKind::General);
202        fa.set_field(StreamKind::Video, 0, "FrameRate", "24.000");
203        fa.set_field(StreamKind::Video, 0, "FrameCount", "1440");
204        let info = FileLevelInfo {
205            file_size: 1,
206            extension: None,
207            modified_unix_secs: None,
208            local_offset_secs: 0,
209        };
210        fill_file_level_fields(&mut fa, &info);
211        assert_eq!(
212            fa.retrieve(StreamKind::General, 0, "FrameRate")
213                .map(|z| z.as_str().to_owned())
214                .as_deref(),
215            Some("24.000")
216        );
217        assert_eq!(
218            fa.retrieve(StreamKind::General, 0, "FrameCount")
219                .map(|z| z.as_str().to_owned())
220                .as_deref(),
221            Some("1440")
222        );
223    }
224
225    #[test]
226    fn does_not_propagate_framerate_without_video() {
227        let mut fa = FileAnalyze::new(b"");
228        fa.stream_prepare(StreamKind::General);
229        fa.set_field(StreamKind::Audio, 0, "Format", "AAC");
230        let info = FileLevelInfo {
231            file_size: 1,
232            extension: None,
233            modified_unix_secs: None,
234            local_offset_secs: 0,
235        };
236        fill_file_level_fields(&mut fa, &info);
237        assert!(fa.retrieve(StreamKind::General, 0, "FrameRate").is_none());
238    }
239
240    #[test]
241    fn computes_overall_bitrate_from_duration() {
242        let mut fa = FileAnalyze::new(b"");
243        fa.stream_prepare(StreamKind::General);
244        fa.set_field(StreamKind::General, 0, "Duration", "1000");
245        let info = FileLevelInfo {
246            file_size: 125_000, // 125000 bytes over 1 s = 1_000_000 bps
247            extension: None,
248            modified_unix_secs: None,
249            local_offset_secs: 0,
250        };
251        fill_file_level_fields(&mut fa, &info);
252        assert_eq!(
253            fa.retrieve(StreamKind::General, 0, "OverallBitRate")
254                .map(|z| z.as_str().to_owned())
255                .as_deref(),
256            Some("1000000")
257        );
258    }
259
260    #[test]
261    fn formats_modified_date_utc_and_local() {
262        let mut fa = FileAnalyze::new(b"");
263        fa.stream_prepare(StreamKind::General);
264        // 2021-01-01 00:00:00 UTC = 1609459200
265        let info = FileLevelInfo {
266            file_size: 1,
267            extension: None,
268            modified_unix_secs: Some(1_609_459_200),
269            local_offset_secs: 3600, // +01:00
270        };
271        fill_file_level_fields(&mut fa, &info);
272        assert_eq!(
273            fa.retrieve(StreamKind::General, 0, "File_Modified_Date")
274                .map(|z| z.as_str().to_owned())
275                .as_deref(),
276            Some("2021-01-01 00:00:00 UTC")
277        );
278        assert_eq!(
279            fa.retrieve(StreamKind::General, 0, "File_Modified_Date_Local")
280                .map(|z| z.as_str().to_owned())
281                .as_deref(),
282            Some("2021-01-01 01:00:00")
283        );
284    }
285}