ff-decode 0.14.2

Video and audio decoding - the Rust way
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
use super::{
    AvFrameGuard, DecodeError, Duration, KEYFRAME_SEEK_TOLERANCE_SECS, VideoDecoderInner,
    VideoFrame,
};

impl VideoDecoderInner {
    /// Returns the current playback position.
    pub(crate) fn position(&self) -> Duration {
        self.position
    }

    /// Returns whether end of file has been reached.
    pub(crate) fn is_eof(&self) -> bool {
        self.eof
    }

    /// Returns whether the source is a live or streaming input.
    ///
    /// Live sources have the `AVFMT_TS_DISCONT` flag set on their `AVInputFormat`.
    /// Seeking is not meaningful on live sources.
    pub(crate) fn is_live(&self) -> bool {
        self.is_live
    }

    /// Converts a `Duration` to a presentation timestamp (PTS) in stream time_base units.
    ///
    /// # Arguments
    ///
    /// * `duration` - The duration to convert.
    ///
    /// # Returns
    ///
    /// The timestamp in stream time_base units.
    ///
    /// # Note
    ///
    /// av_seek_frame expects timestamps in stream time_base units when using a specific stream_index.
    fn duration_to_pts(&self, duration: Duration) -> i64 {
        // Convert duration to stream time_base units for seeking
        // SAFETY:
        // - format_ctx is valid: owned by VideoDecoderInner, initialized in constructor via avformat_open_input
        // - stream_index is valid: validated during decoder creation (find_stream_info + codec opening)
        // - streams array access is valid: guaranteed by FFmpeg after successful avformat_open_input
        let time_base = unsafe {
            let stream = (*self.format_ctx).streams.add(self.stream_index as usize);
            (*(*stream)).time_base
        };

        // Convert: duration (seconds) * (time_base.den / time_base.num) = PTS
        let time_base_f64 = time_base.den as f64 / time_base.num as f64;
        (duration.as_secs_f64() * time_base_f64) as i64
    }

    /// Converts a presentation timestamp (PTS) to a `Duration`.
    ///
    /// # Arguments
    ///
    /// * `pts` - The presentation timestamp in stream time base units.
    ///
    /// # Returns
    ///
    /// The duration corresponding to the PTS.
    ///
    /// # Safety
    ///
    /// Caller must ensure that `format_ctx` and `stream_index` are valid.
    /// Seeks to a specified position in the video stream.
    ///
    /// This method performs efficient seeking without reopening the file.
    /// It uses `av_seek_frame` internally and flushes the decoder buffers.
    ///
    /// # Performance Characteristics
    ///
    /// - **Keyframe seek**: 5-10ms for typical GOP sizes (1-2 seconds)
    /// - **Exact seek**: Proportional to distance from nearest keyframe
    /// - **Large GOP videos**: May require sequential decoding from distant keyframe
    ///
    /// For videos with sparse keyframes (GOP > 2 seconds), the method will
    /// decode frames sequentially from the nearest keyframe to reach the target.
    /// This ensures correct frame data but may take longer (10-50ms for very large GOPs).
    ///
    /// # Arguments
    ///
    /// * `position` - Target position to seek to.
    /// * `mode` - Seek mode (Keyframe, Exact, or Backward).
    ///
    /// # Errors
    ///
    /// Returns [`DecodeError::SeekFailed`] if the seek operation fails.
    pub(crate) fn seek(
        &mut self,
        position: Duration,
        mode: crate::SeekMode,
    ) -> Result<(), DecodeError> {
        use crate::SeekMode;

        let timestamp = self.duration_to_pts(position);

        // All seek modes use BACKWARD flag to find the nearest keyframe at or before target.
        // The difference between modes is in the post-seek processing below.
        let flags = ff_sys::avformat::seek_flags::BACKWARD;

        // 1. Clear any pending packet and frame to avoid reading stale data after seek
        // SAFETY:
        // - packet is valid: allocated in constructor, owned by VideoDecoderInner
        // - frame is valid: allocated in constructor, owned by VideoDecoderInner
        unsafe {
            ff_sys::av_packet_unref(self.packet);
            ff_sys::av_frame_unref(self.frame);
        }

        // 2. Seek in the format context (file is NOT reopened)
        // Use av_seek_frame with the stream index and timestamp in stream time_base units
        // SAFETY:
        // - format_ctx is valid: owned by VideoDecoderInner, initialized via avformat_open_input
        // - stream_index is valid: validated during decoder creation
        // - timestamp is valid: converted from Duration using stream's time_base
        unsafe {
            ff_sys::avformat::seek_frame(
                self.format_ctx,
                self.stream_index as i32,
                timestamp,
                flags,
            )
            .map_err(|e| DecodeError::SeekFailed {
                target: position,
                reason: ff_sys::av_error_string(e),
            })?;
        }

        // 3. Flush decoder buffers to clear any cached frames
        // SAFETY: codec_ctx is valid: owned by VideoDecoderInner, initialized via avcodec_open2
        unsafe {
            ff_sys::avcodec::flush_buffers(self.codec_ctx);
        }

        // 4. Drain any remaining frames from the decoder after flush
        // This ensures no stale frames are returned after the seek
        // SAFETY:
        // - codec_ctx is valid: owned by VideoDecoderInner, initialized via avcodec_open2
        // - frame is valid: allocated in constructor, owned by VideoDecoderInner
        unsafe {
            loop {
                let ret = ff_sys::avcodec_receive_frame(self.codec_ctx, self.frame);
                if ret == ff_sys::error_codes::EAGAIN || ret == ff_sys::error_codes::EOF {
                    // No more frames in the decoder buffer
                    break;
                } else if ret == 0 {
                    // Got a frame, unref it and continue draining
                    ff_sys::av_frame_unref(self.frame);
                } else {
                    // Other error, break out
                    break;
                }
            }
        }

        // 5. Reset internal state
        self.eof = false;
        // Note: We don't update self.position here because it will be updated
        // when the next frame is decoded. This ensures position reflects actual decoded position.

        // 6. Skip forward to the target position
        //
        // Context: av_seek_frame with BACKWARD flag seeks to the nearest keyframe *at or before*
        // the target timestamp. For videos with sparse keyframes (large GOP size), this may
        // land far from the target (e.g., at the first keyframe for GOP=entire video).
        //
        // Solution: Decode frames sequentially from the keyframe until reaching the target.
        // This is necessary because H.264/H.265 P-frames and B-frames depend on previous
        // frames for reconstruction, so we must decode all intermediate frames.
        //
        // Performance Impact:
        // - Typical GOP (1-2s): 30-60 frames to skip, ~5-10ms overhead
        // - Large GOP (5-10s): 150-300 frames to skip, ~20-50ms overhead
        // - Worst case (single keyframe): May decode entire video, ~100ms-1s
        if mode == SeekMode::Exact {
            // For exact mode, decode until we reach or pass the exact target
            self.skip_to_exact(position)?;
        } else {
            // For keyframe/backward modes, decode until we're reasonably close to the target
            // Rationale: Balances accuracy with performance for common use cases
            let tolerance = Duration::from_secs(KEYFRAME_SEEK_TOLERANCE_SECS);
            let min_position = position.saturating_sub(tolerance);

            while let Some(frame) = self.decode_one()? {
                let frame_time = frame.timestamp().as_duration();
                if frame_time >= min_position {
                    // We're close enough to the target
                    break;
                }
                // Continue decoding to get closer (frames are automatically dropped)
            }
        }

        Ok(())
    }

    /// Skips frames until reaching the exact target position.
    ///
    /// This is used by [`Self::seek`] when `SeekMode::Exact` is specified.
    /// It decodes and discards frames from the nearest keyframe until
    /// reaching the target position.
    ///
    /// # Performance
    ///
    /// Time complexity is O(n) where n is the number of frames between the
    /// keyframe and target. For a 30fps video with 2-second GOP:
    /// - Worst case: ~60 frames to decode, ~10-20ms
    /// - Average case: ~30 frames to decode, ~5-10ms
    ///
    /// # Arguments
    ///
    /// * `target` - The exact target position.
    ///
    /// # Errors
    ///
    /// Returns [`DecodeError::SeekFailed`] if EOF is reached before the target position.
    fn skip_to_exact(&mut self, target: Duration) -> Result<(), DecodeError> {
        loop {
            match self.decode_one()? {
                Some(frame) => {
                    let frame_time = frame.timestamp().as_duration();
                    if frame_time >= target {
                        // Reached or passed the target frame
                        // Position will be updated by decode_one() which was just called
                        break;
                    }
                    // Continue decoding (frame is automatically dropped)
                }
                None => {
                    // Reached EOF before finding target frame
                    return Err(DecodeError::SeekFailed {
                        target,
                        reason: "Reached end of stream before target position".to_string(),
                    });
                }
            }
        }
        Ok(())
    }

    /// Flushes the decoder's internal buffers.
    ///
    /// This clears any cached frames and resets the decoder state.
    /// The decoder is ready to receive new packets after flushing.
    pub(crate) fn flush(&mut self) {
        // SAFETY: codec_ctx is valid and owned by this instance
        unsafe {
            ff_sys::avcodec::flush_buffers(self.codec_ctx);
        }
        self.eof = false;
    }

    /// Scales a video frame to the specified dimensions while preserving aspect ratio.
    ///
    /// This method uses SwScale to resize frames efficiently using a "fit-within"
    /// strategy that preserves the original aspect ratio.
    ///
    /// # Aspect Ratio Preservation
    ///
    /// The frame is scaled to fit within `(target_width, target_height)` while
    /// maintaining its original aspect ratio. The output dimensions will be at most
    /// the target size, with at least one dimension matching the target. No letterboxing
    /// or pillarboxing is applied - the frame is simply scaled down to fit.
    ///
    /// # Arguments
    ///
    /// * `frame` - The source frame to scale.
    /// * `target_width` - Desired width in pixels.
    /// * `target_height` - Desired height in pixels.
    ///
    /// # Returns
    ///
    /// A new `VideoFrame` scaled to fit within the target dimensions.
    ///
    /// # Errors
    ///
    /// Returns [`DecodeError`] if SwScale context creation or scaling fails.
    ///
    /// # Performance
    ///
    /// - Caches SwScale context for repeated calls with same dimensions
    /// - Context creation: ~0.1-0.5ms (only on first call or dimension change)
    /// - Typical scaling time: 1-3ms for 1080p → 320x180
    /// - Uses bilinear interpolation for quality/performance balance
    ///
    /// # Cache Behavior
    ///
    /// The SwScale context is cached based on source/target dimensions and format.
    /// When generating multiple thumbnails with the same size (e.g., via `thumbnails()`),
    /// the context is reused, eliminating the ~0.1-0.5ms creation overhead per thumbnail.
    pub(crate) fn scale_frame(
        &mut self,
        frame: &VideoFrame,
        target_width: u32,
        target_height: u32,
    ) -> Result<VideoFrame, DecodeError> {
        let src_width = frame.width();
        let src_height = frame.height();
        let src_format = frame.format();

        // Calculate scaled dimensions to preserve aspect ratio (fit within target)
        let src_aspect = src_width as f64 / src_height as f64;
        let target_aspect = target_width as f64 / target_height as f64;

        let (scaled_width, scaled_height) = if src_aspect > target_aspect {
            // Source is wider - fit to width
            let height = (target_width as f64 / src_aspect).round() as u32;
            (target_width, height)
        } else {
            // Source is taller or equal - fit to height
            let width = (target_height as f64 * src_aspect).round() as u32;
            (width, target_height)
        };

        // Convert pixel format to FFmpeg format
        let av_format = Self::pixel_format_to_av(src_format);

        // Cache key: (src_width, src_height, scaled_width, scaled_height, format)
        let cache_key = (
            src_width,
            src_height,
            scaled_width,
            scaled_height,
            av_format,
        );

        // SAFETY: We're creating temporary FFmpeg objects for scaling
        unsafe {
            // Check if we can reuse the cached SwScale context
            let (sws_ctx, is_cached) = if let (Some(cached_ctx), Some(cached_key)) =
                (self.thumbnail_sws_ctx, self.thumbnail_cache_key)
            {
                if cached_key == cache_key {
                    // Cache hit - reuse existing context
                    (cached_ctx, true)
                } else {
                    // Cache miss - free old context and create new one
                    ff_sys::swscale::free_context(cached_ctx);
                    // Clear cache immediately to prevent dangling pointer
                    self.thumbnail_sws_ctx = None;
                    self.thumbnail_cache_key = None;

                    let new_ctx = ff_sys::swscale::get_context(
                        src_width as i32,
                        src_height as i32,
                        av_format,
                        scaled_width as i32,
                        scaled_height as i32,
                        av_format,
                        ff_sys::swscale::scale_flags::BILINEAR,
                    )
                    .map_err(|e| DecodeError::Ffmpeg {
                        code: 0,
                        message: format!("Failed to create scaling context: {e}"),
                    })?;

                    // Don't cache yet - will cache after successful scaling
                    (new_ctx, false)
                }
            } else {
                // No cache - create new context
                let new_ctx = ff_sys::swscale::get_context(
                    src_width as i32,
                    src_height as i32,
                    av_format,
                    scaled_width as i32,
                    scaled_height as i32,
                    av_format,
                    ff_sys::swscale::scale_flags::BILINEAR,
                )
                .map_err(|e| DecodeError::Ffmpeg {
                    code: 0,
                    message: format!("Failed to create scaling context: {e}"),
                })?;

                // Don't cache yet - will cache after successful scaling
                (new_ctx, false)
            };

            // Set up source frame with VideoFrame data
            let src_frame_guard = AvFrameGuard::new()?;
            let src_frame = src_frame_guard.as_ptr();

            (*src_frame).width = src_width as i32;
            (*src_frame).height = src_height as i32;
            (*src_frame).format = av_format;

            // Set up source frame data pointers directly from VideoFrame (no copy)
            let planes = frame.planes();
            let strides = frame.strides();

            for (i, plane_data) in planes.iter().enumerate() {
                if i >= ff_sys::AV_NUM_DATA_POINTERS as usize {
                    break;
                }
                (*src_frame).data[i] = plane_data.as_ref().as_ptr().cast_mut();
                (*src_frame).linesize[i] = strides[i] as i32;
            }

            // Allocate destination frame
            let dst_frame_guard = AvFrameGuard::new()?;
            let dst_frame = dst_frame_guard.as_ptr();

            (*dst_frame).width = scaled_width as i32;
            (*dst_frame).height = scaled_height as i32;
            (*dst_frame).format = av_format;

            // Allocate buffer for destination frame
            let buffer_ret = ff_sys::av_frame_get_buffer(dst_frame, 0);
            if buffer_ret < 0 {
                // Clean up context if not cached
                if !is_cached {
                    ff_sys::swscale::free_context(sws_ctx);
                }
                return Err(DecodeError::Ffmpeg {
                    code: buffer_ret,
                    message: format!(
                        "Failed to allocate destination frame buffer: {}",
                        ff_sys::av_error_string(buffer_ret)
                    ),
                });
            }

            // Perform scaling
            let scale_result = ff_sys::swscale::scale(
                sws_ctx,
                (*src_frame).data.as_ptr() as *const *const u8,
                (*src_frame).linesize.as_ptr(),
                0,
                src_height as i32,
                (*dst_frame).data.as_ptr() as *const *mut u8,
                (*dst_frame).linesize.as_ptr(),
            );

            if let Err(e) = scale_result {
                // Clean up context if not cached
                if !is_cached {
                    ff_sys::swscale::free_context(sws_ctx);
                }
                return Err(DecodeError::Ffmpeg {
                    code: 0,
                    message: format!("Failed to scale frame: {e}"),
                });
            }

            // Scaling successful - cache the context if it's new
            if !is_cached {
                self.thumbnail_sws_ctx = Some(sws_ctx);
                self.thumbnail_cache_key = Some(cache_key);
            }

            // Copy timestamp
            (*dst_frame).pts = frame.timestamp().pts();

            // Convert destination frame to VideoFrame
            let video_frame = self.av_frame_to_video_frame(dst_frame)?;

            Ok(video_frame)
        }
    }

    // ── Reconnect helpers ─────────────────────────────────────────────────────

    /// Attempts to reconnect to the stream URL using exponential backoff.
    ///
    /// Called from `decode_one()` when `StreamInterrupted` is received and
    /// `NetworkOptions::reconnect_on_error` is `true`. After all attempts fail,
    /// returns a `StreamInterrupted` error.
    pub(super) fn attempt_reconnect(&mut self) -> Result<(), DecodeError> {
        let url = match self.url.as_deref() {
            Some(u) => u.to_owned(),
            None => return Ok(()), // file-path source: no reconnect
        };
        let max = self.network_opts.max_reconnect_attempts;

        for attempt in 1..=max {
            let backoff_ms = 100u64 * (1u64 << (attempt - 1).min(10));
            log::warn!(
                "reconnecting attempt={attempt} url={} backoff_ms={backoff_ms}",
                crate::network::sanitize_url(&url)
            );
            std::thread::sleep(Duration::from_millis(backoff_ms));
            match self.reopen(&url) {
                Ok(()) => {
                    self.reconnect_count += 1;
                    log::info!(
                        "reconnected attempt={attempt} url={} total_reconnects={}",
                        crate::network::sanitize_url(&url),
                        self.reconnect_count
                    );
                    return Ok(());
                }
                Err(e) => log::warn!("reconnect attempt={attempt} failed err={e}"),
            }
        }

        Err(DecodeError::StreamInterrupted {
            code: 0,
            endpoint: crate::network::sanitize_url(&url),
            message: format!("stream did not recover after {max} attempts"),
        })
    }

    /// Closes the current `AVFormatContext`, re-opens the URL, re-reads stream info,
    /// re-finds the video stream, and flushes the codec.
    fn reopen(&mut self, url: &str) -> Result<(), DecodeError> {
        // Close the current format context. `avformat_close_input` sets the pointer
        // to null — this matches the null check in Drop so no double-free occurs.
        // SAFETY: self.format_ctx is valid and owned exclusively by self.
        unsafe {
            ff_sys::avformat::close_input(std::ptr::addr_of_mut!(self.format_ctx));
        }

        // Re-open the URL with the stored network timeouts.
        // SAFETY: url is a valid UTF-8 network URL string.
        self.format_ctx = unsafe {
            ff_sys::avformat::open_input_url(
                url,
                self.network_opts.connect_timeout,
                self.network_opts.read_timeout,
            )
            .map_err(|e| crate::network::map_network_error(e, crate::network::sanitize_url(url)))?
        };

        // Re-read stream information.
        // SAFETY: self.format_ctx is valid and freshly opened.
        unsafe {
            ff_sys::avformat::find_stream_info(self.format_ctx).map_err(|e| {
                DecodeError::Ffmpeg {
                    code: e,
                    message: format!(
                        "reconnect find_stream_info failed: {}",
                        ff_sys::av_error_string(e)
                    ),
                }
            })?;
        }

        // Re-find the video stream (index may differ in theory after reconnect).
        // SAFETY: self.format_ctx is valid.
        let (stream_index, _) = unsafe { Self::find_video_stream(self.format_ctx) }
            .ok_or_else(|| DecodeError::NoVideoStream { path: url.into() })?;
        self.stream_index = stream_index as i32;

        // Flush codec buffers to discard stale decoded state from before the drop.
        // SAFETY: self.codec_ctx is valid and has not been freed.
        unsafe {
            ff_sys::avcodec::flush_buffers(self.codec_ctx);
        }

        self.eof = false;
        Ok(())
    }
}