foxglove 0.25.0

use std::sync::Arc;
use std::time::Duration;

use arc_swap::ArcSwapOption;
use bytes::Bytes;
use libwebrtc::prelude::*;
use libwebrtc::video_frame::FrameMetadata;
use libwebrtc::video_source::native::NativeVideoSource;
use tokio::sync::watch;
use tracing::{debug, error, warn};

use crate::RawChannel;
use crate::img2yuv::{ImageEncoding, ImageMessage, Yuv420Buffer};
use crate::throttler::Throttler;

/// Minimum width and height (in pixels) that we will hand to the libwebrtc video encoder.
///
/// H.264, VP8, and VP9 all encode in 16×16 macroblocks. Some encoder backends — notably
/// `libwebrtc`'s VAAPI H.264 encoder on Linux — do not validate this and can write out of
/// bounds when given sub-macroblock frames, which manifests as heap corruption later in
/// the process. Other backends (e.g. OpenH264) reject the frame but continue running.
/// Enforcing this minimum on our side guarantees safe behavior regardless of the codec
/// backend selected by libwebrtc.
const MIN_VIDEO_DIMENSION: u32 = 16;

/// Interval between throttled warnings for repeatedly-too-small frames on a single track.
const TOO_SMALL_WARN_INTERVAL: Duration = Duration::from_secs(30);

/// The input schema type for a video-capable channel.
///
/// Each variant identifies which message format decoder to use for extracting image data.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[allow(clippy::enum_variant_names)]
pub enum VideoInputSchema {
    /// `foxglove.CompressedImage` with protobuf encoding.
    FoxgloveCompressedImage,
    /// `foxglove.RawImage` with protobuf encoding.
    FoxgloveRawImage,
    /// ROS 1 `sensor_msgs/CompressedImage` with ros1 encoding.
    #[cfg(feature = "img2yuv-ros1")]
    Ros1CompressedImage,
    /// ROS 1 `sensor_msgs/Image` with ros1 encoding.
    #[cfg(feature = "img2yuv-ros1")]
    Ros1Image,
    /// ROS 2 `sensor_msgs/msg/CompressedImage` with cdr encoding.
    #[cfg(feature = "img2yuv-ros2")]
    Ros2CompressedImage,
    /// ROS 2 `sensor_msgs/msg/Image` with cdr encoding.
    #[cfg(feature = "img2yuv-ros2")]
    Ros2Image,
}

/// Detect the video input schema from an (encoding, schema_name) pair.
///
/// Returns `Some(InputSchema)` if the channel carries an image type we can transcode to video.
fn detect_video_schema(encoding: &str, schema_name: &str) -> Option<VideoInputSchema> {
    match (encoding, schema_name) {
        ("protobuf", "foxglove.CompressedImage") => Some(VideoInputSchema::FoxgloveCompressedImage),
        ("protobuf", "foxglove.RawImage") => Some(VideoInputSchema::FoxgloveRawImage),
        #[cfg(feature = "img2yuv-ros1")]
        ("ros1", "sensor_msgs/CompressedImage") => Some(VideoInputSchema::Ros1CompressedImage),
        #[cfg(feature = "img2yuv-ros1")]
        ("ros1", "sensor_msgs/Image") => Some(VideoInputSchema::Ros1Image),
        #[cfg(feature = "img2yuv-ros2")]
        ("cdr", "sensor_msgs/msg/CompressedImage") => Some(VideoInputSchema::Ros2CompressedImage),
        #[cfg(feature = "img2yuv-ros2")]
        ("cdr", "sensor_msgs/msg/Image") => Some(VideoInputSchema::Ros2Image),
        _ => None,
    }
}

/// Convenience function to detect a video input schema from a [`RawChannel`].
pub fn get_video_input_schema(channel: &RawChannel) -> Option<VideoInputSchema> {
    let schema_name = channel.schema().map(|s| s.name.as_str()).unwrap_or("");
    detect_video_schema(channel.message_encoding(), schema_name)
}

/// Metadata extracted from image messages on a video channel.
///
/// Used to populate `foxglove.videoSourceEncoding` and `foxglove.videoFrameId` channel metadata,
/// which the app uses to reconstruct the original image format from the video track.
#[derive(Debug, Clone, PartialEq, Eq)]
pub(crate) struct VideoMetadata {
    /// The image encoding (pixel format or compression codec).
    pub(crate) encoding: ImageEncoding,
    /// The coordinate frame ID of the image source (e.g. `"camera_optical_frame"`).
    pub(crate) frame_id: String,
}

/// Newtype wrapping [`I420Buffer`] that implements [`Yuv420Buffer`].
struct I420Yuv420(I420Buffer);

impl Yuv420Buffer for I420Yuv420 {
    fn dimensions(&self) -> (u32, u32) {
        (self.0.width(), self.0.height())
    }

    fn yuv(&self) -> (&[u8], &[u8], &[u8]) {
        self.0.data()
    }

    fn yuv_mut(&mut self) -> (&mut [u8], &mut [u8], &mut [u8]) {
        self.0.data_mut()
    }

    fn yuv_strides(&self) -> (u32, u32, u32) {
        self.0.strides()
    }
}

/// Error during video encoding.
#[derive(Debug, thiserror::Error)]
enum VideoEncodeError {
    #[error("failed to decode image message: {0}")]
    Decode(String),
    #[error("failed to convert image to YUV420: {0}")]
    YuvConversion(#[from] crate::img2yuv::Error),
    #[error(
        "frame {width}x{height} is below the minimum encoder size {MIN_VIDEO_DIMENSION}x{MIN_VIDEO_DIMENSION}"
    )]
    TooSmall { width: u32, height: u32 },
}

/// Publishes video frames to a LiveKit video track.
///
/// Owns a bounded channel and a background processing task. Dropping the publisher
/// closes the channel, which terminates the task.
pub(crate) struct VideoPublisher {
    tx: flume::Sender<(Bytes, u64)>,
    rx: flume::Receiver<(Bytes, u64)>,
    #[allow(dead_code)]
    video_source: NativeVideoSource,
    /// The latest video metadata observed by the background transcoding task.
    metadata: Arc<ArcSwapOption<VideoMetadata>>,
}

impl VideoPublisher {
    /// The bounded channel capacity for frame back-pressure.
    const CHANNEL_CAPACITY: usize = 2;

    /// Creates a new video publisher and spawns the background processing task.
    ///
    /// When the background task observes a change in video metadata (encoding or frame_id),
    /// it updates `metadata` and signals via `video_metadata_tx` so the session's sender loop
    /// can re-advertise the channel.
    pub fn new(
        video_source: NativeVideoSource,
        input_schema: VideoInputSchema,
        video_metadata_tx: watch::Sender<()>,
    ) -> Self {
        let (tx, rx) = flume::bounded::<(Bytes, u64)>(Self::CHANNEL_CAPACITY);
        let metadata: Arc<ArcSwapOption<VideoMetadata>> = Arc::new(ArcSwapOption::empty());
        let source = video_source.clone();
        let consumer_rx = rx.clone();
        let task_metadata = metadata.clone();
        tokio::spawn(async move {
            let mut last_metadata: Option<VideoMetadata> = None;
            // Throttles `TooSmall` warnings so a stream of undersized frames doesn't
            // flood the log; other encode errors stay at debug level and are unthrottled.
            let mut too_small_throttler = Throttler::new(TOO_SMALL_WARN_INTERVAL);
            while let Ok((data, log_time_ns)) = consumer_rx.recv_async().await {
                let source = source.clone();
                let result = tokio::task::spawn_blocking(move || {
                    transcode_and_publish(input_schema, &source, &data, log_time_ns)
                })
                .await;
                match result {
                    Ok(Ok(new_metadata)) => {
                        if last_metadata.as_ref() != Some(&new_metadata) {
                            last_metadata = Some(new_metadata.clone());
                            task_metadata.store(Some(Arc::new(new_metadata)));
                            video_metadata_tx.send_modify(|_| {});
                        }
                    }
                    Ok(Err(VideoEncodeError::TooSmall { width, height })) => {
                        if too_small_throttler.try_acquire() {
                            warn!(
                                "video frame {width}x{height} is below the minimum encoder size {MIN_VIDEO_DIMENSION}x{MIN_VIDEO_DIMENSION}; dropping frame"
                            );
                        }
                    }
                    Ok(Err(e)) => {
                        debug!("video encode error: {e}");
                    }
                    Err(e) => {
                        error!("video encode task panicked: {e}");
                    }
                }
            }
        });
        Self {
            tx,
            rx,
            video_source,
            metadata,
        }
    }

    /// Returns the latest video metadata observed by this publisher, if any.
    pub fn metadata(&self) -> arc_swap::Guard<Option<Arc<VideoMetadata>>> {
        self.metadata.load()
    }

    /// Send a frame for encoding. Non-blocking: if the channel is full, the oldest frame
    /// is dropped to make room (head-drop for minimal latency on live video).
    ///
    /// `log_time_ns` is the message log time in nanoseconds since epoch, forwarded to the
    /// video encoder as frame timestamp.
    pub fn send(&self, data: Bytes, log_time_ns: u64) {
        let msg = (data, log_time_ns);
        match self.tx.try_send(msg) {
            Ok(()) => {}
            Err(flume::TrySendError::Full(msg)) => {
                let _ = self.rx.try_recv();
                let _ = self.tx.try_send(msg);
            }
            Err(flume::TrySendError::Disconnected(_)) => {
                warn!("video publisher channel closed");
            }
        }
    }
}

/// Transcode the image message and publish it as a video frame.
///
/// Decodes the original image data, extracts metadata (encoding, frame_id),
/// encodes it as YUV 4:2:0, and publishes it to the video track.
/// Returns the extracted metadata on success.
fn transcode_and_publish(
    input_schema: VideoInputSchema,
    video_source: &NativeVideoSource,
    data: &[u8],
    log_time_ns: u64,
) -> Result<VideoMetadata, VideoEncodeError> {
    let image_msg = decode_image_message(input_schema, data)?;
    let (frame, metadata) = build_video_frame(image_msg, log_time_ns)?;
    video_source.capture_frame(&frame);
    Ok(metadata)
}

/// Build the [`VideoFrame`] to hand to libwebrtc, plus the [`VideoMetadata`] used to
/// annotate the LiveKit channel advertisement.
///
/// Split out from [`transcode_and_publish`] so it can be exercised by unit tests
/// without standing up a [`NativeVideoSource`].
fn build_video_frame(
    image_msg: ImageMessage<'_>,
    log_time_ns: u64,
) -> Result<(VideoFrame<I420Buffer>, VideoMetadata), VideoEncodeError> {
    let metadata = VideoMetadata {
        encoding: image_msg.image.encoding(),
        frame_id: image_msg.frame_id.clone(),
    };

    let (width, height) = image_msg
        .image
        .probe_dimensions()
        .map_err(VideoEncodeError::YuvConversion)?;

    let (width, height) = validate_frame_dimensions(width, height)?;

    // Transcode to YUV 4:2:0.
    let mut buffer = I420Yuv420(I420Buffer::new(width, height));
    image_msg
        .image
        .to_yuv420(&mut buffer)
        .map_err(VideoEncodeError::YuvConversion)?;

    // Use the image message timestamp, if it had one, otherwise log_time.
    let timestamp_ns = match image_msg.timestamp {
        Some(ts) => ts.total_nanos(),
        None => log_time_ns,
    };

    // `timestamp_us` is used by the encoder/RTP pipeline and is *not* preserved
    // end-to-end. The original capture timestamp (in nanoseconds since epoch) is
    // carried in-band via the packet-trailer `user_timestamp` field, which the
    // receiving app can recover on every decoded frame. The track must be
    // published with `TrackPublishOptions::packet_trailer_features.user_timestamp
    // = true` for this to actually traverse the wire.
    //
    // Note: FrameMetadata::user_timestamp is documented as microseconds
    // but we store nanoseconds here. This works because this is both serialized
    // and deserialized as a 64-bit integer (PacketTrailerMetadata.userTimestamp is a bigint in JavaScript)
    // and the foxglove app expects a nanoseconds timestamp.
    //
    // The VideoFrame and FrameMetadata take separate paths through libwebrtc's send pipeline.
    // In order to reunite them later, libwebrtc stores FrameMetadata in a lookup table keyed by timestamp_us / 1000.
    // If multiple frames are received within the same millisecond, there will be a collision in this lookup table.
    // We're not expecting frame rates in the kilohertz range, so this should not be a problem in practice.
    let frame = VideoFrame {
        rotation: VideoRotation::VideoRotation0,
        timestamp_us: (timestamp_ns / 1000) as i64,
        frame_metadata: Some(FrameMetadata {
            user_timestamp: Some(timestamp_ns),
            frame_id: None,
        }),
        buffer: buffer.0,
    };
    Ok((frame, metadata))
}

/// Validates and normalizes frame dimensions for video encoding.
///
/// Aligns dimensions to even values (required for YUV 4:2:0) and rejects frames that are
/// too small for the encoder's macroblock size. Returns the even-aligned (width, height) on
/// success.
fn validate_frame_dimensions(width: u32, height: u32) -> Result<(u32, u32), VideoEncodeError> {
    let even_width = width & !1;
    let even_height = height & !1;
    if even_width == 0 || even_height == 0 {
        return Err(VideoEncodeError::YuvConversion(
            crate::img2yuv::Error::ZeroSized,
        ));
    }
    if even_width < MIN_VIDEO_DIMENSION || even_height < MIN_VIDEO_DIMENSION {
        return Err(VideoEncodeError::TooSmall { width, height });
    }
    Ok((even_width, even_height))
}

/// Decode raw message bytes into an [`ImageMessage`] based on the input schema.
fn decode_image_message<'a>(
    input_schema: VideoInputSchema,
    data: &'a [u8],
) -> Result<ImageMessage<'a>, VideoEncodeError> {
    match input_schema {
        VideoInputSchema::FoxgloveCompressedImage => {
            let msg = <crate::messages::CompressedImage as crate::Decode>::decode(data)
                .map_err(|e| VideoEncodeError::Decode(e.to_string()))?;
            ImageMessage::try_from(msg).map_err(|e| VideoEncodeError::Decode(e.to_string()))
        }
        VideoInputSchema::FoxgloveRawImage => {
            let msg = <crate::messages::RawImage as crate::Decode>::decode(data)
                .map_err(|e| VideoEncodeError::Decode(e.to_string()))?;
            ImageMessage::try_from(msg).map_err(|e| VideoEncodeError::Decode(e.to_string()))
        }
        #[cfg(feature = "img2yuv-ros1")]
        VideoInputSchema::Ros1CompressedImage => {
            let msg = crate::img2yuv::ros1::Ros1CompressedImage::decode(data)
                .map_err(|e| VideoEncodeError::Decode(e.to_string()))?;
            ImageMessage::try_from(msg).map_err(|e| VideoEncodeError::Decode(e.to_string()))
        }
        #[cfg(feature = "img2yuv-ros1")]
        VideoInputSchema::Ros1Image => {
            let msg = crate::img2yuv::ros1::Ros1Image::decode(data)
                .map_err(|e| VideoEncodeError::Decode(e.to_string()))?;
            ImageMessage::try_from(msg).map_err(|e| VideoEncodeError::Decode(e.to_string()))
        }
        #[cfg(feature = "img2yuv-ros2")]
        VideoInputSchema::Ros2CompressedImage => {
            let msg = crate::img2yuv::ros2::Ros2CompressedImage::decode(data)
                .map_err(|e| VideoEncodeError::Decode(e.to_string()))?;
            ImageMessage::try_from(msg).map_err(|e| VideoEncodeError::Decode(e.to_string()))
        }
        #[cfg(feature = "img2yuv-ros2")]
        VideoInputSchema::Ros2Image => {
            let msg = crate::img2yuv::ros2::Ros2Image::decode(data)
                .map_err(|e| VideoEncodeError::Decode(e.to_string()))?;
            ImageMessage::try_from(msg).map_err(|e| VideoEncodeError::Decode(e.to_string()))
        }
    }
}

#[cfg(test)]
mod tests {
    use std::borrow::Cow;

    use super::*;
    use crate::img2yuv::{Image, ImageMessage, RawImage, RawImageEncoding};
    use crate::messages::Timestamp;

    /// Build an `ImageMessage` carrying a minimum-size rgb8 image, optionally with a
    /// timestamp. The pixel contents are not relevant to these tests; we only care about
    /// timestamp/metadata propagation.
    fn make_image_message(timestamp: Option<Timestamp>) -> ImageMessage<'static> {
        let width: u32 = 16;
        let height: u32 = 16;
        let stride = width * 3;
        let data = vec![128u8; (stride * height) as usize];
        ImageMessage {
            timestamp,
            frame_id: "camera_optical_frame".to_string(),
            image: Image::Raw(RawImage {
                encoding: RawImageEncoding::Rgb8,
                width,
                height,
                stride,
                data: Cow::Owned(data),
            }),
        }
    }

    #[test]
    fn build_video_frame_propagates_image_timestamp_as_user_timestamp() {
        // Pick a non-trivial timestamp so we'd notice if the wrong fallback is used.
        let ts = Timestamp::new(1_700_000_000, 123_456_789);
        let expected_ns = ts.total_nanos();
        let log_time_ns = 42; // intentionally different from `expected_ns`
        let (frame, metadata) =
            build_video_frame(make_image_message(Some(ts)), log_time_ns).expect("build frame");

        let meta = frame.frame_metadata.expect("frame_metadata must be set");
        assert_eq!(
            meta.user_timestamp,
            Some(expected_ns),
            "user_timestamp should be the original RawImage timestamp, not log_time"
        );
        assert_eq!(
            meta.frame_id, None,
            "frame_id is not used for this end-to-end timestamp path"
        );
        // timestamp_us continues to be derived from the same source.
        assert_eq!(frame.timestamp_us, (expected_ns / 1000) as i64);
        assert_eq!(metadata.frame_id, "camera_optical_frame");
        assert_eq!(
            metadata.encoding,
            ImageEncoding::Raw(RawImageEncoding::Rgb8)
        );
    }

    #[test]
    fn build_video_frame_falls_back_to_log_time_when_image_has_no_timestamp() {
        let log_time_ns = 9_876_543_210u64;
        let (frame, _metadata) =
            build_video_frame(make_image_message(None), log_time_ns).expect("build frame");

        let meta = frame.frame_metadata.expect("frame_metadata must be set");
        assert_eq!(
            meta.user_timestamp,
            Some(log_time_ns),
            "without an image timestamp, fall back to the message log time"
        );
        assert_eq!(frame.timestamp_us, (log_time_ns / 1000) as i64);
    }

    #[test]
    fn test_foxglove_compressed_image() {
        assert_eq!(
            detect_video_schema("protobuf", "foxglove.CompressedImage"),
            Some(VideoInputSchema::FoxgloveCompressedImage)
        );
    }

    #[test]
    fn test_foxglove_raw_image() {
        assert_eq!(
            detect_video_schema("protobuf", "foxglove.RawImage"),
            Some(VideoInputSchema::FoxgloveRawImage)
        );
    }

    #[cfg(feature = "img2yuv-ros1")]
    #[test]
    fn test_ros1_compressed_image() {
        assert_eq!(
            detect_video_schema("ros1", "sensor_msgs/CompressedImage"),
            Some(VideoInputSchema::Ros1CompressedImage)
        );
    }

    #[cfg(feature = "img2yuv-ros1")]
    #[test]
    fn test_ros1_image() {
        assert_eq!(
            detect_video_schema("ros1", "sensor_msgs/Image"),
            Some(VideoInputSchema::Ros1Image)
        );
    }

    #[cfg(feature = "img2yuv-ros2")]
    #[test]
    fn test_ros2_compressed_image() {
        assert_eq!(
            detect_video_schema("cdr", "sensor_msgs/msg/CompressedImage"),
            Some(VideoInputSchema::Ros2CompressedImage)
        );
    }

    #[cfg(feature = "img2yuv-ros2")]
    #[test]
    fn test_ros2_image() {
        assert_eq!(
            detect_video_schema("cdr", "sensor_msgs/msg/Image"),
            Some(VideoInputSchema::Ros2Image)
        );
    }

    #[test]
    fn test_unknown_schema() {
        assert_eq!(detect_video_schema("json", "SomeCustomType"), None);
        assert_eq!(detect_video_schema("protobuf", "foxglove.Pose"), None);
    }

    #[test]
    fn validate_frame_dimensions_rejects_too_small() {
        // Both axes below minimum
        let err = validate_frame_dimensions(15, 15).unwrap_err();
        assert!(matches!(
            err,
            VideoEncodeError::TooSmall {
                width: 15,
                height: 15
            }
        ));

        // Width below minimum, height at minimum
        let err = validate_frame_dimensions(15, 16).unwrap_err();
        assert!(matches!(
            err,
            VideoEncodeError::TooSmall {
                width: 15,
                height: 16
            }
        ));

        // Height below minimum, width at minimum
        let err = validate_frame_dimensions(16, 15).unwrap_err();
        assert!(matches!(
            err,
            VideoEncodeError::TooSmall {
                width: 16,
                height: 15
            }
        ));

        // Odd dimension that rounds down below minimum (17 & !1 == 16, but 15 & !1 == 14)
        let err = validate_frame_dimensions(15, 17).unwrap_err();
        assert!(matches!(
            err,
            VideoEncodeError::TooSmall {
                width: 15,
                height: 17
            }
        ));
    }

    #[test]
    fn validate_frame_dimensions_accepts_at_minimum() {
        // Exactly at the minimum boundary
        assert_eq!(validate_frame_dimensions(16, 16).unwrap(), (16, 16));

        // Just above minimum (odd values get even-aligned down)
        assert_eq!(validate_frame_dimensions(17, 17).unwrap(), (16, 16));

        // Larger values
        assert_eq!(validate_frame_dimensions(1920, 1080).unwrap(), (1920, 1080));
    }

    #[test]
    fn validate_frame_dimensions_rejects_zero_after_alignment() {
        // A 1×1 image rounds to 0×0
        let err = validate_frame_dimensions(1, 1).unwrap_err();
        assert!(matches!(
            err,
            VideoEncodeError::YuvConversion(crate::img2yuv::Error::ZeroSized)
        ));

        // Width 0
        let err = validate_frame_dimensions(0, 16).unwrap_err();
        assert!(matches!(
            err,
            VideoEncodeError::YuvConversion(crate::img2yuv::Error::ZeroSized)
        ));

        // Height 0
        let err = validate_frame_dimensions(16, 0).unwrap_err();
        assert!(matches!(
            err,
            VideoEncodeError::YuvConversion(crate::img2yuv::Error::ZeroSized)
        ));
    }
}