re_types/
image.rs

1//! Image-related utilities.
2
3use arrow::buffer::ScalarBuffer;
4use re_types_core::{Archetype as _, ArchetypeName};
5use smallvec::{SmallVec, smallvec};
6
7use crate::{
8    archetypes,
9    datatypes::{Blob, ChannelDatatype, TensorBuffer, TensorData},
10};
11
12#[cfg(feature = "image")]
13use crate::datatypes::ImageFormat;
14
15// ----------------------------------------------------------------------------
16
17/// The kind of image data, either color, segmentation, or depth image.
18#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
19pub enum ImageKind {
20    /// A normal grayscale or color image ([`archetypes::Image`]).
21    Color,
22
23    /// A depth map ([`archetypes::DepthImage`]).
24    Depth,
25
26    /// A segmentation image ([`archetypes::SegmentationImage`]).
27    ///
28    /// The data is a [`crate::components::ClassId`] which should be
29    /// looked up using the appropriate [`crate::components::AnnotationContext`]
30    Segmentation,
31}
32
33impl ImageKind {
34    /// Determine the kind of image from an image archetype name.
35    pub fn from_archetype_name(archetype_name: Option<ArchetypeName>) -> Self {
36        if archetype_name == Some(archetypes::SegmentationImage::name()) {
37            Self::Segmentation
38        } else if archetype_name == Some(archetypes::DepthImage::name()) {
39            Self::Depth
40        } else {
41            // TODO(#9046): Note that currently all encoded images are treated as color images.
42            Self::Color
43        }
44    }
45}
46
47impl re_byte_size::SizeBytes for ImageKind {
48    fn heap_size_bytes(&self) -> u64 {
49        0
50    }
51
52    fn is_pod() -> bool {
53        true
54    }
55}
56
57// ----------------------------------------------------------------------------
58
59/// Errors when converting images from the [`image`] crate to an [`archetypes::Image`].
60#[cfg(feature = "image")]
61#[derive(thiserror::Error, Clone, Debug)]
62pub enum ImageConversionError {
63    /// Unknown color type from the image crate.
64    ///
65    /// This should only happen if you are using a newer `image` crate than the one Rerun was built for,
66    /// because `image` can add new color types without it being a breaking change,
67    /// so we cannot exhaustively match on all color types.
68    #[error(
69        "Unsupported color type: {0:?}. We support 8-bit, 16-bit, and f32 images, and RGB, RGBA, Luminance, and Luminance-Alpha."
70    )]
71    UnsupportedImageColorType(image::ColorType),
72}
73
74/// Errors when loading image files.
75#[cfg(feature = "image")]
76#[derive(thiserror::Error, Clone, Debug)]
77pub enum ImageLoadError {
78    /// e.g. failed to decode a JPEG file.
79    #[error(transparent)]
80    Image(std::sync::Arc<image::ImageError>),
81
82    /// e.g. failed to decode tiff image.
83    #[error(transparent)]
84    Tiff(std::sync::Arc<tiff::TiffError>),
85
86    /// e.g. failed to find a file on disk.
87    #[error("Failed to load file: {0}")]
88    ReadError(std::sync::Arc<std::io::Error>),
89
90    /// Failure to convert the loaded image to a [`archetypes::Image`].
91    #[error(transparent)]
92    ImageConversionError(#[from] ImageConversionError),
93
94    /// The encountered MIME type is not supported for decoding images.
95    #[error("MIME type '{0}' is not supported for images")]
96    UnsupportedMimeType(String),
97
98    /// Failed to read the MIME type from inspecting the image data blob.
99    #[error("Could not detect MIME type from the image contents")]
100    UnrecognizedMimeType,
101}
102
103#[cfg(feature = "image")]
104impl From<image::ImageError> for ImageLoadError {
105    #[inline]
106    fn from(err: image::ImageError) -> Self {
107        Self::Image(std::sync::Arc::new(err))
108    }
109}
110
111#[cfg(feature = "image")]
112impl From<tiff::TiffError> for ImageLoadError {
113    #[inline]
114    fn from(err: tiff::TiffError) -> Self {
115        Self::Tiff(std::sync::Arc::new(err))
116    }
117}
118
119#[cfg(feature = "image")]
120impl From<std::io::Error> for ImageLoadError {
121    #[inline]
122    fn from(err: std::io::Error) -> Self {
123        Self::ReadError(std::sync::Arc::new(err))
124    }
125}
126
127// ----------------------------------------------------------------------------
128
129/// Error returned when trying to interpret a tensor as an image.
130#[derive(thiserror::Error, Clone, Debug)]
131pub enum ImageConstructionError<T: TryInto<TensorData>>
132where
133    T::Error: std::error::Error,
134{
135    /// Could not convert source to [`TensorData`].
136    #[error("Could not convert source to TensorData: {0}")]
137    TensorDataConversion(T::Error),
138
139    /// The tensor did not have the right shape for an image (e.g. had too many dimensions).
140    #[error("Could not create Image from TensorData with shape {0:?}")]
141    BadImageShape(ScalarBuffer<u64>),
142
143    /// Happens if you try to cast `NV12` or `YUY2` to a depth image or segmentation image.
144    #[error(
145        "Chroma downsampling is not supported for this image type (e.g. DepthImage or SegmentationImage)"
146    )]
147    ChromaDownsamplingNotSupported,
148}
149
150/// Converts it to what is useful for the image API.
151pub fn blob_and_datatype_from_tensor(tensor_buffer: TensorBuffer) -> (Blob, ChannelDatatype) {
152    match tensor_buffer {
153        TensorBuffer::U8(buffer) => (Blob(buffer), ChannelDatatype::U8),
154        TensorBuffer::U16(buffer) => (Blob(cast_to_u8(&buffer)), ChannelDatatype::U16),
155        TensorBuffer::U32(buffer) => (Blob(cast_to_u8(&buffer)), ChannelDatatype::U32),
156        TensorBuffer::U64(buffer) => (Blob(cast_to_u8(&buffer)), ChannelDatatype::U64),
157        TensorBuffer::I8(buffer) => (Blob(cast_to_u8(&buffer)), ChannelDatatype::I8),
158        TensorBuffer::I16(buffer) => (Blob(cast_to_u8(&buffer)), ChannelDatatype::I16),
159        TensorBuffer::I32(buffer) => (Blob(cast_to_u8(&buffer)), ChannelDatatype::I32),
160        TensorBuffer::I64(buffer) => (Blob(cast_to_u8(&buffer)), ChannelDatatype::I64),
161        TensorBuffer::F16(buffer) => (Blob(cast_to_u8(&buffer)), ChannelDatatype::F16),
162        TensorBuffer::F32(buffer) => (Blob(cast_to_u8(&buffer)), ChannelDatatype::F32),
163        TensorBuffer::F64(buffer) => (Blob(cast_to_u8(&buffer)), ChannelDatatype::F64),
164    }
165}
166
167/// Reinterpret POD (plain-old-data) types to `u8`.
168#[inline]
169pub fn cast_to_u8<T: arrow::datatypes::ArrowNativeType>(
170    buffer: &arrow::buffer::ScalarBuffer<T>,
171) -> ScalarBuffer<u8> {
172    arrow::buffer::ScalarBuffer::new(buffer.inner().clone(), 0, buffer.inner().len())
173}
174
175// ----------------------------------------------------------------------------
176
177/// Types that implement this can be used as image channel types.
178///
179/// Implemented for `u8, u16, u32, u64, i8, i16, i32, i64, f16, f32, f64`.
180pub trait ImageChannelType: bytemuck::Pod {
181    /// The [`ChannelDatatype`] for this type.
182    const CHANNEL_TYPE: ChannelDatatype;
183}
184
185impl ImageChannelType for u8 {
186    const CHANNEL_TYPE: ChannelDatatype = ChannelDatatype::U8;
187}
188
189impl ImageChannelType for u16 {
190    const CHANNEL_TYPE: ChannelDatatype = ChannelDatatype::U16;
191}
192
193impl ImageChannelType for u32 {
194    const CHANNEL_TYPE: ChannelDatatype = ChannelDatatype::U32;
195}
196
197impl ImageChannelType for u64 {
198    const CHANNEL_TYPE: ChannelDatatype = ChannelDatatype::U64;
199}
200
201impl ImageChannelType for i8 {
202    const CHANNEL_TYPE: ChannelDatatype = ChannelDatatype::I8;
203}
204
205impl ImageChannelType for i16 {
206    const CHANNEL_TYPE: ChannelDatatype = ChannelDatatype::I16;
207}
208
209impl ImageChannelType for i32 {
210    const CHANNEL_TYPE: ChannelDatatype = ChannelDatatype::I32;
211}
212
213impl ImageChannelType for i64 {
214    const CHANNEL_TYPE: ChannelDatatype = ChannelDatatype::I64;
215}
216
217impl ImageChannelType for half::f16 {
218    const CHANNEL_TYPE: ChannelDatatype = ChannelDatatype::F16;
219}
220
221impl ImageChannelType for f32 {
222    const CHANNEL_TYPE: ChannelDatatype = ChannelDatatype::F32;
223}
224
225impl ImageChannelType for f64 {
226    const CHANNEL_TYPE: ChannelDatatype = ChannelDatatype::F64;
227}
228
229// ----------------------------------------------------------------------------
230
231/// Returns the indices of an appropriate set of dimensions.
232///
233/// Ignores leading and trailing 1-sized dimensions.
234///
235/// For instance: `[1, 480, 640, 3, 1]` would return `[1, 2, 3]`,
236/// the indices of the `[480, 640, 3]` dimensions.
237pub fn find_non_empty_dim_indices(shape: &[u64]) -> SmallVec<[usize; 4]> {
238    match shape.len() {
239        0 => return smallvec![],
240        1 => return smallvec![0],
241        2 => return smallvec![0, 1],
242        _ => {}
243    }
244
245    // Find a range of non-unit dimensions.
246    // [1, 1, 1, 480, 640, 3, 1, 1, 1]
247    //           ^---------^   goal range
248
249    let mut non_unit_indices = shape
250        .iter()
251        .enumerate()
252        .filter_map(|(ind, &dim)| if dim != 1 { Some(ind) } else { None });
253
254    // 0 is always a valid index.
255    let mut min = non_unit_indices.next().unwrap_or(0);
256    let mut max = non_unit_indices.next_back().unwrap_or(min);
257
258    // Note, these are inclusive ranges.
259
260    // First, empty inner dimensions are more likely to be intentional than empty outer dimensions.
261    // Grow to a min-size of 2.
262    // (1x1x3x1) -> 3x1 mono rather than 1x1x3 RGB
263    while max == min && max + 1 < shape.len() {
264        max += 1;
265    }
266
267    // Next, consider empty outer dimensions if we still need them.
268    // Grow up to 3 if the inner dimension is already 3 or 4 (Color Images)
269    // Otherwise, only grow up to 2.
270    // (1x1x3) -> 1x1x3 rgb rather than 1x3 mono
271    let target_len = match shape[max] {
272        3 | 4 => 3,
273        _ => 2,
274    };
275
276    while max - min + 1 < target_len && 0 < min {
277        min -= 1;
278    }
279
280    (min..=max).collect()
281}
282
283#[test]
284fn test_find_non_empty_dim_indices() {
285    fn expect(shape: &[u64], expected: &[usize]) {
286        let got = find_non_empty_dim_indices(shape);
287        assert!(
288            got.as_slice() == expected,
289            "Input: {shape:?}, got {got:?}, expected {expected:?}"
290        );
291    }
292
293    expect(&[], &[]);
294    expect(&[0], &[0]);
295    expect(&[1], &[0]);
296    expect(&[100], &[0]);
297
298    expect(&[480, 640], &[0, 1]);
299    expect(&[480, 640, 1], &[0, 1]);
300    expect(&[480, 640, 1, 1], &[0, 1]);
301    expect(&[480, 640, 3], &[0, 1, 2]);
302    expect(&[1, 480, 640], &[1, 2]);
303    expect(&[1, 480, 640, 3, 1], &[1, 2, 3]);
304    expect(&[1, 3, 480, 640, 1], &[1, 2, 3]);
305    expect(&[1, 1, 480, 640], &[2, 3]);
306    expect(&[1, 1, 480, 640, 1, 1], &[2, 3]);
307
308    expect(&[1, 1, 3], &[0, 1, 2]);
309    expect(&[1, 1, 3, 1], &[2, 3]);
310}
311
312// ----------------------------------------------------------------------------
313
314// TODO(andreas): Expose this in the API?
315/// Yuv matrix coefficients that determine how a YUV image is meant to be converted to RGB.
316///
317/// A rigorious definition of the yuv conversion matrix would additionally require to define
318/// the transfer characteristics & color primaries of the resulting RGB space.
319///
320/// However, at this point we generally assume that no further processing is needed after the transform.
321/// This is acceptable for most non-HDR content because of the following properties of `Bt709`/`Bt601`/ sRGB:
322/// * Bt709 & sRGB primaries are practically identical
323/// * Bt601 PAL & Bt709 color primaries are the same (with some slight differences for Bt709 NTSC)
324/// * Bt709 & sRGB transfer function are almost identical (and the difference is widely ignored)
325///
326/// (sources: <https://en.wikipedia.org/wiki/Rec._709>, <https://en.wikipedia.org/wiki/Rec._601>)
327/// …which means for the moment we pretty much only care about the (actually quite) different YUV conversion matrices!
328#[derive(Clone, Copy, Debug)]
329pub enum YuvMatrixCoefficients {
330    /// BT.601 (aka. SDTV, aka. Rec.601)
331    ///
332    /// Wiki: <https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion/>
333    Bt601,
334
335    /// BT.709 (aka. HDTV, aka. Rec.709)
336    ///
337    /// Wiki: <https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.709_conversion/>
338    ///
339    /// These are the same primaries we usually assume and use for all of Rerun's rendering
340    /// since they are the same primaries used by sRGB.
341    /// <https://en.wikipedia.org/wiki/Rec._709#Relationship_to_sRGB/>
342    /// The OETF/EOTF function (<https://en.wikipedia.org/wiki/Transfer_functions_in_imaging>) is different,
343    /// but for all other purposes they are the same.
344    /// (The only reason for us to convert to optical units ("linear" instead of "gamma") is for
345    /// lighting computation & tonemapping where we typically start out with sRGB anyways!)
346    Bt709,
347    //
348    // Not yet supported. These vary a lot more from the other two!
349    //
350    // /// BT.2020 (aka. PQ, aka. Rec.2020)
351    // ///
352    // /// Wiki: <https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.2020_conversion/>
353    // BT2020_ConstantLuminance,
354    // BT2020_NonConstantLuminance,
355}
356
357/// Returns sRGB from YUV color.
358///
359/// This conversion mirrors the function of the same name in `yuv_converter.wgsl`
360///
361/// Specifying the color standard should be exposed in the future [#3541](https://github.com/rerun-io/rerun/pull/3541)
362pub fn rgb_from_yuv(
363    y: u8,
364    u: u8,
365    v: u8,
366    limited_range: bool,
367    coefficients: YuvMatrixCoefficients,
368) -> [u8; 3] {
369    let (mut y, mut u, mut v) = (y as f32, u as f32, v as f32);
370
371    // rescale YUV values
372    if limited_range {
373        // Via https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion:
374        // "The resultant signals range from 16 to 235 for Y′ (Cb and Cr range from 16 to 240);
375        // the values from 0 to 15 are called footroom, while the values from 236 to 255 are called headroom."
376        y = (y - 16.0) / 219.0;
377        u = (u - 128.0) / 224.0;
378        v = (v - 128.0) / 224.0;
379    } else {
380        y /= 255.0;
381        u = (u - 128.0) / 255.0;
382        v = (v - 128.0) / 255.0;
383    }
384
385    let r;
386    let g;
387    let b;
388
389    match coefficients {
390        YuvMatrixCoefficients::Bt601 => {
391            // BT.601 (aka. SDTV, aka. Rec.601). wiki: https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion
392            r = y + 1.402 * v;
393            g = y - 0.344 * u - 0.714 * v;
394            b = y + 1.772 * u;
395        }
396
397        YuvMatrixCoefficients::Bt709 => {
398            // BT.709 (aka. HDTV, aka. Rec.709). wiki: https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.709_conversion
399            r = y + 1.575 * v;
400            g = y - 0.187 * u - 0.468 * v;
401            b = y + 1.856 * u;
402        }
403    }
404
405    [(255.0 * r) as u8, (255.0 * g) as u8, (255.0 * b) as u8]
406}
407
408// ----------------------------------------------------------------------------
409
410/// Decode a TIFF byte slice into a [`Blob`] and an [`ImageFormat`].
411#[cfg(feature = "image")]
412pub fn blob_and_format_from_tiff(bytes: &[u8]) -> Result<(Blob, ImageFormat), ImageLoadError> {
413    use tiff::decoder::DecodingResult;
414
415    let cursor = std::io::Cursor::new(bytes);
416    let mut decoder = tiff::decoder::Decoder::new(cursor)?;
417    let img = decoder.read_image()?;
418
419    let (bytes, data_type): (&[u8], ChannelDatatype) = match &img {
420        DecodingResult::U8(data) => (bytemuck::cast_slice(data), ChannelDatatype::U8),
421        DecodingResult::U16(data) => (bytemuck::cast_slice(data), ChannelDatatype::U16),
422        DecodingResult::U32(data) => (bytemuck::cast_slice(data), ChannelDatatype::U32),
423        DecodingResult::U64(data) => (bytemuck::cast_slice(data), ChannelDatatype::U64),
424        DecodingResult::F32(data) => (bytemuck::cast_slice(data), ChannelDatatype::F32),
425        DecodingResult::F64(data) => (bytemuck::cast_slice(data), ChannelDatatype::F64),
426        DecodingResult::I8(data) => (bytemuck::cast_slice(data), ChannelDatatype::I8),
427        DecodingResult::I16(data) => (bytemuck::cast_slice(data), ChannelDatatype::I16),
428        DecodingResult::I32(data) => (bytemuck::cast_slice(data), ChannelDatatype::I32),
429        DecodingResult::I64(data) => (bytemuck::cast_slice(data), ChannelDatatype::I64),
430    };
431
432    let (width, height) = decoder.dimensions()?;
433    let image_format = ImageFormat {
434        width,
435        height,
436        channel_datatype: Some(data_type),
437        pixel_format: None,
438        color_model: None,
439    };
440
441    Ok((Blob::from(bytes), image_format))
442}