cog3pio 0.1.0 - Docs.rs

use std::ffi::c_void;
use std::sync::Arc;

use bytes::Bytes;
use cudarc::driver::{CudaSlice, CudaStream, CudaView};
use dlpark::SafeManagedTensorVersioned;
use dlpark::ffi::{DataType, DataTypeCode};
use dlpark::traits::{InferDataType, TensorView};
use exn::{OptionExt, ResultExt};
use geo::AffineTransform;
use ndarray::{Array, Array1};
use nvtiff_sys::result::{NvTiffError, NvTiffStatusError};
use nvtiff_sys::{
    NvTiffResultCheck, nvtiffDecodeCheckSupported, nvtiffDecodeImage, nvtiffDecodeParams,
    nvtiffDecodeParamsCreate, nvtiffDecoder, nvtiffDecoderCreateSimple, nvtiffImageInfo,
    nvtiffSampleFormat, nvtiffStatus, nvtiffStream, nvtiffStreamCreate, nvtiffStreamGetImageInfo,
    nvtiffStreamGetNumImages, nvtiffStreamGetTagValue, nvtiffStreamParse, nvtiffTag,
};

use crate::traits::Transform;

type NvTiffResult<T> = exn::Result<T, NvTiffError>;

/// Cloud-optimized GeoTIFF reader using [`nvTIFF`](https://developer.nvidia.com/nvtiff)
///
/// # Examples
///
/// ## DLPack
///
/// Retrieve a GeoTIFF file stream via the [`object_store`] crate, and set up a CUDA
/// stream via the [`cudarc`] crate. Pass the file stream and CUDA stream into the
/// [`CudaCogReader::new`](crate::io::nvtiff::CudaCogReader::new) method to instantiate
/// a [`CudaCogReader`] struct, and call
/// [`.dlpack()`](crate::io::nvtiff::CudaCogReader::dlpack) to get a
/// [`dlpark::SafeManagedTensorVersioned`] output.
///
/// ```rust
/// use std::sync::Arc;
///
/// use bytes::Bytes;
/// use cog3pio::io::nvtiff::CudaCogReader;
/// use cudarc::driver::{CudaContext, CudaStream};
/// use dlpark::SafeManagedTensorVersioned;
/// use dlpark::ffi::DataType;
/// use dlpark::prelude::TensorView;
/// use object_store::path::Path;
/// use object_store::{GetResult, ObjectStore, parse_url};
/// use tokio;
/// use url::Url;
///
/// #[tokio::main]
/// async fn main() {
///     let cog_url: &str =
///         "https://github.com/cogeotiff/rio-tiler/raw/7.9.0/tests/fixtures/cog_nodata_float_nan.tif";
///     let tif_url: Url = Url::parse(cog_url).unwrap();
///     let (store, location): (Box<dyn ObjectStore>, Path) = parse_url(&tif_url).unwrap();
///
///     let result: GetResult = store.get(&location).await.unwrap();
///     let bytes: Bytes = result.bytes().await.unwrap();
///
///     let ctx: Arc<CudaContext> = cudarc::driver::CudaContext::new(0).unwrap(); // Set on GPU:0
///     let cuda_stream: Arc<CudaStream> = ctx.per_thread_stream();
///
///     // Read GeoTIFF into a dlpark::versioned::SafeManagedTensorVersioned
///     let mut cog = CudaCogReader::new(&bytes).unwrap();
///     let tensor: SafeManagedTensorVersioned = cog.dlpack(&cuda_stream).unwrap();
///     assert_eq!(tensor.shape(), [7088886]); // [1, 2667, 2658]
///     assert_eq!(tensor.data_type(), &DataType::F32);
/// }
/// ```
///
/// Note that the DLPack output is a flattened 1D array in row-major order (i.e.
/// rows-first, columns-next). Common dtypes such as uint (u8/u16/u32/u64), int
/// (i8/i16/i32/i64) and float (f32/f64) should be mostly supported. Other dtypes such
/// as f16, complex32, etc and certain compression schemes are not supported yet.
pub struct CudaCogReader {
    tiff_stream: *mut nvtiffStream,
    image_info: nvtiffImageInfo,
}

impl CudaCogReader {
    /// Create a new Cloud-optimized GeoTIFF decoder that decodes from a CUDA stream
    /// buffer
    ///
    /// # Errors
    /// Will return [`nvtiff_sys::result::NvTiffError::StatusError`] if nvTIFF failed to
    /// parse the TIFF data or metadata from the byte stream buffer.
    pub fn new(byte_stream: &Bytes) -> NvTiffResult<Self> {
        // Step 0: Init TIFF stream on host (CPU)
        let mut host_stream = std::mem::MaybeUninit::uninit();
        let mut tiff_stream: *mut nvtiffStream = host_stream.as_mut_ptr();

        let status_cpustream: nvtiffStatus::Type =
            unsafe { nvtiffStreamCreate(&raw mut tiff_stream) };
        // dbg!(status_cpustream);
        status_cpustream.result()?;

        // Step 1: Parse the TIFF data from byte stream buffer
        let status_parse: u32 =
            unsafe { nvtiffStreamParse(byte_stream.as_ptr(), byte_stream.len(), tiff_stream) };
        // dbg!(status_parse);
        status_parse.result()?;

        // Step 2a: Extract file-level metadata information from the TIFF stream
        let mut num_images: u32 = 0;
        let status_numimages: u32 =
            unsafe { nvtiffStreamGetNumImages(tiff_stream, &raw mut num_images) };
        // dbg!(status_numimages);
        status_numimages.result()?;

        // Step 2b: Extract image-level metadata information from the TIFF stream
        let mut image_info = nvtiffImageInfo::default();
        let status_imageinfo: u32 = unsafe {
            nvtiffStreamGetImageInfo(
                tiff_stream,
                0, // only decode first image in TIFF for now.
                &raw mut image_info,
            )
        };
        // dbg!(status_imageinfo);
        // dbg!(image_info);
        status_imageinfo.result()?;

        Ok(Self {
            tiff_stream,
            image_info,
        })
    }

    /// Decode GeoTIFF image to a [`dlpark::SafeManagedTensorVersioned`]
    ///
    /// # Errors
    ///
    /// Will raise [`nvtiff_sys::result::NvTiffError::StatusError`] if decoding failed
    /// due to e.g. TIFF stream not being supported by nvTIFF, missing
    /// nvCOMP/nvJPEG/nvJPEG2K libraries, etc.
    ///
    /// # Panics
    /// Will panic if [`CudaStream::alloc_zeros`] failed to allocate bytes on CUDA
    /// device memory, usually due to
    /// [`cudarc::driver::sys::cudaError_enum::CUDA_ERROR_OUT_OF_MEMORY`]
    pub fn dlpack(&self, stream: &Arc<CudaStream>) -> NvTiffResult<SafeManagedTensorVersioned> {
        // Step 1a: Init CUDA stream on device (GPU)
        let cuda_stream: *mut nvtiff_sys::CUstream_st = stream.cu_stream().cast::<_>();

        // Step 1b: Init decoder handle
        let mut decoder_handle = std::mem::MaybeUninit::zeroed();
        let mut nvtiff_decoder: *mut nvtiffDecoder = decoder_handle.as_mut_ptr();

        let status_decoder: u32 =
            unsafe { nvtiffDecoderCreateSimple(&raw mut nvtiff_decoder, cuda_stream) };
        // dbg!(status_decoder);
        status_decoder.result()?;

        // Step 2a: Determine dtype from sample_format and bits_per_pixel
        // Assume that all samples/bands have the same dtype
        let sample_format: u32 = self.image_info.sample_format[0];
        let dtype_code: DataTypeCode = match sample_format {
            nvtiffSampleFormat::NVTIFF_SAMPLEFORMAT_UINT => DataTypeCode::UInt,
            nvtiffSampleFormat::NVTIFF_SAMPLEFORMAT_INT => DataTypeCode::Int,
            nvtiffSampleFormat::NVTIFF_SAMPLEFORMAT_IEEEFP => DataTypeCode::Float,
            _ => unimplemented!(
                "non uint/int/float dtypes (e.g. complex int/float) not implemented yet"
            ),
        };
        let bits: u16 = self.image_info.bits_per_pixel / self.image_info.samples_per_pixel;
        let dtype: DataType = DataType {
            code: dtype_code,
            bits: u8::try_from(bits)
                .or_raise(|| NvTiffError::StatusError(NvTiffStatusError::TiffNotSupported))?,
            lanes: 1,
        };
        let bytes_per_pixel: usize = self.image_info.bits_per_pixel as usize / 8;

        // Step 2b: Allocate memory on device
        let num_bytes: usize = self.image_info.image_width as usize // Width
            * self.image_info.image_height as usize // Height
            * bytes_per_pixel; // Bytes per pixel (e.g. 4 bytes for f32)
        // dbg!(num_bytes);
        let cuslice: CudaSlice<u8> = stream.alloc_zeros::<u8>(num_bytes).unwrap_or_else(|err| {
            panic!("Failed to allocate {num_bytes} bytes on CUDA device: {err}")
        });

        // Step 3a: Create instance of decode parameters handle
        let mut params = std::mem::MaybeUninit::zeroed();
        let mut decode_params: *mut nvtiffDecodeParams = params.as_mut_ptr();
        let status_param: u32 = unsafe { nvtiffDecodeParamsCreate(&raw mut decode_params) };
        // dbg!(status_param);
        status_param.result()?;

        // Step 3b: Check if image is supported first
        let status_check: u32 = unsafe {
            nvtiffDecodeCheckSupported(
                self.tiff_stream, // TODO keep lifetime on this?
                nvtiff_decoder,
                decode_params,
                0, // image_id
            )
        };
        // dbg!(status_check); // 4: NVTIFF_STATUS_TIFF_NOT_SUPPORTED; 2: NVTIFF_STATUS_INVALID_PARAMETER
        status_check.result()?;

        // Step 3c: Prepare DLPack tensor container
        // Transmute from u8 to actual dtype before putting into DLPack tensor
        let len_elem: usize = num_bytes / (dtype.bits as usize / 8);
        let tensor: SafeManagedTensorVersioned = match dtype {
            DataType::U8 => SafeManagedTensorVersioned::new(cuslice)
                .or_raise(|| NvTiffError::StatusError(NvTiffStatusError::AllocatorFailure))?,
            DataType::U16 => cudaslice_to_tensor::<u16>(cuslice, len_elem)?,
            DataType::U32 => cudaslice_to_tensor::<u32>(cuslice, len_elem)?,
            DataType::U64 => cudaslice_to_tensor::<u64>(cuslice, len_elem)?,
            DataType::I8 => cudaslice_to_tensor::<i8>(cuslice, len_elem)?,
            DataType::I16 => cudaslice_to_tensor::<i16>(cuslice, len_elem)?,
            DataType::I32 => cudaslice_to_tensor::<i32>(cuslice, len_elem)?,
            DataType::I64 => cudaslice_to_tensor::<i64>(cuslice, len_elem)?,
            DataType::F32 => cudaslice_to_tensor::<f32>(cuslice, len_elem)?,
            DataType::F64 => cudaslice_to_tensor::<f64>(cuslice, len_elem)?,
            dtype => {
                unimplemented!("Converting {dtype:?} into DLPack not supported yet.")
            }
        };

        // Step 3c: Do the TIFF decoding to allocated device memory
        let status_decode: u32 = unsafe {
            nvtiffDecodeImage(
                self.tiff_stream,
                nvtiff_decoder,
                decode_params,
                0, // image_id
                tensor.data_ptr(),
                cuda_stream,
            )
        };
        // dbg!(status_decode); // 4: NVTIFF_STATUS_TIFF_NOT_SUPPORTED; 8: NVTIFF_STATUS_INTERNAL_ERROR
        status_decode.result()?;

        // dbg!(self.tiff_stream); // TODO need this to avoid panic on status_check/status_decode?

        Ok(tensor)
    }
}

impl Transform for &CudaCogReader {
    type Err = NvTiffError;
    /// Affine transformation for 2D matrix extracted from TIFF tag metadata, used to
    /// transform image pixel (row, col) coordinates to and from geographic/projected
    /// (x, y) coordinates.
    ///
    /// ```text
    /// | x' |   | a b c | | x |
    /// | y' | = | d e f | | y |
    /// | 1  |   | 0 0 1 | | 1 |
    /// ```
    ///
    /// where (`x'` and `y'`) are world coordinates, and (`x`, `y`) are the pixel's
    /// image coordinates. Letters a to f represent:
    ///
    /// - `a` - width of a pixel (x-resolution)
    /// - `b` - row rotation (typically zero)
    /// - `c` - x-coordinate of the *center* of the upper-left pixel (x-origin)
    /// - `d` - column rotation (typically zero)
    /// - `e` - height of a pixel (y-resolution, typically negative)
    /// - `f` - y-coordinate of the *center* of the upper-left pixel (y-origin)
    ///
    /// References:
    /// - <https://docs.ogc.org/is/19-008r4/19-008r4.html#_coordinate_transformations>
    ///
    /// # Errors
    ///
    /// Will return [`NvTiffError::StatusError`] if the Affine transformation matrix
    /// cannot be created from the underlying TIFF tag metadata, due to invalid or
    /// unimplemented parsing of [`nvtiffTag::NVTIFF_TAG_MODEL_PIXEL_SCALE`],
    /// [`nvtiffTag::NVTIFF_TAG_MODEL_TIE_POINT`] or
    /// [`nvtiffTag::NVTIFF_TAG_MODEL_TRANSFORMATION`].
    fn transform(self) -> NvTiffResult<AffineTransform<f64>> {
        // Get x and y axis rotation (not yet implemented)
        let transformation = &mut [f64::NAN; 16];
        let status_transformationinfo: u32 = unsafe {
            nvtiffStreamGetTagValue(
                self.tiff_stream,
                0, // image_id
                nvtiffTag::NVTIFF_TAG_MODEL_TRANSFORMATION,
                transformation.as_mut_ptr().cast::<c_void>(),
                16,
            )
        };
        // dbg!(status_transformationinfo);
        let (x_rotation, y_rotation): (f64, f64) = match status_transformationinfo.result() {
            Ok(()) => {
                unimplemented!("ModelTransformationTag and/or non-zero rotation not supported yet")
            }
            Err(_) => (0.0, 0.0),
        };

        // Get pixel size in x and y direction
        let pixel_scale = &mut [f64::NAN; 3];
        let status_pixelscaleinfo: u32 = unsafe {
            nvtiffStreamGetTagValue(
                self.tiff_stream,
                0, // image_id
                nvtiffTag::NVTIFF_TAG_MODEL_PIXEL_SCALE,
                pixel_scale.as_mut_ptr().cast::<c_void>(),
                3,
            )
        };
        // dbg!(status_pixelscaleinfo);
        status_pixelscaleinfo.result()?;
        let [x_scale, y_scale, _z_scale] = *pixel_scale;

        // Get x and y coordinates of upper left pixel
        let tie_points = &mut [f64::NAN; 6];
        let status_tiepointinfo: u32 = unsafe {
            nvtiffStreamGetTagValue(
                self.tiff_stream,
                0, // image_id
                nvtiffTag::NVTIFF_TAG_MODEL_TIE_POINT,
                tie_points.as_mut_ptr().cast::<c_void>(),
                6,
            )
        };
        // dbg!(status_tiepointinfo);
        status_tiepointinfo.result()?;
        let [_i, _j, _k, x_origin, y_origin, _z_origin] = *tie_points;

        // Create affine transformation matrix
        let transform = AffineTransform::new(
            x_scale, x_rotation, x_origin, y_rotation, -y_scale, y_origin,
        );

        Ok(transform)
    }

    /// Get list of x and y coordinates
    ///
    /// Determined based on an [`AffineTransform`] matrix built from
    /// [`nvtiffTag::NVTIFF_TAG_MODEL_PIXEL_SCALE`] and
    /// [`nvtiffTag::NVTIFF_TAG_MODEL_TIE_POINT`]. Note that non-zero
    /// rotation (set by [`nvtiffTag::NVTIFF_TAG_MODEL_TRANSFORMATION`]) is currently
    /// unsupported.
    ///
    /// # Errors
    ///
    /// Will return [`NvTiffStatusError::TagNotFound`] if the TIFF file is
    /// missing tags required to build an Affine transformation matrix.
    fn xy_coords(self) -> NvTiffResult<(Array1<f64>, Array1<f64>)> {
        let transform: AffineTransform = self.transform()?;

        // Get spatial resolution in x and y dimensions
        let x_res: &f64 = &transform.a();
        let y_res: &f64 = &transform.e();

        // Get xy coordinate of the center of the top left pixel
        let x_origin: &f64 = &(transform.xoff() + x_res / 2.0);
        let y_origin: &f64 = &(transform.yoff() + y_res / 2.0);

        // Get number of pixels along the x and y dimensions
        let x_pixels: u32 = self.image_info.image_width;
        let y_pixels: u32 = self.image_info.image_height;

        // Get xy coordinate of the center of the bottom right pixel
        let x_end: f64 = x_origin + x_res * f64::from(x_pixels);
        let y_end: f64 = y_origin + y_res * f64::from(y_pixels);

        // Get array of x-coordinates and y-coordinates
        let x_coords = Array::range(x_origin.to_owned(), x_end, x_res.to_owned());
        let y_coords = Array::range(y_origin.to_owned(), y_end, y_res.to_owned());

        Ok((x_coords, y_coords))
    }
}

/// Transmute `CudaSlice<u8>` into a `CudaView<T>`, and then convert to a DLPack tensor.
fn cudaslice_to_tensor<T: InferDataType>(
    cuslice: CudaSlice<u8>,
    len_elem: usize,
) -> NvTiffResult<SafeManagedTensorVersioned> {
    let cuview: CudaView<_> = unsafe { cuslice.transmute::<T>(len_elem) }
        .ok_or_raise(|| NvTiffError::StatusError(NvTiffStatusError::BadTiff))?;
    let tensor = SafeManagedTensorVersioned::new(cuview)
        .or_raise(|| NvTiffError::StatusError(NvTiffStatusError::AllocatorFailure))?;
    cuslice.leak();

    Ok(tensor)
}

#[cfg(test)]
mod tests {

    use std::sync::Arc;

    use cudarc::driver::{CudaContext, CudaSlice, CudaStream};
    use dlpark::SafeManagedTensorVersioned;
    use dlpark::ffi::DataType;
    use dlpark::prelude::TensorView;
    use geo::AffineTransform;
    use ndarray::Array;
    use object_store::parse_url;
    use rstest::rstest;
    use url::Url;

    use crate::io::nvtiff::CudaCogReader;
    use crate::traits::Transform;

    #[tokio::test]
    async fn cudacogreader_dlpack() {
        let cog_url: &str =
            "https://github.com/rasterio/rasterio/raw/refs/tags/1.4.3/tests/data/float32.tif";
        let tif_url = Url::parse(cog_url).unwrap();
        let (store, location) = parse_url(&tif_url).unwrap();

        let result = store.get(&location).await.unwrap();
        let bytes = result.bytes().await.unwrap();

        // let v = std::fs::read("benches/float32.tif").unwrap();
        // let bytes = Bytes::copy_from_slice(&v);

        // Step 1: Init CUDA stream on device (GPU)
        let ctx: Arc<CudaContext> = cudarc::driver::CudaContext::new(0).unwrap(); // Set on GPU:0
        let cuda_stream: Arc<CudaStream> = ctx.per_thread_stream();

        // Step 2: Do the TIFF decoding
        let cog: CudaCogReader = CudaCogReader::new(&bytes).unwrap();
        let tensor: SafeManagedTensorVersioned = cog.dlpack(&cuda_stream).unwrap();

        assert_eq!(tensor.data_type(), &DataType::F32);
        // assert_eq!(tensor.shape(), [1, 2, 3]); // TODO should be 3D tensor
        assert_eq!(tensor.shape(), [6]);

        // Step 3: Transfer decoded bytes from device to host, and check results
        let mut image_out_h: Vec<f32> = vec![0.0; tensor.num_elements()];
        let cuslice: CudaSlice<_> = unsafe {
            cuda_stream.upgrade_device_ptr(tensor.data_ptr() as u64, tensor.num_elements())
        };
        cuda_stream
            .memcpy_dtoh(&cuslice.clone(), &mut image_out_h)
            .unwrap();
        dbg!(image_out_h.clone());
        assert_eq!(image_out_h, vec![1.41, 1.23, 0.78, 0.32, -0.23, -1.88]);
    }

    #[rstest]
    #[case::u8("byte.tif", DataType::U8)]
    #[case::u16("uint16.tif", DataType::U16)]
    #[case::u32("uint32.tif", DataType::U32)]
    // #[case::u64("uint64.tif", DataType::U64)] // TiffNotSupported
    #[case::i16("int16.tif", DataType::I16)]
    #[case::i32("int32.tif", DataType::I32)]
    // #[case::i64("int64.tif", DataType::I64)] // TiffNotSupported
    #[tokio::test]
    async fn cudacogreader_dlpack_uint_int_dtypes(#[case] filename: &str, #[case] dtype: DataType) {
        let cog_url: String = format!(
            "https://github.com/OSGeo/gdal/raw/v3.12.0beta1/autotest/gcore/data/{filename}",
        );
        let tif_url = Url::parse(cog_url.as_str()).unwrap();
        let (store, location) = parse_url(&tif_url).unwrap();

        let result = store.get(&location).await.unwrap();
        let bytes = result.bytes().await.unwrap();

        // Step 1: Init CUDA stream on device (GPU)
        let ctx: Arc<CudaContext> = cudarc::driver::CudaContext::new(0).unwrap(); // Set on GPU:0
        let cuda_stream: Arc<CudaStream> = ctx.per_thread_stream();

        // Step 2: Do the TIFF decoding
        let cog: CudaCogReader = CudaCogReader::new(&bytes).unwrap();
        let tensor: SafeManagedTensorVersioned = cog.dlpack(&cuda_stream).unwrap();

        assert_eq!(tensor.data_type(), &dtype);
        // assert_eq!(tensor.shape(), [1, 20, 20]); // TODO should be 3D tensor
        assert_eq!(tensor.shape(), [400]);
    }

    #[tokio::test]
    async fn unimplemented_error() {
        let cog_url: &str =
            "https://github.com/image-rs/image-tiff/raw/v0.11.2/tests/images/tiled-cmyk-i8.tif";
        let tif_url = Url::parse(cog_url).unwrap();
        let (store, location) = parse_url(&tif_url).unwrap();

        let result = store.get(&location).await.unwrap();
        let bytes = result.bytes().await.unwrap();

        let ctx: Arc<CudaContext> = cudarc::driver::CudaContext::new(0).unwrap(); // Set on GPU:0
        let cuda_stream: Arc<CudaStream> = ctx.per_thread_stream();

        let cog = CudaCogReader::new(&bytes).unwrap();
        let result = cog.dlpack(&cuda_stream);

        assert_eq!(
            result.err().unwrap().to_string(),
            "Status error: Attempting to decode a TIFF stream that is not supported by the nvTIFF library."
        );
    }

    #[tokio::test]
    async fn cudacogreader_transform() {
        let cog_url: &str =
            "https://github.com/cogeotiff/rio-tiler/raw/8.0.5/tests/fixtures/cog_nodata_nan.tif";
        let tif_url = Url::parse(cog_url).unwrap();
        let (store, location) = parse_url(&tif_url).unwrap();

        let result = store.get(&location).await.unwrap();
        let bytes = result.bytes().await.unwrap();

        let cog: CudaCogReader = CudaCogReader::new(&bytes).unwrap();

        let transform = cog.transform().unwrap();
        assert_eq!(
            transform,
            AffineTransform::new(200.0, 0.0, 499_980.0, 0.0, -200.0, 5_300_040.0)
        );

        let (x_coords, y_coords) = cog.xy_coords().unwrap();
        assert_eq!(x_coords, Array::linspace(500_080., 609_680., 549));
        assert_eq!(y_coords, Array::linspace(5_299_940.0, 5_190_340.0, 549));
    }
}