wiiu_swizzle/
lib.rs

1//! # wiiu_swizzle
2//! wiiu_swizzle is a CPU implementation of memory tiling
3//! for texture surfaces for the Wii U GPU hardware.
4//!
5//! Most applications should construct a [Gx2Surface] and use [Gx2Surface::deswizzle]
6//! to correctly handle offsets and parameter changes for different mip levels.
7#![no_std]
8extern crate alloc;
9
10#[cfg(feature = "std")]
11extern crate std;
12
13pub use addrlib::TileMode;
14use addrlib::{
15    hwl_compute_surface_info, ComputeSurfaceAddrFromCoordInput, ComputeSurfaceInfoInput,
16    ComputeSurfaceInfoOutput, SurfaceFlags,
17};
18use alloc::{vec, vec::Vec};
19
20mod addrlib;
21
22/// Errors than can occur while converting between tiled and linear memory layouts.
23#[derive(Debug)]
24pub enum SwizzleError {
25    /// The source data does not contain enough bytes.
26    NotEnoughData {
27        expected_size: usize,
28        actual_size: usize,
29    },
30}
31
32#[cfg(feature = "std")]
33impl std::fmt::Display for SwizzleError {
34    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
35        match self {
36            SwizzleError::NotEnoughData {
37                expected_size,
38                actual_size,
39            } => write!(
40                f,
41                "Not enough data. Expected {} bytes but found {} bytes.",
42                expected_size, actual_size
43            ),
44        }
45    }
46}
47
48#[cfg(feature = "std")]
49impl std::error::Error for SwizzleError {}
50
51// TODO: Use try into and avoid panic.
52macro_rules! c_enum {
53    (#[$attr1:meta] $name:ident, $($(#[$attr2:meta])* $variant:ident=$value:expr),*,) => {
54        #[$attr1]
55        #[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
56        #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
57        pub enum $name {
58            $(
59                $(#[$attr2])*
60                $variant = $value
61            ),*
62        }
63
64        impl $name {
65            /// Returns the variant with the given value or `None` if invalid.
66            pub fn from_repr(value: u32) -> Option<Self> {
67                match value {
68                    $(
69                        $value => Some(Self::$variant),
70                    )*
71                    _ => None
72                }
73            }
74        }
75    };
76}
77pub(crate) use c_enum;
78
79// TODO: Include all gx2 enum variants?
80c_enum! {
81    /// GX2AAMode for the number of samples
82    AaMode,
83    X1 = 0,
84    X2 = 1,
85    X4 = 2,
86    X8 = 3,
87}
88
89// The GX2 and addrlib enums are the same.
90// https://github.com/decaf-emu/addrlib/blob/194162c47469ce620dd2470eb767ff5e42f5954a/include/addrlib/addrtypes.h#L118
91c_enum! {
92    /// GX2SurfaceFormat for the format of the image data
93    SurfaceFormat,
94    /// GX2_SURFACE_FORMAT_INVALID
95    Invalid = 0x00000000,
96    /// GX2_SURFACE_FORMAT_TC_R8_UNORM
97    R8Unorm = 0x00000001,
98    /// GX2_SURFACE_FORMAT_TC_R8_UINT
99    R8Uint = 0x00000101,
100    /// GX2_SURFACE_FORMAT_TC_R8_SNORM
101    R8Snorm = 0x00000201,
102    /// GX2_SURFACE_FORMAT_TC_R8_SINT
103    R8Sint = 0x00000301,
104    /// GX2_SURFACE_FORMAT_T_R4_G4_UNORM
105    R4G4Unorm = 0x00000002,
106    /// GX2_SURFACE_FORMAT_TCD_R16_UNORM
107    R16Unorm = 0x00000005,
108    /// GX2_SURFACE_FORMAT_TC_R16_UINT
109    R16Uint = 0x00000105,
110    /// GX2_SURFACE_FORMAT_TC_R16_SNORM
111    R16Snorm = 0x00000205,
112    /// GX2_SURFACE_FORMAT_TC_R16_SINT
113    R16Sint = 0x00000305,
114    /// GX2_SURFACE_FORMAT_TC_R16_FLOAT
115    R16Float = 0x00000806,
116    /// GX2_SURFACE_FORMAT_TC_R8_G8_UNORM
117    R8G8Unorm = 0x00000007,
118    /// GX2_SURFACE_FORMAT_TC_R8_G8_UINT
119    R8G8Uint = 0x00000107,
120    /// GX2_SURFACE_FORMAT_TC_R8_G8_SNORM
121    R8G8Snorm = 0x00000207,
122    /// GX2_SURFACE_FORMAT_TC_R8_G8_SINT
123    R8G8Sint = 0x00000307,
124    /// GX2_SURFACE_FORMAT_TCS_R5_G6_B5_UNORM
125    R5G6B5Unorm = 0x00000008,
126    /// GX2_SURFACE_FORMAT_TC_R5_G5_B5_A1_UNORM
127    R5G5B5A1Unorm = 0x0000000a,
128    /// GX2_SURFACE_FORMAT_TC_R4_G4_B4_A4_UNORM
129    R4G4B4A4Unorm = 0x0000000b,
130    /// GX2_SURFACE_FORMAT_TC_R32_UINT
131    R32Uint = 0x0000010d,
132    /// GX2_SURFACE_FORMAT_TC_R32_SINT
133    R32Sint = 0x0000030d,
134    /// GX2_SURFACE_FORMAT_TCD_R32_FLOAT
135    R32Float = 0x0000080e,
136    /// GX2_SURFACE_FORMAT_TC_R16_G16_UNORM
137    R16G16Unorm = 0x0000000f,
138    /// GX2_SURFACE_FORMAT_TC_R16_G16_UINT
139    R16G16Uint = 0x0000010f,
140    /// GX2_SURFACE_FORMAT_TC_R16_G16_SNORM
141    R16G16Snorm = 0x0000020f,
142    /// GX2_SURFACE_FORMAT_TC_R16_G16_SINT
143    R16G16Sint = 0x0000030f,
144    /// GX2_SURFACE_FORMAT_TC_R16_G16_FLOAT
145    R16G16Float = 0x00000810,
146    /// GX2_SURFACE_FORMAT_D_D24_S8_FLOAT
147    D24S8Float = 0x00000811,
148    /// GX2_SURFACE_FORMAT_TC_R11_G11_B10_FLOAT
149    R11G11B10Float = 0x00000816,
150    /// GX2_SURFACE_FORMAT_TCS_R10_G10_B10_A2_UNORM
151    R10G10B10A2Unorm = 0x00000019,
152    /// GX2_SURFACE_FORMAT_TC_R10_G10_B10_A2_UINT
153    R10G10B10A2Uint = 0x00000119,
154    /// GX2_SURFACE_FORMAT_TC_R10_G10_B10_A2_SINT
155    R10G10B10A2Sint = 0x00000319,
156    /// GX2_SURFACE_FORMAT_TCS_R8_G8_B8_A8_UNORM
157    R8G8B8A8Unorm = 0x0000001a,
158    /// GX2_SURFACE_FORMAT_TC_R8_G8_B8_A8_UINT
159    R8G8B8A8Uint = 0x0000011a,
160    /// GX2_SURFACE_FORMAT_TC_R8_G8_B8_A8_SNORM
161    R8G8B8A8Snorm = 0x0000021a,
162    /// GX2_SURFACE_FORMAT_TC_R8_G8_B8_A8_SINT
163    R8G8B8A8Sint = 0x0000031a,
164    /// GX2_SURFACE_FORMAT_TCS_R8_G8_B8_A8_SRGB
165    R8G8B8A8Srgb = 0x0000041a,
166    /// GX2_SURFACE_FORMAT_TC_R32_G32_UINT
167    R32G32Uint = 0x0000011d,
168    /// GX2_SURFACE_FORMAT_TC_R32_G32_SINT
169    R32G32Sint = 0x0000031d,
170    /// GX2_SURFACE_FORMAT_TC_R32_G32_FLOAT
171    R32G32Float = 0x0000081e,
172    /// GX2_SURFACE_FORMAT_TC_R16_G16_B16_A16_UNORM
173    R16G16B16A16Unorm = 0x0000001f,
174    /// GX2_SURFACE_FORMAT_TC_R16_G16_B16_A16_UINT
175    R16G16B16A16Uint = 0x0000011f,
176    /// GX2_SURFACE_FORMAT_TC_R16_G16_B16_A16_SNORM
177    R16G16B16A16Snorm = 0x0000021f,
178    /// GX2_SURFACE_FORMAT_TC_R16_G16_B16_A16_SINT
179    R16G16B16A16Sint = 0x0000031f,
180    /// GX2_SURFACE_FORMAT_TC_R16_G16_B16_A16_FLOAT
181    R16G16B16A16Float = 0x00000820,
182    /// GX2_SURFACE_FORMAT_TC_R32_G32_B32_A32_UINT
183    R32G32B32A32Uint = 0x00000122,
184    /// GX2_SURFACE_FORMAT_TC_R32_G32_B32_A32_SINT
185    R32G32B32A32Sint = 0x00000322,
186    /// GX2_SURFACE_FORMAT_TC_R32_G32_B32_A32_FLOAT
187    R32G32B32A32Float = 0x00000823,
188    /// GX2_SURFACE_FORMAT_T_BC1_UNORM
189    Bc1Unorm = 0x00000031,
190    /// GX2_SURFACE_FORMAT_T_BC1_SRGB
191    Bc1Srgb = 0x00000431,
192    /// GX2_SURFACE_FORMAT_T_BC2_UNORM
193    Bc2Unorm = 0x00000032,
194    /// GX2_SURFACE_FORMAT_T_BC2_SRGB
195    Bc2Srgb = 0x00000432,
196    /// GX2_SURFACE_FORMAT_T_BC3_UNORM
197    Bc3Unorm = 0x00000033,
198    /// GX2_SURFACE_FORMAT_T_BC3_SRGB
199    Bc3Srgb = 0x00000433,
200    /// GX2_SURFACE_FORMAT_T_BC4_UNORM
201    Bc4Unorm = 0x00000034,
202    /// GX2_SURFACE_FORMAT_T_BC4_SNORM
203    Bc4Snorm = 0x00000234,
204    /// GX2_SURFACE_FORMAT_T_BC5_UNORM
205    Bc5Unorm = 0x00000035,
206    /// GX2_SURFACE_FORMAT_T_BC5_SNORM
207    Bc5Snorm = 0x00000235,
208}
209
210impl SurfaceFormat {
211    pub fn block_dim(&self) -> (u32, u32) {
212        match self {
213            SurfaceFormat::Bc1Unorm => (4, 4),
214            SurfaceFormat::Bc1Srgb => (4, 4),
215            SurfaceFormat::Bc2Unorm => (4, 4),
216            SurfaceFormat::Bc2Srgb => (4, 4),
217            SurfaceFormat::Bc3Unorm => (4, 4),
218            SurfaceFormat::Bc3Srgb => (4, 4),
219            SurfaceFormat::Bc4Unorm => (4, 4),
220            SurfaceFormat::Bc4Snorm => (4, 4),
221            SurfaceFormat::Bc5Unorm => (4, 4),
222            SurfaceFormat::Bc5Snorm => (4, 4),
223            _ => (1, 1),
224        }
225    }
226
227    // https://github.com/decaf-emu/addrlib/blob/194162c47469ce620dd2470eb767ff5e42f5954a/src/core/addrelemlib.cpp#L139
228    pub fn bytes_per_pixel(&self) -> u32 {
229        match self {
230            SurfaceFormat::Invalid => 0,
231            SurfaceFormat::R8Unorm => 1,
232            SurfaceFormat::R8Uint => 1,
233            SurfaceFormat::R8Snorm => 1,
234            SurfaceFormat::R8Sint => 1,
235            SurfaceFormat::R4G4Unorm => 1,
236            SurfaceFormat::R16Unorm => 2,
237            SurfaceFormat::R16Uint => 2,
238            SurfaceFormat::R16Snorm => 2,
239            SurfaceFormat::R16Sint => 2,
240            SurfaceFormat::R16Float => 2,
241            SurfaceFormat::R8G8Unorm => 2,
242            SurfaceFormat::R8G8Uint => 2,
243            SurfaceFormat::R8G8Snorm => 2,
244            SurfaceFormat::R8G8Sint => 2,
245            SurfaceFormat::R5G6B5Unorm => 2,
246            SurfaceFormat::R5G5B5A1Unorm => 2,
247            SurfaceFormat::R4G4B4A4Unorm => 2,
248            SurfaceFormat::R32Uint => 4,
249            SurfaceFormat::R32Sint => 4,
250            SurfaceFormat::R32Float => 4,
251            SurfaceFormat::R16G16Unorm => 4,
252            SurfaceFormat::R16G16Uint => 4,
253            SurfaceFormat::R16G16Snorm => 4,
254            SurfaceFormat::R16G16Sint => 4,
255            SurfaceFormat::R16G16Float => 4,
256            SurfaceFormat::D24S8Float => 4,
257            SurfaceFormat::R11G11B10Float => 4,
258            SurfaceFormat::R10G10B10A2Unorm => 4,
259            SurfaceFormat::R10G10B10A2Uint => 4,
260            SurfaceFormat::R10G10B10A2Sint => 4,
261            SurfaceFormat::R8G8B8A8Unorm => 4,
262            SurfaceFormat::R8G8B8A8Uint => 4,
263            SurfaceFormat::R8G8B8A8Snorm => 4,
264            SurfaceFormat::R8G8B8A8Sint => 4,
265            SurfaceFormat::R8G8B8A8Srgb => 4,
266            SurfaceFormat::R32G32Uint => 8,
267            SurfaceFormat::R32G32Sint => 8,
268            SurfaceFormat::R32G32Float => 8,
269            SurfaceFormat::R16G16B16A16Unorm => 8,
270            SurfaceFormat::R16G16B16A16Uint => 8,
271            SurfaceFormat::R16G16B16A16Snorm => 8,
272            SurfaceFormat::R16G16B16A16Sint => 8,
273            SurfaceFormat::R16G16B16A16Float => 8,
274            SurfaceFormat::R32G32B32A32Uint => 16,
275            SurfaceFormat::R32G32B32A32Sint => 16,
276            SurfaceFormat::R32G32B32A32Float => 16,
277            SurfaceFormat::Bc1Unorm => 8,
278            SurfaceFormat::Bc1Srgb => 8,
279            SurfaceFormat::Bc2Unorm => 16,
280            SurfaceFormat::Bc2Srgb => 16,
281            SurfaceFormat::Bc3Unorm => 16,
282            SurfaceFormat::Bc3Srgb => 16,
283            SurfaceFormat::Bc4Unorm => 8,
284            SurfaceFormat::Bc4Snorm => 8,
285            SurfaceFormat::Bc5Unorm => 16,
286            SurfaceFormat::Bc5Snorm => 16,
287        }
288    }
289}
290
291c_enum! {
292    /// GX2SurfaceDim for the dimensionality of the texture surface
293    SurfaceDim,
294    D1 = 0,
295    D2 = 1,
296    D3 = 2,
297    Cube = 3,
298}
299
300// TODO: How to handle array layers?
301// TODO: additional enums?
302// TODO: Show how to split a combined image buffer in docs
303/// A view over Wii U GX2 texture surface to simplify swizzling operations.
304///
305/// Most of these parameters are likely stored
306/// in a texture binary file format in exactly the expected format.
307/// If a value is not present in the texture file
308/// like [usage](#structfield.usage) or [aa](#structfield.aa),
309/// using the recommended default should produce the intended result.
310#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
311#[derive(Debug)]
312pub struct Gx2Surface<'a> {
313    // TODO: Is this even used?
314    pub dim: SurfaceDim,
315    /// The width of the base mip level in pixels.
316    pub width: u32,
317    /// The height of the base mip level in pixels.
318    pub height: u32,
319    /// The depth of the base mip level in pixels or the number of array layers.
320    /// Cube maps will have a value of 6.
321    /// 2D surfaces without any layers should use a value of 1.
322    pub depth_or_array_layers: u32,
323    /// The number of mipmaps or 1 if there are no additional mipmaps.
324    pub mipmap_count: u32,
325    /// The format for the image data.
326    /// Many texture binary file formats store the GX2 values directly.
327    pub format: SurfaceFormat,
328    pub aa: AaMode,
329    pub usage: u32,
330    /// The image data for the base mipmap.
331    pub image_data: &'a [u8],
332    /// The image data for the mipmaps past the base level starting with mip 1.
333    /// If there are no mipmaps, simply set this to an empty slice.
334    pub mipmap_data: &'a [u8],
335    pub tile_mode: TileMode,
336    pub swizzle: u32,
337    /// Usually `512 * bytes_per_pixel`.
338    pub alignment: u32,
339    /// Horizontal pitch that typically depends on [width](#structfield.width).
340    pub pitch: u32,
341    /// The offsets for each mipmap base the base level starting with mip 1.
342    /// Mipmap offsets after mip 1 are relative to the mip 1 offset.
343    ///
344    /// Many texture binary file formats use this method of encoding offsets.
345    pub mipmap_offsets: [u32; 13],
346}
347
348// TODO: Also define a swizzle surface?
349impl<'a> Gx2Surface<'a> {
350    /// Convert all layers and mipmaps from tiled to a combined linear vector.
351    pub fn deswizzle(&self) -> Result<Vec<u8>, SwizzleError> {
352        let (block_width, block_height) = self.format.block_dim();
353        let bytes_per_pixel = self.format.bytes_per_pixel();
354
355        let mut data = Vec::new();
356        for mip in 0..self.mipmap_count {
357            let source = if mip == 0 {
358                // The mip 0 data is at the start of the image data.
359                self.image_data
360            } else if mip == 1 {
361                // The slice already accounts for the mip 1 offset.
362                let next_offset = self.mipmap_offsets[mip as usize] as usize;
363                if next_offset != 0 {
364                    &self.mipmap_data[..next_offset]
365                } else {
366                    self.mipmap_data
367                }
368            } else {
369                // Remaining mip levels are relative to the start of the mipmap data.
370                let offset = if mip == 1 {
371                    0
372                } else {
373                    self.mipmap_offsets[mip as usize - 1] as usize
374                };
375                let next_offset = self.mipmap_offsets[mip as usize] as usize;
376                if next_offset != 0 {
377                    &self.mipmap_data[offset..next_offset]
378                } else {
379                    &self.mipmap_data[offset..]
380                }
381            };
382
383            // TODO: How to handle dimensions not divisible by block dimensions?
384            // TODO: cemu uses mipPtr & 0x700 for swizzle for mipmaps?
385            let width = div_round_up(self.width >> mip, block_width);
386            let height = div_round_up(self.height >> mip, block_height);
387
388            // Some parameters change based on dimensions or mip level.
389            // Small mips may use micro instead of macro tiling.
390            // TODO: how to set these parameters?
391            let input = ComputeSurfaceInfoInput {
392                size: source.len() as u32,
393                tile_mode: self.tile_mode,
394                format: self.format,
395                bpp: bytes_per_pixel * u8::BITS,
396                num_samples: 1 << self.aa as u32,
397                width,
398                height,
399                num_slices: self.depth_or_array_layers,
400                slice: 0,
401                mip_level: mip,
402                flags: SurfaceFlags::new(
403                    false,
404                    false,
405                    false,
406                    false,
407                    self.dim == SurfaceDim::Cube,
408                    self.dim == SurfaceDim::D3,
409                    false,
410                    false,
411                    false,
412                    false,
413                    false,
414                    false,
415                    false,
416                    false,
417                    false,
418                    false,
419                    false,
420                    false,
421                    0u8.into(),
422                ),
423                tile_info: Default::default(),
424                tile_type: addrlib::TileType::Displayable,
425                tile_index: 0,
426            };
427            // TODO: Can this use defaults?
428            let mut output = ComputeSurfaceInfoOutput {
429                size: 0,
430                pitch: 0,
431                height: 0,
432                depth: 0,
433                surf_size: 0,
434                tile_mode: self.tile_mode,
435                base_align: 0,
436                pitch_align: 0,
437                height_align: 0,
438                depth_align: 0,
439                bpp: 0,
440                pixel_pitch: 0,
441                pixel_height: 0,
442                pixel_bits: 0,
443                slice_size: 0,
444                pitch_tile_max: 0,
445                height_tile_max: 0,
446                slice_tile_max: 0,
447                tile_info: Default::default(),
448                tile_type: addrlib::TileType::Displayable,
449                tile_index: 0,
450            };
451            hwl_compute_surface_info(&input, &mut output);
452
453            // TODO: Why is output.pitch sometimes too large?
454            let pitch = output.pitch.min(self.pitch);
455
456            // TODO: is this data all layers for each mip?
457            // TODO: Store this as layer major instead of mip major to match dds?
458            // TODO: Should this be depth and also layers?
459            let mip = deswizzle_mipmap(
460                width,
461                height,
462                self.depth_or_array_layers,
463                source,
464                self.swizzle,
465                pitch,
466                output.tile_mode,
467                bytes_per_pixel,
468                self.aa,
469            )?;
470            data.extend_from_slice(&mip);
471        }
472
473        if self.dim == SurfaceDim::Cube {
474            Ok(self.mip_major_to_layer_major(&data, block_width, block_height, bytes_per_pixel))
475        } else {
476            Ok(data)
477        }
478    }
479
480    fn mip_major_to_layer_major(
481        &self,
482        data: &[u8],
483        block_width: u32,
484        block_height: u32,
485        bytes_per_pixel: u32,
486    ) -> Vec<u8> {
487        // Convert from [mip][layer] to [layer][mip] ordering.
488        // TODO: Is there a better way of doing this?
489        let mut new_data = vec![0u8; data.len()];
490
491        let mut mip_offsets: Vec<_> = (0..self.mipmap_count - 1)
492            .map(|mip| {
493                let width = div_round_up(self.width >> mip, block_width);
494                let height = div_round_up(self.height >> mip, block_height);
495                width * height * bytes_per_pixel
496            })
497            .scan(0, |state, x| Some(*state + x))
498            .collect();
499        mip_offsets.insert(0, 0);
500
501        let output_layer_size = (0..self.mipmap_count)
502            .map(|mip| {
503                let width = div_round_up(self.width >> mip, block_width);
504                let height = div_round_up(self.height >> mip, block_height);
505                width * height * bytes_per_pixel
506            })
507            .sum::<u32>();
508
509        let mut mip_offset = 0;
510        for mip in 0..self.mipmap_count {
511            let width = div_round_up(self.width >> mip, block_width);
512            let height = div_round_up(self.height >> mip, block_height);
513            let mip_size = (width * height * bytes_per_pixel) as usize;
514
515            for layer in 0..6 {
516                let layer_offset = width * height * layer * bytes_per_pixel;
517                let input_offset = layer_offset as usize + mip_offset;
518                let mip_data = &data[input_offset..input_offset + mip_size];
519
520                let output_layer_offset = output_layer_size * layer;
521                let output_mip_offset = mip_offsets[mip as usize];
522                let output_offset = (output_layer_offset + output_mip_offset) as usize;
523
524                new_data[output_offset..output_offset + mip_size].copy_from_slice(mip_data);
525            }
526
527            mip_offset += mip_size * 6;
528        }
529        new_data
530    }
531}
532
533// TODO: Docs and examples.
534/// Convert the tiled data in `source` to a combined linear vector.
535///
536/// For block compressed formats, `width` and `height` should be the dimensions in blocks
537/// with `bytes_per_pixel` being the size of a block in bytes.
538#[allow(clippy::too_many_arguments)]
539pub fn deswizzle_mipmap(
540    width: u32,
541    height: u32,
542    depth_or_array_layers: u32,
543    source: &[u8],
544    swizzle: u32,
545    pitch: u32,
546    tile_mode: TileMode,
547    bytes_per_pixel: u32,
548    aa: AaMode,
549) -> Result<Vec<u8>, SwizzleError> {
550    let output_size = width as usize
551        * height as usize
552        * depth_or_array_layers as usize
553        * bytes_per_pixel as usize;
554    if output_size == 0 {
555        return Ok(Vec::new());
556    }
557
558    let expected_size = swizzled_mipmap_size(
559        width,
560        height,
561        depth_or_array_layers,
562        swizzle,
563        pitch,
564        tile_mode,
565        bytes_per_pixel,
566        aa,
567    );
568    if source.len() < expected_size {
569        return Err(SwizzleError::NotEnoughData {
570            expected_size,
571            actual_size: source.len(),
572        });
573    }
574
575    let mut output = vec![0u8; output_size];
576
577    swizzle_surface_inner::<false>(
578        width,
579        height,
580        depth_or_array_layers,
581        source,
582        &mut output,
583        swizzle,
584        pitch,
585        tile_mode,
586        bytes_per_pixel,
587        aa,
588    )?;
589
590    Ok(output)
591}
592
593/// Convert the linear data in `source` to a combined tiled vector.
594///
595/// For block compressed formats, `width` and `height` should be the dimensions in blocks
596/// with `bytes_per_pixel` being the size of a block in bytes.
597#[allow(clippy::too_many_arguments)]
598pub fn swizzle_mipmap(
599    width: u32,
600    height: u32,
601    depth_or_array_layers: u32,
602    source: &[u8],
603    swizzle: u32,
604    pitch: u32,
605    tile_mode: TileMode,
606    bytes_per_pixel: u32,
607    aa: AaMode,
608) -> Result<Vec<u8>, SwizzleError> {
609    // TODO: Is this the correct output size?
610    let output_size = swizzled_mipmap_size(
611        width,
612        height,
613        depth_or_array_layers,
614        swizzle,
615        pitch,
616        tile_mode,
617        bytes_per_pixel,
618        aa,
619    );
620    if output_size == 0 {
621        return Ok(Vec::new());
622    }
623
624    let expected_size =
625        deswizzled_mipmap_size(width, height, depth_or_array_layers, bytes_per_pixel);
626    if source.len() < expected_size {
627        return Err(SwizzleError::NotEnoughData {
628            expected_size,
629            actual_size: source.len(),
630        });
631    }
632
633    let mut output = vec![0u8; output_size];
634
635    swizzle_surface_inner::<true>(
636        width,
637        height,
638        depth_or_array_layers,
639        source,
640        &mut output,
641        swizzle,
642        pitch,
643        tile_mode,
644        bytes_per_pixel,
645        aa,
646    )?;
647
648    Ok(output)
649}
650
651fn div_round_up(x: u32, d: u32) -> u32 {
652    (x + d - 1) / d
653}
654
655fn deswizzled_mipmap_size(
656    width: u32,
657    height: u32,
658    depth_or_array_layers: u32,
659    bytes_per_pixel: u32,
660) -> usize {
661    width as usize * height as usize * depth_or_array_layers as usize * bytes_per_pixel as usize
662}
663
664// TODO: Should this use ComputeSurfaceInfo functions from addrlib?
665#[allow(clippy::too_many_arguments)]
666fn swizzled_mipmap_size(
667    width: u32,
668    height: u32,
669    depth_or_array_layers: u32,
670    swizzle: u32,
671    pitch: u32,
672    tile_mode: TileMode,
673    bytes_per_pixel: u32,
674    aa: AaMode,
675) -> usize {
676    // Addrlib code doesn't handle a bpp of 0.
677    if bytes_per_pixel == 0 {
678        return 0;
679    }
680    let bpp = bytes_per_pixel * u8::BITS;
681
682    // TODO: name in gx2?
683    let (pipe_swizzle, bank_swizzle) = addrlib::pipe_bank_swizzle(swizzle);
684
685    // TODO: How to initialize these parameters?
686    let sample = 0;
687    let num_samples = 1 << aa as u32;
688    let tile_base = 0; // TODO: only used for depth map textures?
689    let comp_bits = 0; // TODO: only used for depth map textures?
690
691    // TODO: How many of these fields are set from functions?
692    // TODO: Find a way to get values used from cemu to create test cases?
693    let p_in = ComputeSurfaceAddrFromCoordInput {
694        x: width.saturating_sub(1),
695        y: height.saturating_sub(1),
696        slice: depth_or_array_layers.saturating_sub(1),
697        sample,
698        bpp,
699        pitch,
700        height,
701        num_slices: depth_or_array_layers,
702        num_samples,
703        tile_mode,
704        is_depth: false,
705        tile_base,
706        comp_bits,
707        pipe_swizzle,
708        bank_swizzle,
709    };
710
711    // TODO: Will the corner always be the largest address?
712    addrlib::dispatch_compute_surface_addrfrom_coord(&p_in) as usize
713}
714
715#[allow(clippy::too_many_arguments)]
716fn swizzle_surface_inner<const SWIZZLE: bool>(
717    width: u32,
718    height: u32,
719    depth_or_array_layers: u32,
720    source: &[u8],
721    output: &mut [u8],
722    swizzle: u32,
723    pitch: u32,
724    tile_mode: TileMode,
725    bytes_per_pixel: u32,
726    aa: AaMode,
727) -> Result<(), SwizzleError> {
728    // TODO: validate dimensions?
729    // TODO: compute surface info to fill in these params?
730    // TODO: rounding or padding of dimensions?
731    // TODO: handle div round up based on block dimensions?
732
733    // TODO: always bytes per pixel * 8?
734    let bpp = bytes_per_pixel * u8::BITS;
735
736    // TODO: name in gx2?
737    let (pipe_swizzle, bank_swizzle) = addrlib::pipe_bank_swizzle(swizzle);
738
739    // TODO: How to initialize these parameters?
740    let sample = 0;
741    let num_samples = 1 << aa as u32; // TODO: is this based on self.aa?
742    let tile_base = 0; // TODO: only used for depth map textures?
743    let comp_bits = 0; // TODO: only used for depth map textures?
744
745    // TODO: addrlib uses input and output structs to "dispatch" swizzling?
746    // TODO: only the input pin values matter?
747    // TODO: cemu uses this structure as well?
748    // TODO: should these define the public API?
749
750    // TODO: Is it correct to use depth and layers as slices?
751    for z in 0..depth_or_array_layers {
752        for y in 0..height {
753            for x in 0..width {
754                // TODO: How many of these fields are set from functions?
755                // TODO: Find a way to get values used from cemu to create test cases?
756                let p_in = ComputeSurfaceAddrFromCoordInput {
757                    x,
758                    y,
759                    slice: z,
760                    sample,
761                    bpp,
762                    pitch,
763                    height,
764                    num_slices: depth_or_array_layers,
765                    num_samples,
766                    tile_mode,
767                    is_depth: false,
768                    tile_base,
769                    comp_bits,
770                    pipe_swizzle,
771                    bank_swizzle,
772                };
773
774                let address = addrlib::dispatch_compute_surface_addrfrom_coord(&p_in) as usize;
775                let linear_address =
776                    ((z * width * height + y * width + x) * bytes_per_pixel) as usize;
777
778                if SWIZZLE {
779                    // TODO: This should never be out of bounds on valid inputs?
780                    output[address..address + bytes_per_pixel as usize].copy_from_slice(
781                        &source[linear_address..linear_address + bytes_per_pixel as usize],
782                    );
783                } else {
784                    // TODO: This should never be out of bounds on valid inputs?
785                    output[linear_address..linear_address + bytes_per_pixel as usize]
786                        .copy_from_slice(&source[address..address + bytes_per_pixel as usize]);
787                }
788            }
789        }
790    }
791    Ok(())
792}
793
794#[cfg(test)]
795mod tests {
796    use super::*;
797
798    // TODO: Add a test for micro tiling.
799    #[test]
800    fn deswizzle_empty() {
801        assert!(deswizzle_mipmap(
802            0,
803            0,
804            0,
805            &[],
806            853504,
807            256,
808            TileMode::D2TiledThin1,
809            8,
810            AaMode::X1
811        )
812        .unwrap()
813        .is_empty());
814    }
815
816    #[test]
817    fn deswizzle_macro_tiled_1024x1024_bc1() {
818        let expected = include_bytes!("data/1024x1024_bc1_tm4_p256_s853504_deswizzled.bin");
819        let swizzled = include_bytes!("data/1024x1024_bc1_tm4_p256_s853504_swizzled.bin");
820
821        assert_eq!(
822            expected,
823            &deswizzle_mipmap(
824                1024 / 4,
825                1024 / 4,
826                1,
827                swizzled,
828                853504,
829                256,
830                TileMode::D2TiledThin1,
831                8,
832                AaMode::X1
833            )
834            .unwrap()[..]
835        );
836    }
837
838    #[test]
839    fn deswizzle_macro_tiled_16x16x16_rgba8() {
840        let expected = include_bytes!("data/16x16x16_rgba8_tm7_p32_s852224_deswizzled.bin");
841        let swizzled = include_bytes!("data/16x16x16_rgba8_tm7_p32_s852224_swizzled.bin");
842
843        assert_eq!(
844            expected,
845            &deswizzle_mipmap(
846                16,
847                16,
848                16,
849                swizzled,
850                852224,
851                32,
852                TileMode::D2TiledThick,
853                4,
854                AaMode::X1
855            )
856            .unwrap()[..]
857        );
858    }
859
860    #[test]
861    fn deswizzle_surface_256x256_bc1_mipmaps() {
862        let expected = include_bytes!("data/256x256_bc1_tm4_p64_s132352_mips8_deswizzled.bin");
863        let swizzled = include_bytes!("data/256x256_bc1_tm4_p64_s132352_mips8_swizzled.bin");
864
865        let surface = Gx2Surface {
866            dim: SurfaceDim::D2,
867            width: 256,
868            height: 256,
869            depth_or_array_layers: 1,
870            mipmap_count: 8,
871            format: SurfaceFormat::Bc1Unorm,
872            aa: AaMode::X1,
873            usage: 1,
874            image_data: swizzled,
875            mipmap_data: &swizzled[32768..],
876            tile_mode: TileMode::D2TiledThin1,
877            swizzle: 132352,
878            alignment: 4096,
879            pitch: 64,
880            mipmap_offsets: [
881                32768, 9472, 11520, 12032, 12544, 13056, 13568, 0, 0, 0, 0, 0, 0,
882            ],
883        };
884        assert_eq!(expected, &surface.deswizzle().unwrap()[..]);
885    }
886
887    #[test]
888    fn deswizzle_surface_64x64_cube_bc1_mipmaps() {
889        let expected = include_bytes!("data/64x64_cube_bc1_tm4_p32_s67328_deswizzled.bin");
890        let swizzled = include_bytes!("data/64x64_cube_bc1_tm4_p32_s67328_swizzled.bin");
891
892        let surface = Gx2Surface {
893            dim: SurfaceDim::Cube,
894            width: 64,
895            height: 64,
896            depth_or_array_layers: 6,
897            mipmap_count: 2,
898            format: SurfaceFormat::Bc1Unorm,
899            aa: AaMode::X1,
900            usage: 1,
901            image_data: swizzled,
902            mipmap_data: &swizzled[24576..],
903            tile_mode: TileMode::D2TiledThin1,
904            swizzle: 67328,
905            alignment: 4096,
906            pitch: 32,
907            mipmap_offsets: [24576, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
908        };
909        assert_eq!(expected, &surface.deswizzle().unwrap()[..]);
910    }
911
912    #[test]
913    fn aa_mode_from_repr() {
914        assert_eq!(Some(AaMode::X2), AaMode::from_repr(1));
915        assert_eq!(None, AaMode::from_repr(0xff));
916    }
917
918    #[test]
919    fn surface_dim_from_repr() {
920        assert_eq!(Some(SurfaceDim::D2), SurfaceDim::from_repr(1));
921        assert_eq!(None, SurfaceDim::from_repr(0xff));
922    }
923
924    #[test]
925    fn surface_format_from_repr() {
926        assert_eq!(Some(SurfaceFormat::Bc5Unorm), SurfaceFormat::from_repr(53));
927        assert_eq!(None, SurfaceFormat::from_repr(0xff));
928    }
929
930    #[test]
931    fn tile_mode_from_repr() {
932        assert_eq!(Some(TileMode::D2TiledThin1), TileMode::from_repr(4));
933        assert_eq!(None, TileMode::from_repr(0xff));
934    }
935}