image_texel/
texel.rs

1// Distributed under The MIT License (MIT)
2//
3// Copyright (c) 2019, 2020 The `image-rs` developers
4#![allow(unsafe_code)]
5
6use core::cell::Cell;
7use core::cmp::{Eq, Ord, Ordering, PartialEq, PartialOrd};
8use core::marker::PhantomData;
9use core::{fmt, hash, mem, num, ops, ptr, slice, sync::atomic};
10
11use crate::buf::{atomic_buf, buf, cell_buf, AtomicRef, AtomicSliceRef, TexelRange};
12
13/// Marker struct to denote a texel type.
14///
15/// Can be constructed only for types that have expected alignment and no byte invariants. It
16/// always implements `Copy` and `Clone`, regardless of the underlying type and is zero-sized.
17///
18/// This is the central encapsulation of unsafety in this crate. It utilizes `bytemuck` for a safe
19/// interface but permits other types with an unsafe interface, and offers the cast operations
20/// without a bound on the `Pod` trait. Note that `Pod` is a pure marker trait; its properties must
21/// hold even if it is not explicitly mentioned. If all constructors (safely or unsafely) ensure
22/// that its properties hold we can use `Texel` as a witness type for the bound and subsequently
23/// write interfaces to take an instance instead of having a static type bound. This achieves two
24/// effects:
25/// * Firstly, it makes the interface independent of the chosen transmutation crate. Potentially we
26///   will have a method to construct the `Texel` via a `core` trait.
27/// * Secondly, it allows creating texel of third-party types for which the bound can not be
28///   implemented. Crucially, this includes SIMD representations that would be a burden to support
29///   directly. And conversely you can also deal with arbitrary existing texel without a bound in
30///   your own interfaces!
31pub struct Texel<P: ?Sized>(PhantomData<P>);
32
33/// Marker struct to denote that P is transparently wrapped in O.
34///
35/// The only way to construct it is by accessing its associated constant which only exists when the
36/// bound `bytemuck::TransparentWrapper` holds as required. This encodes a type-level set and is
37/// a workaround for such bounds not yet being allowed in `const fn`. Expect this type to be
38/// deprecated sooner or later.
39pub struct IsTransparentWrapper<P, O>(PhantomData<(P, O)>);
40
41/// Describes a type which can represent a `Texel` and for which this is statically known.
42pub trait AsTexel {
43    /// Get the texel struct for this type.
44    ///
45    /// The naive implementation of merely unwrapping the result of `Texel::for_type` **panics** on
46    /// any invalid type. This trait should only be implemented when you know for sure that the
47    /// type is correct.
48    fn texel() -> Texel<Self>;
49}
50
51macro_rules! def_max_align {
52    (
53        match cfg(target) {
54            $($($arch:literal)|* => $num:literal),*,
55        }
56
57        $(#[$common_attr:meta])*
58        struct MaxAligned(..);
59
60        $(#[$atomic_attr:meta])*
61        struct MaxAtomic(..);
62
63        $(#[$cell_attr:meta])*
64        struct MaxCell(..);
65    ) => {
66        /// A byte-like-type that is aligned to the required max alignment.
67        ///
68        /// This type does not contain padding and implements `Pod`. Generally, the alignment and size
69        /// requirement is kept small to avoid overhead.
70        $(#[$common_attr])*
71        $(
72            #[cfg_attr(
73                any($(target_arch = $arch),*),
74                repr(align($num))
75            )]
76        )*
77        pub struct MaxAligned(pub(crate) [u8; MAX_ALIGN]);
78
79        /* Note: We need to be really careful to avoid peril for several reasons.
80         *
81         * Firstly, the Rust atomic model forbids us from doing unsynchronized access (stores _or_
82         * loads) with differing sizes to the same memory location. For now, and for the
83         * foreseeable future. Since we do not synchronize the access to the buffer, we must use
84         * the same size everywhere.
85         *
86         * Secondly, using any type other than `AtomicU8` for these makes it hard for us to slice
87         * the buffer at arbitrary points. For true references we might work around this by custom
88         * metadata, yet this is not stable. Hence, we _must_ use a non-reference type wrapper for
89         * the kind of access we need. Or rather, the initial buffer allocation can deref into a
90         * reference to a slice of atomics but to slice it we must use our own type. And all
91         * operations are implemented to work on full units of this atomic type.
92         *
93         * At least for relaxed operations, the larger unit is somewhat equivalent. It's certainly
94         * at bit of a balance. Larger units might be more costly from destructive interference
95         * between different accesses, but small units are costly due to added instructions.
96         *
97         * View the selection below as a 'best-effort' really.
98         **/
99        #[cfg(all(
100            not(target_has_atomic = "8"),
101            not(target_has_atomic = "16"),
102            not(target_has_atomic = "32"),
103            not(target_has_atomic = "64"),
104        ))]
105        compile_error!("Synchronous buffer API requires one atomic unsigned type");
106
107        #[cfg(all(
108            target_has_atomic = "8",
109            not(target_has_atomic = "16"),
110            not(target_has_atomic = "32"),
111            not(target_has_atomic = "64"),
112        ))]
113        pub(crate) type AtomicPart = core::sync::atomic::AtomicU8;
114        #[cfg(all(
115            target_has_atomic = "16",
116            not(target_has_atomic = "32"),
117            not(target_has_atomic = "64"),
118        ))]
119        pub(crate) type AtomicPart = core::sync::atomic::AtomicU16;
120        #[cfg(all(
121            target_has_atomic = "32",
122            not(target_has_atomic = "64"),
123        ))]
124        pub(crate) type AtomicPart = core::sync::atomic::AtomicU32;
125        #[cfg(all(
126            target_has_atomic = "64",
127        ))]
128        pub(crate) type AtomicPart = core::sync::atomic::AtomicU64;
129
130        const ATOMIC_PARTS: usize = MAX_ALIGN / core::mem::size_of::<AtomicPart>();
131
132        $(
133            #[cfg_attr(
134                any($(target_arch = $arch),*),
135                repr(align($num))
136            )]
137        )*
138        $(#[$atomic_attr])*
139        pub struct MaxAtomic(pub(crate) [AtomicPart; ATOMIC_PARTS]);
140
141        $(
142            #[cfg_attr(
143                any($(target_arch = $arch),*),
144                repr(align($num))
145            )]
146        )*
147        $(#[$cell_attr])*
148        pub struct MaxCell(pub(crate) Cell<[u8; MAX_ALIGN]>);
149
150        $(
151            #[cfg(
152                any($(target_arch = $arch),*),
153            )]
154            pub(crate) const MAX_ALIGN: usize = $num;
155        )*
156
157        #[cfg(
158            not(any(
159                $(any($(target_arch = $arch),*)),*
160            )),
161        )]
162        pub(crate) const MAX_ALIGN: usize = 8;
163    }
164}
165
166def_max_align! {
167    match cfg(target) {
168        "x86" | "x86_64" => 32,
169        "arm" => 16,
170        "aarch64" => 16,
171        "wasm32" => 16,
172    }
173
174    /// A byte-like-type that is aligned to the required max alignment.
175    ///
176    /// This type does not contain padding and implements `Pod`. Generally, the alignment and size
177    /// requirement is kept small to avoid overhead.
178    #[derive(Clone, Copy)]
179    #[repr(C)]
180    struct MaxAligned(..);
181
182    /// Atomic equivalence of [`MaxAligned`].
183    ///
184    /// This contains some instance of [`core::sync::atomic::AtomicU8`].
185    struct MaxAtomic(..);
186
187    /// A cell of a byte array equivalent to [`MaxAligned`].
188    struct MaxCell(..);
189}
190
191unsafe impl bytemuck::Zeroable for MaxAligned {}
192unsafe impl bytemuck::Pod for MaxAligned {}
193
194/// Wraps a type by value but removes its alignment requirement.
195#[repr(packed(1))]
196// Deriving Clone works by Copy, which is why it works at all.
197#[derive(Clone, Copy)]
198pub struct Unaligned<T>(pub T);
199
200unsafe impl<T: bytemuck::Zeroable> bytemuck::Zeroable for Unaligned<T> {}
201unsafe impl<T: bytemuck::Pod> bytemuck::Pod for Unaligned<T> {}
202
203impl<T> From<T> for Unaligned<T> {
204    fn from(value: T) -> Self {
205        Unaligned(value)
206    }
207}
208
209impl<T> Unaligned<T> {
210    /// Unwrap the inner value.
211    ///
212    /// This is the same as accessing the public field, but the function type makes for better type
213    /// inference and allows using that access with [`Option::map`] etc.
214    pub fn into_inner(self) -> T {
215        self.0
216    }
217}
218
219macro_rules! builtin_texel {
220    ( $name:ty ) => {
221        impl AsTexel for $name {
222            fn texel() -> Texel<Self> {
223                const _: () = {
224                    assert!(Texel::<$name>::check_invariants());
225                };
226
227                unsafe { Texel::new_unchecked() }
228            }
229        }
230    };
231}
232
233pub(crate) mod constants {
234    use super::{AsTexel, MaxAligned, Texel};
235
236    macro_rules! constant_texel {
237        ($(($name:ident, $type:ty)),*) => {
238            $(pub const $name: Texel<$type> = Texel(core::marker::PhantomData) ;
239              impl AsTexel for $type {
240                  fn texel() -> Texel<Self> {
241                      const _: () = {
242                          assert!(Texel::<$type>::check_invariants());
243                      };
244
245                      $name
246                  }
247              }
248              )*
249        }
250    }
251
252    constant_texel!(
253        (I8, i8),
254        (U8, u8),
255        (I16, i16),
256        (U16, u16),
257        (I32, i32),
258        (U32, u32),
259        (F32, f32),
260        (I64, i64),
261        (U64, u64),
262        (F64, f64),
263        (USIZE, usize),
264        (ISIZE, isize),
265        (MAX, MaxAligned)
266    );
267
268    impl<T: AsTexel> AsTexel for [T; 1] {
269        fn texel() -> Texel<[T; 1]> {
270            T::texel().array::<1>()
271        }
272    }
273
274    impl<T: AsTexel> AsTexel for [T; 2] {
275        fn texel() -> Texel<[T; 2]> {
276            T::texel().array::<2>()
277        }
278    }
279
280    impl<T: AsTexel> AsTexel for [T; 3] {
281        fn texel() -> Texel<[T; 3]> {
282            T::texel().array::<3>()
283        }
284    }
285
286    impl<T: AsTexel> AsTexel for [T; 4] {
287        fn texel() -> Texel<[T; 4]> {
288            T::texel().array::<4>()
289        }
290    }
291
292    impl<T: AsTexel> AsTexel for [T; 5] {
293        fn texel() -> Texel<[T; 5]> {
294            T::texel().array::<5>()
295        }
296    }
297
298    impl<T: AsTexel> AsTexel for [T; 6] {
299        fn texel() -> Texel<[T; 6]> {
300            T::texel().array::<6>()
301        }
302    }
303
304    impl<T: AsTexel> AsTexel for [T; 7] {
305        fn texel() -> Texel<[T; 7]> {
306            T::texel().array::<7>()
307        }
308    }
309
310    impl<T: AsTexel> AsTexel for [T; 8] {
311        fn texel() -> Texel<[T; 8]> {
312            T::texel().array::<8>()
313        }
314    }
315
316    impl<T: AsTexel> AsTexel for ::core::num::Wrapping<T> {
317        fn texel() -> Texel<::core::num::Wrapping<T>> {
318            T::texel().num_wrapping()
319        }
320    }
321}
322
323#[cfg(target_arch = "x86")]
324mod x64 {
325    use super::{AsTexel, Texel};
326    use core::arch::x86;
327
328    builtin_texel!(x86::__m128);
329
330    builtin_texel!(x86::__m128);
331    builtin_texel!(x86::__m128d);
332    builtin_texel!(x86::__m128i);
333    builtin_texel!(x86::__m256);
334    builtin_texel!(x86::__m256d);
335    builtin_texel!(x86::__m256i);
336}
337
338#[cfg(target_arch = "x86_64")]
339mod x64_64 {
340    use super::{AsTexel, Texel};
341    use core::arch::x86_64;
342
343    builtin_texel!(x86_64::__m128);
344    builtin_texel!(x86_64::__m128d);
345    builtin_texel!(x86_64::__m128i);
346    builtin_texel!(x86_64::__m256);
347    builtin_texel!(x86_64::__m256d);
348    builtin_texel!(x86_64::__m256i);
349}
350
351#[cfg(target_arch = "arm")]
352mod arm { /* all types unstable */
353}
354
355#[cfg(target_arch = "aarch64")]
356mod arm {
357    use super::{AsTexel, Texel};
358    use core::arch::aarch64;
359
360    builtin_texel!(aarch64::float64x1_t);
361    builtin_texel!(aarch64::float64x1x2_t);
362    builtin_texel!(aarch64::float64x1x3_t);
363    builtin_texel!(aarch64::float64x1x4_t);
364    builtin_texel!(aarch64::float64x2_t);
365    builtin_texel!(aarch64::float64x2x2_t);
366    builtin_texel!(aarch64::float64x2x3_t);
367    builtin_texel!(aarch64::float64x2x4_t);
368}
369
370#[cfg(target_arch = "wasm32")]
371mod arm {
372    use super::{AsTexel, Texel};
373    use core::arch::wasm32;
374
375    builtin_texel!(wasm32::v128);
376}
377
378impl<P: bytemuck::Pod> Texel<P> {
379    /// Try to construct an instance of the marker.
380    ///
381    /// If successful, you can freely use it to access the image buffers. This requires:
382    /// - The type must have an alignment of *at most* `MAX_ALIGN`.
383    /// - The type must *not* be a ZST.
384    /// - The type must *not* have any Drop-glue (no drop, any contain not part that is Drop).
385    pub const fn for_type() -> Option<Self> {
386        if Texel::<P>::check_invariants() {
387            Some(Texel(PhantomData))
388        } else {
389            None
390        }
391    }
392}
393
394impl<P, O: bytemuck::TransparentWrapper<P>> IsTransparentWrapper<P, O> {
395    pub const CONST: Self = IsTransparentWrapper(PhantomData);
396}
397
398/// The **only** ways to construct a `buf`, protecting the alignment invariant.
399/// Hint: This is an unsized type so there is no safe way of constructing it.
400impl buf {
401    pub const ALIGNMENT: usize = MAX_ALIGN;
402
403    /// Wrap bytes in a `buf`.
404    ///
405    /// The bytes need to be aligned to `ALIGNMENT`.
406    pub fn from_bytes(bytes: &[u8]) -> Option<&Self> {
407        if bytes.as_ptr() as usize % Self::ALIGNMENT == 0 {
408            // SAFETY: this is an almost trivial cast of unsized references. Additionally, we still
409            // guarantee that this is at least aligned to `MAX_ALIGN`.
410            Some(unsafe { &*(bytes as *const [u8] as *const Self) })
411        } else {
412            None
413        }
414    }
415
416    /// Wrap bytes in a `buf`.
417    ///
418    /// The bytes need to be aligned to `ALIGNMENT`.
419    pub fn from_bytes_mut(bytes: &mut [u8]) -> Option<&mut Self> {
420        if bytes.as_ptr() as usize % Self::ALIGNMENT == 0 {
421            // SAFETY: this is an almost trivial cast of unsized references. Additionally, we still
422            // guarantee that this is at least aligned to `MAX_ALIGN`.
423            Some(unsafe { &mut *(bytes as *mut [u8] as *mut Self) })
424        } else {
425            None
426        }
427    }
428}
429
430impl atomic_buf {
431    pub const ALIGNMENT: usize = MAX_ALIGN;
432
433    pub fn from_slice(values: &[MaxAtomic]) -> &Self {
434        debug_assert_eq!(values.as_ptr() as usize % Self::ALIGNMENT, 0);
435        let ptr = values.as_ptr() as *const AtomicPart;
436        let count = values.len() * ATOMIC_PARTS;
437        // Safety: these types are binary compatible, they wrap atomics of the same size,  and
438        // starting at the same address, with a pointer of the same provenance which will be valid
439        // for the whole lifetime.
440        //
441        // This case relaxes the alignment requirements from `MaxAtomic` to that of the underlying
442        // atomic, which allows us to go beyond the public interface.
443        //
444        // The new size covered by the slice is the same as the input slice, since there are
445        // `ATOMIC_PARTS` units within each `MaxAtomic`. The memory invariants of the new type are
446        // the same as the old type, which is that we access only with atomics instructions of the
447        // size of the `AtomicPart` type.
448        let atomics = core::ptr::slice_from_raw_parts::<AtomicPart>(ptr, count);
449        // Safety: `atomic_buf` has the same layout as a `[MaxAtomic]` and wraps it transparently.
450        unsafe { &*(atomics as *const Self) }
451    }
452
453    pub(crate) fn from_slice_mut(values: &mut [MaxAtomic]) -> &mut Self {
454        debug_assert_eq!(values.as_ptr() as usize % Self::ALIGNMENT, 0);
455        let ptr = values.as_mut_ptr() as *mut AtomicPart;
456        let count = values.len() * ATOMIC_PARTS;
457        // Safety: as `from_slice`. We converted the input pointer from a mutable pointer itself,
458        // fulfilling the extra uniqueness and ownership requirement.
459        let atomics = core::ptr::slice_from_raw_parts_mut::<AtomicPart>(ptr, count);
460        // Safety: `atomic_buf` has the same layout as a `[MaxAtomic]` and wraps it transparently.
461        unsafe { &mut *(atomics as *mut Self) }
462    }
463
464    /// Wrap a sub-slice of bytes from an atomic buffer into a new `atomic_buf`.
465    ///
466    /// The bytes need to be aligned to `ALIGNMENT`. Returns `None` if these checks fail and return
467    /// the newly wrapped buffer in `Some` otherwise.
468    pub fn from_bytes(bytes: AtomicSliceRef<u8>) -> Option<&Self> {
469        if bytes.start % Self::ALIGNMENT == 0 {
470            let offset = bytes.start / core::mem::size_of::<AtomicPart>();
471            let len = bytes.len().div_ceil(core::mem::size_of::<AtomicPart>());
472            let buffer = &bytes.buf.0[offset..][..len];
473            // Safety: these types are binary compatible. The metadata is also the same, as both
474            // types encapsulate a slice of `AtomicPart`-sized types.
475            Some(unsafe { &*(buffer as *const _ as *const Self) })
476        } else {
477            None
478        }
479    }
480
481    /// Wrap bytes in an atomic `buf`.
482    ///
483    /// The bytes need to be aligned to `ALIGNMENT`. Additionally the length must be a multiple of
484    /// the `MaxAtomic` size's units. Returns `None` if these checks fail and return the newly
485    /// wrapped buffer in `Some` otherwise.
486    pub fn from_bytes_mut(bytes: &mut [u8]) -> Option<&mut Self> {
487        if bytes.as_ptr() as usize % Self::ALIGNMENT != 0 {
488            None
489        } else if bytes.len() % core::mem::size_of::<MaxAtomic>() != 0 {
490            None
491        } else {
492            let len = bytes.len() / core::mem::size_of::<AtomicPart>();
493            let ptr = bytes.as_mut_ptr() as *mut AtomicPart;
494            // SAFETY: We fulfill the alignment and length requirements for this cast, i.e. there
495            // are enough bytes available in this slice. Additionally, we still guarantee that this
496            // is at least aligned to `MAX_ALIGN`. We also have the shared read-write provenance on
497            // our pointer that a shared reference to atomic requires.
498            let atomics = ptr::slice_from_raw_parts_mut(ptr, len);
499            Some(unsafe { &mut *(atomics as *mut Self) })
500        }
501    }
502
503    /// Wrapper around the unstable `<Atomic*>::get_mut_slice`.
504    pub(crate) fn part_mut_slice(slice: &mut [AtomicPart]) -> &mut [u8] {
505        let len = core::mem::size_of_val(slice);
506        let ptr = slice.as_mut_ptr() as *mut u8;
507        // SAFETY: this is an almost trivial cast of unsized references. Additionally, we still
508        // guarantee that this is at least aligned to `MAX_ALIGN`.
509        unsafe { slice::from_raw_parts_mut(ptr, len) }
510    }
511}
512
513impl cell_buf {
514    pub const ALIGNMENT: usize = MAX_ALIGN;
515
516    pub fn from_slice(values: &[MaxCell]) -> &Self {
517        debug_assert_eq!(values.as_ptr() as usize % Self::ALIGNMENT, 0);
518        let ptr = values.as_ptr() as *const Cell<u8>;
519        let count = core::mem::size_of_val(values);
520        // Safety: constructs a pointer to a slice validly covering exactly the values in the
521        // input. The byte length is determined by `size_of_val` and starting at the same address,
522        // with a pointer of the same provenance which will be valid for the whole lifetime. The
523        // memory invariants of the new type are the same as the old type, which is that we access
524        // only with atomics instructions of the size of the `AtomicPart` type.
525        let memory = core::ptr::slice_from_raw_parts::<Cell<u8>>(ptr, count);
526        // Safety: these types are binary compatible, they wrap memory of the same size.
527        // This case relaxes the alignment requirements from `MaxAtomic` to that of the underlying
528        // atomic, which allows us to go beyond the public interface.
529        unsafe { &*(memory as *const Self) }
530    }
531
532    pub(crate) fn from_slice_mut(values: &mut [MaxCell]) -> &mut Self {
533        debug_assert_eq!(values.as_ptr() as usize % Self::ALIGNMENT, 0);
534        let ptr = values.as_mut_ptr() as *mut Cell<u8>;
535        let count = core::mem::size_of_val(values);
536        // Safety: as `from_slice`. We converted the input pointer from a mutable pointer itself,
537        // fulfilling the extra uniqueness and ownership requirement.
538        let memory = core::ptr::slice_from_raw_parts_mut::<Cell<u8>>(ptr, count);
539        // Safety: `cell_buf` has the same layout as a `[Cell<u8>]` and wraps it transparently.
540        unsafe { &mut *(memory as *mut Self) }
541    }
542
543    /// Interpret a slice of bytes in an unsynchronized shared `cell_buf`.
544    ///
545    /// The bytes need to be aligned to `ALIGNMENT`.
546    pub fn from_bytes(bytes: &[Cell<u8>]) -> Option<&Self> {
547        if bytes.as_ptr() as usize % Self::ALIGNMENT == 0 {
548            // Safety: these types are binary compatible. The metadata is also the same, as both
549            // types encapsulate a slice of `u8`-sized types.
550            Some(unsafe { &*(bytes as *const [_] as *const Cell<[u8]> as *const cell_buf) })
551        } else {
552            None
553        }
554    }
555
556    /// Wrap bytes in an unsynchronized shared `cell_buf`.
557    ///
558    /// The bytes need to be aligned to `ALIGNMENT`.
559    pub fn from_bytes_mut(bytes: &mut [u8]) -> Option<&Self> {
560        let slice = Cell::from_mut(bytes).as_slice_of_cells();
561        Self::from_bytes(slice)
562    }
563}
564
565impl<P> Texel<P> {
566    /// Create a witness certifying `P` as a texel without checks.
567    ///
568    /// # Safety
569    ///
570    /// The type `P` must __not__:
571    /// * have any validity invariants, i.e. is mustn't contain any padding.
572    /// * have any safety invariants. This implies it can be copied.
573    /// * have an alignment larger than [`MaxAligned`].
574    /// * be a zero-size type.
575    ///
576    /// Furthermore, tentatively, the type must not have any drop glue. That is its members are all
577    /// simple types without Drop implementations. This requirement exists mainly to avoid code
578    /// accidentally leaking instances, and ensures that copies created from their byte
579    /// representation—which is safe according to the other invairants— do not cause unexpected
580    /// effects.
581    ///
582    /// Note that the alignment requirement with regards to `MaxAligned` is __architecture
583    /// dependent__ as the exact bound varies across the `target_arch` feature. Where possible, add
584    /// static assertions to each call site of this function.
585    ///
586    /// [`MaxAligned`]: struct.MaxAligned.html
587    pub const unsafe fn new_unchecked() -> Self {
588        debug_assert!(Self::check_invariants());
589        Texel(PhantomData)
590    }
591
592    /// Note this isn't exhaustive. Indeed, we have no way to check for padding.
593    pub(crate) const fn check_invariants() -> bool {
594        mem::align_of::<P>() <= MAX_ALIGN && mem::size_of::<P>() > 0 && !mem::needs_drop::<P>()
595    }
596
597    /// Proxy of `core::mem::align_of`.
598    pub const fn align(self) -> usize {
599        mem::align_of::<P>()
600    }
601
602    /// Proxy of `core::mem::size_of`.
603    pub const fn size(self) -> usize {
604        mem::size_of::<P>()
605    }
606
607    /// Publicly visible function to use the guarantee of non-ZST.
608    pub const fn size_nz(self) -> core::num::NonZeroUsize {
609        match core::num::NonZeroUsize::new(self.size()) {
610            None => panic!(""),
611            Some(num) => num,
612        }
613    }
614
615    // A number of constructors that are technically unsafe. Note that we could write them as safe
616    // code here to pad our stats but they are not checked by the type system so it's risky. Better
617    // explain their safety in the code as comments.
618
619    /// Construct a texel as an array of no elements.
620    ///
621    /// # Panics
622    ///
623    /// This function panics when called with `N` equal to 0.
624    pub const fn array<const N: usize>(self) -> Texel<[P; N]> {
625        if N == 0 {
626            panic!()
627        }
628
629        // Safety:
630        // * has no validity/safety invariants
631        // * has the same alignment as P which is not larger then MaxAligned
632        unsafe { Texel::new_unchecked() }
633    }
634
635    /// Construct a texel for unaligned data of the contained type.
636    pub const fn unaligned(self) -> Texel<Unaligned<P>> {
637        // Safety:
638        // * has no validity/safety invariants
639        // * has alignment 1 which is not larger than MaxAligned
640        unsafe { Texel::new_unchecked() }
641    }
642
643    /// Construct a texel by wrapping into a transparent wrapper.
644    ///
645    /// TODO: a constructor for `Texel<O>` based on proof of transmutation from &mut P to &mut O,
646    /// based on the standard transmutation RFC. This is more flexible than bytemuck's
647    /// TransparentWrapper trait.
648    pub const fn transparent_wrap<O>(self, _: IsTransparentWrapper<P, O>) -> Texel<O> {
649        // Safety:
650        // * P and O must have the same invariants, none
651        // * P and O have the same alignment
652        unsafe { Texel::new_unchecked() }
653    }
654
655    /// Construct a texel by unwrapping a transparent wrapper.
656    pub const fn transparent_unwrap<O>(self, _: IsTransparentWrapper<O, P>) -> Texel<O> {
657        // Safety:
658        // * P and O must have the same invariants, none
659        // * P and O have the same alignment
660        unsafe { Texel::new_unchecked() }
661    }
662
663    /// Construct a texel that contains a number in the standard `Wrapping` type.
664    pub const fn num_wrapping(self) -> Texel<num::Wrapping<P>> {
665        // * Texel<P> = Self certifies the byte properties.
666        // * `core::num::Wrapping` is `repr(transparent)
667        unsafe { Texel::new_unchecked() }
668    }
669}
670
671impl<T, const N: usize> Texel<[T; N]> {
672    /// Construct a texel, from an array of elements.
673    pub const fn array_element(self) -> Texel<T> {
674        // Safety:
675        // We'll see that all properties are implied by _any_ suitable array.
676        // - The type must have an alignment of *at most* `MAX_ALIGN`. Array and inner type have
677        //   the same alignment.
678        // - The type must *not* be a ZST. The array would otherwise be a ZST.
679        // - The type must *not* have any Drop-glue (no drop, any contain not part that is Drop).
680        //   The array would otherwise have Drop-glue.
681        unsafe { Texel::new_unchecked() }
682    }
683}
684
685/// Protocol for [`Texel::store_atomic_slice_unchecked`] argument. Implementation detail.
686trait DataSource {
687    fn init(&mut self, init: usize);
688    fn load_head(&mut self, val: &mut [u8; MaxAtomic::PART_SIZE]);
689    fn load(&mut self, val: &mut [u8; MaxAtomic::PART_SIZE]);
690    fn load_tail(&mut self, val: &mut [u8; MaxAtomic::PART_SIZE]);
691}
692
693/// Operations that can be performed based on the evidence of Texel.
694impl<P> Texel<P> {
695    /// Construct a value of `P` from thin air, with zeroed representation.
696    pub fn zeroed(self) -> P {
697        // SAFETY: by `Texel` being a POD this is a valid representation.
698        unsafe { core::mem::zeroed::<P>() }
699    }
700
701    /// Copy a texel.
702    ///
703    /// Note that this does not require `Copy` because that requirement was part of the
704    /// requirements of constructing this `Texel` witness.
705    pub fn copy_val(self, val: &P) -> P {
706        // SAFETY: by the constructor, this type can be copied byte-by-byte.
707        unsafe { ptr::read(val) }
708    }
709
710    pub fn copy_cell(self, val: &Cell<P>) -> P {
711        // SAFETY: by the constructor, this inner type can be copied byte-by-byte. And `Cell` is a
712        // transparent wrapper so it can be read byte-by-byte as well.
713        unsafe { ptr::read(val) }.into_inner()
714    }
715
716    /// Undo a [`Cell::as_slice_of_cells`] call.
717    pub fn cell_as_slice(self, val: &[Cell<P>]) -> &Cell<[P]> {
718        let new_slice = self
719            .try_to_cell(self.cell_bytes(val).as_slice_of_cells())
720            .expect("alignment held previously");
721        debug_assert_eq!(new_slice.as_slice_of_cells().len(), val.len());
722        new_slice
723    }
724
725    /// Efficiently store a slice of shared read values to cells.
726    ///
727    /// We choose an outer slice for the parameter only since the standard library offers the
728    /// transposition out of the type parameter, but not its inverse yet. Call
729    /// [`Cell::as_slice_of_cells`] as needed.
730    #[track_caller]
731    pub fn store_cell_slice(self, val: &[Cell<P>], from: &[P]) {
732        assert_eq!(from.len(), val.len());
733        // SAFETY: by the constructor, this inner type can be copied byte-by-byte. And `Cell` is a
734        // transparent wrapper. By our assertion the slices are of the same length. Note we do not
735        // assert these slices to be non-overlapping! We could have `P = Cell<X>` and then it's
736        // unclear if Rust allows these to overlap or not. I guess we currently have that `Cell<X>`
737        // is never `Copy` so we couldn't have such a `Texel` but alas that negative impl is not
738        // guaranteed by any logic I came across.
739        unsafe {
740            ptr::copy(
741                from.as_ptr(),
742                // SAFETY: the slice of `Cell`s is all `UnsafeCell`.
743                //
744                // <https://github.com/rust-lang/rust/issues/88248#issuecomment-2397394716>
745                (val as *const [Cell<P>] as *mut [Cell<P>]).cast(),
746                from.len(),
747            )
748        }
749    }
750
751    /// Efficiently copy a slice of values from cells to an owned buffer.
752    ///
753    /// We choose an outer slice for the parameter only since the standard library offers the
754    /// transposition out of the type parameter, but not its inverse yet. Call
755    /// [`Cell::as_slice_of_cells`] as needed.
756    #[track_caller]
757    pub fn load_cell_slice(self, val: &[Cell<P>], into: &mut [P]) {
758        assert_eq!(into.len(), val.len());
759        // SAFETY: see `store_cell_slice` but since we have a mutable reference to the target we
760        // can assume it does not overlap.
761        unsafe {
762            ptr::copy_nonoverlapping(
763                (val as *const [Cell<P>]).cast(),
764                into.as_mut_ptr(),
765                into.len(),
766            )
767        }
768    }
769
770    /// Load a value from an atomic slice.
771    ///
772    /// The results is only correct if no concurrent modification occurs. The library promises
773    /// *basic soundness* but no particular defined behaviour under parallel modifications to the
774    /// memory bytes which describe the value to be loaded.
775    ///
776    /// Each atomic unit is read at most once.
777    pub fn load_atomic(self, val: AtomicRef<P>) -> P {
778        let mut value = self.zeroed();
779        let slice = AtomicSliceRef::from_ref(val);
780        let into = core::slice::from_ref(Cell::from_mut(&mut value));
781        self.load_atomic_slice_unchecked(slice, into);
782        value
783    }
784
785    /// Load values from an atomic slice.
786    ///
787    /// The results is only correct if no concurrent modification occurs. The library promises
788    /// *basic soundness* but no particular defined behaviour under parallel modifications to the
789    /// memory bytes which describe the value to be loaded.
790    ///
791    /// Each atomic unit is read at most once.
792    ///
793    /// # Panics
794    ///
795    /// This method panics if the slice and the target buffer do not have the same logical length.
796    #[track_caller]
797    pub fn load_atomic_slice(self, val: AtomicSliceRef<P>, into: &mut [P]) {
798        assert_eq!(val.len(), into.len());
799        self.load_atomic_slice_unchecked(val, Cell::from_mut(into).as_slice_of_cells());
800    }
801
802    /// Load values from an atomic slice to a slice of cells.
803    ///
804    /// The results is only correct if no concurrent modification occurs. The library promises
805    /// *basic soundness* but no particular defined behaviour under parallel modifications to the
806    /// memory bytes which describe the value to be loaded.
807    ///
808    /// Each atomic unit is read at most once.
809    ///
810    /// # Panics
811    ///
812    /// This method panics if the slice and the target buffer do not have the same length.
813    #[track_caller]
814    pub fn load_atomic_to_cells(self, val: AtomicSliceRef<P>, into: &[Cell<P>]) {
815        assert_eq!(val.len(), into.len());
816        // Always works, just undoing the `as_slice_of_cells` of the argument.
817        self.load_atomic_slice_unchecked(val, into);
818    }
819
820    fn load_atomic_slice_unchecked(self, val: AtomicSliceRef<P>, into: &[Cell<P>]) {
821        let offset = val.start / core::mem::size_of::<AtomicPart>();
822        let mut initial_skip = val.start % core::mem::size_of::<AtomicPart>();
823        let mut target = self.cell_bytes(into).as_slice_of_cells();
824
825        let mut buffer = val.buf.0[offset..].iter();
826        // By the invariants of `AtomicRef`, that number of bytes is in-bounds.
827        let mut load = buffer.next().unwrap().load(atomic::Ordering::Relaxed);
828
829        loop {
830            let input = &bytemuck::bytes_of(&load)[initial_skip..];
831            let copy_len = input.len().min(target.len());
832            constants::U8.store_cell_slice(&target[..copy_len], &input[..copy_len]);
833            target = &target[copy_len..];
834
835            if target.is_empty() {
836                break;
837            }
838
839            load = buffer.next().unwrap().load(atomic::Ordering::Relaxed);
840            initial_skip = 0;
841        }
842    }
843
844    /// Store a value to an atomic slice.
845    ///
846    /// The results is only correct if no concurrent modification occurs. The library promises
847    /// *basic soundness* but no particular defined behaviour under parallel modifications to the
848    /// memory bytes which describe the value to be store.
849    ///
850    /// Provides the same wait-freeness as the underlying platform for `fetch_*` instructions, that
851    /// is this does not use `compare_exchange_weak`. This implies that concurrent modifications to
852    /// bytes *not* covered by this particular representation will not inherently block progress.
853    pub fn store_atomic(self, val: AtomicRef<P>, value: P) {
854        let slice = AtomicSliceRef::from_ref(val);
855        self.store_atomic_slice(slice, core::slice::from_ref(&value));
856    }
857
858    /// Store values to an atomic slice.
859    ///
860    /// The results is only correct if no concurrent modification occurs. The library promises
861    /// *basic soundness* but no particular defined behaviour under parallel modifications to the
862    /// memory bytes which describe the value to be store.
863    ///
864    /// Provides the same wait-freeness as the underlying platform for `fetch_*` instructions, that
865    /// is this does not use `compare_exchange_weak`. This implies that concurrent modifications to
866    /// bytes *not* covered by this particular representation will not inherently block progress.
867    ///
868    /// # Panics
869    ///
870    /// This method panics if the slice and the source buffer do not have the same logical length.
871    #[track_caller]
872    pub fn store_atomic_slice(self, val: AtomicSliceRef<P>, source: &[P]) {
873        struct SliceSource<'lt> {
874            skip: usize,
875            head: &'lt [u8],
876            chunks: core::slice::ChunksExact<'lt, u8>,
877            tail: &'lt [u8],
878        }
879
880        impl DataSource for SliceSource<'_> {
881            fn init(&mut self, init: usize) {
882                let len = self.head.len().min(init);
883                let (head, body) = self.head.split_at(len);
884                self.head = head;
885                self.skip = MaxAtomic::PART_SIZE - init;
886
887                let chunks = body.chunks_exact(MaxAtomic::PART_SIZE);
888                self.tail = chunks.remainder();
889                self.chunks = chunks;
890            }
891
892            fn load_head(&mut self, val: &mut [u8; MaxAtomic::PART_SIZE]) {
893                let target = &mut val[self.skip..][..self.head.len()];
894                target.copy_from_slice(self.head);
895            }
896
897            fn load(&mut self, val: &mut [u8; core::mem::size_of::<AtomicPart>()]) {
898                if let Some(next) = self.chunks.next() {
899                    val.copy_from_slice(next);
900                } else {
901                    debug_assert!(false);
902                }
903            }
904
905            fn load_tail(&mut self, val: &mut [u8; MaxAtomic::PART_SIZE]) {
906                let target = &mut val[..self.tail.len()];
907                target.copy_from_slice(self.tail);
908            }
909        }
910
911        assert_eq!(val.len(), source.len());
912
913        let source = SliceSource {
914            head: self.to_bytes(source),
915            skip: 0,
916            chunks: [].chunks_exact(MaxAtomic::PART_SIZE),
917            tail: &[],
918        };
919
920        self.store_atomic_slice_unchecked(val, source);
921    }
922
923    /// Store values from cells to an atomic slice.
924    ///
925    /// The results is only correct if no concurrent modification occurs. The library promises
926    /// *basic soundness* but no particular defined behaviour under parallel modifications to the
927    /// memory bytes which describe the value to be store.
928    ///
929    /// Provides the same wait-freeness as the underlying platform for `fetch_*` instructions, that
930    /// is this does not use `compare_exchange_weak`. This implies that concurrent modifications to
931    /// bytes *not* covered by this particular representation will not inherently block progress.
932    ///
933    /// # Panics
934    ///
935    /// This method panics if the slice and the source buffer do not have the same logical length.
936    pub fn store_atomic_from_cells(self, val: AtomicSliceRef<P>, source: &[Cell<P>]) {
937        struct CellSource<'lt> {
938            skip: usize,
939            head: &'lt [Cell<u8>],
940            chunks: core::slice::ChunksExact<'lt, Cell<u8>>,
941            tail: &'lt [Cell<u8>],
942        }
943
944        impl DataSource for CellSource<'_> {
945            fn init(&mut self, init: usize) {
946                let len = self.head.len().min(init);
947                let (head, body) = self.head.split_at(len);
948                self.head = head;
949                self.skip = MaxAtomic::PART_SIZE - init;
950
951                let chunks = body.chunks_exact(MaxAtomic::PART_SIZE);
952                self.tail = chunks.remainder();
953                self.chunks = chunks;
954            }
955
956            fn load_head(&mut self, val: &mut [u8; MaxAtomic::PART_SIZE]) {
957                let target = &mut val[self.skip..][..self.head.len()];
958                constants::U8.load_cell_slice(self.head, target);
959            }
960
961            fn load(&mut self, val: &mut [u8; core::mem::size_of::<AtomicPart>()]) {
962                if let Some(next) = self.chunks.next() {
963                    constants::U8.load_cell_slice(next, val);
964                } else {
965                    debug_assert!(false);
966                }
967            }
968
969            fn load_tail(&mut self, val: &mut [u8; MaxAtomic::PART_SIZE]) {
970                let target = &mut val[..self.tail.len()];
971                constants::U8.load_cell_slice(self.tail, target);
972            }
973        }
974
975        assert_eq!(val.len(), source.len());
976
977        assert!(
978            {
979                let lhs = val.as_ptr_range();
980                let rhs = source.as_ptr_range();
981                lhs.end.addr() <= rhs.start.addr() || rhs.end.addr() <= lhs.start.addr()
982            },
983            "Your atomic slice aliases a slice of cells. While this may be permissible if you're \
984            very very careful about these values, you are violating safety invariants by using \
985            these values across non-local API boundaries"
986        );
987
988        let source = CellSource {
989            head: self.cell_bytes(source).as_slice_of_cells(),
990            skip: 0,
991            chunks: [].chunks_exact(MaxAtomic::PART_SIZE),
992            tail: &[],
993        };
994
995        self.store_atomic_slice_unchecked(val, source);
996    }
997
998    // Store a data source to a slice, assuming they cover the same number of bytes.
999    fn store_atomic_slice_unchecked(self, val: AtomicSliceRef<P>, mut from: impl DataSource) {
1000        // Modify only some bits of an atomic value.
1001        fn modify_parts_with(
1002            part: &AtomicPart,
1003            with: impl FnOnce(&mut [u8; MaxAtomic::PART_SIZE]),
1004        ) {
1005            let original = part.load(atomic::Ordering::Relaxed);
1006            let mut value = original;
1007
1008            let buffer = bytemuck::bytes_of_mut(&mut value);
1009            with(buffer.try_into().unwrap());
1010
1011            // Any bits we did not modify, including those outside our own range, will not get
1012            // modified by this instruction. This provides the basic conflict guarantee.
1013            part.fetch_xor(original ^ value, atomic::Ordering::Relaxed);
1014        }
1015
1016        let offset = val.start / MaxAtomic::PART_SIZE;
1017        let mut buffer = val.buf.0[offset..].iter();
1018
1019        // How many bytes from the start to first atomic boundary?
1020        let head_len = val.start.next_multiple_of(MaxAtomic::PART_SIZE) - val.start;
1021        from.init(head_len);
1022
1023        let after_head = (val.end - val.start).saturating_sub(head_len);
1024        // How many bytes is the end from its previous atomic boundary?
1025        let tail_skip = after_head % MaxAtomic::PART_SIZE;
1026        let body_count = after_head / MaxAtomic::PART_SIZE;
1027
1028        if head_len > 0 {
1029            let into = buffer.next().unwrap();
1030            modify_parts_with(into, |buffer| from.load_head(buffer));
1031        }
1032
1033        let body = buffer.as_slice();
1034        for part in &body[..body_count] {
1035            // Here we modify all bytes so just store..
1036            let mut value = Default::default();
1037            let buffer = bytemuck::bytes_of_mut(&mut value);
1038            from.load(buffer.try_into().unwrap());
1039            part.store(value, atomic::Ordering::Relaxed);
1040        }
1041
1042        if tail_skip > 0 {
1043            let into = &body[body_count];
1044            modify_parts_with(into, |buffer| from.load_tail(buffer));
1045        }
1046    }
1047
1048    /// Reinterpret a slice of aligned bytes as a slice of the texel.
1049    ///
1050    /// Note that the size (in bytes) of the slice will be shortened if the size of `P` is not a
1051    /// divisor of the input slice's size.
1052    pub fn to_slice<'buf>(self, buffer: &'buf [MaxAligned]) -> &'buf [P] {
1053        self.cast_buf(buf::new(buffer))
1054    }
1055
1056    /// Reinterpret a slice of aligned bytes as a mutable slice of the texel.
1057    ///
1058    /// Note that the size (in bytes) of the slice will be shortened if the size of `P` is not a
1059    /// divisor of the input slice's size.
1060    pub fn to_mut_slice<'buf>(self, buffer: &'buf mut [MaxAligned]) -> &'buf mut [P] {
1061        self.cast_mut_buf(buf::new_mut(buffer))
1062    }
1063
1064    /// Try to reinterpret a slice of bytes as a slice of the texel.
1065    ///
1066    /// This returns `Some` if the buffer is suitably aligned, and `None` otherwise.
1067    pub fn try_to_slice<'buf>(self, bytes: &'buf [u8]) -> Option<&'buf [P]> {
1068        if bytes.as_ptr() as usize % mem::align_of::<P>() == 0 {
1069            // SAFETY:
1070            // - The `pod`-ness is certified by `self`, which makes the bytes a valid
1071            //   representation of P.
1072            // - The total size is at most `bytes` by construction.
1073            let len = bytes.len() / mem::size_of::<P>();
1074            Some(unsafe { &*ptr::slice_from_raw_parts(bytes.as_ptr() as *const P, len) })
1075        } else {
1076            None
1077        }
1078    }
1079
1080    /// Try to reinterpret a slice of bytes as a slice of the texel.
1081    ///
1082    /// This returns `Some` if the buffer is suitably aligned, and `None` otherwise.
1083    pub fn try_to_slice_mut<'buf>(self, bytes: &'buf mut [u8]) -> Option<&'buf mut [P]> {
1084        if let Some(slice) = self.try_to_slice(bytes) {
1085            // SAFETY:
1086            // - The `pod`-ness is certified by `self`, which makes the bytes a valid
1087            //   representation of P. Conversely, it makes any P valid as bytes.
1088            let len = slice.len();
1089            Some(unsafe { &mut *ptr::slice_from_raw_parts_mut(bytes.as_mut_ptr() as *mut P, len) })
1090        } else {
1091            None
1092        }
1093    }
1094
1095    /// Interpret a byte slice as unaligned values of another type.
1096    ///
1097    /// This is essentially a call to [`Texel::to_slice`] however the specific output type
1098    /// selection ensures that it always succeeds.
1099    ///
1100    /// # Examples
1101    ///
1102    /// ```
1103    /// use image_texel::texels::{U8, U64};
1104    ///
1105    /// // This buffer is not guaranteed to be aligned!
1106    /// let raw_buffer = [0u16, 1, 2, 3].map(u16::to_be_bytes);
1107    /// let raw_bytes = U8.array().to_bytes(&raw_buffer);
1108    ///
1109    /// let unaligned = U64.to_unaligned_slice(raw_bytes);
1110    /// // Forces a copy. `texel.unaligned().copy` would work, too.
1111    /// assert_eq!(u64::from_be(unaligned[0].0), 0x0000_0001_0002_0003);
1112    /// ```
1113    pub fn to_unaligned_slice<'buf>(self, bytes: &'buf [u8]) -> &'buf [Unaligned<P>] {
1114        self.unaligned().try_to_slice(bytes).unwrap()
1115    }
1116
1117    /// Interpret a mutable byte slice as unaligned values of another type.
1118    ///
1119    /// # Examples
1120    ///
1121    /// ```
1122    /// use image_texel::texels::{U16, U64};
1123    ///
1124    /// // This buffer is not guaranteed to be aligned!
1125    /// let mut raw_buffer = [0u16; 4];
1126    /// let raw_bytes = U16.to_mut_bytes(&mut raw_buffer);
1127    ///
1128    /// let unaligned = U64.to_unaligned_slice_mut(raw_bytes);
1129    /// unaligned[0].0 = u64::from_be(0x0000_0001_0002_0003);
1130    /// assert_eq!(raw_buffer.map(u16::from_be), [0, 1, 2, 3]);
1131    /// ```
1132    pub fn to_unaligned_slice_mut<'buf>(self, bytes: &'buf mut [u8]) -> &'buf mut [Unaligned<P>] {
1133        self.unaligned().try_to_slice_mut(bytes).unwrap()
1134    }
1135
1136    /// Reinterpret a shared slice as a some particular type.
1137    ///
1138    /// Note that the size (in bytes) of the slice will be shortened if the size of `P` is not a
1139    /// divisor of the input slice's size.
1140    pub fn to_cell<'buf>(self, buffer: &'buf [MaxCell]) -> &'buf Cell<[P]> {
1141        cell_buf::from_slice(buffer).as_texels(self)
1142    }
1143
1144    /// Reinterpret a slice of texel as memory.
1145    ///
1146    /// Note that you can convert a reference to a single value by [`core::slice::from_ref`].
1147    pub fn try_to_cell<'buf>(self, bytes: &'buf [Cell<u8>]) -> Option<&'buf Cell<[P]>> {
1148        // Safety:
1149        // - The `pod`-ness certified by `self` ensures the cast of the contents of the memory is
1150        //   valid. All representations are a valid P and conversely and P is valid as bytes. Since
1151        //   Cell is a transparent wrapper the types are compatible.
1152        // - We uphold the share invariants of `Cell`, which are trivial (less than those required
1153        //   and provided by a shared reference).
1154        if bytes.as_ptr() as usize % mem::align_of::<P>() == 0 {
1155            let len = bytes.len() / mem::size_of::<P>();
1156            let ptr = ptr::slice_from_raw_parts(bytes.as_ptr() as *const P, len);
1157            Some(unsafe { &*(ptr as *const Cell<[P]>) })
1158        } else {
1159            None
1160        }
1161    }
1162
1163    /// Interpret a slice of cells as unaligned cells of another type.
1164    ///
1165    /// # Examples
1166    ///
1167    /// ```
1168    /// use core::cell::Cell;
1169    /// use image_texel::texels::{U16, U64};
1170    ///
1171    /// // This buffer is not guaranteed to be aligned to u64!
1172    /// let mut raw_buffer = [0u16; 4].map(Cell::new);
1173    /// let raw_bytes = U16.cell_bytes(&raw_buffer).as_slice_of_cells();
1174    ///
1175    /// // Write a u64 value anyways.
1176    /// let unaligned = U64.to_unaligned_cell(raw_bytes).as_slice_of_cells();
1177    /// unaligned[0].set(u64::from_be(0x0000_0001_0002_0003).into());
1178    ///
1179    /// let raw_buffer = raw_buffer.map(Cell::into_inner);
1180    /// assert_eq!(raw_buffer.map(u16::from_be), [0, 1, 2, 3]);
1181    /// ```
1182    pub fn to_unaligned_cell<'buf>(self, bytes: &'buf [Cell<u8>]) -> &'buf Cell<[Unaligned<P>]> {
1183        self.unaligned().try_to_cell(bytes).unwrap()
1184    }
1185
1186    /// Reinterpret a slice of atomically access memory with a type annotation.
1187    pub fn try_to_atomic<'buf>(
1188        self,
1189        bytes: AtomicSliceRef<'buf, u8>,
1190    ) -> Option<AtomicSliceRef<'buf, P>> {
1191        if bytes.start % mem::align_of::<P>() == 0 {
1192            let end = bytes.end - bytes.end % mem::align_of::<P>();
1193            Some(AtomicSliceRef {
1194                buf: bytes.buf,
1195                start: bytes.start,
1196                end,
1197                texel: self,
1198            })
1199        } else {
1200            None
1201        }
1202    }
1203
1204    /// Interpret a slice of cells as unaligned atomic values of another type.
1205    ///
1206    /// # Examples
1207    ///
1208    /// ```
1209    /// use image_texel::texels::atomic_buf;
1210    /// use image_texel::texels::{MaxAtomic, U16, U64};
1211    ///
1212    /// let underlying = [MaxAtomic::zero(); 1];
1213    /// let raw_buffer = atomic_buf::new(&underlying[..]);
1214    ///
1215    /// // Get a partial slice of it, that is is not aligned to u64.
1216    /// let u16_slice = raw_buffer.index(U16.to_range(1..5).unwrap());
1217    /// let raw_bytes = U16.atomic_bytes(u16_slice);
1218    ///
1219    /// // Re-Interpret that as an unaligned slice of u64 values.
1220    /// let unaligned = U64.to_unaligned_atomic(raw_bytes);
1221    ///
1222    /// std::thread::scope(|scope| {
1223    ///     scope.spawn(|| {
1224    ///         // Write a u64 value.
1225    ///         U64.unaligned().store_atomic(
1226    ///             unaligned.index_one(0),
1227    ///             u64::from_be(0x0000_0001_0002_0003).into()
1228    ///         )
1229    ///    });
1230    /// });
1231    ///
1232    /// // Load from the buffer we've written to atomically.
1233    /// let mut values = [0; 4];
1234    /// U16.load_atomic_slice(u16_slice, &mut values[..]);
1235    /// assert_eq!(values.map(u16::from_be), [0u16, 1, 2, 3]);
1236    /// ```
1237    pub fn to_unaligned_atomic<'buf>(
1238        self,
1239        bytes: AtomicSliceRef<'buf, u8>,
1240    ) -> AtomicSliceRef<'buf, Unaligned<P>> {
1241        self.unaligned().try_to_atomic(bytes).unwrap()
1242    }
1243
1244    /// Reinterpret a slice of texel as memory.
1245    ///
1246    /// Note that you can convert a reference to a single value by [`core::slice::from_ref`].
1247    pub fn to_bytes<'buf>(self, texel: &'buf [P]) -> &'buf [u8] {
1248        // Safety:
1249        // * lifetime is not changed
1250        // * keeps the exact same size
1251        // * validity for byte reading checked by Texel constructor
1252        unsafe { slice::from_raw_parts(texel.as_ptr() as *const u8, mem::size_of_val(texel)) }
1253    }
1254
1255    /// Reinterpret a mutable slice of texel as memory.
1256    ///
1257    /// Note that you can convert a reference to a single value by [`core::slice::from_mut`].
1258    pub fn to_mut_bytes<'buf>(self, texel: &'buf mut [P]) -> &'buf mut [u8] {
1259        // Safety:
1260        // * lifetime is not changed
1261        // * keeps the exact same size
1262        // * validity as bytes checked by Texel constructor
1263        unsafe { slice::from_raw_parts_mut(texel.as_mut_ptr() as *mut u8, mem::size_of_val(texel)) }
1264    }
1265
1266    /// Reinterpret a slice of texel as memory.
1267    ///
1268    /// Note that you can convert a reference to a single value by [`core::slice::from_ref`].
1269    pub fn cell_bytes<'buf>(self, texel: &'buf [Cell<P>]) -> &'buf Cell<[u8]> {
1270        let ptr: *const [u8] =
1271            { ptr::slice_from_raw_parts(texel.as_ptr() as *const u8, mem::size_of_val(texel)) };
1272
1273        // Safety:
1274        // * lifetime is not changed
1275        // * kept the exact same size
1276        // * validity for byte representations both ways checked by Texel constructor
1277        unsafe { &*(ptr as *const Cell<[u8]>) }
1278    }
1279
1280    /// Reinterpret a slice of atomically modified texels as atomic bytes.
1281    pub fn atomic_bytes<'buf>(self, texel: AtomicSliceRef<'buf, P>) -> AtomicSliceRef<'buf, u8> {
1282        AtomicSliceRef {
1283            buf: texel.buf,
1284            start: texel.start,
1285            end: texel.end,
1286            texel: constants::U8,
1287        }
1288    }
1289
1290    #[track_caller]
1291    pub(crate) fn cell_memory_copy(self, a: &[Cell<P>], b: &[Cell<P>]) {
1292        assert_eq!(a.len(), b.len());
1293        // Safety:
1294        // - the source is readable for `len` units
1295        // - the target is writable for `len` items
1296        // - the Texel certifies that this copy creates valid values
1297        //
1298        // We could not do this as `b_to_slice.copy_from_slice(a_to_slice)` since that would assert
1299        // a non-overlap between the two that need no hold in general.
1300        unsafe { ptr::copy::<P>(a.as_ptr() as *const P, b.as_ptr() as *mut P, a.len()) };
1301    }
1302
1303    /// Compare two cell slices by memory, not by any content equality.
1304    ///
1305    /// TODO: expose this, but under what name?
1306    pub(crate) fn cell_memory_eq<'a, 'b>(self, a: &'a [Cell<P>], b: &'b [Cell<P>]) -> bool {
1307        let len = mem::size_of_val(a);
1308
1309        if len != mem::size_of_val(b) {
1310            return false;
1311        }
1312
1313        // Safety: the same reasoning applies for both.
1314        // - this covers the exact memory range as the underlying slice of cells.
1315        // - the Texel certifies it is initialized memory.
1316        // - the lifetime is the same.
1317        // - the memory in the slice is not mutated. This is a little more subtle but `Cell` is not
1318        //   `Sync` so this thread is the only that could modify those contents currently as we
1319        //   have a reference to those contents. But also in this thread this function _is
1320        //   currently running_ and so it suffices that it does not to modify the contents. It does
1321        //   not access the slice through the cell in any way.
1322        // - the total size is at most `isize::MAX` since it was already a reference to it.
1323        let lhs: &'a [u8] = unsafe { slice::from_raw_parts(a.as_ptr() as *const u8, len) };
1324        let rhs: &'b [u8] = unsafe { slice::from_raw_parts(b.as_ptr() as *const u8, len) };
1325
1326        lhs == rhs
1327    }
1328
1329    /// Compare a slices with untyped memory.
1330    ///
1331    /// TODO: expose this, but under what name?
1332    pub(crate) fn cell_bytes_eq<'a, 'b>(self, a: &'a [Cell<P>], rhs: &[u8]) -> bool {
1333        let len = mem::size_of_val(a);
1334
1335        if len != mem::size_of_val(rhs) {
1336            return false;
1337        }
1338
1339        // Safety: see `cell_memory_eq`.
1340        let lhs: &'a [u8] = unsafe { slice::from_raw_parts(a.as_ptr() as *const u8, len) };
1341
1342        // Really these two should not be overlapping! If the compiler knew, maybe a better memory
1343        // compare that is more aware of the cache effects of loading? But to be honest it should
1344        // not matter much.
1345        debug_assert!({
1346            let a_range = lhs.as_ptr_range();
1347            let b_range = rhs.as_ptr_range();
1348
1349            a_range.end <= b_range.start || b_range.end <= a_range.start
1350        });
1351
1352        lhs == rhs
1353    }
1354
1355    #[track_caller]
1356    pub(crate) fn atomic_memory_move(self, a: AtomicSliceRef<'_, P>, b: AtomicSliceRef<'_, P>) {
1357        struct SliceSource<'lt> {
1358            skip: usize,
1359            head: AtomicSliceRef<'lt, u8>,
1360            // FIXME: the loads are straddling boundaries. Each side may be copied twice in the
1361            // effort of loading. Also iterating like this incurs some bounds checks. It's very
1362            // suboptimal. But the soundness of this whole thing scares me so let's not over
1363            // optimize before we know atomic-to-atomic copy is actually needed to be very fast.
1364            chunks: AtomicSliceRef<'lt, u8>,
1365            tail: AtomicSliceRef<'lt, u8>,
1366        }
1367
1368        impl DataSource for SliceSource<'_> {
1369            fn init(&mut self, init: usize) {
1370                let len = self.head.len().min(init);
1371                let (head, body) = self.head.split_at(len);
1372                self.head = head;
1373                self.skip = MaxAtomic::PART_SIZE - init;
1374
1375                let chunks_len = body.len() & !(MaxAtomic::PART_SIZE - 1);
1376                let (chunks, tail) = body.split_at(chunks_len);
1377
1378                self.chunks = chunks;
1379                self.tail = tail;
1380            }
1381
1382            fn load_head(&mut self, val: &mut [u8; MaxAtomic::PART_SIZE]) {
1383                let target = &mut val[self.skip..][..self.head.len()];
1384                constants::U8.load_atomic_slice(self.head, target);
1385            }
1386
1387            fn load(&mut self, val: &mut [u8; MaxAtomic::PART_SIZE]) {
1388                if let Some(next) = self.chunks.get(..MaxAtomic::PART_SIZE) {
1389                    self.chunks = self.chunks.get(MaxAtomic::PART_SIZE..).unwrap();
1390                    constants::U8.load_atomic_slice(next, val);
1391                } else {
1392                    debug_assert!(false);
1393                }
1394            }
1395
1396            fn load_tail(&mut self, val: &mut [u8; MaxAtomic::PART_SIZE]) {
1397                let target = &mut val[..self.tail.len()];
1398                constants::U8.load_atomic_slice(self.tail, target);
1399            }
1400        }
1401
1402        assert_eq!(a.len(), b.len());
1403
1404        let source = SliceSource {
1405            head: self.atomic_bytes(a),
1406            skip: 0,
1407            chunks: atomic_buf::new(&[]).as_texels(constants::U8),
1408            tail: atomic_buf::new(&[]).as_texels(constants::U8),
1409        };
1410
1411        self.store_atomic_slice_unchecked(b, source);
1412    }
1413
1414    pub(crate) fn cast_buf<'buf>(self, buffer: &'buf buf) -> &'buf [P] {
1415        debug_assert_eq!(buffer.as_ptr() as usize % mem::align_of::<MaxAligned>(), 0);
1416        debug_assert_eq!(buffer.as_ptr() as usize % mem::align_of::<P>(), 0);
1417        // Safety:
1418        // * data is valid for reads as memory size is not enlarged
1419        // * lifetime is not changed
1420        // * validity for arbitrary data as required by Texel constructor
1421        // * alignment checked by Texel constructor
1422        // * the size fits in an allocation, see first bullet point.
1423        unsafe {
1424            slice::from_raw_parts(
1425                buffer.as_ptr() as *const P,
1426                buffer.len() / self.size_nz().get(),
1427            )
1428        }
1429    }
1430
1431    pub(crate) fn cast_mut_buf<'buf>(self, buffer: &'buf mut buf) -> &'buf mut [P] {
1432        debug_assert_eq!(buffer.as_ptr() as usize % mem::align_of::<MaxAligned>(), 0);
1433        debug_assert_eq!(buffer.as_ptr() as usize % mem::align_of::<P>(), 0);
1434        // Safety:
1435        // * data is valid for reads and writes as memory size is not enlarged
1436        // * lifetime is not changed
1437        // * validity for arbitrary data as required by Texel constructor
1438        // * alignment checked by Texel constructor
1439        // * the size fits in an allocation, see first bullet point.
1440        unsafe {
1441            slice::from_raw_parts_mut(
1442                buffer.as_mut_ptr() as *mut P,
1443                buffer.len() / self.size_nz().get(),
1444            )
1445        }
1446    }
1447
1448    /// Construct a range indexing to a slice of this texel.
1449    ///
1450    /// See [`TexelRange::new`] as this is just a proxy.
1451    ///
1452    /// ```
1453    /// use image_texel::{texels::{U16, buf}, TexelBuffer};
1454    ///
1455    /// let buffer = TexelBuffer::with_elements(&[1u32, 2, 3, 4]);
1456    /// let range = U16.to_range(4..8).unwrap();
1457    /// let u16_view = &buffer.as_buf()[range];
1458    ///
1459    /// assert_eq!(u16_view.len(), 4);
1460    /// // This view extends over the `3u32` and `4u32` elements.
1461    /// // Results depend on native endianess of the `u32` type.
1462    /// assert!(u16_view[0] == 3 || u16_view[1] == 3);
1463    /// assert!(u16_view[2] == 4 || u16_view[3] == 4);
1464    /// ```
1465    pub fn to_range(self, range: ops::Range<usize>) -> Option<TexelRange<P>> {
1466        TexelRange::new(self, range)
1467    }
1468
1469    /// Construct a range indexing to a slice of this texel by bytes.
1470    ///
1471    /// See [`TexelRange::from_byte_range`] as this is just a proxy.
1472    pub fn to_byte_range(self, range: ops::Range<usize>) -> Option<TexelRange<P>> {
1473        TexelRange::from_byte_range(self, range)
1474    }
1475}
1476
1477const _: () = {
1478    const fn atomic_is_size_equivalent_of_aligned() {}
1479    const fn atomic_is_align_equivalent_of_aligned() {}
1480
1481    [atomic_is_size_equivalent_of_aligned()]
1482        [!(core::mem::size_of::<MaxAtomic>() == core::mem::size_of::<MaxAligned>()) as usize];
1483
1484    [atomic_is_align_equivalent_of_aligned()]
1485        [!(core::mem::align_of::<MaxAtomic>() == core::mem::align_of::<MaxAligned>()) as usize];
1486};
1487
1488impl MaxAtomic {
1489    pub(crate) const PART_SIZE: usize = core::mem::size_of::<AtomicPart>();
1490
1491    /// Create a vector of atomic zero-bytes.
1492    pub const fn zero() -> Self {
1493        const Z: AtomicPart = AtomicPart::new(0);
1494        MaxAtomic([Z; ATOMIC_PARTS])
1495    }
1496
1497    /// Create a vector from values initialized synchronously.
1498    pub fn new(contents: MaxAligned) -> Self {
1499        let mut result = Self::zero();
1500        let from = bytemuck::bytes_of(&contents);
1501        let from = from.chunks_exact(core::mem::size_of::<AtomicPart>());
1502
1503        for (part, src) in result.0.iter_mut().zip(from) {
1504            let to = bytemuck::bytes_of_mut(AtomicPart::get_mut(part));
1505            to.copy_from_slice(src);
1506        }
1507
1508        result
1509    }
1510
1511    /// Unwrap an owned value.
1512    pub fn into_inner(mut self) -> MaxAligned {
1513        let mut result = MaxAligned([0; MAX_ALIGN]);
1514        let from = bytemuck::bytes_of_mut(&mut result);
1515        let from = from.chunks_exact_mut(core::mem::size_of::<AtomicPart>());
1516
1517        for (part, to) in self.0.iter_mut().zip(from) {
1518            let src = bytemuck::bytes_of(AtomicPart::get_mut(part));
1519            to.copy_from_slice(src);
1520        }
1521
1522        result
1523    }
1524
1525    /// Load the data into an owned value.
1526    pub fn load(&self, ordering: atomic::Ordering) -> MaxAligned {
1527        let mut result = MaxAligned([0; MAX_ALIGN]);
1528        let from = bytemuck::bytes_of_mut(&mut result);
1529        let from = from.chunks_exact_mut(core::mem::size_of::<AtomicPart>());
1530
1531        for (part, to) in self.0.iter().zip(from) {
1532            let data = part.load(ordering);
1533            let src = bytemuck::bytes_of(&data);
1534            to.copy_from_slice(src);
1535        }
1536
1537        result
1538    }
1539}
1540
1541impl MaxCell {
1542    /// Create a vector of atomic zero-bytes.
1543    pub const fn zero() -> Self {
1544        MaxCell(Cell::new([0; MAX_ALIGN]))
1545    }
1546
1547    /// Create a vector from values initialized synchronously.
1548    pub fn new(contents: MaxAligned) -> Self {
1549        MaxCell(Cell::new(contents.0))
1550    }
1551
1552    /// Overwrite the contents with new information from another cell.
1553    pub fn set(&self, newval: &Self) {
1554        self.0.set(newval.0.get())
1555    }
1556
1557    /// Read the current contents from this cell into an owned value.
1558    pub fn get(&self) -> MaxAligned {
1559        MaxAligned(self.0.get())
1560    }
1561
1562    /// Unwrap an owned value.
1563    pub fn into_inner(self) -> MaxAligned {
1564        MaxAligned(self.0.into_inner())
1565    }
1566}
1567
1568/// This is a pure marker type.
1569impl<P> Clone for Texel<P> {
1570    fn clone(&self) -> Self {
1571        Texel(PhantomData)
1572    }
1573}
1574
1575impl<P> PartialEq for Texel<P> {
1576    fn eq(&self, _: &Self) -> bool {
1577        true
1578    }
1579}
1580
1581impl<P> Eq for Texel<P> {}
1582
1583impl<P> PartialOrd for Texel<P> {
1584    fn partial_cmp(&self, _: &Self) -> Option<Ordering> {
1585        Some(Ordering::Equal)
1586    }
1587}
1588
1589impl<P> Ord for Texel<P> {
1590    fn cmp(&self, _: &Self) -> Ordering {
1591        Ordering::Equal
1592    }
1593}
1594
1595/// This is a pure marker type.
1596impl<P> Copy for Texel<P> {}
1597
1598impl<P> fmt::Debug for Texel<P> {
1599    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1600        f.debug_struct("Texel")
1601            .field("size", &self.size())
1602            .field("align", &self.align())
1603            .finish()
1604    }
1605}
1606
1607impl<P> hash::Hash for Texel<P> {
1608    fn hash<H: hash::Hasher>(&self, _: &mut H) {}
1609}