image_texel/texel.rs
1// Distributed under The MIT License (MIT)
2//
3// Copyright (c) 2019, 2020 The `image-rs` developers
4#![allow(unsafe_code)]
5
6use core::cell::Cell;
7use core::cmp::{Eq, Ord, Ordering, PartialEq, PartialOrd};
8use core::marker::PhantomData;
9use core::{fmt, hash, mem, num, ops, ptr, slice, sync::atomic};
10
11use crate::buf::{atomic_buf, buf, cell_buf, AtomicRef, AtomicSliceRef, TexelRange};
12
13/// Marker struct to denote a texel type.
14///
15/// Can be constructed only for types that have expected alignment and no byte invariants. It
16/// always implements `Copy` and `Clone`, regardless of the underlying type and is zero-sized.
17///
18/// This is the central encapsulation of unsafety in this crate. It utilizes `bytemuck` for a safe
19/// interface but permits other types with an unsafe interface, and offers the cast operations
20/// without a bound on the `Pod` trait. Note that `Pod` is a pure marker trait; its properties must
21/// hold even if it is not explicitly mentioned. If all constructors (safely or unsafely) ensure
22/// that its properties hold we can use `Texel` as a witness type for the bound and subsequently
23/// write interfaces to take an instance instead of having a static type bound. This achieves two
24/// effects:
25/// * Firstly, it makes the interface independent of the chosen transmutation crate. Potentially we
26/// will have a method to construct the `Texel` via a `core` trait.
27/// * Secondly, it allows creating texel of third-party types for which the bound can not be
28/// implemented. Crucially, this includes SIMD representations that would be a burden to support
29/// directly. And conversely you can also deal with arbitrary existing texel without a bound in
30/// your own interfaces!
31pub struct Texel<P: ?Sized>(PhantomData<P>);
32
33/// Marker struct to denote that P is transparently wrapped in O.
34///
35/// The only way to construct it is by accessing its associated constant which only exists when the
36/// bound `bytemuck::TransparentWrapper` holds as required. This encodes a type-level set and is
37/// a workaround for such bounds not yet being allowed in `const fn`. Expect this type to be
38/// deprecated sooner or later.
39pub struct IsTransparentWrapper<P, O>(PhantomData<(P, O)>);
40
41/// Describes a type which can represent a `Texel` and for which this is statically known.
42pub trait AsTexel {
43 /// Get the texel struct for this type.
44 ///
45 /// The naive implementation of merely unwrapping the result of `Texel::for_type` **panics** on
46 /// any invalid type. This trait should only be implemented when you know for sure that the
47 /// type is correct.
48 fn texel() -> Texel<Self>;
49}
50
51macro_rules! def_max_align {
52 (
53 match cfg(target) {
54 $($($arch:literal)|* => $num:literal),*,
55 }
56
57 $(#[$common_attr:meta])*
58 struct MaxAligned(..);
59
60 $(#[$atomic_attr:meta])*
61 struct MaxAtomic(..);
62
63 $(#[$cell_attr:meta])*
64 struct MaxCell(..);
65 ) => {
66 /// A byte-like-type that is aligned to the required max alignment.
67 ///
68 /// This type does not contain padding and implements `Pod`. Generally, the alignment and size
69 /// requirement is kept small to avoid overhead.
70 $(#[$common_attr])*
71 $(
72 #[cfg_attr(
73 any($(target_arch = $arch),*),
74 repr(align($num))
75 )]
76 )*
77 pub struct MaxAligned(pub(crate) [u8; MAX_ALIGN]);
78
79 /* Note: We need to be really careful to avoid peril for several reasons.
80 *
81 * Firstly, the Rust atomic model forbids us from doing unsynchronized access (stores _or_
82 * loads) with differing sizes to the same memory location. For now, and for the
83 * foreseeable future. Since we do not synchronize the access to the buffer, we must use
84 * the same size everywhere.
85 *
86 * Secondly, using any type other than `AtomicU8` for these makes it hard for us to slice
87 * the buffer at arbitrary points. For true references we might work around this by custom
88 * metadata, yet this is not stable. Hence, we _must_ use a non-reference type wrapper for
89 * the kind of access we need. Or rather, the initial buffer allocation can deref into a
90 * reference to a slice of atomics but to slice it we must use our own type. And all
91 * operations are implemented to work on full units of this atomic type.
92 *
93 * At least for relaxed operations, the larger unit is somewhat equivalent. It's certainly
94 * at bit of a balance. Larger units might be more costly from destructive interference
95 * between different accesses, but small units are costly due to added instructions.
96 *
97 * View the selection below as a 'best-effort' really.
98 **/
99 #[cfg(all(
100 not(target_has_atomic = "8"),
101 not(target_has_atomic = "16"),
102 not(target_has_atomic = "32"),
103 not(target_has_atomic = "64"),
104 ))]
105 compile_error!("Synchronous buffer API requires one atomic unsigned type");
106
107 #[cfg(all(
108 target_has_atomic = "8",
109 not(target_has_atomic = "16"),
110 not(target_has_atomic = "32"),
111 not(target_has_atomic = "64"),
112 ))]
113 pub(crate) type AtomicPart = core::sync::atomic::AtomicU8;
114 #[cfg(all(
115 target_has_atomic = "16",
116 not(target_has_atomic = "32"),
117 not(target_has_atomic = "64"),
118 ))]
119 pub(crate) type AtomicPart = core::sync::atomic::AtomicU16;
120 #[cfg(all(
121 target_has_atomic = "32",
122 not(target_has_atomic = "64"),
123 ))]
124 pub(crate) type AtomicPart = core::sync::atomic::AtomicU32;
125 #[cfg(all(
126 target_has_atomic = "64",
127 ))]
128 pub(crate) type AtomicPart = core::sync::atomic::AtomicU64;
129
130 const ATOMIC_PARTS: usize = MAX_ALIGN / core::mem::size_of::<AtomicPart>();
131
132 $(
133 #[cfg_attr(
134 any($(target_arch = $arch),*),
135 repr(align($num))
136 )]
137 )*
138 $(#[$atomic_attr])*
139 pub struct MaxAtomic(pub(crate) [AtomicPart; ATOMIC_PARTS]);
140
141 $(
142 #[cfg_attr(
143 any($(target_arch = $arch),*),
144 repr(align($num))
145 )]
146 )*
147 $(#[$cell_attr])*
148 pub struct MaxCell(pub(crate) Cell<[u8; MAX_ALIGN]>);
149
150 $(
151 #[cfg(
152 any($(target_arch = $arch),*),
153 )]
154 pub(crate) const MAX_ALIGN: usize = $num;
155 )*
156
157 #[cfg(
158 not(any(
159 $(any($(target_arch = $arch),*)),*
160 )),
161 )]
162 pub(crate) const MAX_ALIGN: usize = 8;
163 }
164}
165
166def_max_align! {
167 match cfg(target) {
168 "x86" | "x86_64" => 32,
169 "arm" => 16,
170 "aarch64" => 16,
171 "wasm32" => 16,
172 }
173
174 /// A byte-like-type that is aligned to the required max alignment.
175 ///
176 /// This type does not contain padding and implements `Pod`. Generally, the alignment and size
177 /// requirement is kept small to avoid overhead.
178 #[derive(Clone, Copy)]
179 #[repr(C)]
180 struct MaxAligned(..);
181
182 /// Atomic equivalence of [`MaxAligned`].
183 ///
184 /// This contains some instance of [`core::sync::atomic::AtomicU8`].
185 struct MaxAtomic(..);
186
187 /// A cell of a byte array equivalent to [`MaxAligned`].
188 struct MaxCell(..);
189}
190
191unsafe impl bytemuck::Zeroable for MaxAligned {}
192unsafe impl bytemuck::Pod for MaxAligned {}
193
194/// Wraps a type by value but removes its alignment requirement.
195#[repr(packed(1))]
196// Deriving Clone works by Copy, which is why it works at all.
197#[derive(Clone, Copy)]
198pub struct Unaligned<T>(pub T);
199
200unsafe impl<T: bytemuck::Zeroable> bytemuck::Zeroable for Unaligned<T> {}
201unsafe impl<T: bytemuck::Pod> bytemuck::Pod for Unaligned<T> {}
202
203impl<T> From<T> for Unaligned<T> {
204 fn from(value: T) -> Self {
205 Unaligned(value)
206 }
207}
208
209impl<T> Unaligned<T> {
210 /// Unwrap the inner value.
211 ///
212 /// This is the same as accessing the public field, but the function type makes for better type
213 /// inference and allows using that access with [`Option::map`] etc.
214 pub fn into_inner(self) -> T {
215 self.0
216 }
217}
218
219macro_rules! builtin_texel {
220 ( $name:ty ) => {
221 impl AsTexel for $name {
222 fn texel() -> Texel<Self> {
223 const _: () = {
224 assert!(Texel::<$name>::check_invariants());
225 };
226
227 unsafe { Texel::new_unchecked() }
228 }
229 }
230 };
231}
232
233pub(crate) mod constants {
234 use super::{AsTexel, MaxAligned, Texel};
235
236 macro_rules! constant_texel {
237 ($(($name:ident, $type:ty)),*) => {
238 $(pub const $name: Texel<$type> = Texel(core::marker::PhantomData) ;
239 impl AsTexel for $type {
240 fn texel() -> Texel<Self> {
241 const _: () = {
242 assert!(Texel::<$type>::check_invariants());
243 };
244
245 $name
246 }
247 }
248 )*
249 }
250 }
251
252 constant_texel!(
253 (I8, i8),
254 (U8, u8),
255 (I16, i16),
256 (U16, u16),
257 (I32, i32),
258 (U32, u32),
259 (F32, f32),
260 (I64, i64),
261 (U64, u64),
262 (F64, f64),
263 (USIZE, usize),
264 (ISIZE, isize),
265 (MAX, MaxAligned)
266 );
267
268 impl<T: AsTexel> AsTexel for [T; 1] {
269 fn texel() -> Texel<[T; 1]> {
270 T::texel().array::<1>()
271 }
272 }
273
274 impl<T: AsTexel> AsTexel for [T; 2] {
275 fn texel() -> Texel<[T; 2]> {
276 T::texel().array::<2>()
277 }
278 }
279
280 impl<T: AsTexel> AsTexel for [T; 3] {
281 fn texel() -> Texel<[T; 3]> {
282 T::texel().array::<3>()
283 }
284 }
285
286 impl<T: AsTexel> AsTexel for [T; 4] {
287 fn texel() -> Texel<[T; 4]> {
288 T::texel().array::<4>()
289 }
290 }
291
292 impl<T: AsTexel> AsTexel for [T; 5] {
293 fn texel() -> Texel<[T; 5]> {
294 T::texel().array::<5>()
295 }
296 }
297
298 impl<T: AsTexel> AsTexel for [T; 6] {
299 fn texel() -> Texel<[T; 6]> {
300 T::texel().array::<6>()
301 }
302 }
303
304 impl<T: AsTexel> AsTexel for [T; 7] {
305 fn texel() -> Texel<[T; 7]> {
306 T::texel().array::<7>()
307 }
308 }
309
310 impl<T: AsTexel> AsTexel for [T; 8] {
311 fn texel() -> Texel<[T; 8]> {
312 T::texel().array::<8>()
313 }
314 }
315
316 impl<T: AsTexel> AsTexel for ::core::num::Wrapping<T> {
317 fn texel() -> Texel<::core::num::Wrapping<T>> {
318 T::texel().num_wrapping()
319 }
320 }
321}
322
323#[cfg(target_arch = "x86")]
324mod x64 {
325 use super::{AsTexel, Texel};
326 use core::arch::x86;
327
328 builtin_texel!(x86::__m128);
329
330 builtin_texel!(x86::__m128);
331 builtin_texel!(x86::__m128d);
332 builtin_texel!(x86::__m128i);
333 builtin_texel!(x86::__m256);
334 builtin_texel!(x86::__m256d);
335 builtin_texel!(x86::__m256i);
336}
337
338#[cfg(target_arch = "x86_64")]
339mod x64_64 {
340 use super::{AsTexel, Texel};
341 use core::arch::x86_64;
342
343 builtin_texel!(x86_64::__m128);
344 builtin_texel!(x86_64::__m128d);
345 builtin_texel!(x86_64::__m128i);
346 builtin_texel!(x86_64::__m256);
347 builtin_texel!(x86_64::__m256d);
348 builtin_texel!(x86_64::__m256i);
349}
350
351#[cfg(target_arch = "arm")]
352mod arm { /* all types unstable */
353}
354
355#[cfg(target_arch = "aarch64")]
356mod arm {
357 use super::{AsTexel, Texel};
358 use core::arch::aarch64;
359
360 builtin_texel!(aarch64::float64x1_t);
361 builtin_texel!(aarch64::float64x1x2_t);
362 builtin_texel!(aarch64::float64x1x3_t);
363 builtin_texel!(aarch64::float64x1x4_t);
364 builtin_texel!(aarch64::float64x2_t);
365 builtin_texel!(aarch64::float64x2x2_t);
366 builtin_texel!(aarch64::float64x2x3_t);
367 builtin_texel!(aarch64::float64x2x4_t);
368}
369
370#[cfg(target_arch = "wasm32")]
371mod arm {
372 use super::{AsTexel, Texel};
373 use core::arch::wasm32;
374
375 builtin_texel!(wasm32::v128);
376}
377
378impl<P: bytemuck::Pod> Texel<P> {
379 /// Try to construct an instance of the marker.
380 ///
381 /// If successful, you can freely use it to access the image buffers. This requires:
382 /// - The type must have an alignment of *at most* `MAX_ALIGN`.
383 /// - The type must *not* be a ZST.
384 /// - The type must *not* have any Drop-glue (no drop, any contain not part that is Drop).
385 pub const fn for_type() -> Option<Self> {
386 if Texel::<P>::check_invariants() {
387 Some(Texel(PhantomData))
388 } else {
389 None
390 }
391 }
392}
393
394impl<P, O: bytemuck::TransparentWrapper<P>> IsTransparentWrapper<P, O> {
395 pub const CONST: Self = IsTransparentWrapper(PhantomData);
396}
397
398/// The **only** ways to construct a `buf`, protecting the alignment invariant.
399/// Hint: This is an unsized type so there is no safe way of constructing it.
400impl buf {
401 pub const ALIGNMENT: usize = MAX_ALIGN;
402
403 /// Wrap bytes in a `buf`.
404 ///
405 /// The bytes need to be aligned to `ALIGNMENT`.
406 pub fn from_bytes(bytes: &[u8]) -> Option<&Self> {
407 if bytes.as_ptr() as usize % Self::ALIGNMENT == 0 {
408 // SAFETY: this is an almost trivial cast of unsized references. Additionally, we still
409 // guarantee that this is at least aligned to `MAX_ALIGN`.
410 Some(unsafe { &*(bytes as *const [u8] as *const Self) })
411 } else {
412 None
413 }
414 }
415
416 /// Wrap bytes in a `buf`.
417 ///
418 /// The bytes need to be aligned to `ALIGNMENT`.
419 pub fn from_bytes_mut(bytes: &mut [u8]) -> Option<&mut Self> {
420 if bytes.as_ptr() as usize % Self::ALIGNMENT == 0 {
421 // SAFETY: this is an almost trivial cast of unsized references. Additionally, we still
422 // guarantee that this is at least aligned to `MAX_ALIGN`.
423 Some(unsafe { &mut *(bytes as *mut [u8] as *mut Self) })
424 } else {
425 None
426 }
427 }
428}
429
430impl atomic_buf {
431 pub const ALIGNMENT: usize = MAX_ALIGN;
432
433 pub fn from_slice(values: &[MaxAtomic]) -> &Self {
434 debug_assert_eq!(values.as_ptr() as usize % Self::ALIGNMENT, 0);
435 let ptr = values.as_ptr() as *const AtomicPart;
436 let count = values.len() * ATOMIC_PARTS;
437 // Safety: these types are binary compatible, they wrap atomics of the same size, and
438 // starting at the same address, with a pointer of the same provenance which will be valid
439 // for the whole lifetime.
440 //
441 // This case relaxes the alignment requirements from `MaxAtomic` to that of the underlying
442 // atomic, which allows us to go beyond the public interface.
443 //
444 // The new size covered by the slice is the same as the input slice, since there are
445 // `ATOMIC_PARTS` units within each `MaxAtomic`. The memory invariants of the new type are
446 // the same as the old type, which is that we access only with atomics instructions of the
447 // size of the `AtomicPart` type.
448 let atomics = core::ptr::slice_from_raw_parts::<AtomicPart>(ptr, count);
449 // Safety: `atomic_buf` has the same layout as a `[MaxAtomic]` and wraps it transparently.
450 unsafe { &*(atomics as *const Self) }
451 }
452
453 pub(crate) fn from_slice_mut(values: &mut [MaxAtomic]) -> &mut Self {
454 debug_assert_eq!(values.as_ptr() as usize % Self::ALIGNMENT, 0);
455 let ptr = values.as_mut_ptr() as *mut AtomicPart;
456 let count = values.len() * ATOMIC_PARTS;
457 // Safety: as `from_slice`. We converted the input pointer from a mutable pointer itself,
458 // fulfilling the extra uniqueness and ownership requirement.
459 let atomics = core::ptr::slice_from_raw_parts_mut::<AtomicPart>(ptr, count);
460 // Safety: `atomic_buf` has the same layout as a `[MaxAtomic]` and wraps it transparently.
461 unsafe { &mut *(atomics as *mut Self) }
462 }
463
464 /// Wrap a sub-slice of bytes from an atomic buffer into a new `atomic_buf`.
465 ///
466 /// The bytes need to be aligned to `ALIGNMENT`. Returns `None` if these checks fail and return
467 /// the newly wrapped buffer in `Some` otherwise.
468 pub fn from_bytes(bytes: AtomicSliceRef<u8>) -> Option<&Self> {
469 if bytes.start % Self::ALIGNMENT == 0 {
470 let offset = bytes.start / core::mem::size_of::<AtomicPart>();
471 let len = bytes.len().div_ceil(core::mem::size_of::<AtomicPart>());
472 let buffer = &bytes.buf.0[offset..][..len];
473 // Safety: these types are binary compatible. The metadata is also the same, as both
474 // types encapsulate a slice of `AtomicPart`-sized types.
475 Some(unsafe { &*(buffer as *const _ as *const Self) })
476 } else {
477 None
478 }
479 }
480
481 /// Wrap bytes in an atomic `buf`.
482 ///
483 /// The bytes need to be aligned to `ALIGNMENT`. Additionally the length must be a multiple of
484 /// the `MaxAtomic` size's units. Returns `None` if these checks fail and return the newly
485 /// wrapped buffer in `Some` otherwise.
486 pub fn from_bytes_mut(bytes: &mut [u8]) -> Option<&mut Self> {
487 if bytes.as_ptr() as usize % Self::ALIGNMENT != 0 {
488 None
489 } else if bytes.len() % core::mem::size_of::<MaxAtomic>() != 0 {
490 None
491 } else {
492 let len = bytes.len() / core::mem::size_of::<AtomicPart>();
493 let ptr = bytes.as_mut_ptr() as *mut AtomicPart;
494 // SAFETY: We fulfill the alignment and length requirements for this cast, i.e. there
495 // are enough bytes available in this slice. Additionally, we still guarantee that this
496 // is at least aligned to `MAX_ALIGN`. We also have the shared read-write provenance on
497 // our pointer that a shared reference to atomic requires.
498 let atomics = ptr::slice_from_raw_parts_mut(ptr, len);
499 Some(unsafe { &mut *(atomics as *mut Self) })
500 }
501 }
502
503 /// Wrapper around the unstable `<Atomic*>::get_mut_slice`.
504 pub(crate) fn part_mut_slice(slice: &mut [AtomicPart]) -> &mut [u8] {
505 let len = core::mem::size_of_val(slice);
506 let ptr = slice.as_mut_ptr() as *mut u8;
507 // SAFETY: this is an almost trivial cast of unsized references. Additionally, we still
508 // guarantee that this is at least aligned to `MAX_ALIGN`.
509 unsafe { slice::from_raw_parts_mut(ptr, len) }
510 }
511}
512
513impl cell_buf {
514 pub const ALIGNMENT: usize = MAX_ALIGN;
515
516 pub fn from_slice(values: &[MaxCell]) -> &Self {
517 debug_assert_eq!(values.as_ptr() as usize % Self::ALIGNMENT, 0);
518 let ptr = values.as_ptr() as *const Cell<u8>;
519 let count = core::mem::size_of_val(values);
520 // Safety: constructs a pointer to a slice validly covering exactly the values in the
521 // input. The byte length is determined by `size_of_val` and starting at the same address,
522 // with a pointer of the same provenance which will be valid for the whole lifetime. The
523 // memory invariants of the new type are the same as the old type, which is that we access
524 // only with atomics instructions of the size of the `AtomicPart` type.
525 let memory = core::ptr::slice_from_raw_parts::<Cell<u8>>(ptr, count);
526 // Safety: these types are binary compatible, they wrap memory of the same size.
527 // This case relaxes the alignment requirements from `MaxAtomic` to that of the underlying
528 // atomic, which allows us to go beyond the public interface.
529 unsafe { &*(memory as *const Self) }
530 }
531
532 pub(crate) fn from_slice_mut(values: &mut [MaxCell]) -> &mut Self {
533 debug_assert_eq!(values.as_ptr() as usize % Self::ALIGNMENT, 0);
534 let ptr = values.as_mut_ptr() as *mut Cell<u8>;
535 let count = core::mem::size_of_val(values);
536 // Safety: as `from_slice`. We converted the input pointer from a mutable pointer itself,
537 // fulfilling the extra uniqueness and ownership requirement.
538 let memory = core::ptr::slice_from_raw_parts_mut::<Cell<u8>>(ptr, count);
539 // Safety: `cell_buf` has the same layout as a `[Cell<u8>]` and wraps it transparently.
540 unsafe { &mut *(memory as *mut Self) }
541 }
542
543 /// Interpret a slice of bytes in an unsynchronized shared `cell_buf`.
544 ///
545 /// The bytes need to be aligned to `ALIGNMENT`.
546 pub fn from_bytes(bytes: &[Cell<u8>]) -> Option<&Self> {
547 if bytes.as_ptr() as usize % Self::ALIGNMENT == 0 {
548 // Safety: these types are binary compatible. The metadata is also the same, as both
549 // types encapsulate a slice of `u8`-sized types.
550 Some(unsafe { &*(bytes as *const [_] as *const Cell<[u8]> as *const cell_buf) })
551 } else {
552 None
553 }
554 }
555
556 /// Wrap bytes in an unsynchronized shared `cell_buf`.
557 ///
558 /// The bytes need to be aligned to `ALIGNMENT`.
559 pub fn from_bytes_mut(bytes: &mut [u8]) -> Option<&Self> {
560 let slice = Cell::from_mut(bytes).as_slice_of_cells();
561 Self::from_bytes(slice)
562 }
563}
564
565impl<P> Texel<P> {
566 /// Create a witness certifying `P` as a texel without checks.
567 ///
568 /// # Safety
569 ///
570 /// The type `P` must __not__:
571 /// * have any validity invariants, i.e. is mustn't contain any padding.
572 /// * have any safety invariants. This implies it can be copied.
573 /// * have an alignment larger than [`MaxAligned`].
574 /// * be a zero-size type.
575 ///
576 /// Furthermore, tentatively, the type must not have any drop glue. That is its members are all
577 /// simple types without Drop implementations. This requirement exists mainly to avoid code
578 /// accidentally leaking instances, and ensures that copies created from their byte
579 /// representation—which is safe according to the other invairants— do not cause unexpected
580 /// effects.
581 ///
582 /// Note that the alignment requirement with regards to `MaxAligned` is __architecture
583 /// dependent__ as the exact bound varies across the `target_arch` feature. Where possible, add
584 /// static assertions to each call site of this function.
585 ///
586 /// [`MaxAligned`]: struct.MaxAligned.html
587 pub const unsafe fn new_unchecked() -> Self {
588 debug_assert!(Self::check_invariants());
589 Texel(PhantomData)
590 }
591
592 /// Note this isn't exhaustive. Indeed, we have no way to check for padding.
593 pub(crate) const fn check_invariants() -> bool {
594 mem::align_of::<P>() <= MAX_ALIGN && mem::size_of::<P>() > 0 && !mem::needs_drop::<P>()
595 }
596
597 /// Proxy of `core::mem::align_of`.
598 pub const fn align(self) -> usize {
599 mem::align_of::<P>()
600 }
601
602 /// Proxy of `core::mem::size_of`.
603 pub const fn size(self) -> usize {
604 mem::size_of::<P>()
605 }
606
607 /// Publicly visible function to use the guarantee of non-ZST.
608 pub const fn size_nz(self) -> core::num::NonZeroUsize {
609 match core::num::NonZeroUsize::new(self.size()) {
610 None => panic!(""),
611 Some(num) => num,
612 }
613 }
614
615 // A number of constructors that are technically unsafe. Note that we could write them as safe
616 // code here to pad our stats but they are not checked by the type system so it's risky. Better
617 // explain their safety in the code as comments.
618
619 /// Construct a texel as an array of no elements.
620 ///
621 /// # Panics
622 ///
623 /// This function panics when called with `N` equal to 0.
624 pub const fn array<const N: usize>(self) -> Texel<[P; N]> {
625 if N == 0 {
626 panic!()
627 }
628
629 // Safety:
630 // * has no validity/safety invariants
631 // * has the same alignment as P which is not larger then MaxAligned
632 unsafe { Texel::new_unchecked() }
633 }
634
635 /// Construct a texel for unaligned data of the contained type.
636 pub const fn unaligned(self) -> Texel<Unaligned<P>> {
637 // Safety:
638 // * has no validity/safety invariants
639 // * has alignment 1 which is not larger than MaxAligned
640 unsafe { Texel::new_unchecked() }
641 }
642
643 /// Construct a texel by wrapping into a transparent wrapper.
644 ///
645 /// TODO: a constructor for `Texel<O>` based on proof of transmutation from &mut P to &mut O,
646 /// based on the standard transmutation RFC. This is more flexible than bytemuck's
647 /// TransparentWrapper trait.
648 pub const fn transparent_wrap<O>(self, _: IsTransparentWrapper<P, O>) -> Texel<O> {
649 // Safety:
650 // * P and O must have the same invariants, none
651 // * P and O have the same alignment
652 unsafe { Texel::new_unchecked() }
653 }
654
655 /// Construct a texel by unwrapping a transparent wrapper.
656 pub const fn transparent_unwrap<O>(self, _: IsTransparentWrapper<O, P>) -> Texel<O> {
657 // Safety:
658 // * P and O must have the same invariants, none
659 // * P and O have the same alignment
660 unsafe { Texel::new_unchecked() }
661 }
662
663 /// Construct a texel that contains a number in the standard `Wrapping` type.
664 pub const fn num_wrapping(self) -> Texel<num::Wrapping<P>> {
665 // * Texel<P> = Self certifies the byte properties.
666 // * `core::num::Wrapping` is `repr(transparent)
667 unsafe { Texel::new_unchecked() }
668 }
669}
670
671impl<T, const N: usize> Texel<[T; N]> {
672 /// Construct a texel, from an array of elements.
673 pub const fn array_element(self) -> Texel<T> {
674 // Safety:
675 // We'll see that all properties are implied by _any_ suitable array.
676 // - The type must have an alignment of *at most* `MAX_ALIGN`. Array and inner type have
677 // the same alignment.
678 // - The type must *not* be a ZST. The array would otherwise be a ZST.
679 // - The type must *not* have any Drop-glue (no drop, any contain not part that is Drop).
680 // The array would otherwise have Drop-glue.
681 unsafe { Texel::new_unchecked() }
682 }
683}
684
685/// Protocol for [`Texel::store_atomic_slice_unchecked`] argument. Implementation detail.
686trait DataSource {
687 fn init(&mut self, init: usize);
688 fn load_head(&mut self, val: &mut [u8; MaxAtomic::PART_SIZE]);
689 fn load(&mut self, val: &mut [u8; MaxAtomic::PART_SIZE]);
690 fn load_tail(&mut self, val: &mut [u8; MaxAtomic::PART_SIZE]);
691}
692
693/// Operations that can be performed based on the evidence of Texel.
694impl<P> Texel<P> {
695 /// Construct a value of `P` from thin air, with zeroed representation.
696 pub fn zeroed(self) -> P {
697 // SAFETY: by `Texel` being a POD this is a valid representation.
698 unsafe { core::mem::zeroed::<P>() }
699 }
700
701 /// Copy a texel.
702 ///
703 /// Note that this does not require `Copy` because that requirement was part of the
704 /// requirements of constructing this `Texel` witness.
705 pub fn copy_val(self, val: &P) -> P {
706 // SAFETY: by the constructor, this type can be copied byte-by-byte.
707 unsafe { ptr::read(val) }
708 }
709
710 pub fn copy_cell(self, val: &Cell<P>) -> P {
711 // SAFETY: by the constructor, this inner type can be copied byte-by-byte. And `Cell` is a
712 // transparent wrapper so it can be read byte-by-byte as well.
713 unsafe { ptr::read(val) }.into_inner()
714 }
715
716 /// Undo a [`Cell::as_slice_of_cells`] call.
717 pub fn cell_as_slice(self, val: &[Cell<P>]) -> &Cell<[P]> {
718 let new_slice = self
719 .try_to_cell(self.cell_bytes(val).as_slice_of_cells())
720 .expect("alignment held previously");
721 debug_assert_eq!(new_slice.as_slice_of_cells().len(), val.len());
722 new_slice
723 }
724
725 /// Efficiently store a slice of shared read values to cells.
726 ///
727 /// We choose an outer slice for the parameter only since the standard library offers the
728 /// transposition out of the type parameter, but not its inverse yet. Call
729 /// [`Cell::as_slice_of_cells`] as needed.
730 #[track_caller]
731 pub fn store_cell_slice(self, val: &[Cell<P>], from: &[P]) {
732 assert_eq!(from.len(), val.len());
733 // SAFETY: by the constructor, this inner type can be copied byte-by-byte. And `Cell` is a
734 // transparent wrapper. By our assertion the slices are of the same length. Note we do not
735 // assert these slices to be non-overlapping! We could have `P = Cell<X>` and then it's
736 // unclear if Rust allows these to overlap or not. I guess we currently have that `Cell<X>`
737 // is never `Copy` so we couldn't have such a `Texel` but alas that negative impl is not
738 // guaranteed by any logic I came across.
739 unsafe {
740 ptr::copy(
741 from.as_ptr(),
742 // SAFETY: the slice of `Cell`s is all `UnsafeCell`.
743 //
744 // <https://github.com/rust-lang/rust/issues/88248#issuecomment-2397394716>
745 (val as *const [Cell<P>] as *mut [Cell<P>]).cast(),
746 from.len(),
747 )
748 }
749 }
750
751 /// Efficiently copy a slice of values from cells to an owned buffer.
752 ///
753 /// We choose an outer slice for the parameter only since the standard library offers the
754 /// transposition out of the type parameter, but not its inverse yet. Call
755 /// [`Cell::as_slice_of_cells`] as needed.
756 #[track_caller]
757 pub fn load_cell_slice(self, val: &[Cell<P>], into: &mut [P]) {
758 assert_eq!(into.len(), val.len());
759 // SAFETY: see `store_cell_slice` but since we have a mutable reference to the target we
760 // can assume it does not overlap.
761 unsafe {
762 ptr::copy_nonoverlapping(
763 (val as *const [Cell<P>]).cast(),
764 into.as_mut_ptr(),
765 into.len(),
766 )
767 }
768 }
769
770 /// Load a value from an atomic slice.
771 ///
772 /// The results is only correct if no concurrent modification occurs. The library promises
773 /// *basic soundness* but no particular defined behaviour under parallel modifications to the
774 /// memory bytes which describe the value to be loaded.
775 ///
776 /// Each atomic unit is read at most once.
777 pub fn load_atomic(self, val: AtomicRef<P>) -> P {
778 let mut value = self.zeroed();
779 let slice = AtomicSliceRef::from_ref(val);
780 let into = core::slice::from_ref(Cell::from_mut(&mut value));
781 self.load_atomic_slice_unchecked(slice, into);
782 value
783 }
784
785 /// Load values from an atomic slice.
786 ///
787 /// The results is only correct if no concurrent modification occurs. The library promises
788 /// *basic soundness* but no particular defined behaviour under parallel modifications to the
789 /// memory bytes which describe the value to be loaded.
790 ///
791 /// Each atomic unit is read at most once.
792 ///
793 /// # Panics
794 ///
795 /// This method panics if the slice and the target buffer do not have the same logical length.
796 #[track_caller]
797 pub fn load_atomic_slice(self, val: AtomicSliceRef<P>, into: &mut [P]) {
798 assert_eq!(val.len(), into.len());
799 self.load_atomic_slice_unchecked(val, Cell::from_mut(into).as_slice_of_cells());
800 }
801
802 /// Load values from an atomic slice to a slice of cells.
803 ///
804 /// The results is only correct if no concurrent modification occurs. The library promises
805 /// *basic soundness* but no particular defined behaviour under parallel modifications to the
806 /// memory bytes which describe the value to be loaded.
807 ///
808 /// Each atomic unit is read at most once.
809 ///
810 /// # Panics
811 ///
812 /// This method panics if the slice and the target buffer do not have the same length.
813 #[track_caller]
814 pub fn load_atomic_to_cells(self, val: AtomicSliceRef<P>, into: &[Cell<P>]) {
815 assert_eq!(val.len(), into.len());
816 // Always works, just undoing the `as_slice_of_cells` of the argument.
817 self.load_atomic_slice_unchecked(val, into);
818 }
819
820 fn load_atomic_slice_unchecked(self, val: AtomicSliceRef<P>, into: &[Cell<P>]) {
821 let offset = val.start / core::mem::size_of::<AtomicPart>();
822 let mut initial_skip = val.start % core::mem::size_of::<AtomicPart>();
823 let mut target = self.cell_bytes(into).as_slice_of_cells();
824
825 let mut buffer = val.buf.0[offset..].iter();
826 // By the invariants of `AtomicRef`, that number of bytes is in-bounds.
827 let mut load = buffer.next().unwrap().load(atomic::Ordering::Relaxed);
828
829 loop {
830 let input = &bytemuck::bytes_of(&load)[initial_skip..];
831 let copy_len = input.len().min(target.len());
832 constants::U8.store_cell_slice(&target[..copy_len], &input[..copy_len]);
833 target = &target[copy_len..];
834
835 if target.is_empty() {
836 break;
837 }
838
839 load = buffer.next().unwrap().load(atomic::Ordering::Relaxed);
840 initial_skip = 0;
841 }
842 }
843
844 /// Store a value to an atomic slice.
845 ///
846 /// The results is only correct if no concurrent modification occurs. The library promises
847 /// *basic soundness* but no particular defined behaviour under parallel modifications to the
848 /// memory bytes which describe the value to be store.
849 ///
850 /// Provides the same wait-freeness as the underlying platform for `fetch_*` instructions, that
851 /// is this does not use `compare_exchange_weak`. This implies that concurrent modifications to
852 /// bytes *not* covered by this particular representation will not inherently block progress.
853 pub fn store_atomic(self, val: AtomicRef<P>, value: P) {
854 let slice = AtomicSliceRef::from_ref(val);
855 self.store_atomic_slice(slice, core::slice::from_ref(&value));
856 }
857
858 /// Store values to an atomic slice.
859 ///
860 /// The results is only correct if no concurrent modification occurs. The library promises
861 /// *basic soundness* but no particular defined behaviour under parallel modifications to the
862 /// memory bytes which describe the value to be store.
863 ///
864 /// Provides the same wait-freeness as the underlying platform for `fetch_*` instructions, that
865 /// is this does not use `compare_exchange_weak`. This implies that concurrent modifications to
866 /// bytes *not* covered by this particular representation will not inherently block progress.
867 ///
868 /// # Panics
869 ///
870 /// This method panics if the slice and the source buffer do not have the same logical length.
871 #[track_caller]
872 pub fn store_atomic_slice(self, val: AtomicSliceRef<P>, source: &[P]) {
873 struct SliceSource<'lt> {
874 skip: usize,
875 head: &'lt [u8],
876 chunks: core::slice::ChunksExact<'lt, u8>,
877 tail: &'lt [u8],
878 }
879
880 impl DataSource for SliceSource<'_> {
881 fn init(&mut self, init: usize) {
882 let len = self.head.len().min(init);
883 let (head, body) = self.head.split_at(len);
884 self.head = head;
885 self.skip = MaxAtomic::PART_SIZE - init;
886
887 let chunks = body.chunks_exact(MaxAtomic::PART_SIZE);
888 self.tail = chunks.remainder();
889 self.chunks = chunks;
890 }
891
892 fn load_head(&mut self, val: &mut [u8; MaxAtomic::PART_SIZE]) {
893 let target = &mut val[self.skip..][..self.head.len()];
894 target.copy_from_slice(self.head);
895 }
896
897 fn load(&mut self, val: &mut [u8; core::mem::size_of::<AtomicPart>()]) {
898 if let Some(next) = self.chunks.next() {
899 val.copy_from_slice(next);
900 } else {
901 debug_assert!(false);
902 }
903 }
904
905 fn load_tail(&mut self, val: &mut [u8; MaxAtomic::PART_SIZE]) {
906 let target = &mut val[..self.tail.len()];
907 target.copy_from_slice(self.tail);
908 }
909 }
910
911 assert_eq!(val.len(), source.len());
912
913 let source = SliceSource {
914 head: self.to_bytes(source),
915 skip: 0,
916 chunks: [].chunks_exact(MaxAtomic::PART_SIZE),
917 tail: &[],
918 };
919
920 self.store_atomic_slice_unchecked(val, source);
921 }
922
923 /// Store values from cells to an atomic slice.
924 ///
925 /// The results is only correct if no concurrent modification occurs. The library promises
926 /// *basic soundness* but no particular defined behaviour under parallel modifications to the
927 /// memory bytes which describe the value to be store.
928 ///
929 /// Provides the same wait-freeness as the underlying platform for `fetch_*` instructions, that
930 /// is this does not use `compare_exchange_weak`. This implies that concurrent modifications to
931 /// bytes *not* covered by this particular representation will not inherently block progress.
932 ///
933 /// # Panics
934 ///
935 /// This method panics if the slice and the source buffer do not have the same logical length.
936 pub fn store_atomic_from_cells(self, val: AtomicSliceRef<P>, source: &[Cell<P>]) {
937 struct CellSource<'lt> {
938 skip: usize,
939 head: &'lt [Cell<u8>],
940 chunks: core::slice::ChunksExact<'lt, Cell<u8>>,
941 tail: &'lt [Cell<u8>],
942 }
943
944 impl DataSource for CellSource<'_> {
945 fn init(&mut self, init: usize) {
946 let len = self.head.len().min(init);
947 let (head, body) = self.head.split_at(len);
948 self.head = head;
949 self.skip = MaxAtomic::PART_SIZE - init;
950
951 let chunks = body.chunks_exact(MaxAtomic::PART_SIZE);
952 self.tail = chunks.remainder();
953 self.chunks = chunks;
954 }
955
956 fn load_head(&mut self, val: &mut [u8; MaxAtomic::PART_SIZE]) {
957 let target = &mut val[self.skip..][..self.head.len()];
958 constants::U8.load_cell_slice(self.head, target);
959 }
960
961 fn load(&mut self, val: &mut [u8; core::mem::size_of::<AtomicPart>()]) {
962 if let Some(next) = self.chunks.next() {
963 constants::U8.load_cell_slice(next, val);
964 } else {
965 debug_assert!(false);
966 }
967 }
968
969 fn load_tail(&mut self, val: &mut [u8; MaxAtomic::PART_SIZE]) {
970 let target = &mut val[..self.tail.len()];
971 constants::U8.load_cell_slice(self.tail, target);
972 }
973 }
974
975 assert_eq!(val.len(), source.len());
976
977 assert!(
978 {
979 let lhs = val.as_ptr_range();
980 let rhs = source.as_ptr_range();
981 lhs.end.addr() <= rhs.start.addr() || rhs.end.addr() <= lhs.start.addr()
982 },
983 "Your atomic slice aliases a slice of cells. While this may be permissible if you're \
984 very very careful about these values, you are violating safety invariants by using \
985 these values across non-local API boundaries"
986 );
987
988 let source = CellSource {
989 head: self.cell_bytes(source).as_slice_of_cells(),
990 skip: 0,
991 chunks: [].chunks_exact(MaxAtomic::PART_SIZE),
992 tail: &[],
993 };
994
995 self.store_atomic_slice_unchecked(val, source);
996 }
997
998 // Store a data source to a slice, assuming they cover the same number of bytes.
999 fn store_atomic_slice_unchecked(self, val: AtomicSliceRef<P>, mut from: impl DataSource) {
1000 // Modify only some bits of an atomic value.
1001 fn modify_parts_with(
1002 part: &AtomicPart,
1003 with: impl FnOnce(&mut [u8; MaxAtomic::PART_SIZE]),
1004 ) {
1005 let original = part.load(atomic::Ordering::Relaxed);
1006 let mut value = original;
1007
1008 let buffer = bytemuck::bytes_of_mut(&mut value);
1009 with(buffer.try_into().unwrap());
1010
1011 // Any bits we did not modify, including those outside our own range, will not get
1012 // modified by this instruction. This provides the basic conflict guarantee.
1013 part.fetch_xor(original ^ value, atomic::Ordering::Relaxed);
1014 }
1015
1016 let offset = val.start / MaxAtomic::PART_SIZE;
1017 let mut buffer = val.buf.0[offset..].iter();
1018
1019 // How many bytes from the start to first atomic boundary?
1020 let head_len = val.start.next_multiple_of(MaxAtomic::PART_SIZE) - val.start;
1021 from.init(head_len);
1022
1023 let after_head = (val.end - val.start).saturating_sub(head_len);
1024 // How many bytes is the end from its previous atomic boundary?
1025 let tail_skip = after_head % MaxAtomic::PART_SIZE;
1026 let body_count = after_head / MaxAtomic::PART_SIZE;
1027
1028 if head_len > 0 {
1029 let into = buffer.next().unwrap();
1030 modify_parts_with(into, |buffer| from.load_head(buffer));
1031 }
1032
1033 let body = buffer.as_slice();
1034 for part in &body[..body_count] {
1035 // Here we modify all bytes so just store..
1036 let mut value = Default::default();
1037 let buffer = bytemuck::bytes_of_mut(&mut value);
1038 from.load(buffer.try_into().unwrap());
1039 part.store(value, atomic::Ordering::Relaxed);
1040 }
1041
1042 if tail_skip > 0 {
1043 let into = &body[body_count];
1044 modify_parts_with(into, |buffer| from.load_tail(buffer));
1045 }
1046 }
1047
1048 /// Reinterpret a slice of aligned bytes as a slice of the texel.
1049 ///
1050 /// Note that the size (in bytes) of the slice will be shortened if the size of `P` is not a
1051 /// divisor of the input slice's size.
1052 pub fn to_slice<'buf>(self, buffer: &'buf [MaxAligned]) -> &'buf [P] {
1053 self.cast_buf(buf::new(buffer))
1054 }
1055
1056 /// Reinterpret a slice of aligned bytes as a mutable slice of the texel.
1057 ///
1058 /// Note that the size (in bytes) of the slice will be shortened if the size of `P` is not a
1059 /// divisor of the input slice's size.
1060 pub fn to_mut_slice<'buf>(self, buffer: &'buf mut [MaxAligned]) -> &'buf mut [P] {
1061 self.cast_mut_buf(buf::new_mut(buffer))
1062 }
1063
1064 /// Try to reinterpret a slice of bytes as a slice of the texel.
1065 ///
1066 /// This returns `Some` if the buffer is suitably aligned, and `None` otherwise.
1067 pub fn try_to_slice<'buf>(self, bytes: &'buf [u8]) -> Option<&'buf [P]> {
1068 if bytes.as_ptr() as usize % mem::align_of::<P>() == 0 {
1069 // SAFETY:
1070 // - The `pod`-ness is certified by `self`, which makes the bytes a valid
1071 // representation of P.
1072 // - The total size is at most `bytes` by construction.
1073 let len = bytes.len() / mem::size_of::<P>();
1074 Some(unsafe { &*ptr::slice_from_raw_parts(bytes.as_ptr() as *const P, len) })
1075 } else {
1076 None
1077 }
1078 }
1079
1080 /// Try to reinterpret a slice of bytes as a slice of the texel.
1081 ///
1082 /// This returns `Some` if the buffer is suitably aligned, and `None` otherwise.
1083 pub fn try_to_slice_mut<'buf>(self, bytes: &'buf mut [u8]) -> Option<&'buf mut [P]> {
1084 if let Some(slice) = self.try_to_slice(bytes) {
1085 // SAFETY:
1086 // - The `pod`-ness is certified by `self`, which makes the bytes a valid
1087 // representation of P. Conversely, it makes any P valid as bytes.
1088 let len = slice.len();
1089 Some(unsafe { &mut *ptr::slice_from_raw_parts_mut(bytes.as_mut_ptr() as *mut P, len) })
1090 } else {
1091 None
1092 }
1093 }
1094
1095 /// Interpret a byte slice as unaligned values of another type.
1096 ///
1097 /// This is essentially a call to [`Texel::to_slice`] however the specific output type
1098 /// selection ensures that it always succeeds.
1099 ///
1100 /// # Examples
1101 ///
1102 /// ```
1103 /// use image_texel::texels::{U8, U64};
1104 ///
1105 /// // This buffer is not guaranteed to be aligned!
1106 /// let raw_buffer = [0u16, 1, 2, 3].map(u16::to_be_bytes);
1107 /// let raw_bytes = U8.array().to_bytes(&raw_buffer);
1108 ///
1109 /// let unaligned = U64.to_unaligned_slice(raw_bytes);
1110 /// // Forces a copy. `texel.unaligned().copy` would work, too.
1111 /// assert_eq!(u64::from_be(unaligned[0].0), 0x0000_0001_0002_0003);
1112 /// ```
1113 pub fn to_unaligned_slice<'buf>(self, bytes: &'buf [u8]) -> &'buf [Unaligned<P>] {
1114 self.unaligned().try_to_slice(bytes).unwrap()
1115 }
1116
1117 /// Interpret a mutable byte slice as unaligned values of another type.
1118 ///
1119 /// # Examples
1120 ///
1121 /// ```
1122 /// use image_texel::texels::{U16, U64};
1123 ///
1124 /// // This buffer is not guaranteed to be aligned!
1125 /// let mut raw_buffer = [0u16; 4];
1126 /// let raw_bytes = U16.to_mut_bytes(&mut raw_buffer);
1127 ///
1128 /// let unaligned = U64.to_unaligned_slice_mut(raw_bytes);
1129 /// unaligned[0].0 = u64::from_be(0x0000_0001_0002_0003);
1130 /// assert_eq!(raw_buffer.map(u16::from_be), [0, 1, 2, 3]);
1131 /// ```
1132 pub fn to_unaligned_slice_mut<'buf>(self, bytes: &'buf mut [u8]) -> &'buf mut [Unaligned<P>] {
1133 self.unaligned().try_to_slice_mut(bytes).unwrap()
1134 }
1135
1136 /// Reinterpret a shared slice as a some particular type.
1137 ///
1138 /// Note that the size (in bytes) of the slice will be shortened if the size of `P` is not a
1139 /// divisor of the input slice's size.
1140 pub fn to_cell<'buf>(self, buffer: &'buf [MaxCell]) -> &'buf Cell<[P]> {
1141 cell_buf::from_slice(buffer).as_texels(self)
1142 }
1143
1144 /// Reinterpret a slice of texel as memory.
1145 ///
1146 /// Note that you can convert a reference to a single value by [`core::slice::from_ref`].
1147 pub fn try_to_cell<'buf>(self, bytes: &'buf [Cell<u8>]) -> Option<&'buf Cell<[P]>> {
1148 // Safety:
1149 // - The `pod`-ness certified by `self` ensures the cast of the contents of the memory is
1150 // valid. All representations are a valid P and conversely and P is valid as bytes. Since
1151 // Cell is a transparent wrapper the types are compatible.
1152 // - We uphold the share invariants of `Cell`, which are trivial (less than those required
1153 // and provided by a shared reference).
1154 if bytes.as_ptr() as usize % mem::align_of::<P>() == 0 {
1155 let len = bytes.len() / mem::size_of::<P>();
1156 let ptr = ptr::slice_from_raw_parts(bytes.as_ptr() as *const P, len);
1157 Some(unsafe { &*(ptr as *const Cell<[P]>) })
1158 } else {
1159 None
1160 }
1161 }
1162
1163 /// Interpret a slice of cells as unaligned cells of another type.
1164 ///
1165 /// # Examples
1166 ///
1167 /// ```
1168 /// use core::cell::Cell;
1169 /// use image_texel::texels::{U16, U64};
1170 ///
1171 /// // This buffer is not guaranteed to be aligned to u64!
1172 /// let mut raw_buffer = [0u16; 4].map(Cell::new);
1173 /// let raw_bytes = U16.cell_bytes(&raw_buffer).as_slice_of_cells();
1174 ///
1175 /// // Write a u64 value anyways.
1176 /// let unaligned = U64.to_unaligned_cell(raw_bytes).as_slice_of_cells();
1177 /// unaligned[0].set(u64::from_be(0x0000_0001_0002_0003).into());
1178 ///
1179 /// let raw_buffer = raw_buffer.map(Cell::into_inner);
1180 /// assert_eq!(raw_buffer.map(u16::from_be), [0, 1, 2, 3]);
1181 /// ```
1182 pub fn to_unaligned_cell<'buf>(self, bytes: &'buf [Cell<u8>]) -> &'buf Cell<[Unaligned<P>]> {
1183 self.unaligned().try_to_cell(bytes).unwrap()
1184 }
1185
1186 /// Reinterpret a slice of atomically access memory with a type annotation.
1187 pub fn try_to_atomic<'buf>(
1188 self,
1189 bytes: AtomicSliceRef<'buf, u8>,
1190 ) -> Option<AtomicSliceRef<'buf, P>> {
1191 if bytes.start % mem::align_of::<P>() == 0 {
1192 let end = bytes.end - bytes.end % mem::align_of::<P>();
1193 Some(AtomicSliceRef {
1194 buf: bytes.buf,
1195 start: bytes.start,
1196 end,
1197 texel: self,
1198 })
1199 } else {
1200 None
1201 }
1202 }
1203
1204 /// Interpret a slice of cells as unaligned atomic values of another type.
1205 ///
1206 /// # Examples
1207 ///
1208 /// ```
1209 /// use image_texel::texels::atomic_buf;
1210 /// use image_texel::texels::{MaxAtomic, U16, U64};
1211 ///
1212 /// let underlying = [MaxAtomic::zero(); 1];
1213 /// let raw_buffer = atomic_buf::new(&underlying[..]);
1214 ///
1215 /// // Get a partial slice of it, that is is not aligned to u64.
1216 /// let u16_slice = raw_buffer.index(U16.to_range(1..5).unwrap());
1217 /// let raw_bytes = U16.atomic_bytes(u16_slice);
1218 ///
1219 /// // Re-Interpret that as an unaligned slice of u64 values.
1220 /// let unaligned = U64.to_unaligned_atomic(raw_bytes);
1221 ///
1222 /// std::thread::scope(|scope| {
1223 /// scope.spawn(|| {
1224 /// // Write a u64 value.
1225 /// U64.unaligned().store_atomic(
1226 /// unaligned.index_one(0),
1227 /// u64::from_be(0x0000_0001_0002_0003).into()
1228 /// )
1229 /// });
1230 /// });
1231 ///
1232 /// // Load from the buffer we've written to atomically.
1233 /// let mut values = [0; 4];
1234 /// U16.load_atomic_slice(u16_slice, &mut values[..]);
1235 /// assert_eq!(values.map(u16::from_be), [0u16, 1, 2, 3]);
1236 /// ```
1237 pub fn to_unaligned_atomic<'buf>(
1238 self,
1239 bytes: AtomicSliceRef<'buf, u8>,
1240 ) -> AtomicSliceRef<'buf, Unaligned<P>> {
1241 self.unaligned().try_to_atomic(bytes).unwrap()
1242 }
1243
1244 /// Reinterpret a slice of texel as memory.
1245 ///
1246 /// Note that you can convert a reference to a single value by [`core::slice::from_ref`].
1247 pub fn to_bytes<'buf>(self, texel: &'buf [P]) -> &'buf [u8] {
1248 // Safety:
1249 // * lifetime is not changed
1250 // * keeps the exact same size
1251 // * validity for byte reading checked by Texel constructor
1252 unsafe { slice::from_raw_parts(texel.as_ptr() as *const u8, mem::size_of_val(texel)) }
1253 }
1254
1255 /// Reinterpret a mutable slice of texel as memory.
1256 ///
1257 /// Note that you can convert a reference to a single value by [`core::slice::from_mut`].
1258 pub fn to_mut_bytes<'buf>(self, texel: &'buf mut [P]) -> &'buf mut [u8] {
1259 // Safety:
1260 // * lifetime is not changed
1261 // * keeps the exact same size
1262 // * validity as bytes checked by Texel constructor
1263 unsafe { slice::from_raw_parts_mut(texel.as_mut_ptr() as *mut u8, mem::size_of_val(texel)) }
1264 }
1265
1266 /// Reinterpret a slice of texel as memory.
1267 ///
1268 /// Note that you can convert a reference to a single value by [`core::slice::from_ref`].
1269 pub fn cell_bytes<'buf>(self, texel: &'buf [Cell<P>]) -> &'buf Cell<[u8]> {
1270 let ptr: *const [u8] =
1271 { ptr::slice_from_raw_parts(texel.as_ptr() as *const u8, mem::size_of_val(texel)) };
1272
1273 // Safety:
1274 // * lifetime is not changed
1275 // * kept the exact same size
1276 // * validity for byte representations both ways checked by Texel constructor
1277 unsafe { &*(ptr as *const Cell<[u8]>) }
1278 }
1279
1280 /// Reinterpret a slice of atomically modified texels as atomic bytes.
1281 pub fn atomic_bytes<'buf>(self, texel: AtomicSliceRef<'buf, P>) -> AtomicSliceRef<'buf, u8> {
1282 AtomicSliceRef {
1283 buf: texel.buf,
1284 start: texel.start,
1285 end: texel.end,
1286 texel: constants::U8,
1287 }
1288 }
1289
1290 #[track_caller]
1291 pub(crate) fn cell_memory_copy(self, a: &[Cell<P>], b: &[Cell<P>]) {
1292 assert_eq!(a.len(), b.len());
1293 // Safety:
1294 // - the source is readable for `len` units
1295 // - the target is writable for `len` items
1296 // - the Texel certifies that this copy creates valid values
1297 //
1298 // We could not do this as `b_to_slice.copy_from_slice(a_to_slice)` since that would assert
1299 // a non-overlap between the two that need no hold in general.
1300 unsafe { ptr::copy::<P>(a.as_ptr() as *const P, b.as_ptr() as *mut P, a.len()) };
1301 }
1302
1303 /// Compare two cell slices by memory, not by any content equality.
1304 ///
1305 /// TODO: expose this, but under what name?
1306 pub(crate) fn cell_memory_eq<'a, 'b>(self, a: &'a [Cell<P>], b: &'b [Cell<P>]) -> bool {
1307 let len = mem::size_of_val(a);
1308
1309 if len != mem::size_of_val(b) {
1310 return false;
1311 }
1312
1313 // Safety: the same reasoning applies for both.
1314 // - this covers the exact memory range as the underlying slice of cells.
1315 // - the Texel certifies it is initialized memory.
1316 // - the lifetime is the same.
1317 // - the memory in the slice is not mutated. This is a little more subtle but `Cell` is not
1318 // `Sync` so this thread is the only that could modify those contents currently as we
1319 // have a reference to those contents. But also in this thread this function _is
1320 // currently running_ and so it suffices that it does not to modify the contents. It does
1321 // not access the slice through the cell in any way.
1322 // - the total size is at most `isize::MAX` since it was already a reference to it.
1323 let lhs: &'a [u8] = unsafe { slice::from_raw_parts(a.as_ptr() as *const u8, len) };
1324 let rhs: &'b [u8] = unsafe { slice::from_raw_parts(b.as_ptr() as *const u8, len) };
1325
1326 lhs == rhs
1327 }
1328
1329 /// Compare a slices with untyped memory.
1330 ///
1331 /// TODO: expose this, but under what name?
1332 pub(crate) fn cell_bytes_eq<'a, 'b>(self, a: &'a [Cell<P>], rhs: &[u8]) -> bool {
1333 let len = mem::size_of_val(a);
1334
1335 if len != mem::size_of_val(rhs) {
1336 return false;
1337 }
1338
1339 // Safety: see `cell_memory_eq`.
1340 let lhs: &'a [u8] = unsafe { slice::from_raw_parts(a.as_ptr() as *const u8, len) };
1341
1342 // Really these two should not be overlapping! If the compiler knew, maybe a better memory
1343 // compare that is more aware of the cache effects of loading? But to be honest it should
1344 // not matter much.
1345 debug_assert!({
1346 let a_range = lhs.as_ptr_range();
1347 let b_range = rhs.as_ptr_range();
1348
1349 a_range.end <= b_range.start || b_range.end <= a_range.start
1350 });
1351
1352 lhs == rhs
1353 }
1354
1355 #[track_caller]
1356 pub(crate) fn atomic_memory_move(self, a: AtomicSliceRef<'_, P>, b: AtomicSliceRef<'_, P>) {
1357 struct SliceSource<'lt> {
1358 skip: usize,
1359 head: AtomicSliceRef<'lt, u8>,
1360 // FIXME: the loads are straddling boundaries. Each side may be copied twice in the
1361 // effort of loading. Also iterating like this incurs some bounds checks. It's very
1362 // suboptimal. But the soundness of this whole thing scares me so let's not over
1363 // optimize before we know atomic-to-atomic copy is actually needed to be very fast.
1364 chunks: AtomicSliceRef<'lt, u8>,
1365 tail: AtomicSliceRef<'lt, u8>,
1366 }
1367
1368 impl DataSource for SliceSource<'_> {
1369 fn init(&mut self, init: usize) {
1370 let len = self.head.len().min(init);
1371 let (head, body) = self.head.split_at(len);
1372 self.head = head;
1373 self.skip = MaxAtomic::PART_SIZE - init;
1374
1375 let chunks_len = body.len() & !(MaxAtomic::PART_SIZE - 1);
1376 let (chunks, tail) = body.split_at(chunks_len);
1377
1378 self.chunks = chunks;
1379 self.tail = tail;
1380 }
1381
1382 fn load_head(&mut self, val: &mut [u8; MaxAtomic::PART_SIZE]) {
1383 let target = &mut val[self.skip..][..self.head.len()];
1384 constants::U8.load_atomic_slice(self.head, target);
1385 }
1386
1387 fn load(&mut self, val: &mut [u8; MaxAtomic::PART_SIZE]) {
1388 if let Some(next) = self.chunks.get(..MaxAtomic::PART_SIZE) {
1389 self.chunks = self.chunks.get(MaxAtomic::PART_SIZE..).unwrap();
1390 constants::U8.load_atomic_slice(next, val);
1391 } else {
1392 debug_assert!(false);
1393 }
1394 }
1395
1396 fn load_tail(&mut self, val: &mut [u8; MaxAtomic::PART_SIZE]) {
1397 let target = &mut val[..self.tail.len()];
1398 constants::U8.load_atomic_slice(self.tail, target);
1399 }
1400 }
1401
1402 assert_eq!(a.len(), b.len());
1403
1404 let source = SliceSource {
1405 head: self.atomic_bytes(a),
1406 skip: 0,
1407 chunks: atomic_buf::new(&[]).as_texels(constants::U8),
1408 tail: atomic_buf::new(&[]).as_texels(constants::U8),
1409 };
1410
1411 self.store_atomic_slice_unchecked(b, source);
1412 }
1413
1414 pub(crate) fn cast_buf<'buf>(self, buffer: &'buf buf) -> &'buf [P] {
1415 debug_assert_eq!(buffer.as_ptr() as usize % mem::align_of::<MaxAligned>(), 0);
1416 debug_assert_eq!(buffer.as_ptr() as usize % mem::align_of::<P>(), 0);
1417 // Safety:
1418 // * data is valid for reads as memory size is not enlarged
1419 // * lifetime is not changed
1420 // * validity for arbitrary data as required by Texel constructor
1421 // * alignment checked by Texel constructor
1422 // * the size fits in an allocation, see first bullet point.
1423 unsafe {
1424 slice::from_raw_parts(
1425 buffer.as_ptr() as *const P,
1426 buffer.len() / self.size_nz().get(),
1427 )
1428 }
1429 }
1430
1431 pub(crate) fn cast_mut_buf<'buf>(self, buffer: &'buf mut buf) -> &'buf mut [P] {
1432 debug_assert_eq!(buffer.as_ptr() as usize % mem::align_of::<MaxAligned>(), 0);
1433 debug_assert_eq!(buffer.as_ptr() as usize % mem::align_of::<P>(), 0);
1434 // Safety:
1435 // * data is valid for reads and writes as memory size is not enlarged
1436 // * lifetime is not changed
1437 // * validity for arbitrary data as required by Texel constructor
1438 // * alignment checked by Texel constructor
1439 // * the size fits in an allocation, see first bullet point.
1440 unsafe {
1441 slice::from_raw_parts_mut(
1442 buffer.as_mut_ptr() as *mut P,
1443 buffer.len() / self.size_nz().get(),
1444 )
1445 }
1446 }
1447
1448 /// Construct a range indexing to a slice of this texel.
1449 ///
1450 /// See [`TexelRange::new`] as this is just a proxy.
1451 ///
1452 /// ```
1453 /// use image_texel::{texels::{U16, buf}, TexelBuffer};
1454 ///
1455 /// let buffer = TexelBuffer::with_elements(&[1u32, 2, 3, 4]);
1456 /// let range = U16.to_range(4..8).unwrap();
1457 /// let u16_view = &buffer.as_buf()[range];
1458 ///
1459 /// assert_eq!(u16_view.len(), 4);
1460 /// // This view extends over the `3u32` and `4u32` elements.
1461 /// // Results depend on native endianess of the `u32` type.
1462 /// assert!(u16_view[0] == 3 || u16_view[1] == 3);
1463 /// assert!(u16_view[2] == 4 || u16_view[3] == 4);
1464 /// ```
1465 pub fn to_range(self, range: ops::Range<usize>) -> Option<TexelRange<P>> {
1466 TexelRange::new(self, range)
1467 }
1468
1469 /// Construct a range indexing to a slice of this texel by bytes.
1470 ///
1471 /// See [`TexelRange::from_byte_range`] as this is just a proxy.
1472 pub fn to_byte_range(self, range: ops::Range<usize>) -> Option<TexelRange<P>> {
1473 TexelRange::from_byte_range(self, range)
1474 }
1475}
1476
1477const _: () = {
1478 const fn atomic_is_size_equivalent_of_aligned() {}
1479 const fn atomic_is_align_equivalent_of_aligned() {}
1480
1481 [atomic_is_size_equivalent_of_aligned()]
1482 [!(core::mem::size_of::<MaxAtomic>() == core::mem::size_of::<MaxAligned>()) as usize];
1483
1484 [atomic_is_align_equivalent_of_aligned()]
1485 [!(core::mem::align_of::<MaxAtomic>() == core::mem::align_of::<MaxAligned>()) as usize];
1486};
1487
1488impl MaxAtomic {
1489 pub(crate) const PART_SIZE: usize = core::mem::size_of::<AtomicPart>();
1490
1491 /// Create a vector of atomic zero-bytes.
1492 pub const fn zero() -> Self {
1493 const Z: AtomicPart = AtomicPart::new(0);
1494 MaxAtomic([Z; ATOMIC_PARTS])
1495 }
1496
1497 /// Create a vector from values initialized synchronously.
1498 pub fn new(contents: MaxAligned) -> Self {
1499 let mut result = Self::zero();
1500 let from = bytemuck::bytes_of(&contents);
1501 let from = from.chunks_exact(core::mem::size_of::<AtomicPart>());
1502
1503 for (part, src) in result.0.iter_mut().zip(from) {
1504 let to = bytemuck::bytes_of_mut(AtomicPart::get_mut(part));
1505 to.copy_from_slice(src);
1506 }
1507
1508 result
1509 }
1510
1511 /// Unwrap an owned value.
1512 pub fn into_inner(mut self) -> MaxAligned {
1513 let mut result = MaxAligned([0; MAX_ALIGN]);
1514 let from = bytemuck::bytes_of_mut(&mut result);
1515 let from = from.chunks_exact_mut(core::mem::size_of::<AtomicPart>());
1516
1517 for (part, to) in self.0.iter_mut().zip(from) {
1518 let src = bytemuck::bytes_of(AtomicPart::get_mut(part));
1519 to.copy_from_slice(src);
1520 }
1521
1522 result
1523 }
1524
1525 /// Load the data into an owned value.
1526 pub fn load(&self, ordering: atomic::Ordering) -> MaxAligned {
1527 let mut result = MaxAligned([0; MAX_ALIGN]);
1528 let from = bytemuck::bytes_of_mut(&mut result);
1529 let from = from.chunks_exact_mut(core::mem::size_of::<AtomicPart>());
1530
1531 for (part, to) in self.0.iter().zip(from) {
1532 let data = part.load(ordering);
1533 let src = bytemuck::bytes_of(&data);
1534 to.copy_from_slice(src);
1535 }
1536
1537 result
1538 }
1539}
1540
1541impl MaxCell {
1542 /// Create a vector of atomic zero-bytes.
1543 pub const fn zero() -> Self {
1544 MaxCell(Cell::new([0; MAX_ALIGN]))
1545 }
1546
1547 /// Create a vector from values initialized synchronously.
1548 pub fn new(contents: MaxAligned) -> Self {
1549 MaxCell(Cell::new(contents.0))
1550 }
1551
1552 /// Overwrite the contents with new information from another cell.
1553 pub fn set(&self, newval: &Self) {
1554 self.0.set(newval.0.get())
1555 }
1556
1557 /// Read the current contents from this cell into an owned value.
1558 pub fn get(&self) -> MaxAligned {
1559 MaxAligned(self.0.get())
1560 }
1561
1562 /// Unwrap an owned value.
1563 pub fn into_inner(self) -> MaxAligned {
1564 MaxAligned(self.0.into_inner())
1565 }
1566}
1567
1568/// This is a pure marker type.
1569impl<P> Clone for Texel<P> {
1570 fn clone(&self) -> Self {
1571 Texel(PhantomData)
1572 }
1573}
1574
1575impl<P> PartialEq for Texel<P> {
1576 fn eq(&self, _: &Self) -> bool {
1577 true
1578 }
1579}
1580
1581impl<P> Eq for Texel<P> {}
1582
1583impl<P> PartialOrd for Texel<P> {
1584 fn partial_cmp(&self, _: &Self) -> Option<Ordering> {
1585 Some(Ordering::Equal)
1586 }
1587}
1588
1589impl<P> Ord for Texel<P> {
1590 fn cmp(&self, _: &Self) -> Ordering {
1591 Ordering::Equal
1592 }
1593}
1594
1595/// This is a pure marker type.
1596impl<P> Copy for Texel<P> {}
1597
1598impl<P> fmt::Debug for Texel<P> {
1599 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1600 f.debug_struct("Texel")
1601 .field("size", &self.size())
1602 .field("align", &self.align())
1603 .finish()
1604 }
1605}
1606
1607impl<P> hash::Hash for Texel<P> {
1608 fn hash<H: hash::Hasher>(&self, _: &mut H) {}
1609}