1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
//! ## Basic Invariants
//!
//! - `Bits` have a nonzero bit-width specified in a `NonZeroUsize`. Being
//!   nonzero, it eliminates several edge cases and ambiguities this crate would
//!   have to handle.
//! - `Bits` are stored in little endian order with the same requirements as
//!   `[Digit]`. The number of `Digit`s is the minimum needed to store all bits.
//!   If bitwidth is not a multiple of `Digit::BITS`, then there will be some
//!   unused bits in the last `Digit`. For example, a bitwidth of of 100 bits
//!   takes up 2 digits (if `Digit::BITS == 64`): 64 bits in the first digit, 36
//!   bits in the least significant bits of the second, and 28 unused bits in
//!   the remaining bits of the second.
//! - Unused bits are zeroed. Note that this is not a safety critical invariant.
//!   Setting unused bits via `Bits::as_mut_slice` or `Bits::last_mut` will not
//!   cause Rust U.B., but it may result in arithmetically incorrect results or
//!   panics from the functions on `Bits`. Arbitrary bits can in the last digit
//!   can be set temporarily, but [Bits::clear_unused_bits] should be run before
//!   reaching a function that expects all these invariants to hold.

use core::{
    fmt,
    hash::{Hash, Hasher},
    mem,
    num::NonZeroUsize,
    ops::Range,
    ptr,
};

use awint_internals::*;
use const_fn::const_fn;

/// A reference to the bits in an `InlAwi`, `ExtAwi`, or other backing
/// construct. If a function is written just in terms of `Bits`, it can work on
/// mixed references to `InlAwi`s, `ExtAwi`s, and `FP<B>`s.
/// `const` big integer arithmetic is possible if the backing type is `InlAwi`
/// and the "const_support" flag is enabled.
///
/// `Bits` do **not** know signedness. Instead, the methods on `Bits` are
/// specified to interpret the bits as unsigned or signed two's complement
/// integers. If a method's documentation does not mention signedness, it either
/// works for both kinds or views the bits as a plain bit string with no
/// integral properties.
///
/// See the [`awint_core` crate level documentation](crate) for understanding
/// two's complement and numerical limits.
///
/// # Note
///
/// Function names of the form `*_` with a trailing underscore are shorthand for
/// saying `*_assign`, which denotes an inplace assignment operation where the
/// left hand side is used as an input before being reassigned the value of the
/// output inplace. This is used instead of the typical 2-input 1-new-output
/// kind of function, because:
/// - `Bits` cannot allocate without choosing a storage type
/// - In most cases during the course of computation, one value will not be
///   needed after being used once as an input. It can take the left hand side
///   `self` value of these inplace assignment operations.
/// - For large bitwidth `Bits`, only two streams of addresses have to be
///   considered by the CPU
/// - In cases where we do need buffering, copying to some temporary is the
///   fastest kind of operation (and in the future an optimizing macro for this
///   is planned)
///
/// Unless otherwise specified, functions on `Bits` that return an `Option<()>`
/// return `None` if the input bitwidths are not equal to each other. The `Bits`
/// have been left unchanged if `None` is returned.
///
/// # Portability
///
/// This crate strives to maintain deterministic outputs across architectures
/// with different `usize::BITS`, `Digit::BITS`, and different endiannesses. The
/// [Bits::u8_slice_] function, the [Bits::to_u8_slice] functions, the
/// serialization impls enabled by `serde_support`, the strings produced by the
/// `const` serialization functions, and functions like `bits_to_string_radix`
/// in the `awint_ext` crate are all portable and should be used when sending
/// representations of `Bits` between architectures.
///
/// The `rand_` function enabled by `rand_support` uses a
/// deterministic byte oriented implementation to avoid portability issues as
/// long as the rng itself is portable.
///
/// The [core::hash::Hash] implementation is _not_ deterministic across
/// platforms and may not even be deterministic across compiler versions. This
/// is because of technical problems, and the standard library docs say it is
/// not intended to be portable anyway.
///
/// There are many functions that depend on `Digit`, `usize`, and
/// `NonZeroUsize`. In cases where the `usize` describes the bitwidth, a bit
/// shift, or a bit position, the user should not need to worry about
/// portability, since if the values are close to `usize::MAX`, the user is
/// already close to running out of possible memory any way.
///
/// There are a few usages of `Digit` that are actual
/// views into a contiguous range of bits inside `Bits`, such as
/// `Bits::as_slice`, `Bits::first`, and `Bits::get_digit` (which are all hidden
/// from the documentation, please refer to the source code of `bits.rs` if
/// needed). Most end users should not use these, since they have a strong
/// dependence on the size of `Digit`. These functions are actual views into the
/// inner building blocks of this crate that other functions are built around in
/// such a way that they are portable (e.g. the addition functions may
/// internally operate on differing numbers of `Digit`s depending on the
/// size of `Digit`, but the end result looks the same to users on different
/// architectures). The only reason these functions are exposed, is that someone
/// may want to write their own custom performant algorithms, and they want as
/// few abstractions as possible in the way.
///
/// Visible functions that are not portable in general, but always start from
/// the zeroeth bit or a given bit position like [Bits::digit_cin_mul_],
/// [Bits::digit_udivide_], or [Bits::digit_or_], are always
/// portable as long as the digit inputs and/or outputs are restricted to
/// `0..=u8::MAX`, or special care is taken.
#[repr(transparent)]
pub struct Bits {
    /// # Raw Invariants
    ///
    /// We have chosen `Bits` to be a DST in order to avoid double indirection
    /// (`&mut Bits` would be a pointer to a `Bits` struct which in turn had a
    /// pointer inside itself to the actual digits). A DST also lets us harness
    /// the power of Rust's desugering and inference surrounding other DSTS.
    ///
    /// In addition to the minimum number of digits required to store all the
    /// bits, there is one or more metadata digits on the end of the slice
    /// responsible for storing the actual bitwidth. The length field on the
    /// `[Digit]` DST is the total number of digits in the slice, including
    /// regular digits and the metadata digits. This design decision was made to
    /// prevent invoking UB by having a fake slice with the bitwidth instead of
    /// the true slice width. Even if we completely avoid all Rust core methods
    /// on slices (and thus avoid practical UB due to avoiding standard slice
    /// functions expecting a standard length field), Miri can still detect a
    /// fake slice being made (even if we completely avoid
    /// `core::ptr::slice_from_raw_parts` with the jankest of transmutation
    /// shenanigans).
    ///
    /// The metadata bitwidth is on the end of the slice, because accesses of
    /// the bitwidth also commonly access the last digit right next to it
    /// through `clear_unused_bits`. This means good cache locality even if the
    /// slice is huge and interior digits are rarely accessed. Storing the
    /// bitwidth at the beginning of the slice instead (which is what Rust does
    /// if we add the bitwidth directly as a field in the `Bits` DST) would lead
    /// to extra offsetting operations being done to skip the first digit
    /// pointed to by the pointer in the DST.
    ///
    /// The unfortunate consequence is that taking `Bits` digitwise subslices of
    /// `Bits` in the same general no-copy way that you can take subslices of
    /// regular Rust slices is not possible. `subdigits_mut!` almost achieves it
    /// by temporarily replacing digits with the needed metadata where the end
    /// of the subslice is, running a closure on the subslice, and
    /// then replacing the digit at the end. However, we have the concatenation
    /// macros which cover almost every bitfield transformation one could want.
    raw: [Digit],
}

/// `Bits` is safe to send between threads since it does not own
/// aliasing memory and has no reference counting mechanism like `Rc`.
unsafe impl Send for Bits {}

/// `Bits` is safe to share between threads since it does not own
/// aliasing memory and has no mutable internal state like `Cell` or `RefCell`.
unsafe impl Sync for Bits {}

/// # Basic functions
impl<'a> Bits {
    /// # Safety
    ///
    /// `raw_ptr` and `raw_len` should satisfy the raw invariants (not just the
    /// regular invariants, but those that account for the extra bitwidth digit)
    /// of `Bits` along with [standard alignment and initialization
    /// conditions](core::slice::from_raw_parts_mut). `Bits` itself does not
    /// allocate or deallocate memory. It is expected that the caller had a
    /// struct with proper `Drop` implementation, created `Bits` from that
    /// struct, and insured that the struct is borrowed for the duration of
    /// the `Bits` lifetime. The memory referenced by `bits` must not be
    /// accessed through any other reference for the duration of lifetime `'a`.
    #[doc(hidden)]
    #[inline]
    #[const_fn(cfg(feature = "const_support"))]
    #[must_use]
    pub const unsafe fn from_raw_parts(raw_ptr: *const Digit, raw_len: usize) -> &'a Self {
        // Safety: `Bits` follows standard slice initialization invariants and is marked
        // `#[repr(transparent)]`. The explicit lifetimes make sure they do not become
        // unbounded.
        unsafe { mem::transmute::<&[Digit], &Bits>(&*ptr::slice_from_raw_parts(raw_ptr, raw_len)) }
    }

    /// # Safety
    ///
    /// see [Bits::from_raw_parts]
    #[doc(hidden)]
    #[inline]
    #[const_fn(cfg(feature = "const_support"))]
    #[must_use]
    pub const unsafe fn from_raw_parts_mut(raw_ptr: *mut Digit, raw_len: usize) -> &'a mut Self {
        // Safety: `Bits` follows standard slice initialization invariants and is marked
        // `#[repr(transparent)]`. The explicit lifetimes make sure they do not become
        // unbounded.
        unsafe {
            mem::transmute::<&mut [Digit], &mut Bits>(&mut *ptr::slice_from_raw_parts_mut(
                raw_ptr, raw_len,
            ))
        }
    }

    /// Returns the argument of this function. This exists so that the macros in
    /// `awint_macros` work on all storage types and `Bits` without needing to
    /// determine the type.
    #[doc(hidden)]
    #[inline]
    #[must_use]
    pub const fn const_as_ref(&'a self) -> &'a Bits {
        self
    }

    /// Returns the argument of this function. This exists so that the macros in
    /// `awint_macros` work on all storage types and `Bits` without needing to
    /// determine the type.
    #[doc(hidden)]
    #[inline]
    #[const_fn(cfg(feature = "const_support"))]
    #[must_use]
    pub const fn const_as_mut(&'a mut self) -> &'a mut Bits {
        self
    }

    /// Returns a raw pointer to the underlying bit storage. The caller must
    /// ensure that the `Bits` outlives the pointer this function returns.
    /// The underlying memory should never be written to.
    #[doc(hidden)]
    #[inline]
    #[must_use]
    pub const fn as_ptr(&self) -> *const Digit {
        self.raw.as_ptr()
    }

    /// Returns a raw pointer to the underlying bit storage. The caller must
    /// ensure that the `Bits` outlives the pointer this function returns.
    #[doc(hidden)]
    #[inline]
    #[const_fn(cfg(feature = "const_support"))]
    #[must_use]
    pub const fn as_mut_ptr(&mut self) -> *mut Digit {
        self.raw.as_mut_ptr()
    }

    /// Returns the raw total length of `self`, including the bitwidth metadata.
    #[doc(hidden)]
    #[inline]
    #[must_use]
    pub const fn raw_len(&self) -> usize {
        self.raw.len()
    }

    /// This allows access of all digits including the bitwidth digits.
    ///
    /// # Safety
    ///
    /// `i < self.raw_len()` should hold true
    #[doc(hidden)]
    #[inline]
    #[const_fn(cfg(feature = "const_support"))]
    #[must_use]
    pub(crate) const unsafe fn raw_get_unchecked(&self, i: usize) -> Digit {
        debug_assert!(i < self.raw_len());
        // Safety: `i` is bounded by `raw_len`
        unsafe { *self.as_ptr().add(i) }
    }

    /// This allows mutable access of all digits including the bitwidth digit.
    ///
    /// # Safety
    ///
    /// `i < self.raw_len()` should hold true
    #[doc(hidden)]
    #[inline]
    #[const_fn(cfg(feature = "const_support"))]
    #[must_use]
    pub(crate) const unsafe fn raw_get_unchecked_mut(&'a mut self, i: usize) -> &'a mut Digit {
        debug_assert!(i < self.raw_len());
        // Safety: `i` is bounded by `raw_len`. The lifetimes are bounded.
        unsafe { &mut *self.as_mut_ptr().add(i) }
    }

    /// Returns the bitwidth as a `NonZeroUsize`
    #[inline]
    #[const_fn(cfg(feature = "const_support"))]
    #[must_use]
    #[allow(clippy::unnecessary_cast)] // if `Digit == usize` clippy fires
    pub const fn nzbw(&self) -> NonZeroUsize {
        unsafe {
            let mut w = 0usize;
            // Safety: The bitwidth is stored in the metadata digits. The bitwidth is
            // nonzero if invariants were maintained.
            let raw_len = self.raw_len();
            const_for!(i in {0..METADATA_DIGITS} {
                w |= (self.raw_get_unchecked(i + raw_len - METADATA_DIGITS) as usize) << (i * BITS);
            });
            // If something with zeroing allocation or mutations accidentally breaks during
            // development, it will probably manifest itself here
            debug_assert!(w != 0);
            NonZeroUsize::new_unchecked(w)
        }
    }

    /// Returns the bitwidth as a `usize`
    #[inline]
    #[const_fn(cfg(feature = "const_support"))]
    #[must_use]
    pub const fn bw(&self) -> usize {
        self.nzbw().get()
    }

    /// # Safety
    ///
    /// `i < self.len()` should hold true
    #[doc(hidden)]
    #[inline]
    #[const_fn(cfg(feature = "const_support"))]
    #[must_use]
    pub const unsafe fn get_unchecked(&self, i: usize) -> Digit {
        debug_assert!(i < self.len());
        // Safety: `i < self.len()` means the access is within the slice
        unsafe { self.raw_get_unchecked(i) }
    }

    /// # Safety
    ///
    /// `i < self.len()` should hold true
    #[doc(hidden)]
    #[inline]
    #[const_fn(cfg(feature = "const_support"))]
    #[must_use]
    pub const unsafe fn get_unchecked_mut(&'a mut self, i: usize) -> &'a mut Digit {
        debug_assert!(i < self.len());
        // Safety: The bounds of this are a subset of `raw_get_unchecked_mut`
        unsafe { self.raw_get_unchecked_mut(i) }
    }

    /// Returns the exact number of `usize` digits needed to store all bits.
    #[doc(hidden)]
    #[inline]
    #[must_use]
    pub const fn len(&self) -> usize {
        // Safety: The length on the raw slice is the number of `usize` digits plus the
        // metadata bitwidth digit. To get the number of regular digits, we just
        // subtract the metadata digits.
        self.raw_len() - METADATA_DIGITS
    }

    /// Returns the number of unused bits.
    #[doc(hidden)]
    #[inline]
    #[const_fn(cfg(feature = "const_support"))]
    #[must_use]
    pub const fn unused(&self) -> usize {
        if self.extra() == 0 {
            0
        } else {
            BITS - self.extra()
        }
    }

    /// Returns the number of extra bits, or `usize::BITS - self.unused()`. If
    /// there are no unused bits, this is zero.
    #[doc(hidden)]
    #[inline]
    #[const_fn(cfg(feature = "const_support"))]
    #[must_use]
    pub const fn extra(&self) -> usize {
        extra(self.nzbw())
    }

    /// Returns the first `Digit`
    #[doc(hidden)]
    #[inline]
    #[const_fn(cfg(feature = "const_support"))]
    #[must_use]
    pub const fn first(&self) -> Digit {
        // Safety: There is at least one digit since bitwidth has a nonzero invariant
        unsafe { self.get_unchecked(0) }
    }

    /// Returns a mutable reference to the first `Digit`
    #[doc(hidden)]
    #[inline]
    #[const_fn(cfg(feature = "const_support"))]
    #[must_use]
    pub const fn first_mut(&'a mut self) -> &'a mut Digit {
        // Safety: There is at least one digit since bitwidth has a nonzero invariant
        unsafe { self.get_unchecked_mut(0) }
    }

    /// Returns the last `Digit`
    #[doc(hidden)]
    #[inline]
    #[const_fn(cfg(feature = "const_support"))]
    #[must_use]
    pub const fn last(&self) -> Digit {
        // Safety: There is at least one digit since bitwidth has a nonzero invariant
        unsafe { self.get_unchecked(self.len() - 1) }
    }

    /// Returns a mutable reference to the last `Digit`
    #[doc(hidden)]
    #[inline]
    #[const_fn(cfg(feature = "const_support"))]
    #[must_use]
    pub const fn last_mut(&'a mut self) -> &'a mut Digit {
        // Safety: There is at least one digit since bitwidth has a nonzero invariant
        unsafe { self.get_unchecked_mut(self.len() - 1) }
    }

    /// Clears the unused bits. This is only needed if you are using certain
    /// hidden functions to write to the digits directly.
    #[doc(hidden)]
    #[inline]
    #[const_fn(cfg(feature = "const_support"))]
    pub const fn clear_unused_bits(&mut self) {
        if self.extra() == 0 {
            return // There are no unused bits
        }
        *self.last_mut() &= MAX >> (BITS - self.extra());
    }

    /// Some functions cannot handle set unused bits, so this acts as a quick
    /// way to check if unused bits are indeed clear.
    ///
    /// # Panics
    ///
    /// Panics if unused bits are set.
    #[doc(hidden)]
    #[track_caller]
    #[const_fn(cfg(feature = "const_support"))]
    pub const fn assert_cleared_unused_bits(&self) {
        let one: Digit = 1;
        if (self.extra() != 0) && (self.last() >= one.wrapping_shl(self.extra() as u32)) {
            panic!(
                "unused bits are set in a `Bits` struct, they have been set with one of the \
                 hidden functions and not properly unset with `Bits::clear_unused_bits`"
            );
        }
    }

    /// Returns a reference to all of the underlying bits of `self`, including
    /// unused bits.
    ///
    /// # Note
    ///
    /// If the `Bits` has unused bits, those bits will always be set to zero,
    /// even if the `Bits` are intended to be a sign extended integer.
    #[doc(hidden)]
    #[inline]
    #[const_fn(cfg(feature = "const_support"))]
    #[must_use]
    pub const fn as_slice(&'a self) -> &'a [Digit] {
        // Safety: `Bits` already follows standard slice initialization invariants. This
        // acquires a subslice that includes everything except for the metadata digit.
        unsafe { &*ptr::slice_from_raw_parts(self.as_ptr(), self.len()) }
    }

    /// Same as [Bits::as_slice] except it includes the bitwidth digit
    #[doc(hidden)]
    #[inline]
    #[const_fn(cfg(feature = "const_support"))]
    #[must_use]
    pub const fn as_raw_slice(&'a self) -> &'a [Digit] {
        // Safety: `Bits` already follows standard slice initialization invariants. This
        // acquires a subslice that includes everything except for the metadata digit.
        unsafe { &*ptr::slice_from_raw_parts(self.as_ptr(), self.raw_len()) }
    }

    /// Returns a mutable reference to all of the underlying bits of `self`,
    /// including unused bits.
    ///
    /// # Note
    ///
    /// Unused bits can be temporarily set but should be cleared before they
    /// are used by another function that expects the standard `Bits` invariants
    /// to be upheld. Set unused bits will not cause Rust undefined behavior,
    /// but may cause incorrect arithmetical results or panics.
    #[doc(hidden)]
    #[inline]
    #[const_fn(cfg(feature = "const_support"))]
    #[must_use]
    pub const fn as_mut_slice(&'a mut self) -> &'a mut [Digit] {
        // Safety: `Bits` already follows standard slice initialization invariants. This
        // acquires a subslice that includes everything except for the metadata digit,
        // so that it cannot be mutated.
        unsafe { &mut *ptr::slice_from_raw_parts_mut(self.as_mut_ptr(), self.len()) }
    }

    /// Returns a reference to the underlying bits of `self`, including unused
    /// bits (which occur if `self.bw()` is not a multiple of `Digit::BITS`).
    ///
    /// # Note
    ///
    /// If the `Bits` has unused bits, those bits will always be set to zero,
    /// even if the `Bits` are intended to be a sign extended integer.
    ///
    /// # Portability
    ///
    /// This function is highly non-portable across architectures, see the
    /// source code of [Bits::rand_] for how to handle this
    #[doc(hidden)]
    #[const_fn(cfg(feature = "const_support"))]
    #[must_use]
    pub const fn as_bytes_full_width_nonportable(&'a self) -> &'a [u8] {
        // Previously, this function used to be called "portable" because it was
        // intended as a way to get a slice view of `Bits` independent of `Digit` width.
        // It worked with unused bits by making the slice length such that the unused
        // bits were only in the last byte, which at first glance is portable. However,
        // I completely forgot about big-endian systems. Not taking a full width of the
        // byte slice can result in significant bytes being completely disregarded.
        let size_in_u8 = self.len() * DIGIT_BYTES;
        // Safety: Adding on to what is satisfied in `as_slice`, [Digit] can always be
        // divided into [u8] and the correct length is calculated above. If the bitwidth
        // is not a multiple of eight, there must be at least enough unused bits to form
        // one more byte. This is returned as a reference with a constrained lifetime,
        // so we can't run into any deallocation alignment UB.
        unsafe { &*ptr::slice_from_raw_parts(self.as_ptr() as *const u8, size_in_u8) }
    }

    /// Returns a mutable reference to the underlying bits of `self`, including
    /// unused bits (which occur if `self.bw()` is not a multiple of
    /// `Digit::BITS`).
    ///
    /// # Note
    ///
    /// Unused bits can be temporarily set but should be cleared before they
    /// are used by another function that expects the standard `Bits` invariants
    /// to be upheld. Set unused bits will not cause Rust undefined behavior,
    /// but may cause incorrect arithmetical results or panics.
    ///
    /// # Portability
    ///
    /// This function is highly non-portable across architectures, see the
    /// source code of [Bits::rand_] for how to handle this
    #[doc(hidden)]
    #[const_fn(cfg(feature = "const_support"))]
    #[must_use]
    #[inline(always)] // this is needed for `unstable_from_u8_slice`
    pub const fn as_mut_bytes_full_width_nonportable(&'a mut self) -> &'a mut [u8] {
        let size_in_u8 = self.len() * DIGIT_BYTES;
        // Safety: Same reasoning as `as_bytes_full_width_nonportable`
        unsafe { &mut *ptr::slice_from_raw_parts_mut(self.as_mut_ptr() as *mut u8, size_in_u8) }
    }

    /// Assigns the bits of `buf` to `self`. If `(buf.len() * 8) > self.bw()`
    /// then the corresponding bits in `buf` beyond `self.bw()` are ignored. If
    /// `(buf.len() * 8) < self.bw()` then the rest of the bits in `self` are
    /// zeroed. This function is portable across target architecture pointer
    /// sizes and endianness.
    #[const_fn(cfg(feature = "const_support"))]
    pub const fn u8_slice_(&'a mut self, buf: &[u8]) {
        let self_byte_width = self.len() * DIGIT_BYTES;
        let min_width = if self_byte_width < buf.len() {
            self_byte_width
        } else {
            buf.len()
        };
        // start of digits that will not be completely overwritten
        let start = min_width / DIGIT_BYTES;
        unsafe {
            // zero out first.
            self.digit_set(false, start..self.len(), false);
            // Safety: `src` is valid for reads at least up to `min_width`, `dst` is valid
            // for writes at least up to `min_width`, they are aligned, and are
            // nonoverlapping because `self` is a mutable reference.
            ptr::copy_nonoverlapping(
                buf.as_ptr(),
                self.as_mut_bytes_full_width_nonportable().as_mut_ptr(),
                min_width,
            );
            // `start` can be `self.len()`, so cap it
            let cap = if start >= self.len() {
                self.len()
            } else {
                start + 1
            };
            const_for!(i in {0..cap} {
                // correct for big endian, otherwise no-op
                *self.get_unchecked_mut(i) = Digit::from_le(self.get_unchecked(i));
            });
        }
        self.clear_unused_bits();
    }

    /// Assigns the bits of `self` to `buf`. If `(buf.len() * 8) > self.bw()`
    /// then the corresponding bits in `buf` beyond `self.bw()` are zeroed. If
    /// `(buf.len() * 8) < self.bw()` then the bits of `self` beyond the buffer
    /// do nothing. This function is portable across target architecture
    /// pointer sizes and endianness.
    #[const_fn(cfg(feature = "const_support"))]
    pub const fn to_u8_slice(&'a self, buf: &mut [u8]) {
        let self_byte_width = self.len() * DIGIT_BYTES;
        let min_width = if self_byte_width < buf.len() {
            self_byte_width
        } else {
            buf.len()
        };
        #[cfg(target_endian = "little")]
        {
            unsafe {
                // Safety: `src` is valid for reads at least up to `min_width`, `dst` is valid
                // for writes at least up to `min_width`, they are aligned, and are
                // nonoverlapping because `buf` is a mutable reference.
                ptr::copy_nonoverlapping(
                    self.as_bytes_full_width_nonportable().as_ptr(),
                    buf.as_mut_ptr(),
                    min_width,
                );
            }
        }
        #[cfg(target_endian = "big")]
        {
            const_for!(i in {0..self.len()} {
                let x = self.as_slice()[i];
                let start = i * DIGIT_BYTES;
                let end = if (start + DIGIT_BYTES) > buf.len() {
                    buf.len()
                } else {
                    start + DIGIT_BYTES
                };
                let mut s = 0;
                const_for!(j in {start..end} {
                    buf[j] = (x >> s) as u8;
                    s += 8;
                });
            });
        }
        unsafe {
            // zero remaining bytes.
            // Safety: `min_width` cannot be more than `buf.len()`
            ptr::write_bytes(buf.as_mut_ptr().add(min_width), 0, buf.len() - min_width);
        }
    }

    /// # Safety
    ///
    /// `range` must satisfy `range.start <= range.end` and `range.end <=
    /// self.len()`
    #[doc(hidden)]
    #[inline]
    #[const_fn(cfg(feature = "const_support"))]
    pub const unsafe fn digit_set(
        &mut self,
        val: bool,
        range: Range<usize>,
        clear_unused_bits: bool,
    ) {
        debug_assert!(range.end <= self.len());
        debug_assert!(range.start <= range.end);
        //let byte = if val { u8::MAX } else { 0 };
        //ptr::write_bytes(
        //    self.as_mut_ptr().add(range.start),
        //    byte,
        //    range.end - range.start,
        //);
        let digit = if val { MAX } else { 0 };
        unsafe_for_each_mut!(
            self,
            x,
            { range.start..range.end }
            { *x = digit },
            clear_unused_bits
        );
    }

    /// Gets one `Digit` from `self` starting at the bit index `start`.
    /// Bits that extend beyond `self.bw()` are zeroed.
    #[doc(hidden)]
    #[const_fn(cfg(feature = "const_support"))]
    #[must_use]
    pub const fn get_digit(&self, start: usize) -> Digit {
        let digits = digits_u(start);
        let bits = extra_u(start);
        let mut tmp = 0;
        // Safety: The checks avoid indexing beyond `self.len() - 1`
        unsafe {
            if digits < self.len() {
                tmp = self.get_unchecked(digits) >> bits;
                if bits != 0 && ((digits + 1) < self.len()) {
                    tmp |= self.get_unchecked(digits + 1) << (BITS - bits);
                }
            }
            tmp
        }
    }

    /// Gets two `usize` digits from `self` starting at the bit index `start`,
    /// and returns them in little endian order. Bits that extend beyond
    /// `self.bw()` are zeroed.
    #[doc(hidden)]
    #[const_fn(cfg(feature = "const_support"))]
    #[must_use]
    pub const fn get_double_digit(&self, start: usize) -> (Digit, Digit) {
        let digits = digits_u(start);
        let bits = extra_u(start);
        let mut first = 0;
        let mut second = 0;
        // Safety: The checks avoid indexing beyond `self.len() - 1`
        unsafe {
            if digits < self.len() {
                first = self.get_unchecked(digits) >> bits;
                if (digits + 1) < self.len() {
                    let mid = self.get_unchecked(digits + 1);
                    if bits == 0 {
                        second = mid;
                    } else {
                        first |= mid << (BITS - bits);
                        second = mid >> bits;
                        if (digits + 2) < self.len() {
                            second |= self.get_unchecked(digits + 2) << (BITS - bits);
                        }
                    };
                }
            }
            (first, second)
        }
    }
}

impl fmt::Debug for Bits {
    /// Forwards to the `LowerHex` impl. We cannot use decimal because it would
    /// require allocation.
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        fmt::LowerHex::fmt(self, f)
    }
}

impl fmt::Display for Bits {
    /// Forwards to the `Debug` impl
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        fmt::Debug::fmt(self, f)
    }
}

impl fmt::LowerHex for Bits {
    /// Lowercase hexadecimal formatting.
    ///
    /// ```
    /// use awint::{inlawi, Bits, InlAwi};
    /// assert_eq!(
    ///     format!("{:x}", inlawi!(0xfedcba9876543210u100)),
    ///     "0xfedcba98_76543210_u100"
    /// );
    /// ```
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        self.debug_format_hexadecimal(f, false)
    }
}

impl fmt::UpperHex for Bits {
    /// Uppercase hexadecimal formatting.
    ///
    /// ```
    /// use awint::{inlawi, Bits, InlAwi};
    /// assert_eq!(
    ///     format!("{:X}", inlawi!(0xFEDCBA9876543210u100)),
    ///     "0xFEDCBA98_76543210_u100"
    /// );
    /// ```
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        self.debug_format_hexadecimal(f, true)
    }
}

impl fmt::Octal for Bits {
    /// Octal formatting.
    ///
    /// ```
    /// use awint::{inlawi, Bits, InlAwi};
    /// assert_eq!(
    ///     format!("{:o}", inlawi!(0o776543210u100)),
    ///     "0o7_76543210_u100"
    /// );
    /// ```
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        self.debug_format_octal(f)
    }
}

impl fmt::Binary for Bits {
    /// Binary formatting.
    ///
    /// ```
    /// use awint::{inlawi, Bits, InlAwi};
    /// assert_eq!(format!("{:b}", inlawi!(11000101)), "0b11000101_u8");
    /// ```
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        self.debug_format_binary(f)
    }
}

impl fmt::Pointer for Bits {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        let ptr = self.as_ptr();
        fmt::Pointer::fmt(&ptr, f)
    }
}

impl Hash for Bits {
    /// note: this function is not portable across platforms
    fn hash<H: Hasher>(&self, state: &mut H) {
        self.bw().hash(state);
        self.as_slice().hash(state);
    }
}

#[cfg(feature = "zeroize_support")]
impl zeroize::Zeroize for Bits {
    fn zeroize(&mut self) {
        self.as_mut_slice().zeroize()
    }
}