byte_set/
lib.rs

1//! <div align="center">
2//!     <h1>
3//!         <a href="https://github.com/nvzqz/byte-set-rs">
4//!             ByteSet
5//!         </a>
6//!     </h1>
7//!     <a href="https://crates.io/crates/byte_set">
8//!         <img src="https://img.shields.io/crates/v/byte_set.svg" alt="Crates.io">
9//!         <img src="https://img.shields.io/crates/d/byte_set.svg" alt="Downloads">
10//!     </a>
11//!     <a href="https://docs.rs/byte_set">
12//!         <img src="https://docs.rs/byte_set/badge.svg" alt="docs.rs">
13//!     </a>
14//!     <a href="https://github.com/nvzqz/byte-set-rs/actions?query=workflow%3ACI">
15//!         <img src="https://github.com/nvzqz/byte-set-rs/workflows/CI/badge.svg" alt="Build Status">
16//!     </a>
17//!     <br><br>
18//! </div>
19//!
20//! Efficient sets of bytes for Rust, brought to you by [@NikolaiVazquez]!
21//!
22//! The star of the show is [`ByteSet`]: an allocation-free sorted set. It is a
23//! *much faster* alternative to [`HashSet<u8>`], [`BTreeSet<u8>`], and other
24//! types for a variety of scenarios. See ["Implementation"](#implementation)
25//! for a peek under the hood.
26//!
27//! If you found this library useful, please consider [sponsoring me on
28//! GitHub](https://github.com/sponsors/nvzqz)!
29//!
30//! ## Table of Contents
31//!
32//! 1. [Usage](#usage)
33//! 2. [Examples](#examples)
34//!    1. [`ByteSet` Type](#byteset-type)
35//!       1. [Insert](#insert)
36//!       2. [Extend](#extend)
37//!       3. [Remove](#remove)
38//!       4. [Iterate](#iterate)
39//!       5. [Contains](#contains)
40//!       6. [Subset](#subset)
41//!       7. [Min and Max](#min-and-max)
42//!    2. [`byte_set!` Macro](#byte_set-macro)
43//! 3. [Implementation](#implementation)
44//! 4. [Benchmarks](#benchmarks)
45//! 5. [Ecosystem Integrations](#ecosystem-integrations)
46//!    1. [`rand`](#rand)
47//!    2. [`serde`](#serde)
48//! 6. [License](#license)
49//!
50//! ## Usage
51//!
52//! This library is available [on crates.io][crate] and can be used by adding
53//! the following to your project's [`Cargo.toml`]:
54//!
55//! ```toml
56//! [dependencies]
57//! byte_set = "0.1.3"
58//! ```
59//!
60//! To import the [`byte_set!`] macro, add this to your crate root (`main.rs` or
61//! `lib.rs`):
62//!
63//! ```rust
64//! use byte_set::byte_set;
65//! ```
66//!
67//! If you're not using [Rust 2018 edition][2018], it must be imported
68//! differently:
69//!
70//! ```rust
71//! #[macro_use]
72//! extern crate byte_set;
73//! # fn main() {}
74//! ```
75//!
76//! ## Examples
77//!
78//! ### `ByteSet` Type
79//!
80//! First, let's import [`ByteSet`]:
81//!
82//! ```rust
83//! use byte_set::ByteSet;
84//! ```
85//!
86//! Here's how you create an empty set:
87//!
88//! ```rust
89//! # use byte_set::ByteSet;
90//! let bytes = ByteSet::new();
91//! ```
92//!
93//! You can create a set filled with all bytes (0 through 255) just as easily:
94//!
95//! ```rust
96//! # use byte_set::ByteSet;
97//! let bytes = ByteSet::full();
98//! ```
99//!
100//! Ok, let's see what we can do with this. Note that this isn't the only
101//! available functionality. See [`ByteSet`] for a complete list.
102//!
103//! #### Insert
104//!
105//! Use [`insert`] to include a single byte, by mutating the [`ByteSet`]
106//! in-place:
107//!
108//! ```rust
109//! # use byte_set::ByteSet;
110//! let mut bytes = ByteSet::new();
111//! bytes.insert(255);
112//! ```
113//!
114//! Use [`inserting`] as an immutable alternative, by passing the calling
115//! [`ByteSet`] by value:
116//!
117//! ```rust
118//! # use byte_set::ByteSet;
119//! let bytes = ByteSet::new().inserting(255);
120//! ```
121//!
122//! Use [`insert_all`] to include all bytes of another [`ByteSet`], by mutating
123//! the [`ByteSet`] in-place:
124//!
125//! ```rust
126//! # use byte_set::ByteSet;
127//! let mut alphabet = ByteSet::ASCII_UPPERCASE;
128//! alphabet.insert_all(ByteSet::ASCII_LOWERCASE);
129//!
130//! assert_eq!(alphabet, ByteSet::ASCII_ALPHABETIC);
131//! ```
132//!
133//! Use [`inserting_all`] as an immutable alternative, by passing the calling
134//! [`ByteSet`] by value:
135//!
136//! ```rust
137//! # use byte_set::ByteSet;
138//! let alphabet = ByteSet::ASCII_UPPERCASE.inserting_all(ByteSet::ASCII_LOWERCASE);
139//!
140//! assert_eq!(alphabet, ByteSet::ASCII_ALPHABETIC);
141//! ```
142//!
143//! #### Extend
144//!
145//! Rather than call [`insert`] in a loop, [`extend`] simplifies inserting from
146//! an iterator:
147//!
148//! ```rust
149//! # use byte_set::ByteSet;
150//! fn take_string(bytes: &mut ByteSet, s: &str) {
151//!     bytes.extend(s.as_bytes());
152//! }
153//! ```
154//!
155//! Because this iterates over the entire input, it is *much* more efficient to
156//! use [`insert_all`] instead of [`extend`] when inserting another [`ByteSet`].
157//!
158//! #### Remove
159//!
160//! Use [`remove`] to exclude a single byte by mutating the set in-place:
161//!
162//! ```rust
163//! # use byte_set::ByteSet;
164//! let mut bytes = ByteSet::full();
165//! bytes.remove(255);
166//! ```
167//!
168//! Use [`removing`] as an immutable alternative, by passing the calling
169//! [`ByteSet`] by value:
170//!
171//! ```rust
172//! # use byte_set::ByteSet;
173//! let bytes = ByteSet::full().removing(255);
174//! ```
175//!
176//! Use [`remove_all`] to exclude all bytes of another [`ByteSet`], by mutating
177//! the [`ByteSet`] in-place:
178//!
179//! ```rust
180//! # use byte_set::ByteSet;
181//! let mut alphabet = ByteSet::ASCII_ALPHANUMERIC;
182//! alphabet.remove_all(ByteSet::ASCII_DIGIT);
183//!
184//! assert_eq!(alphabet, ByteSet::ASCII_ALPHABETIC);
185//! ```
186//!
187//! Use [`removing_all`] as an immutable alternative, by passing the calling
188//! [`ByteSet`] by value:
189//!
190//! ```rust
191//! # use byte_set::ByteSet;
192//! let alphabet = ByteSet::ASCII_ALPHANUMERIC.removing_all(ByteSet::ASCII_DIGIT);
193//!
194//! assert_eq!(alphabet, ByteSet::ASCII_ALPHABETIC);
195//! ```
196//!
197//! #### Iterate
198//!
199//! Iterating can be done with just a `for` loop, and goes in order from least
200//! to greatest:
201//!
202//! ```rust
203//! # use byte_set::ByteSet;
204//! # fn do_work(_: u8) {}
205//! fn small_to_big(bytes: ByteSet) {
206//!     for byte in bytes {
207//!         do_work(byte);
208//!     }
209//! }
210//! ```
211//!
212//! Iterating in reverse is slightly more verbose, and goes in order from
213//! greatest to least:
214//!
215//! ```rust
216//! # use byte_set::ByteSet;
217//! # fn do_work(_: u8) {}
218//! fn big_to_small(bytes: ByteSet) {
219//!     for byte in bytes.into_iter().rev() {
220//!         do_work(byte);
221//!     }
222//! }
223//! ```
224//!
225//! #### Contains
226//!
227//! It wouldn't really be a set if you couldn't check if it has specific items.
228//!
229//! Use [`contains`] to check a single byte:
230//!
231//! ```rust
232//! # use byte_set::ByteSet;
233//! fn has_null(bytes: &ByteSet) -> bool {
234//!     bytes.contains(0)
235//! }
236//! ```
237//!
238//! Use [`contains_any`] to check for any matches in another [`ByteSet`]:
239//!
240//! ```rust
241//! # use byte_set::ByteSet;
242//! fn intersects(a: &ByteSet, b: &ByteSet) -> bool {
243//!     a.contains_any(b)
244//! }
245//! ```
246//!
247//! #### Subset
248//!
249//! Use [`is_subset`] to check that all of the bytes in a [`ByteSet`] are
250//! contained in another:
251//!
252//! ```rust
253//! # use byte_set::ByteSet;
254//! fn test(a: &ByteSet, b: &ByteSet) {
255//!     assert!(a.is_subset(b));
256//!
257//!     // Always passes because every set is a subset of itself.
258//!     assert!(a.is_subset(a));
259//! }
260//! ```
261//!
262//! Use [`is_strict_subset`] to check [`is_subset`] *and* that the sets are not
263//! the same:
264//!
265//! ```rust
266//! # use byte_set::ByteSet;
267//! fn test(a: &ByteSet, b: &ByteSet) {
268//!     assert!(a.is_strict_subset(b));
269//!
270//!     // `a` is equal to itself.
271//!     assert!(!a.is_strict_subset(a));
272//! }
273//! ```
274//!
275//! For the sake of completion, there is also [`is_superset`] and
276//! [`is_strict_superset`], which call these functions with `a` and `b`
277//! switched.
278//!
279//! #### Min and Max
280//!
281//! Use [`first`] to get the smallest byte and [`last`] to get the biggest byte:
282//!
283//! ```rust
284//! # use byte_set::ByteSet;
285//! fn sanity_check(bytes: &ByteSet) {
286//!     if let (Some(first), Some(last)) = (bytes.first(), bytes.last()) {
287//!         assert!(first <= last);
288//!     } else {
289//!         // `bytes` is empty.
290//!     }
291//! }
292//! ```
293//!
294//! These are the first and last bytes returned when iterating.
295//!
296//! ### `byte_set!` Macro
297//!
298//! [`byte_set!`] enables you to create a [`ByteSet`] with the same syntax as
299//! [`vec!`] or array expressions:
300//!
301//! ```rust
302//! # use byte_set::byte_set;
303//! let bytes = byte_set![1, 2, 3, b'x', b'y', b'z'];
304//! ```
305//!
306//! It even works at compile-time in a `const` expression:
307//!
308//! ```rust
309//! # use byte_set::{ByteSet, byte_set};
310//! const WHOA: ByteSet = byte_set![b'w', b'h', b'o', b'a'];
311//!
312//! static ABC: ByteSet = byte_set![b'a', b'c', b'c'];
313//! ```
314//!
315//! ## Implementation
316//!
317//! [`ByteSet`] is implemented as a 256-bit mask where each bit corresponds to a
318//! byte value. The first (least significant) bit in the mask represents the
319//! first byte (0) in the set. Likewise, the last last (most significant) bit
320//! represents the last byte (255).
321//!
322//! Given the following [`ByteSet`]:
323//!
324//! ```rust
325//! # use byte_set::byte_set;
326//! let bytes = byte_set![0, 1, 4, 5, 244];
327//! ```
328//!
329//! The in-memory representation of `bytes` would look like:
330//!
331//! ```text
332//!  Byte: 0 1 2 3 4 5 6 7 ... 253 244 255
333//! Value: 1 1 0 0 1 1 0 0 ...  0   1   0
334//! ```
335//!
336//! This bit mask is composed of either `[u64; 4]` or `[u32; 8]` depending on
337//! the target CPU (see [#3]). Because this comes out to only 32 bytes,
338//! [`ByteSet`] implements [`Copy`].
339//!
340//! ## Benchmarks
341//!
342//! I will upload benchmarks run from my machine soon.
343//!
344//! In the meantime, you can benchmark this library by running:
345//!
346//! ```sh
347//! cargo bench
348//! ```
349//!
350//! By default, this will benchmark [`ByteSet`] along with various other types
351//! to compare performance. Note that this will take **a long time** (about 1
352//! hour and 30 minutes).
353//!
354//! Benchmark only [`ByteSet`] by running:
355//!
356//! ```sh
357//! cargo bench ByteSet
358//! ```
359//!
360//! This takes about 15 minutes, so maybe grab a coffee in the meantime.
361//!
362//! Benchmark a specific [`ByteSet`] operation by running:
363//!
364//! ```sh
365//! cargo bench $operation/ByteSet
366//! ```
367//!
368//! See `/benches/benchmarks` for strings that can be used for `$operation`.
369//!
370//! Note that `cargo bench` takes a regular expression, so `Contains (Random)`
371//! will not work because the parentheses are treated as a capture group. To
372//! match parentheses, escape them: `Contains \(Random\)`.
373//!
374//! ## Ecosystem Integrations
375//!
376//! This library has extended functionality for some popular crates.
377//!
378//! ### `rand`
379//!
380//! Use the `rand` (or `rand_core`) feature in your [`Cargo.toml`] to enable
381//! random [`ByteSet`] generation:
382//!
383//! ```toml
384//! [dependencies.byte_set]
385//! version = "0.1.3"
386//! features = ["rand"]
387//! ```
388//!
389//! This makes the following possible:
390//!
391//! ```rust,ignore
392//! let bytes = rand::random::<ByteSet>();
393//!
394//! // Same as above.
395//! let bytes = ByteSet::rand(rand::thread_rng());
396//!
397//! // Handle failure instead of panicking.
398//! match ByteSet::try_rand(rand::rngs::OsRng) {
399//!     Ok(bytes)  => // ...
400//!     Err(error) => // ...
401//! }
402//! ```
403//!
404//! ### `serde`
405//!
406//! Use the `serde` feature in your [`Cargo.toml`] to enable [`Serialize`] and
407//! [`Deserialize`] for [`ByteSet`]:
408//!
409//! ```toml
410//! [dependencies.byte_set]
411//! version = "0.1.3"
412//! features = ["serde"]
413//! ```
414//!
415//! This makes the following possible:
416//!
417//! ```rust,ignore
418//! use serde::{Serialize, Deserialize};
419//!
420//! #[derive(Serialize, Deserialize)]
421//! struct MyValue {
422//!     bytes: ByteSet
423//! }
424//! ```
425//!
426//! [`ByteSet`] can be serialized into a `u8` sequence, and deserialized from
427//! `&[u8]` or a `u8` sequence.
428//!
429//! Read more about using `serde` at [serde.rs](https://serde.rs/).
430//!
431//! ## License
432//!
433//! This project is released under either:
434//!
435//! - [MIT License](https://github.com/nvzqz/byte-set-rs/blob/master/LICENSE-MIT)
436//!
437//! - [Apache License (Version 2.0)](https://github.com/nvzqz/byte-set-rs/blob/master/LICENSE-APACHE)
438//!
439//! at your choosing.
440//!
441//! [@NikolaiVazquez]: https://twitter.com/NikolaiVazquez
442//!
443//! [`Cargo.toml`]: https://doc.rust-lang.org/cargo/reference/manifest.html
444//! [2018]:         https://blog.rust-lang.org/2018/12/06/Rust-1.31-and-rust-2018.html#rust-2018
445//! [crate]:        https://crates.io/crates/byte_set
446//!
447//! [`BTreeSet<u8>`]:   https://doc.rust-lang.org/std/collections/struct.BTreeSet.html
448//! [`Copy`]:           https://doc.rust-lang.org/std/marker/trait.Copy.html
449//! [`HashSet<u8>`]:    https://doc.rust-lang.org/std/collections/struct.HashSet.html
450//! [`u8`]:             https://doc.rust-lang.org/std/primitive.u8.html
451//! [`vec!`]:           https://doc.rust-lang.org/std/macro.vec.html
452//!
453//! [`Serialize`]:   https://docs.rs/serde/1.*/serde/trait.Serialize.html
454//! [`Deserialize`]: https://docs.rs/serde/1.*/serde/trait.Deserialize.html
455//!
456//! [#3]: https://github.com/nvzqz/byte-set-rs/issues/3
457//!
458//! [`byte_set!`]:          macro.byte_set.html
459//! [`ByteSet`]:            struct.ByteSet.html
460//! [`contains_any`]:       struct.ByteSet.html#method.contains_any
461//! [`contains`]:           struct.ByteSet.html#method.contains
462//! [`extend`]:             struct.ByteSet.html#impl-Extend%3Cu8%3E
463//! [`first`]:              struct.ByteSet.html#method.first
464//! [`insert_all`]:         struct.ByteSet.html#method.insert_all
465//! [`insert`]:             struct.ByteSet.html#method.insert
466//! [`inserting_all`]:      struct.ByteSet.html#method.inserting_all
467//! [`inserting`]:          struct.ByteSet.html#method.inserting
468//! [`last`]:               struct.ByteSet.html#method.last
469//! [`remove_all`]:         struct.ByteSet.html#method.remove_all
470//! [`remove`]:             struct.ByteSet.html#method.remove
471//! [`removing_all`]:       struct.ByteSet.html#method.removing_all
472//! [`removing`]:           struct.ByteSet.html#method.removing
473//! [`is_strict_subset`]:   struct.ByteSet.html#method.is_strict_subset
474//! [`is_subset`]:          struct.ByteSet.html#method.is_subset
475//! [`is_strict_superset`]: struct.ByteSet.html#method.is_strict_superset
476//! [`is_superset`]:        struct.ByteSet.html#method.is_superset
477
478#![cfg_attr(docsrs, feature(doc_cfg))]
479#![cfg_attr(not(test), no_std)]
480#![warn(missing_docs)]
481
482use core::{cmp, fmt, hash, iter::FromIterator, mem, ops, slice};
483
484// Makes `ByteSet::{rand,try_rand}` simpler to express.
485#[cfg(feature = "rand")]
486use rand as rand_core;
487
488#[macro_use]
489mod macros;
490
491#[cfg(test)]
492#[macro_use]
493mod tests_macros;
494
495#[cfg(test)]
496mod tests;
497
498pub(crate) mod chunk;
499pub(crate) use chunk::Chunk;
500
501mod iter;
502pub use iter::Iter;
503
504const SLOT_SIZE: usize = mem::size_of::<Chunk>();
505
506const NUM_SLOTS: usize = 256 / 8 / SLOT_SIZE;
507
508const LAST_SLOT_INDEX: usize = NUM_SLOTS - 1;
509
510/// An efficient, general-purpose set of [`u8`]s.
511///
512/// # Implementation
513///
514/// This is a 256-bit mask where a byte is contained based on whether its bit is
515/// enabled. The first (least significant) bit in the mask represents the first
516/// byte in the set. Likewise, the last last (most significant) bit represents
517/// the last byte.
518///
519/// The mask is composed a of "chunk" array. Each chunk is either 64 or 32 bits
520/// wide, depending on the target architecture. As of right now, this is based
521/// on native register size. This may change in the future based on target
522/// features that enable better performance.
523///
524/// [`u8`]: https://doc.rust-lang.org/std/primitive.u8.html
525#[derive(Clone, Copy, PartialEq, Eq)]
526#[repr(transparent)]
527pub struct ByteSet([Chunk; NUM_SLOTS]);
528
529/// Returns the chunk index for `byte` and the bit shift for that chunk.
530#[inline]
531const fn chunk_index_and_shift(byte: u8) -> (usize, usize) {
532    let byte = byte as usize;
533
534    #[cfg(target_pointer_width = "64")]
535    let index = byte >> 6;
536    #[cfg(target_pointer_width = "64")]
537    let shift = byte & 0b0011_1111;
538
539    #[cfg(not(target_pointer_width = "64"))]
540    let index = byte >> 5;
541    #[cfg(not(target_pointer_width = "64"))]
542    let shift = byte & 0b0001_1111;
543
544    (index, shift)
545}
546
547#[cfg(test)]
548impl ByteSet {
549    /// Returns a formatting proxy for the binary representation of `self`.
550    ///
551    /// `fmt::Binary` is not currently implemented for `ByteSet` because of the
552    /// extra work to support formatting options.
553    pub(crate) fn fmt_binary<'a>(&'a self) -> impl fmt::Display + 'a {
554        struct Formatted<'a>(&'a ByteSet);
555
556        impl fmt::Display for Formatted<'_> {
557            fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
558                for chunk in &(self.0).0 {
559                    #[cfg(target_pointer_width = "64")]
560                    write!(f, "{:064b}", chunk)?;
561
562                    #[cfg(not(target_pointer_width = "64"))]
563                    write!(f, "{:032b}", chunk)?;
564                }
565                Ok(())
566            }
567        }
568
569        Formatted(self)
570    }
571}
572
573impl ByteSet {
574    /// Returns a set containing no bytes.
575    #[inline]
576    #[must_use]
577    pub const fn new() -> Self {
578        Self([0; NUM_SLOTS])
579    }
580
581    /// Returns a set containing all bytes (0-255).
582    #[inline]
583    #[must_use]
584    pub const fn full() -> Self {
585        Self([Chunk::max_value(); NUM_SLOTS])
586    }
587
588    /// Returns a set containing only `byte`.
589    ///
590    /// # Examples
591    ///
592    /// ```
593    /// # use byte_set::ByteSet;
594    /// let byte = 42;
595    /// let set = ByteSet::from_byte(byte);
596    ///
597    /// assert_eq!(set.first(), Some(byte));
598    /// assert_eq!(set.last(), Some(byte));
599    /// ```
600    #[inline]
601    #[must_use]
602    pub const fn from_byte(byte: u8) -> Self {
603        byte_set![byte]
604    }
605
606    /// Construct a ByteSet from a `RangeTo` value, i.e. `..x`
607    #[inline]
608    #[must_use]
609    pub const fn from_range_to(range: ops::RangeTo<u8>) -> Self {
610        const fn chunk_for(
611            this_chunk: usize,
612            byte_chunk: usize,
613            shift: usize,
614        ) -> Chunk {
615            // the following code is equivalent to
616            // if this_chunk == byte_chunk {
617            //     value
618            // } else if this_chunk < byte_chunk {
619            //     Chunk::max_value()
620            // } else {
621            //     0
622            // }
623            //
624            // Once `if` works in const, this can be cleaned up
625            // https://github.com/rust-lang/rust/pull/72437
626            let value: Chunk = (1 << shift) - 1;
627            let is_equal = (this_chunk == byte_chunk) as usize;
628            let is_less_than = (this_chunk < byte_chunk) as usize;
629            let if_unequal = [0, Chunk::max_value()][is_less_than];
630
631            [if_unequal, value][is_equal]
632        }
633        let (index, shift) = chunk_index_and_shift(range.end);
634        #[cfg(target_pointer_width = "64")]
635        let array = [
636            chunk_for(0, index, shift),
637            chunk_for(1, index, shift),
638            chunk_for(2, index, shift),
639            chunk_for(3, index, shift),
640        ];
641        #[cfg(not(target_pointer_width = "64"))]
642        let array = [
643            chunk_for(0, index, shift),
644            chunk_for(1, index, shift),
645            chunk_for(2, index, shift),
646            chunk_for(3, index, shift),
647            chunk_for(4, index, shift),
648            chunk_for(5, index, shift),
649            chunk_for(6, index, shift),
650            chunk_for(7, index, shift),
651        ];
652        ByteSet(array)
653    }
654
655    /// Construct a ByteSet from a `RangeToInclusive` value, i.e. `..=x`
656    #[inline]
657    #[must_use]
658    pub const fn from_range_to_inclusive(
659        range: ops::RangeToInclusive<u8>,
660    ) -> Self {
661        [
662            Self::full(),
663            Self::from_range_to(..(range.end.wrapping_add(1))),
664        ][(range.end != 255) as usize]
665    }
666
667    /// Construct a ByteSet from a `RangeFrom` value, i.e. `x..`
668    #[inline]
669    #[must_use]
670    pub const fn from_range_from(range: ops::RangeFrom<u8>) -> Self {
671        Self::from_range_to(..range.start).not()
672    }
673
674    /// Construct a ByteSet from a `RangeToInclusive` value, i.e. `x..y`
675    #[inline]
676    #[must_use]
677    pub const fn from_range(range: ops::Range<u8>) -> Self {
678        Self::from_range_from(range.start..)
679            .intersection(Self::from_range_to(..range.end))
680    }
681
682    /// Construct a ByteSet from a `RangeInclusive` value, i.e. `x..=y`
683    #[inline]
684    #[must_use]
685    pub const fn from_range_inclusive(range: ops::RangeInclusive<u8>) -> Self {
686        Self::from_range_from(*range.start()..)
687            .intersection(Self::from_range_to_inclusive(..=*range.end()))
688    }
689
690    /// Returns a set containing uniformly-distributed random bytes from `rng`.
691    ///
692    /// This uses [`fill_bytes`] under the hood.
693    ///
694    /// [`fill_bytes`]: https://docs.rs/rand_core/0.5.*/rand_core/trait.RngCore.html#tymethod.fill_bytes
695    #[cfg(any(feature = "rand", feature = "rand_core"))]
696    #[cfg_attr(docsrs, doc(cfg(any(feature = "rand", feature = "rand_core"))))]
697    #[inline]
698    pub fn rand<R: rand_core::RngCore>(mut rng: R) -> Self {
699        let mut set = Self::new();
700        rng.fill_bytes(set.as_bytes_mut());
701        set
702    }
703
704    /// Returns a set containing uniformly-distributed random bytes from `rng`,
705    /// or `Err` if `rng` failed.
706    ///
707    /// This uses [`try_fill_bytes`] under the hood.
708    ///
709    /// [`try_fill_bytes`]: https://docs.rs/rand_core/0.5.*/rand_core/trait.RngCore.html#tymethod.try_fill_bytes
710    #[cfg(any(feature = "rand", feature = "rand_core"))]
711    #[cfg_attr(docsrs, doc(cfg(any(feature = "rand", feature = "rand_core"))))]
712    #[inline]
713    pub fn try_rand<R: rand_core::RngCore>(
714        mut rng: R,
715    ) -> Result<Self, rand_core::Error> {
716        let mut set = Self::new();
717        rng.try_fill_bytes(set.as_bytes_mut())?;
718        Ok(set)
719    }
720
721    /// Returns `true` if `self` contains no bytes.
722    ///
723    /// This is more efficient than checking `self.len() == 0`.
724    #[inline]
725    #[must_use]
726    #[allow(clippy::let_and_return)]
727    pub const fn is_empty(&self) -> bool {
728        let is_empty = (self.0[0] == 0)
729            & (self.0[1] == 0)
730            & (self.0[2] == 0)
731            & (self.0[3] == 0);
732
733        #[cfg(not(target_pointer_width = "64"))]
734        {
735            is_empty
736                & (self.0[4] == 0)
737                & (self.0[5] == 0)
738                & (self.0[6] == 0)
739                & (self.0[7] == 0)
740        }
741
742        #[cfg(target_pointer_width = "64")]
743        is_empty
744    }
745
746    /// Returns `true` if `self` contains all bytes.
747    ///
748    /// This is more efficient than checking `self.len() == 256`.
749    #[inline]
750    #[must_use]
751    #[allow(clippy::let_and_return)]
752    pub const fn is_full(&self) -> bool {
753        let is_full = (self.0[0] == !0)
754            & (self.0[1] == !0)
755            & (self.0[2] == !0)
756            & (self.0[3] == !0);
757
758        #[cfg(not(target_pointer_width = "64"))]
759        {
760            is_full
761                & (self.0[4] == !0)
762                & (self.0[5] == !0)
763                & (self.0[6] == !0)
764                & (self.0[7] == !0)
765        }
766
767        #[cfg(target_pointer_width = "64")]
768        is_full
769    }
770
771    /// Returns the number of bytes contained in `self`.
772    #[cfg_attr(target_feature = "popcnt", inline)]
773    #[must_use]
774    #[allow(clippy::let_and_return)]
775    pub const fn len(&self) -> usize {
776        let len = (self.0[0].count_ones() as usize)
777            + (self.0[1].count_ones() as usize)
778            + (self.0[2].count_ones() as usize)
779            + (self.0[3].count_ones() as usize);
780
781        #[cfg(not(target_pointer_width = "64"))]
782        {
783            len + (self.0[4].count_ones() as usize)
784                + (self.0[5].count_ones() as usize)
785                + (self.0[6].count_ones() as usize)
786                + (self.0[7].count_ones() as usize)
787        }
788
789        #[cfg(target_pointer_width = "64")]
790        len
791    }
792
793    /// Removes all bytes from `self`.
794    #[inline]
795    pub fn clear(&mut self) {
796        *self = ByteSet::new();
797    }
798
799    /// Returns the first (least) byte in `self`, or `None` if `self` is empty.
800    pub fn first(&self) -> Option<u8> {
801        for (i, &chunk) in self.0.iter().enumerate() {
802            if let Some(lsb) = chunk::lsb(chunk) {
803                return Some(lsb + (i * chunk::INDEX_OFFSET) as u8);
804            }
805        }
806        None
807    }
808
809    /// Removes the first (least) byte in `self` and returns it, or `None` if
810    /// `self` is empty.
811    pub fn pop_first(&mut self) -> Option<u8> {
812        for (i, chunk) in self.0.iter_mut().enumerate() {
813            if let Some(lsb) = chunk::pop_lsb(chunk) {
814                return Some(lsb + (i * chunk::INDEX_OFFSET) as u8);
815            }
816        }
817        None
818    }
819
820    /// Returns the last (greatest) byte in `self`, or `None` if `self` is
821    /// empty.
822    pub fn last(&self) -> Option<u8> {
823        for (i, &chunk) in self.0.iter().rev().enumerate() {
824            if let Some(msb) = chunk::msb(chunk) {
825                let i = LAST_SLOT_INDEX - i;
826                return Some(msb + (i * chunk::INDEX_OFFSET) as u8);
827            }
828        }
829        None
830    }
831
832    /// Removes the last (least) byte in `self` and returns it, or `None` if
833    /// `self` is empty.
834    pub fn pop_last(&mut self) -> Option<u8> {
835        for (i, chunk) in self.0.iter_mut().rev().enumerate() {
836            if let Some(msb) = chunk::pop_msb(chunk) {
837                let i = LAST_SLOT_INDEX - i;
838                return Some(msb + (i * chunk::INDEX_OFFSET) as u8);
839            }
840        }
841        None
842    }
843
844    /// Inserts `byte` into `self` in-place.
845    ///
846    /// Unlike [`HashSet::insert`] and [`BTreeSet::insert`], this does not
847    /// return a `bool` for whether `byte` was not present. This is because it's
848    /// just as efficient to call [`contains`](#method.contains) before.
849    ///
850    /// [`HashSet::insert`]:  https://doc.rust-lang.org/std/collections/struct.HashSet.html#method.insert
851    /// [`BTreeSet::insert`]: https://doc.rust-lang.org/std/collections/struct.BTreeSet.html#method.insert
852    #[inline]
853    pub fn insert(&mut self, byte: u8) {
854        let (index, shift) = chunk_index_and_shift(byte);
855
856        self.0[index] |= 1 << shift;
857    }
858
859    /// Inserts all bytes of `other` into `self` in-place.
860    #[inline]
861    pub fn insert_all(&mut self, other: Self) {
862        self.0[0] |= other.0[0];
863        self.0[1] |= other.0[1];
864        self.0[2] |= other.0[2];
865        self.0[3] |= other.0[3];
866
867        #[cfg(not(target_pointer_width = "64"))]
868        {
869            self.0[4] |= other.0[4];
870            self.0[5] |= other.0[5];
871            self.0[6] |= other.0[6];
872            self.0[7] |= other.0[7];
873        }
874    }
875
876    /// Returns a copy of `self` with `byte` inserted.
877    #[inline]
878    #[must_use]
879    pub const fn inserting(mut self, byte: u8) -> Self {
880        let (index, shift) = chunk_index_and_shift(byte);
881
882        self.0[index] |= 1 << shift;
883        self
884    }
885
886    /// Returns a copy of `self` with all of `other` inserted.
887    ///
888    /// This is equivalent to the [`union`](#method.union) method.
889    #[inline]
890    #[must_use]
891    pub const fn inserting_all(self, other: Self) -> Self {
892        self.union(other)
893    }
894
895    /// Removes `byte` from `self` in-place.
896    ///
897    /// Unlike [`HashSet::remove`] and [`BTreeSet::remove`], this does not
898    /// return a `bool` for whether `byte` was present. This is because it's
899    /// just as efficient to call [`contains`](#method.contains) before.
900    ///
901    /// [`HashSet::remove`]:  https://doc.rust-lang.org/std/collections/struct.HashSet.html#method.remove
902    /// [`BTreeSet::remove`]: https://doc.rust-lang.org/std/collections/struct.BTreeSet.html#method.remove
903    #[inline]
904    pub fn remove(&mut self, byte: u8) {
905        let (index, shift) = chunk_index_and_shift(byte);
906
907        self.0[index] &= !(1 << shift);
908    }
909
910    /// Removes all bytes of `other` from `self` in-place.
911    #[inline]
912    pub fn remove_all(&mut self, other: Self) {
913        *self &= !other;
914    }
915
916    /// Returns a copy of `self` with `byte` removed.
917    #[inline]
918    #[must_use]
919    pub const fn removing(mut self, byte: u8) -> Self {
920        let (index, shift) = chunk_index_and_shift(byte);
921
922        self.0[index] &= !(1 << shift);
923        self
924    }
925
926    /// Returns a copy of `self` with `byte` removed.
927    #[inline]
928    #[must_use]
929    pub const fn removing_all(self, other: Self) -> Self {
930        self.difference(other)
931    }
932
933    /// Sets `byte` in `self` to `enabled` in-place.
934    #[inline]
935    pub fn set(&mut self, byte: u8, enabled: bool) {
936        let (index, shift) = chunk_index_and_shift(byte);
937        let chunk = self.0[index];
938
939        self.0[index] = (chunk & !(1 << shift)) | ((enabled as Chunk) << shift);
940    }
941
942    /// Returns a copy of `self` with `byte` set to `enabled`.
943    #[inline]
944    #[must_use]
945    pub const fn setting(mut self, byte: u8, enabled: bool) -> Self {
946        let (index, shift) = chunk_index_and_shift(byte);
947        let chunk = self.0[index];
948
949        self.0[index] = (chunk & !(1 << shift)) | ((enabled as Chunk) << shift);
950        self
951    }
952
953    /// Returns `true` if `byte` is contained in `self`.
954    #[inline]
955    #[must_use]
956    pub const fn contains(&self, byte: u8) -> bool {
957        let (index, shift) = chunk_index_and_shift(byte);
958
959        self.0[index] & (1 << shift) != 0
960    }
961
962    #[inline]
963    #[must_use]
964    const fn chunk_and_or(&self, other: &Self) -> Chunk {
965        map_reduce_chunks!(self, other, &, |)
966    }
967
968    /// Returns `true` if `self` contains any bytes in `other`.
969    #[inline]
970    #[must_use]
971    // Not `const` because it may be later improved with SIMD intrinsics.
972    pub fn contains_any(&self, other: &Self) -> bool {
973        self.chunk_and_or(other) != 0
974    }
975
976    #[inline]
977    const fn _is_subset(&self, other: &Self) -> bool {
978        self.intersection(*other).eq(self)
979    }
980
981    /// Returns `true` if `other` contains all bytes in `self`.
982    #[inline]
983    #[must_use]
984    // Not `const` because it may be later improved with SIMD intrinsics.
985    pub fn is_subset(&self, other: &Self) -> bool {
986        self._is_subset(other)
987    }
988
989    /// Returns `true` if `other` contains all bytes in `self` and at least one
990    /// other byte not contained in `self`.
991    ///
992    /// This is also known as a "proper subset".
993    #[must_use]
994    // Not inlined because lots of code is generated on x86.
995    // Not `const` because it may be later improved with SIMD intrinsics.
996    pub fn is_strict_subset(&self, other: &Self) -> bool {
997        // On x86, checking inequality first produces less code and uses fewer
998        // registers.
999        self.ne(other) && self.is_subset(other)
1000    }
1001
1002    /// Returns `true` if `self` contains all bytes in `other`.
1003    #[inline]
1004    #[must_use]
1005    pub fn is_superset(&self, other: &Self) -> bool {
1006        other.is_subset(self)
1007    }
1008
1009    /// Returns `true` if `self` contains all bytes in `other` and at least one
1010    /// other byte not contained in `other`.
1011    ///
1012    /// This is also known as a "proper superset".
1013    #[inline]
1014    #[must_use]
1015    pub fn is_strict_superset(&self, other: &Self) -> bool {
1016        other.is_strict_subset(self)
1017    }
1018
1019    /// Returns `true` if `self` and `other` have no bytes in common.
1020    #[inline]
1021    #[must_use]
1022    // Not `const` because it may be later improved with SIMD intrinsics.
1023    pub fn is_disjoint(&self, other: &Self) -> bool {
1024        self.intersection(*other).is_empty()
1025    }
1026
1027    /// Returns a set with the bytes contained in `self`, but not in `other`.
1028    #[inline]
1029    #[must_use]
1030    pub const fn difference(self, other: Self) -> Self {
1031        self.intersection(other.not())
1032    }
1033
1034    /// Returns a set with the bytes contained in `self` or `other`, but not in
1035    /// both.
1036    #[inline]
1037    #[must_use]
1038    pub const fn symmetric_difference(self, other: Self) -> Self {
1039        map_chunks!(self, ^, other)
1040    }
1041
1042    /// Returns a set with the bytes contained both in `self` and `other`.
1043    #[inline]
1044    #[must_use]
1045    pub const fn intersection(self, other: Self) -> Self {
1046        map_chunks!(self, &, other)
1047    }
1048
1049    /// Returns a new set with the bytes contained in `self` or `other`.
1050    #[inline]
1051    #[must_use]
1052    pub const fn union(self, other: Self) -> Self {
1053        map_chunks!(self, |, other)
1054    }
1055
1056    /// Returns a new set with the bytes not contained in `self`.
1057    ///
1058    /// This exists because the [`Not`] trait cannot be used in `const` yet.
1059    ///
1060    /// [`Not`]: https://doc.rust-lang.org/std/ops/trait.Not.html
1061    #[inline]
1062    #[must_use]
1063    #[allow(clippy::should_implement_trait)]
1064    pub const fn not(self) -> Self {
1065        map_chunks!(self, !)
1066    }
1067
1068    /// Returns `self` with its bits reversed.
1069    ///
1070    /// This is equivalent to checking for `255 - b` in all subsequent searches
1071    /// of `b`.
1072    #[must_use]
1073    // The `rbit` instruction makes this reasonable to inline.
1074    #[cfg_attr(target_arch = "aarch64", inline)]
1075    // Not inlined because lots of code is generated on x86.
1076    pub const fn reverse_bits(self) -> Self {
1077        Self([
1078            #[cfg(not(target_pointer_width = "64"))]
1079            self.0[7].reverse_bits(),
1080            #[cfg(not(target_pointer_width = "64"))]
1081            self.0[6].reverse_bits(),
1082            #[cfg(not(target_pointer_width = "64"))]
1083            self.0[5].reverse_bits(),
1084            #[cfg(not(target_pointer_width = "64"))]
1085            self.0[4].reverse_bits(),
1086            self.0[3].reverse_bits(),
1087            self.0[2].reverse_bits(),
1088            self.0[1].reverse_bits(),
1089            self.0[0].reverse_bits(),
1090        ])
1091    }
1092
1093    /// Returns `true` if `self` and `other` contain the same bytes.
1094    ///
1095    /// This exists because `PartialEq` is currently unstable in `const`.
1096    #[inline]
1097    #[must_use]
1098    #[allow(clippy::should_implement_trait)]
1099    #[allow(clippy::let_and_return)]
1100    pub const fn eq(&self, other: &Self) -> bool {
1101        let eq = (self.0[0] == other.0[0])
1102            & (self.0[1] == other.0[1])
1103            & (self.0[2] == other.0[2])
1104            & (self.0[3] == other.0[3]);
1105
1106        #[cfg(not(target_pointer_width = "64"))]
1107        {
1108            eq & (self.0[4] == other.0[4])
1109                & (self.0[5] == other.0[5])
1110                & (self.0[6] == other.0[6])
1111                & (self.0[7] == other.0[7])
1112        }
1113
1114        #[cfg(target_pointer_width = "64")]
1115        eq
1116    }
1117
1118    /// Returns `true` if `self` and `other` do not contain the same bytes.
1119    ///
1120    /// This exists because `PartialEq` is currently unstable in `const`.
1121    #[inline]
1122    #[must_use]
1123    #[allow(clippy::should_implement_trait)]
1124    pub const fn ne(&self, other: &Self) -> bool {
1125        !self.eq(other)
1126    }
1127}
1128
1129/// Operations related to the ASCII character set.
1130impl ByteSet {
1131    /// The set of all ASCII characters: U+0000 NULL ..= U+007F DELETE.
1132    ///
1133    /// # Examples
1134    ///
1135    /// This contains all bytes for which [`u8::is_ascii`] returns `true`:
1136    ///
1137    /// ```
1138    /// # use byte_set::ByteSet;
1139    /// for byte in ByteSet::ASCII {
1140    ///     assert!(byte.is_ascii());
1141    /// }
1142    ///
1143    /// for byte in !ByteSet::ASCII {
1144    ///     assert!(!byte.is_ascii());
1145    /// }
1146    /// ```
1147    ///
1148    /// [`u8::is_ascii`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii
1149    pub const ASCII: Self = {
1150        #[cfg(target_pointer_width = "64")]
1151        {
1152            Self([!0, !0, 0, 0])
1153        }
1154
1155        #[cfg(not(target_pointer_width = "64"))]
1156        {
1157            Self([!0, !0, !0, !0, 0, 0, 0, 0])
1158        }
1159    };
1160
1161    /// The set of all ASCII alphabetic characters:
1162    ///
1163    /// - U+0041 'A' ..= U+005A 'Z'
1164    /// - U+0061 'a' ..= U+007A 'z'
1165    ///
1166    /// # Examples
1167    ///
1168    /// This contains all bytes for which [`u8::is_ascii_alphabetic`] returns
1169    /// `true`:
1170    ///
1171    /// ```
1172    /// # use byte_set::ByteSet;
1173    /// for byte in ByteSet::ASCII_ALPHABETIC {
1174    ///     assert!(byte.is_ascii_alphabetic());
1175    /// }
1176    ///
1177    /// for byte in !ByteSet::ASCII_ALPHABETIC {
1178    ///     assert!(!byte.is_ascii_alphabetic());
1179    /// }
1180    /// ```
1181    ///
1182    /// [`u8::is_ascii_alphabetic`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_alphabetic
1183    pub const ASCII_ALPHABETIC: Self =
1184        Self::ASCII_LOWERCASE.inserting_all(Self::ASCII_UPPERCASE);
1185
1186    /// The set of all ASCII uppercase characters: U+0041 'A' ..= U+005A 'Z'.
1187    ///
1188    /// # Examples
1189    ///
1190    /// This contains all bytes for which [`u8::is_ascii_uppercase`] returns
1191    /// `true`:
1192    ///
1193    /// ```
1194    /// # use byte_set::ByteSet;
1195    /// for byte in ByteSet::ASCII_UPPERCASE {
1196    ///     assert!(byte.is_ascii_uppercase());
1197    /// }
1198    ///
1199    /// for byte in !ByteSet::ASCII_UPPERCASE {
1200    ///     assert!(!byte.is_ascii_uppercase());
1201    /// }
1202    /// ```
1203    ///
1204    /// [`u8::is_ascii_uppercase`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_uppercase
1205    pub const ASCII_UPPERCASE: Self = Self::from_range_inclusive(b'A'..=b'Z');
1206
1207    /// The set of all ASCII lowercase characters: U+0061 'a' ..= U+007A 'z'.
1208    ///
1209    /// # Examples
1210    ///
1211    /// This contains all bytes for which [`u8::is_ascii_lowercase`] returns
1212    /// `true`:
1213    ///
1214    /// ```
1215    /// # use byte_set::ByteSet;
1216    /// for byte in ByteSet::ASCII_LOWERCASE {
1217    ///     assert!(byte.is_ascii_lowercase());
1218    /// }
1219    ///
1220    /// for byte in !ByteSet::ASCII_LOWERCASE {
1221    ///     assert!(!byte.is_ascii_lowercase());
1222    /// }
1223    /// ```
1224    ///
1225    /// [`u8::is_ascii_lowercase`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_lowercase
1226    pub const ASCII_LOWERCASE: Self = Self::from_range_inclusive(b'a'..=b'z');
1227
1228    /// The set of all ASCII alphanumeric characters:
1229    ///
1230    /// - U+0041 'A' ..= U+005A 'Z'
1231    /// - U+0061 'a' ..= U+007A 'z'
1232    /// - U+0030 '0' ..= U+0039 '9'
1233    ///
1234    /// # Examples
1235    ///
1236    /// This contains all bytes for which [`u8::is_ascii_alphanumeric`] returns
1237    /// `true`:
1238    ///
1239    /// ```
1240    /// # use byte_set::ByteSet;
1241    /// for byte in ByteSet::ASCII_ALPHANUMERIC {
1242    ///     assert!(byte.is_ascii_alphanumeric());
1243    /// }
1244    ///
1245    /// for byte in !ByteSet::ASCII_ALPHANUMERIC {
1246    ///     assert!(!byte.is_ascii_alphanumeric());
1247    /// }
1248    /// ```
1249    ///
1250    /// [`u8::is_ascii_alphanumeric`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_alphanumeric
1251    pub const ASCII_ALPHANUMERIC: Self =
1252        Self::ASCII_ALPHABETIC.inserting_all(Self::ASCII_DIGIT);
1253
1254    /// The set of all ASCII decimal digits: U+0030 '0' ..= U+0039 '9'.
1255    ///
1256    /// # Examples
1257    ///
1258    /// This contains all bytes for which [`u8::is_ascii_digit`] returns `true`:
1259    ///
1260    /// ```
1261    /// # use byte_set::ByteSet;
1262    /// for byte in ByteSet::ASCII_DIGIT {
1263    ///     assert!(byte.is_ascii_digit());
1264    /// }
1265    ///
1266    /// for byte in !ByteSet::ASCII_DIGIT {
1267    ///     assert!(!byte.is_ascii_digit());
1268    /// }
1269    /// ```
1270    ///
1271    /// [`u8::is_ascii_digit`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_digit
1272    pub const ASCII_DIGIT: Self = Self::from_range_inclusive(b'0'..=b'9');
1273
1274    /// The set of all ASCII hexadecimal digits:
1275    ///
1276    /// - U+0030 '0' ..= U+0039 '9'
1277    /// - U+0041 'A' ..= U+0046 'F'
1278    /// - U+0061 'a' ..= U+0066 'f'
1279    ///
1280    /// # Examples
1281    ///
1282    /// This contains all bytes for which [`u8::is_ascii_hexdigit`] returns
1283    /// `true`:
1284    ///
1285    /// ```
1286    /// # use byte_set::ByteSet;
1287    /// for byte in ByteSet::ASCII_HEXDIGIT {
1288    ///     assert!(byte.is_ascii_hexdigit());
1289    /// }
1290    ///
1291    /// for byte in !ByteSet::ASCII_HEXDIGIT {
1292    ///     assert!(!byte.is_ascii_hexdigit());
1293    /// }
1294    /// ```
1295    ///
1296    /// [`u8::is_ascii_hexdigit`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_hexdigit
1297    pub const ASCII_HEXDIGIT: Self = Self::ASCII_DIGIT
1298        .inserting_all(Self::from_range_inclusive(b'A'..=b'F'))
1299        .inserting_all(Self::from_range_inclusive(b'a'..=b'f'));
1300
1301    /// The set of all ASCII punctuation characters:
1302    ///
1303    /// - U+0021 ..= U+002F `! " # $ % & ' ( ) * + , - . /`
1304    /// - U+003A ..= U+0040 `: ; < = > ? @`
1305    /// - U+005B ..= U+0060 ``[ \ ] ^ _ ` ``
1306    /// - U+007B ..= U+007E `{ | } ~`
1307    ///
1308    /// # Examples
1309    ///
1310    /// This contains all bytes for which [`u8::is_ascii_punctuation`] returns
1311    /// `true`:
1312    ///
1313    /// ```
1314    /// # use byte_set::ByteSet;
1315    /// for byte in ByteSet::ASCII_PUNCTUATION {
1316    ///     assert!(byte.is_ascii_punctuation());
1317    /// }
1318    ///
1319    /// for byte in !ByteSet::ASCII_PUNCTUATION {
1320    ///     assert!(!byte.is_ascii_punctuation());
1321    /// }
1322    /// ```
1323    ///
1324    /// [`u8::is_ascii_punctuation`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_punctuation
1325    pub const ASCII_PUNCTUATION: Self = byte_set![
1326        b'!', b'"', b'#', b'$', b'%', b'&', b'\'', b'(', b')', b'*', b'+',
1327        b',', b'-', b'.', b'/', b':', b';', b'<', b'=', b'>', b'?', b'@', b'[',
1328        b'\\', b']', b'^', b'_', b'`', b'{', b'|', b'}', b'~',
1329    ];
1330
1331    /// The set of all ASCII graphic characters: U+0021 '!' ..= U+007E '~'.
1332    ///
1333    /// # Examples
1334    ///
1335    /// This contains all bytes for which [`u8::is_ascii_graphic`] returns
1336    /// `true`:
1337    ///
1338    /// ```
1339    /// # use byte_set::ByteSet;
1340    /// for byte in ByteSet::ASCII_GRAPHIC {
1341    ///     assert!(byte.is_ascii_graphic());
1342    /// }
1343    ///
1344    /// for byte in !ByteSet::ASCII_GRAPHIC {
1345    ///     assert!(!byte.is_ascii_graphic());
1346    /// }
1347    /// ```
1348    ///
1349    /// [`u8::is_ascii_graphic`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_graphic
1350    pub const ASCII_GRAPHIC: Self =
1351        Self::ASCII_ALPHANUMERIC.inserting_all(Self::ASCII_PUNCTUATION);
1352
1353    /// The set of all ASCII whitespace characters:
1354    ///
1355    /// - U+0020 SPACE
1356    /// - U+0009 HORIZONTAL TAB
1357    /// - U+000A LINE FEED
1358    /// - U+000C FORM FEED
1359    /// - U+000D CARRIAGE RETURN
1360    ///
1361    /// # Examples
1362    ///
1363    /// This contains all bytes for which [`u8::is_ascii_whitespace`] returns
1364    /// `true`:
1365    ///
1366    /// ```
1367    /// # use byte_set::ByteSet;
1368    /// for byte in ByteSet::ASCII_WHITESPACE {
1369    ///     assert!(byte.is_ascii_whitespace());
1370    /// }
1371    ///
1372    /// for byte in !ByteSet::ASCII_WHITESPACE {
1373    ///     assert!(!byte.is_ascii_whitespace());
1374    /// }
1375    /// ```
1376    ///
1377    /// [`u8::is_ascii_whitespace`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_whitespace
1378    pub const ASCII_WHITESPACE: Self =
1379        byte_set![b'\t', b'\n', 0x0C, b'\r', b' '];
1380
1381    /// The set of all ASCII control characters:
1382    ///
1383    /// - U+0000 NUL ..= U+001F UNIT SEPARATOR
1384    /// - U+007F DELETE.
1385    ///
1386    /// Note that most ASCII whitespace characters are control characters, but
1387    /// SPACE is not.
1388    ///
1389    /// # Examples
1390    ///
1391    /// This contains all bytes for which [`u8::is_ascii_control`] returns
1392    /// `true`:
1393    ///
1394    /// ```
1395    /// # use byte_set::ByteSet;
1396    /// for byte in ByteSet::ASCII_CONTROL {
1397    ///     assert!(byte.is_ascii_control());
1398    /// }
1399    ///
1400    /// for byte in !ByteSet::ASCII_CONTROL {
1401    ///     assert!(!byte.is_ascii_control());
1402    /// }
1403    /// ```
1404    ///
1405    /// [`u8::is_ascii_whitespace`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_whitespace
1406    pub const ASCII_CONTROL: Self =
1407        Self::from_range_inclusive(0..=0x1F).inserting(0x7F);
1408
1409    /// Returns `true` if [`u8::is_ascii`] returns `true` for all bytes in
1410    /// `self`.
1411    ///
1412    /// This is significantly more efficient than checking each byte in `self`
1413    /// individually.
1414    ///
1415    /// [`u8::is_ascii`]:
1416    /// https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii
1417    #[inline]
1418    #[must_use]
1419    pub const fn is_ascii(&self) -> bool {
1420        self._is_subset(&Self::ASCII)
1421    }
1422
1423    /// Returns `true` if [`u8::is_ascii_alphabetic`] returns `true` for all
1424    /// bytes in `self`.
1425    ///
1426    /// This is significantly more efficient than checking each byte in `self`
1427    /// individually.
1428    ///
1429    /// [`u8::is_ascii_alphabetic`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_alphabetic
1430    #[inline]
1431    #[must_use]
1432    pub const fn is_ascii_alphabetic(&self) -> bool {
1433        self._is_subset(&Self::ASCII_ALPHABETIC)
1434    }
1435
1436    /// Returns `true` if [`u8::is_ascii_uppercase`] returns `true` for all
1437    /// bytes in `self`.
1438    ///
1439    /// This is significantly more efficient than checking each byte in `self`
1440    /// individually.
1441    ///
1442    /// [`u8::is_ascii_uppercase`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_uppercase
1443    #[inline]
1444    #[must_use]
1445    pub const fn is_ascii_uppercase(&self) -> bool {
1446        self._is_subset(&Self::ASCII_UPPERCASE)
1447    }
1448
1449    /// Returns `true` if [`u8::is_ascii_lowercase`] returns `true` for all
1450    /// bytes in `self`.
1451    ///
1452    /// This is significantly more efficient than checking each byte in `self`
1453    /// individually.
1454    ///
1455    /// [`u8::is_ascii_lowercase`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_lowercase
1456    #[inline]
1457    #[must_use]
1458    pub const fn is_ascii_lowercase(&self) -> bool {
1459        self._is_subset(&Self::ASCII_LOWERCASE)
1460    }
1461
1462    /// Returns `true` if [`u8::is_ascii_alphanumeric`] returns `true` for all
1463    /// bytes in `self`.
1464    ///
1465    /// This is significantly more efficient than checking each byte in `self`
1466    /// individually.
1467    ///
1468    /// [`u8::is_ascii_alphanumeric`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_alphanumeric
1469    #[inline]
1470    #[must_use]
1471    pub const fn is_ascii_alphanumeric(&self) -> bool {
1472        self._is_subset(&Self::ASCII_ALPHANUMERIC)
1473    }
1474
1475    /// Returns `true` if [`u8::is_ascii_digit`] returns `true` for all bytes in
1476    /// `self`.
1477    ///
1478    /// This is significantly more efficient than checking each byte in `self`
1479    /// individually.
1480    ///
1481    /// [`u8::is_ascii_digit`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_digit
1482    #[inline]
1483    #[must_use]
1484    pub const fn is_ascii_digit(&self) -> bool {
1485        self._is_subset(&Self::ASCII_DIGIT)
1486    }
1487
1488    /// Returns `true` if [`u8::is_ascii_hexdigit`] returns `true` for all bytes
1489    /// in `self`.
1490    ///
1491    /// This is significantly more efficient than checking each byte in `self`
1492    /// individually.
1493    ///
1494    /// [`u8::is_ascii_hexdigit`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_hexdigit
1495    #[inline]
1496    #[must_use]
1497    pub const fn is_ascii_hexdigit(&self) -> bool {
1498        self._is_subset(&Self::ASCII_HEXDIGIT)
1499    }
1500
1501    /// Returns `true` if [`u8::is_ascii_punctuation`] returns `true` for all
1502    /// bytes in `self`.
1503    ///
1504    /// This is significantly more efficient than checking each byte in `self`
1505    /// individually.
1506    ///
1507    /// [`u8::is_ascii_punctuation`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_punctuation
1508    #[inline]
1509    #[must_use]
1510    pub const fn is_ascii_punctuation(&self) -> bool {
1511        self._is_subset(&Self::ASCII_PUNCTUATION)
1512    }
1513
1514    /// Returns `true` if [`u8::is_ascii_graphic`] returns `true` for all bytes
1515    /// in `self`.
1516    ///
1517    /// This is significantly more efficient than checking each byte in `self`
1518    /// individually.
1519    ///
1520    /// [`u8::is_ascii_graphic`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_graphic
1521    #[inline]
1522    #[must_use]
1523    pub const fn is_ascii_graphic(&self) -> bool {
1524        self._is_subset(&Self::ASCII_GRAPHIC)
1525    }
1526
1527    /// Returns `true` if [`u8::is_ascii_whitespace`] returns `true` for all
1528    /// bytes in `self`.
1529    ///
1530    /// This is significantly more efficient than checking each byte in `self`
1531    /// individually.
1532    ///
1533    /// [`u8::is_ascii_whitespace`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_whitespace
1534    #[inline]
1535    #[must_use]
1536    pub const fn is_ascii_whitespace(&self) -> bool {
1537        self._is_subset(&Self::ASCII_WHITESPACE)
1538    }
1539
1540    /// Returns `true` if [`u8::is_ascii_control`] returns `true` for all bytes
1541    /// in `self`.
1542    ///
1543    /// This is significantly more efficient than checking each byte in `self`
1544    /// individually.
1545    ///
1546    /// [`u8::is_ascii_control`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_control
1547    #[inline]
1548    #[must_use]
1549    pub const fn is_ascii_control(&self) -> bool {
1550        self._is_subset(&Self::ASCII_CONTROL)
1551    }
1552}
1553
1554/// Operations over the internal memory representation.
1555///
1556/// There are currently no stability guarantees over the internal bytes. This is
1557/// being tracked in [#8](https://github.com/nvzqz/byte-set-rs/issues/8).
1558impl ByteSet {
1559    const SIZE: usize = mem::size_of::<Self>();
1560
1561    /// Returns a shared reference to the underlying bytes of `self`.
1562    #[inline]
1563    pub fn as_bytes(&self) -> &[u8; Self::SIZE] {
1564        unsafe { &*self.0.as_ptr().cast() }
1565    }
1566
1567    /// Returns a mutable reference to the underlying bytes of `self`.
1568    #[inline]
1569    pub fn as_bytes_mut(&mut self) -> &mut [u8; Self::SIZE] {
1570        unsafe { &mut *self.0.as_mut_ptr().cast() }
1571    }
1572
1573    /// Returns a shared reference to the underlying bytes of `slice`.
1574    #[inline]
1575    pub fn slice_as_bytes(slice: &[Self]) -> &[u8] {
1576        let ptr = slice.as_ptr().cast::<u8>();
1577        let len = slice.len() * Self::SIZE;
1578        unsafe { slice::from_raw_parts(ptr, len) }
1579    }
1580
1581    /// Returns a mutable reference to the underlying bytes of `slice`.
1582    #[inline]
1583    pub fn slice_as_bytes_mut(slice: &mut [Self]) -> &mut [u8] {
1584        let ptr = slice.as_mut_ptr().cast::<u8>();
1585        let len = slice.len() * Self::SIZE;
1586        unsafe { slice::from_raw_parts_mut(ptr, len) }
1587    }
1588}
1589
1590impl Default for ByteSet {
1591    #[inline]
1592    fn default() -> Self {
1593        Self::new()
1594    }
1595}
1596
1597impl From<u8> for ByteSet {
1598    #[inline]
1599    fn from(byte: u8) -> ByteSet {
1600        ByteSet::from_byte(byte)
1601    }
1602}
1603
1604impl From<&[u8]> for ByteSet {
1605    #[inline]
1606    fn from(bytes: &[u8]) -> Self {
1607        let mut set = ByteSet::new();
1608        set.extend(bytes);
1609        set
1610    }
1611}
1612
1613impl From<&str> for ByteSet {
1614    #[inline]
1615    fn from(s: &str) -> Self {
1616        s.as_bytes().into()
1617    }
1618}
1619
1620impl From<ops::Range<u8>> for ByteSet {
1621    #[inline]
1622    fn from(range: ops::Range<u8>) -> Self {
1623        Self::from_range(range)
1624    }
1625}
1626
1627impl From<ops::RangeTo<u8>> for ByteSet {
1628    #[inline]
1629    fn from(range: ops::RangeTo<u8>) -> Self {
1630        Self::from_range_to(range)
1631    }
1632}
1633
1634impl From<ops::RangeFrom<u8>> for ByteSet {
1635    #[inline]
1636    fn from(range: ops::RangeFrom<u8>) -> Self {
1637        Self::from_range_from(range)
1638    }
1639}
1640
1641impl From<ops::RangeInclusive<u8>> for ByteSet {
1642    #[inline]
1643    fn from(range: ops::RangeInclusive<u8>) -> Self {
1644        Self::from_range_inclusive(range)
1645    }
1646}
1647
1648impl From<ops::RangeToInclusive<u8>> for ByteSet {
1649    #[inline]
1650    fn from(range: ops::RangeToInclusive<u8>) -> Self {
1651        Self::from_range_to_inclusive(range)
1652    }
1653}
1654
1655impl From<ops::RangeFull> for ByteSet {
1656    #[inline]
1657    fn from(_: ops::RangeFull) -> Self {
1658        Self::full()
1659    }
1660}
1661
1662impl Extend<u8> for ByteSet {
1663    fn extend<T: IntoIterator<Item = u8>>(&mut self, iter: T) {
1664        iter.into_iter().for_each(|byte| self.insert(byte));
1665    }
1666}
1667
1668impl<'a> Extend<&'a u8> for ByteSet {
1669    fn extend<T: IntoIterator<Item = &'a u8>>(&mut self, iter: T) {
1670        self.extend(iter.into_iter().cloned());
1671    }
1672}
1673
1674impl FromIterator<u8> for ByteSet {
1675    fn from_iter<T: IntoIterator<Item = u8>>(iter: T) -> Self {
1676        // Make sure to use `insert` over `inserting` to not copy so many bytes
1677        // on each iteration.
1678        let mut set = ByteSet::new();
1679        set.extend(iter);
1680        set
1681    }
1682}
1683
1684impl<'a> FromIterator<&'a u8> for ByteSet {
1685    fn from_iter<T: IntoIterator<Item = &'a u8>>(iter: T) -> Self {
1686        iter.into_iter().cloned().collect()
1687    }
1688}
1689
1690impl IntoIterator for ByteSet {
1691    type Item = u8;
1692    type IntoIter = Iter;
1693
1694    #[inline]
1695    fn into_iter(self) -> Self::IntoIter {
1696        self.into()
1697    }
1698}
1699
1700impl fmt::Debug for ByteSet {
1701    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1702        f.debug_set().entries(*self).finish()
1703    }
1704}
1705
1706// This is implemented manually over `[u8; 32]` because it seems to produce
1707// better code than `[usize; N]`.
1708impl PartialOrd for ByteSet {
1709    #[inline]
1710    fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {
1711        Some(self.cmp(other))
1712    }
1713
1714    #[inline]
1715    fn lt(&self, other: &Self) -> bool {
1716        self.as_bytes().lt(other.as_bytes())
1717    }
1718
1719    #[inline]
1720    fn le(&self, other: &Self) -> bool {
1721        self.as_bytes().le(other.as_bytes())
1722    }
1723
1724    #[inline]
1725    fn gt(&self, other: &Self) -> bool {
1726        self.as_bytes().gt(other.as_bytes())
1727    }
1728
1729    #[inline]
1730    fn ge(&self, other: &Self) -> bool {
1731        self.as_bytes().ge(other.as_bytes())
1732    }
1733}
1734
1735impl Ord for ByteSet {
1736    #[inline]
1737    fn cmp(&self, other: &Self) -> cmp::Ordering {
1738        self.as_bytes().cmp(other.as_bytes())
1739    }
1740}
1741
1742#[allow(clippy::derive_hash_xor_eq)]
1743impl hash::Hash for ByteSet {
1744    #[inline]
1745    fn hash<H: hash::Hasher>(&self, state: &mut H) {
1746        self.as_bytes().hash(state)
1747    }
1748
1749    #[inline]
1750    fn hash_slice<H: hash::Hasher>(data: &[Self], state: &mut H) {
1751        Self::slice_as_bytes(data).hash(state)
1752    }
1753}
1754
1755impl ops::Sub for ByteSet {
1756    type Output = Self;
1757
1758    #[inline]
1759    fn sub(self, rhs: Self) -> Self::Output {
1760        self.removing_all(rhs)
1761    }
1762}
1763
1764impl ops::SubAssign for ByteSet {
1765    #[inline]
1766    fn sub_assign(&mut self, rhs: Self) {
1767        self.remove_all(rhs);
1768    }
1769}
1770
1771impl ops::BitAnd for ByteSet {
1772    type Output = Self;
1773
1774    #[inline]
1775    fn bitand(self, rhs: Self) -> Self::Output {
1776        self.intersection(rhs)
1777    }
1778}
1779
1780impl ops::BitAndAssign for ByteSet {
1781    #[inline]
1782    fn bitand_assign(&mut self, rhs: Self) {
1783        *self = *self & rhs;
1784    }
1785}
1786
1787impl ops::BitOr for ByteSet {
1788    type Output = Self;
1789
1790    #[inline]
1791    fn bitor(self, rhs: Self) -> Self::Output {
1792        self.inserting_all(rhs)
1793    }
1794}
1795
1796impl ops::BitOrAssign for ByteSet {
1797    #[inline]
1798    fn bitor_assign(&mut self, rhs: Self) {
1799        self.insert_all(rhs);
1800    }
1801}
1802
1803impl ops::BitXor for ByteSet {
1804    type Output = Self;
1805
1806    #[inline]
1807    fn bitxor(self, rhs: Self) -> Self::Output {
1808        self.symmetric_difference(rhs)
1809    }
1810}
1811
1812impl ops::BitXorAssign for ByteSet {
1813    #[inline]
1814    fn bitxor_assign(&mut self, rhs: Self) {
1815        *self = *self ^ rhs;
1816    }
1817}
1818
1819impl ops::Not for ByteSet {
1820    type Output = Self;
1821
1822    #[inline]
1823    fn not(self) -> Self::Output {
1824        ByteSet::not(self)
1825    }
1826}
1827
1828// Enables `rand::random::<ByteSet>()`.
1829#[cfg(feature = "rand")]
1830#[cfg_attr(docsrs, doc(cfg(feature = "rand")))]
1831impl rand::distributions::Distribution<ByteSet>
1832    for rand::distributions::Standard
1833{
1834    #[inline]
1835    fn sample<R: rand::Rng + ?Sized>(&self, rng: &mut R) -> ByteSet {
1836        ByteSet::rand(rng)
1837    }
1838}
1839
1840#[cfg(feature = "serde")]
1841#[cfg_attr(docsrs, doc(cfg(feature = "serde")))]
1842impl serde::Serialize for ByteSet {
1843    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
1844    where
1845        S: serde::Serializer,
1846    {
1847        use serde::ser::SerializeSeq;
1848
1849        let mut seq = serializer.serialize_seq(Some(self.len()))?;
1850        for byte in *self {
1851            seq.serialize_element(&byte)?;
1852        }
1853        seq.end()
1854    }
1855}
1856
1857#[cfg(feature = "serde")]
1858#[cfg_attr(docsrs, doc(cfg(feature = "serde")))]
1859impl<'de> serde::Deserialize<'de> for ByteSet {
1860    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
1861    where
1862        D: serde::Deserializer<'de>,
1863    {
1864        struct ByteSetVisitor;
1865
1866        impl<'de> serde::de::Visitor<'de> for ByteSetVisitor {
1867            type Value = ByteSet;
1868
1869            fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result {
1870                f.write_str("a set of bytes")
1871            }
1872
1873            fn visit_bytes<E>(self, v: &[u8]) -> Result<Self::Value, E>
1874            where
1875                E: serde::de::Error,
1876            {
1877                Ok(v.into())
1878            }
1879
1880            fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
1881            where
1882                A: serde::de::SeqAccess<'de>,
1883            {
1884                let mut set = ByteSet::new();
1885                while let Some(byte) = seq.next_element::<u8>()? {
1886                    set.insert(byte);
1887                }
1888                Ok(set)
1889            }
1890        }
1891
1892        deserializer.deserialize_seq(ByteSetVisitor)
1893    }
1894}
byte_set/lib.rs

byte_set/
lib.rs