byte_set/lib.rs
1//! <div align="center">
2//! <h1>
3//! <a href="https://github.com/nvzqz/byte-set-rs">
4//! ByteSet
5//! </a>
6//! </h1>
7//! <a href="https://crates.io/crates/byte_set">
8//! <img src="https://img.shields.io/crates/v/byte_set.svg" alt="Crates.io">
9//! <img src="https://img.shields.io/crates/d/byte_set.svg" alt="Downloads">
10//! </a>
11//! <a href="https://docs.rs/byte_set">
12//! <img src="https://docs.rs/byte_set/badge.svg" alt="docs.rs">
13//! </a>
14//! <a href="https://github.com/nvzqz/byte-set-rs/actions?query=workflow%3ACI">
15//! <img src="https://github.com/nvzqz/byte-set-rs/workflows/CI/badge.svg" alt="Build Status">
16//! </a>
17//! <br><br>
18//! </div>
19//!
20//! Efficient sets of bytes for Rust, brought to you by [@NikolaiVazquez]!
21//!
22//! The star of the show is [`ByteSet`]: an allocation-free sorted set. It is a
23//! *much faster* alternative to [`HashSet<u8>`], [`BTreeSet<u8>`], and other
24//! types for a variety of scenarios. See ["Implementation"](#implementation)
25//! for a peek under the hood.
26//!
27//! If you found this library useful, please consider [sponsoring me on
28//! GitHub](https://github.com/sponsors/nvzqz)!
29//!
30//! ## Table of Contents
31//!
32//! 1. [Usage](#usage)
33//! 2. [Examples](#examples)
34//! 1. [`ByteSet` Type](#byteset-type)
35//! 1. [Insert](#insert)
36//! 2. [Extend](#extend)
37//! 3. [Remove](#remove)
38//! 4. [Iterate](#iterate)
39//! 5. [Contains](#contains)
40//! 6. [Subset](#subset)
41//! 7. [Min and Max](#min-and-max)
42//! 2. [`byte_set!` Macro](#byte_set-macro)
43//! 3. [Implementation](#implementation)
44//! 4. [Benchmarks](#benchmarks)
45//! 5. [Ecosystem Integrations](#ecosystem-integrations)
46//! 1. [`rand`](#rand)
47//! 2. [`serde`](#serde)
48//! 6. [License](#license)
49//!
50//! ## Usage
51//!
52//! This library is available [on crates.io][crate] and can be used by adding
53//! the following to your project's [`Cargo.toml`]:
54//!
55//! ```toml
56//! [dependencies]
57//! byte_set = "0.1.3"
58//! ```
59//!
60//! To import the [`byte_set!`] macro, add this to your crate root (`main.rs` or
61//! `lib.rs`):
62//!
63//! ```rust
64//! use byte_set::byte_set;
65//! ```
66//!
67//! If you're not using [Rust 2018 edition][2018], it must be imported
68//! differently:
69//!
70//! ```rust
71//! #[macro_use]
72//! extern crate byte_set;
73//! # fn main() {}
74//! ```
75//!
76//! ## Examples
77//!
78//! ### `ByteSet` Type
79//!
80//! First, let's import [`ByteSet`]:
81//!
82//! ```rust
83//! use byte_set::ByteSet;
84//! ```
85//!
86//! Here's how you create an empty set:
87//!
88//! ```rust
89//! # use byte_set::ByteSet;
90//! let bytes = ByteSet::new();
91//! ```
92//!
93//! You can create a set filled with all bytes (0 through 255) just as easily:
94//!
95//! ```rust
96//! # use byte_set::ByteSet;
97//! let bytes = ByteSet::full();
98//! ```
99//!
100//! Ok, let's see what we can do with this. Note that this isn't the only
101//! available functionality. See [`ByteSet`] for a complete list.
102//!
103//! #### Insert
104//!
105//! Use [`insert`] to include a single byte, by mutating the [`ByteSet`]
106//! in-place:
107//!
108//! ```rust
109//! # use byte_set::ByteSet;
110//! let mut bytes = ByteSet::new();
111//! bytes.insert(255);
112//! ```
113//!
114//! Use [`inserting`] as an immutable alternative, by passing the calling
115//! [`ByteSet`] by value:
116//!
117//! ```rust
118//! # use byte_set::ByteSet;
119//! let bytes = ByteSet::new().inserting(255);
120//! ```
121//!
122//! Use [`insert_all`] to include all bytes of another [`ByteSet`], by mutating
123//! the [`ByteSet`] in-place:
124//!
125//! ```rust
126//! # use byte_set::ByteSet;
127//! let mut alphabet = ByteSet::ASCII_UPPERCASE;
128//! alphabet.insert_all(ByteSet::ASCII_LOWERCASE);
129//!
130//! assert_eq!(alphabet, ByteSet::ASCII_ALPHABETIC);
131//! ```
132//!
133//! Use [`inserting_all`] as an immutable alternative, by passing the calling
134//! [`ByteSet`] by value:
135//!
136//! ```rust
137//! # use byte_set::ByteSet;
138//! let alphabet = ByteSet::ASCII_UPPERCASE.inserting_all(ByteSet::ASCII_LOWERCASE);
139//!
140//! assert_eq!(alphabet, ByteSet::ASCII_ALPHABETIC);
141//! ```
142//!
143//! #### Extend
144//!
145//! Rather than call [`insert`] in a loop, [`extend`] simplifies inserting from
146//! an iterator:
147//!
148//! ```rust
149//! # use byte_set::ByteSet;
150//! fn take_string(bytes: &mut ByteSet, s: &str) {
151//! bytes.extend(s.as_bytes());
152//! }
153//! ```
154//!
155//! Because this iterates over the entire input, it is *much* more efficient to
156//! use [`insert_all`] instead of [`extend`] when inserting another [`ByteSet`].
157//!
158//! #### Remove
159//!
160//! Use [`remove`] to exclude a single byte by mutating the set in-place:
161//!
162//! ```rust
163//! # use byte_set::ByteSet;
164//! let mut bytes = ByteSet::full();
165//! bytes.remove(255);
166//! ```
167//!
168//! Use [`removing`] as an immutable alternative, by passing the calling
169//! [`ByteSet`] by value:
170//!
171//! ```rust
172//! # use byte_set::ByteSet;
173//! let bytes = ByteSet::full().removing(255);
174//! ```
175//!
176//! Use [`remove_all`] to exclude all bytes of another [`ByteSet`], by mutating
177//! the [`ByteSet`] in-place:
178//!
179//! ```rust
180//! # use byte_set::ByteSet;
181//! let mut alphabet = ByteSet::ASCII_ALPHANUMERIC;
182//! alphabet.remove_all(ByteSet::ASCII_DIGIT);
183//!
184//! assert_eq!(alphabet, ByteSet::ASCII_ALPHABETIC);
185//! ```
186//!
187//! Use [`removing_all`] as an immutable alternative, by passing the calling
188//! [`ByteSet`] by value:
189//!
190//! ```rust
191//! # use byte_set::ByteSet;
192//! let alphabet = ByteSet::ASCII_ALPHANUMERIC.removing_all(ByteSet::ASCII_DIGIT);
193//!
194//! assert_eq!(alphabet, ByteSet::ASCII_ALPHABETIC);
195//! ```
196//!
197//! #### Iterate
198//!
199//! Iterating can be done with just a `for` loop, and goes in order from least
200//! to greatest:
201//!
202//! ```rust
203//! # use byte_set::ByteSet;
204//! # fn do_work(_: u8) {}
205//! fn small_to_big(bytes: ByteSet) {
206//! for byte in bytes {
207//! do_work(byte);
208//! }
209//! }
210//! ```
211//!
212//! Iterating in reverse is slightly more verbose, and goes in order from
213//! greatest to least:
214//!
215//! ```rust
216//! # use byte_set::ByteSet;
217//! # fn do_work(_: u8) {}
218//! fn big_to_small(bytes: ByteSet) {
219//! for byte in bytes.into_iter().rev() {
220//! do_work(byte);
221//! }
222//! }
223//! ```
224//!
225//! #### Contains
226//!
227//! It wouldn't really be a set if you couldn't check if it has specific items.
228//!
229//! Use [`contains`] to check a single byte:
230//!
231//! ```rust
232//! # use byte_set::ByteSet;
233//! fn has_null(bytes: &ByteSet) -> bool {
234//! bytes.contains(0)
235//! }
236//! ```
237//!
238//! Use [`contains_any`] to check for any matches in another [`ByteSet`]:
239//!
240//! ```rust
241//! # use byte_set::ByteSet;
242//! fn intersects(a: &ByteSet, b: &ByteSet) -> bool {
243//! a.contains_any(b)
244//! }
245//! ```
246//!
247//! #### Subset
248//!
249//! Use [`is_subset`] to check that all of the bytes in a [`ByteSet`] are
250//! contained in another:
251//!
252//! ```rust
253//! # use byte_set::ByteSet;
254//! fn test(a: &ByteSet, b: &ByteSet) {
255//! assert!(a.is_subset(b));
256//!
257//! // Always passes because every set is a subset of itself.
258//! assert!(a.is_subset(a));
259//! }
260//! ```
261//!
262//! Use [`is_strict_subset`] to check [`is_subset`] *and* that the sets are not
263//! the same:
264//!
265//! ```rust
266//! # use byte_set::ByteSet;
267//! fn test(a: &ByteSet, b: &ByteSet) {
268//! assert!(a.is_strict_subset(b));
269//!
270//! // `a` is equal to itself.
271//! assert!(!a.is_strict_subset(a));
272//! }
273//! ```
274//!
275//! For the sake of completion, there is also [`is_superset`] and
276//! [`is_strict_superset`], which call these functions with `a` and `b`
277//! switched.
278//!
279//! #### Min and Max
280//!
281//! Use [`first`] to get the smallest byte and [`last`] to get the biggest byte:
282//!
283//! ```rust
284//! # use byte_set::ByteSet;
285//! fn sanity_check(bytes: &ByteSet) {
286//! if let (Some(first), Some(last)) = (bytes.first(), bytes.last()) {
287//! assert!(first <= last);
288//! } else {
289//! // `bytes` is empty.
290//! }
291//! }
292//! ```
293//!
294//! These are the first and last bytes returned when iterating.
295//!
296//! ### `byte_set!` Macro
297//!
298//! [`byte_set!`] enables you to create a [`ByteSet`] with the same syntax as
299//! [`vec!`] or array expressions:
300//!
301//! ```rust
302//! # use byte_set::byte_set;
303//! let bytes = byte_set![1, 2, 3, b'x', b'y', b'z'];
304//! ```
305//!
306//! It even works at compile-time in a `const` expression:
307//!
308//! ```rust
309//! # use byte_set::{ByteSet, byte_set};
310//! const WHOA: ByteSet = byte_set![b'w', b'h', b'o', b'a'];
311//!
312//! static ABC: ByteSet = byte_set![b'a', b'c', b'c'];
313//! ```
314//!
315//! ## Implementation
316//!
317//! [`ByteSet`] is implemented as a 256-bit mask where each bit corresponds to a
318//! byte value. The first (least significant) bit in the mask represents the
319//! first byte (0) in the set. Likewise, the last last (most significant) bit
320//! represents the last byte (255).
321//!
322//! Given the following [`ByteSet`]:
323//!
324//! ```rust
325//! # use byte_set::byte_set;
326//! let bytes = byte_set![0, 1, 4, 5, 244];
327//! ```
328//!
329//! The in-memory representation of `bytes` would look like:
330//!
331//! ```text
332//! Byte: 0 1 2 3 4 5 6 7 ... 253 244 255
333//! Value: 1 1 0 0 1 1 0 0 ... 0 1 0
334//! ```
335//!
336//! This bit mask is composed of either `[u64; 4]` or `[u32; 8]` depending on
337//! the target CPU (see [#3]). Because this comes out to only 32 bytes,
338//! [`ByteSet`] implements [`Copy`].
339//!
340//! ## Benchmarks
341//!
342//! I will upload benchmarks run from my machine soon.
343//!
344//! In the meantime, you can benchmark this library by running:
345//!
346//! ```sh
347//! cargo bench
348//! ```
349//!
350//! By default, this will benchmark [`ByteSet`] along with various other types
351//! to compare performance. Note that this will take **a long time** (about 1
352//! hour and 30 minutes).
353//!
354//! Benchmark only [`ByteSet`] by running:
355//!
356//! ```sh
357//! cargo bench ByteSet
358//! ```
359//!
360//! This takes about 15 minutes, so maybe grab a coffee in the meantime.
361//!
362//! Benchmark a specific [`ByteSet`] operation by running:
363//!
364//! ```sh
365//! cargo bench $operation/ByteSet
366//! ```
367//!
368//! See `/benches/benchmarks` for strings that can be used for `$operation`.
369//!
370//! Note that `cargo bench` takes a regular expression, so `Contains (Random)`
371//! will not work because the parentheses are treated as a capture group. To
372//! match parentheses, escape them: `Contains \(Random\)`.
373//!
374//! ## Ecosystem Integrations
375//!
376//! This library has extended functionality for some popular crates.
377//!
378//! ### `rand`
379//!
380//! Use the `rand` (or `rand_core`) feature in your [`Cargo.toml`] to enable
381//! random [`ByteSet`] generation:
382//!
383//! ```toml
384//! [dependencies.byte_set]
385//! version = "0.1.3"
386//! features = ["rand"]
387//! ```
388//!
389//! This makes the following possible:
390//!
391//! ```rust,ignore
392//! let bytes = rand::random::<ByteSet>();
393//!
394//! // Same as above.
395//! let bytes = ByteSet::rand(rand::thread_rng());
396//!
397//! // Handle failure instead of panicking.
398//! match ByteSet::try_rand(rand::rngs::OsRng) {
399//! Ok(bytes) => // ...
400//! Err(error) => // ...
401//! }
402//! ```
403//!
404//! ### `serde`
405//!
406//! Use the `serde` feature in your [`Cargo.toml`] to enable [`Serialize`] and
407//! [`Deserialize`] for [`ByteSet`]:
408//!
409//! ```toml
410//! [dependencies.byte_set]
411//! version = "0.1.3"
412//! features = ["serde"]
413//! ```
414//!
415//! This makes the following possible:
416//!
417//! ```rust,ignore
418//! use serde::{Serialize, Deserialize};
419//!
420//! #[derive(Serialize, Deserialize)]
421//! struct MyValue {
422//! bytes: ByteSet
423//! }
424//! ```
425//!
426//! [`ByteSet`] can be serialized into a `u8` sequence, and deserialized from
427//! `&[u8]` or a `u8` sequence.
428//!
429//! Read more about using `serde` at [serde.rs](https://serde.rs/).
430//!
431//! ## License
432//!
433//! This project is released under either:
434//!
435//! - [MIT License](https://github.com/nvzqz/byte-set-rs/blob/master/LICENSE-MIT)
436//!
437//! - [Apache License (Version 2.0)](https://github.com/nvzqz/byte-set-rs/blob/master/LICENSE-APACHE)
438//!
439//! at your choosing.
440//!
441//! [@NikolaiVazquez]: https://twitter.com/NikolaiVazquez
442//!
443//! [`Cargo.toml`]: https://doc.rust-lang.org/cargo/reference/manifest.html
444//! [2018]: https://blog.rust-lang.org/2018/12/06/Rust-1.31-and-rust-2018.html#rust-2018
445//! [crate]: https://crates.io/crates/byte_set
446//!
447//! [`BTreeSet<u8>`]: https://doc.rust-lang.org/std/collections/struct.BTreeSet.html
448//! [`Copy`]: https://doc.rust-lang.org/std/marker/trait.Copy.html
449//! [`HashSet<u8>`]: https://doc.rust-lang.org/std/collections/struct.HashSet.html
450//! [`u8`]: https://doc.rust-lang.org/std/primitive.u8.html
451//! [`vec!`]: https://doc.rust-lang.org/std/macro.vec.html
452//!
453//! [`Serialize`]: https://docs.rs/serde/1.*/serde/trait.Serialize.html
454//! [`Deserialize`]: https://docs.rs/serde/1.*/serde/trait.Deserialize.html
455//!
456//! [#3]: https://github.com/nvzqz/byte-set-rs/issues/3
457//!
458//! [`byte_set!`]: macro.byte_set.html
459//! [`ByteSet`]: struct.ByteSet.html
460//! [`contains_any`]: struct.ByteSet.html#method.contains_any
461//! [`contains`]: struct.ByteSet.html#method.contains
462//! [`extend`]: struct.ByteSet.html#impl-Extend%3Cu8%3E
463//! [`first`]: struct.ByteSet.html#method.first
464//! [`insert_all`]: struct.ByteSet.html#method.insert_all
465//! [`insert`]: struct.ByteSet.html#method.insert
466//! [`inserting_all`]: struct.ByteSet.html#method.inserting_all
467//! [`inserting`]: struct.ByteSet.html#method.inserting
468//! [`last`]: struct.ByteSet.html#method.last
469//! [`remove_all`]: struct.ByteSet.html#method.remove_all
470//! [`remove`]: struct.ByteSet.html#method.remove
471//! [`removing_all`]: struct.ByteSet.html#method.removing_all
472//! [`removing`]: struct.ByteSet.html#method.removing
473//! [`is_strict_subset`]: struct.ByteSet.html#method.is_strict_subset
474//! [`is_subset`]: struct.ByteSet.html#method.is_subset
475//! [`is_strict_superset`]: struct.ByteSet.html#method.is_strict_superset
476//! [`is_superset`]: struct.ByteSet.html#method.is_superset
477
478#![cfg_attr(docsrs, feature(doc_cfg))]
479#![cfg_attr(not(test), no_std)]
480#![warn(missing_docs)]
481
482use core::{cmp, fmt, hash, iter::FromIterator, mem, ops, slice};
483
484// Makes `ByteSet::{rand,try_rand}` simpler to express.
485#[cfg(feature = "rand")]
486use rand as rand_core;
487
488#[macro_use]
489mod macros;
490
491#[cfg(test)]
492#[macro_use]
493mod tests_macros;
494
495#[cfg(test)]
496mod tests;
497
498pub(crate) mod chunk;
499pub(crate) use chunk::Chunk;
500
501mod iter;
502pub use iter::Iter;
503
504const SLOT_SIZE: usize = mem::size_of::<Chunk>();
505
506const NUM_SLOTS: usize = 256 / 8 / SLOT_SIZE;
507
508const LAST_SLOT_INDEX: usize = NUM_SLOTS - 1;
509
510/// An efficient, general-purpose set of [`u8`]s.
511///
512/// # Implementation
513///
514/// This is a 256-bit mask where a byte is contained based on whether its bit is
515/// enabled. The first (least significant) bit in the mask represents the first
516/// byte in the set. Likewise, the last last (most significant) bit represents
517/// the last byte.
518///
519/// The mask is composed a of "chunk" array. Each chunk is either 64 or 32 bits
520/// wide, depending on the target architecture. As of right now, this is based
521/// on native register size. This may change in the future based on target
522/// features that enable better performance.
523///
524/// [`u8`]: https://doc.rust-lang.org/std/primitive.u8.html
525#[derive(Clone, Copy, PartialEq, Eq)]
526#[repr(transparent)]
527pub struct ByteSet([Chunk; NUM_SLOTS]);
528
529/// Returns the chunk index for `byte` and the bit shift for that chunk.
530#[inline]
531const fn chunk_index_and_shift(byte: u8) -> (usize, usize) {
532 let byte = byte as usize;
533
534 #[cfg(target_pointer_width = "64")]
535 let index = byte >> 6;
536 #[cfg(target_pointer_width = "64")]
537 let shift = byte & 0b0011_1111;
538
539 #[cfg(not(target_pointer_width = "64"))]
540 let index = byte >> 5;
541 #[cfg(not(target_pointer_width = "64"))]
542 let shift = byte & 0b0001_1111;
543
544 (index, shift)
545}
546
547#[cfg(test)]
548impl ByteSet {
549 /// Returns a formatting proxy for the binary representation of `self`.
550 ///
551 /// `fmt::Binary` is not currently implemented for `ByteSet` because of the
552 /// extra work to support formatting options.
553 pub(crate) fn fmt_binary<'a>(&'a self) -> impl fmt::Display + 'a {
554 struct Formatted<'a>(&'a ByteSet);
555
556 impl fmt::Display for Formatted<'_> {
557 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
558 for chunk in &(self.0).0 {
559 #[cfg(target_pointer_width = "64")]
560 write!(f, "{:064b}", chunk)?;
561
562 #[cfg(not(target_pointer_width = "64"))]
563 write!(f, "{:032b}", chunk)?;
564 }
565 Ok(())
566 }
567 }
568
569 Formatted(self)
570 }
571}
572
573impl ByteSet {
574 /// Returns a set containing no bytes.
575 #[inline]
576 #[must_use]
577 pub const fn new() -> Self {
578 Self([0; NUM_SLOTS])
579 }
580
581 /// Returns a set containing all bytes (0-255).
582 #[inline]
583 #[must_use]
584 pub const fn full() -> Self {
585 Self([Chunk::max_value(); NUM_SLOTS])
586 }
587
588 /// Returns a set containing only `byte`.
589 ///
590 /// # Examples
591 ///
592 /// ```
593 /// # use byte_set::ByteSet;
594 /// let byte = 42;
595 /// let set = ByteSet::from_byte(byte);
596 ///
597 /// assert_eq!(set.first(), Some(byte));
598 /// assert_eq!(set.last(), Some(byte));
599 /// ```
600 #[inline]
601 #[must_use]
602 pub const fn from_byte(byte: u8) -> Self {
603 byte_set![byte]
604 }
605
606 /// Construct a ByteSet from a `RangeTo` value, i.e. `..x`
607 #[inline]
608 #[must_use]
609 pub const fn from_range_to(range: ops::RangeTo<u8>) -> Self {
610 const fn chunk_for(
611 this_chunk: usize,
612 byte_chunk: usize,
613 shift: usize,
614 ) -> Chunk {
615 // the following code is equivalent to
616 // if this_chunk == byte_chunk {
617 // value
618 // } else if this_chunk < byte_chunk {
619 // Chunk::max_value()
620 // } else {
621 // 0
622 // }
623 //
624 // Once `if` works in const, this can be cleaned up
625 // https://github.com/rust-lang/rust/pull/72437
626 let value: Chunk = (1 << shift) - 1;
627 let is_equal = (this_chunk == byte_chunk) as usize;
628 let is_less_than = (this_chunk < byte_chunk) as usize;
629 let if_unequal = [0, Chunk::max_value()][is_less_than];
630
631 [if_unequal, value][is_equal]
632 }
633 let (index, shift) = chunk_index_and_shift(range.end);
634 #[cfg(target_pointer_width = "64")]
635 let array = [
636 chunk_for(0, index, shift),
637 chunk_for(1, index, shift),
638 chunk_for(2, index, shift),
639 chunk_for(3, index, shift),
640 ];
641 #[cfg(not(target_pointer_width = "64"))]
642 let array = [
643 chunk_for(0, index, shift),
644 chunk_for(1, index, shift),
645 chunk_for(2, index, shift),
646 chunk_for(3, index, shift),
647 chunk_for(4, index, shift),
648 chunk_for(5, index, shift),
649 chunk_for(6, index, shift),
650 chunk_for(7, index, shift),
651 ];
652 ByteSet(array)
653 }
654
655 /// Construct a ByteSet from a `RangeToInclusive` value, i.e. `..=x`
656 #[inline]
657 #[must_use]
658 pub const fn from_range_to_inclusive(
659 range: ops::RangeToInclusive<u8>,
660 ) -> Self {
661 [
662 Self::full(),
663 Self::from_range_to(..(range.end.wrapping_add(1))),
664 ][(range.end != 255) as usize]
665 }
666
667 /// Construct a ByteSet from a `RangeFrom` value, i.e. `x..`
668 #[inline]
669 #[must_use]
670 pub const fn from_range_from(range: ops::RangeFrom<u8>) -> Self {
671 Self::from_range_to(..range.start).not()
672 }
673
674 /// Construct a ByteSet from a `RangeToInclusive` value, i.e. `x..y`
675 #[inline]
676 #[must_use]
677 pub const fn from_range(range: ops::Range<u8>) -> Self {
678 Self::from_range_from(range.start..)
679 .intersection(Self::from_range_to(..range.end))
680 }
681
682 /// Construct a ByteSet from a `RangeInclusive` value, i.e. `x..=y`
683 #[inline]
684 #[must_use]
685 pub const fn from_range_inclusive(range: ops::RangeInclusive<u8>) -> Self {
686 Self::from_range_from(*range.start()..)
687 .intersection(Self::from_range_to_inclusive(..=*range.end()))
688 }
689
690 /// Returns a set containing uniformly-distributed random bytes from `rng`.
691 ///
692 /// This uses [`fill_bytes`] under the hood.
693 ///
694 /// [`fill_bytes`]: https://docs.rs/rand_core/0.5.*/rand_core/trait.RngCore.html#tymethod.fill_bytes
695 #[cfg(any(feature = "rand", feature = "rand_core"))]
696 #[cfg_attr(docsrs, doc(cfg(any(feature = "rand", feature = "rand_core"))))]
697 #[inline]
698 pub fn rand<R: rand_core::RngCore>(mut rng: R) -> Self {
699 let mut set = Self::new();
700 rng.fill_bytes(set.as_bytes_mut());
701 set
702 }
703
704 /// Returns a set containing uniformly-distributed random bytes from `rng`,
705 /// or `Err` if `rng` failed.
706 ///
707 /// This uses [`try_fill_bytes`] under the hood.
708 ///
709 /// [`try_fill_bytes`]: https://docs.rs/rand_core/0.5.*/rand_core/trait.RngCore.html#tymethod.try_fill_bytes
710 #[cfg(any(feature = "rand", feature = "rand_core"))]
711 #[cfg_attr(docsrs, doc(cfg(any(feature = "rand", feature = "rand_core"))))]
712 #[inline]
713 pub fn try_rand<R: rand_core::RngCore>(
714 mut rng: R,
715 ) -> Result<Self, rand_core::Error> {
716 let mut set = Self::new();
717 rng.try_fill_bytes(set.as_bytes_mut())?;
718 Ok(set)
719 }
720
721 /// Returns `true` if `self` contains no bytes.
722 ///
723 /// This is more efficient than checking `self.len() == 0`.
724 #[inline]
725 #[must_use]
726 #[allow(clippy::let_and_return)]
727 pub const fn is_empty(&self) -> bool {
728 let is_empty = (self.0[0] == 0)
729 & (self.0[1] == 0)
730 & (self.0[2] == 0)
731 & (self.0[3] == 0);
732
733 #[cfg(not(target_pointer_width = "64"))]
734 {
735 is_empty
736 & (self.0[4] == 0)
737 & (self.0[5] == 0)
738 & (self.0[6] == 0)
739 & (self.0[7] == 0)
740 }
741
742 #[cfg(target_pointer_width = "64")]
743 is_empty
744 }
745
746 /// Returns `true` if `self` contains all bytes.
747 ///
748 /// This is more efficient than checking `self.len() == 256`.
749 #[inline]
750 #[must_use]
751 #[allow(clippy::let_and_return)]
752 pub const fn is_full(&self) -> bool {
753 let is_full = (self.0[0] == !0)
754 & (self.0[1] == !0)
755 & (self.0[2] == !0)
756 & (self.0[3] == !0);
757
758 #[cfg(not(target_pointer_width = "64"))]
759 {
760 is_full
761 & (self.0[4] == !0)
762 & (self.0[5] == !0)
763 & (self.0[6] == !0)
764 & (self.0[7] == !0)
765 }
766
767 #[cfg(target_pointer_width = "64")]
768 is_full
769 }
770
771 /// Returns the number of bytes contained in `self`.
772 #[cfg_attr(target_feature = "popcnt", inline)]
773 #[must_use]
774 #[allow(clippy::let_and_return)]
775 pub const fn len(&self) -> usize {
776 let len = (self.0[0].count_ones() as usize)
777 + (self.0[1].count_ones() as usize)
778 + (self.0[2].count_ones() as usize)
779 + (self.0[3].count_ones() as usize);
780
781 #[cfg(not(target_pointer_width = "64"))]
782 {
783 len + (self.0[4].count_ones() as usize)
784 + (self.0[5].count_ones() as usize)
785 + (self.0[6].count_ones() as usize)
786 + (self.0[7].count_ones() as usize)
787 }
788
789 #[cfg(target_pointer_width = "64")]
790 len
791 }
792
793 /// Removes all bytes from `self`.
794 #[inline]
795 pub fn clear(&mut self) {
796 *self = ByteSet::new();
797 }
798
799 /// Returns the first (least) byte in `self`, or `None` if `self` is empty.
800 pub fn first(&self) -> Option<u8> {
801 for (i, &chunk) in self.0.iter().enumerate() {
802 if let Some(lsb) = chunk::lsb(chunk) {
803 return Some(lsb + (i * chunk::INDEX_OFFSET) as u8);
804 }
805 }
806 None
807 }
808
809 /// Removes the first (least) byte in `self` and returns it, or `None` if
810 /// `self` is empty.
811 pub fn pop_first(&mut self) -> Option<u8> {
812 for (i, chunk) in self.0.iter_mut().enumerate() {
813 if let Some(lsb) = chunk::pop_lsb(chunk) {
814 return Some(lsb + (i * chunk::INDEX_OFFSET) as u8);
815 }
816 }
817 None
818 }
819
820 /// Returns the last (greatest) byte in `self`, or `None` if `self` is
821 /// empty.
822 pub fn last(&self) -> Option<u8> {
823 for (i, &chunk) in self.0.iter().rev().enumerate() {
824 if let Some(msb) = chunk::msb(chunk) {
825 let i = LAST_SLOT_INDEX - i;
826 return Some(msb + (i * chunk::INDEX_OFFSET) as u8);
827 }
828 }
829 None
830 }
831
832 /// Removes the last (least) byte in `self` and returns it, or `None` if
833 /// `self` is empty.
834 pub fn pop_last(&mut self) -> Option<u8> {
835 for (i, chunk) in self.0.iter_mut().rev().enumerate() {
836 if let Some(msb) = chunk::pop_msb(chunk) {
837 let i = LAST_SLOT_INDEX - i;
838 return Some(msb + (i * chunk::INDEX_OFFSET) as u8);
839 }
840 }
841 None
842 }
843
844 /// Inserts `byte` into `self` in-place.
845 ///
846 /// Unlike [`HashSet::insert`] and [`BTreeSet::insert`], this does not
847 /// return a `bool` for whether `byte` was not present. This is because it's
848 /// just as efficient to call [`contains`](#method.contains) before.
849 ///
850 /// [`HashSet::insert`]: https://doc.rust-lang.org/std/collections/struct.HashSet.html#method.insert
851 /// [`BTreeSet::insert`]: https://doc.rust-lang.org/std/collections/struct.BTreeSet.html#method.insert
852 #[inline]
853 pub fn insert(&mut self, byte: u8) {
854 let (index, shift) = chunk_index_and_shift(byte);
855
856 self.0[index] |= 1 << shift;
857 }
858
859 /// Inserts all bytes of `other` into `self` in-place.
860 #[inline]
861 pub fn insert_all(&mut self, other: Self) {
862 self.0[0] |= other.0[0];
863 self.0[1] |= other.0[1];
864 self.0[2] |= other.0[2];
865 self.0[3] |= other.0[3];
866
867 #[cfg(not(target_pointer_width = "64"))]
868 {
869 self.0[4] |= other.0[4];
870 self.0[5] |= other.0[5];
871 self.0[6] |= other.0[6];
872 self.0[7] |= other.0[7];
873 }
874 }
875
876 /// Returns a copy of `self` with `byte` inserted.
877 #[inline]
878 #[must_use]
879 pub const fn inserting(mut self, byte: u8) -> Self {
880 let (index, shift) = chunk_index_and_shift(byte);
881
882 self.0[index] |= 1 << shift;
883 self
884 }
885
886 /// Returns a copy of `self` with all of `other` inserted.
887 ///
888 /// This is equivalent to the [`union`](#method.union) method.
889 #[inline]
890 #[must_use]
891 pub const fn inserting_all(self, other: Self) -> Self {
892 self.union(other)
893 }
894
895 /// Removes `byte` from `self` in-place.
896 ///
897 /// Unlike [`HashSet::remove`] and [`BTreeSet::remove`], this does not
898 /// return a `bool` for whether `byte` was present. This is because it's
899 /// just as efficient to call [`contains`](#method.contains) before.
900 ///
901 /// [`HashSet::remove`]: https://doc.rust-lang.org/std/collections/struct.HashSet.html#method.remove
902 /// [`BTreeSet::remove`]: https://doc.rust-lang.org/std/collections/struct.BTreeSet.html#method.remove
903 #[inline]
904 pub fn remove(&mut self, byte: u8) {
905 let (index, shift) = chunk_index_and_shift(byte);
906
907 self.0[index] &= !(1 << shift);
908 }
909
910 /// Removes all bytes of `other` from `self` in-place.
911 #[inline]
912 pub fn remove_all(&mut self, other: Self) {
913 *self &= !other;
914 }
915
916 /// Returns a copy of `self` with `byte` removed.
917 #[inline]
918 #[must_use]
919 pub const fn removing(mut self, byte: u8) -> Self {
920 let (index, shift) = chunk_index_and_shift(byte);
921
922 self.0[index] &= !(1 << shift);
923 self
924 }
925
926 /// Returns a copy of `self` with `byte` removed.
927 #[inline]
928 #[must_use]
929 pub const fn removing_all(self, other: Self) -> Self {
930 self.difference(other)
931 }
932
933 /// Sets `byte` in `self` to `enabled` in-place.
934 #[inline]
935 pub fn set(&mut self, byte: u8, enabled: bool) {
936 let (index, shift) = chunk_index_and_shift(byte);
937 let chunk = self.0[index];
938
939 self.0[index] = (chunk & !(1 << shift)) | ((enabled as Chunk) << shift);
940 }
941
942 /// Returns a copy of `self` with `byte` set to `enabled`.
943 #[inline]
944 #[must_use]
945 pub const fn setting(mut self, byte: u8, enabled: bool) -> Self {
946 let (index, shift) = chunk_index_and_shift(byte);
947 let chunk = self.0[index];
948
949 self.0[index] = (chunk & !(1 << shift)) | ((enabled as Chunk) << shift);
950 self
951 }
952
953 /// Returns `true` if `byte` is contained in `self`.
954 #[inline]
955 #[must_use]
956 pub const fn contains(&self, byte: u8) -> bool {
957 let (index, shift) = chunk_index_and_shift(byte);
958
959 self.0[index] & (1 << shift) != 0
960 }
961
962 #[inline]
963 #[must_use]
964 const fn chunk_and_or(&self, other: &Self) -> Chunk {
965 map_reduce_chunks!(self, other, &, |)
966 }
967
968 /// Returns `true` if `self` contains any bytes in `other`.
969 #[inline]
970 #[must_use]
971 // Not `const` because it may be later improved with SIMD intrinsics.
972 pub fn contains_any(&self, other: &Self) -> bool {
973 self.chunk_and_or(other) != 0
974 }
975
976 #[inline]
977 const fn _is_subset(&self, other: &Self) -> bool {
978 self.intersection(*other).eq(self)
979 }
980
981 /// Returns `true` if `other` contains all bytes in `self`.
982 #[inline]
983 #[must_use]
984 // Not `const` because it may be later improved with SIMD intrinsics.
985 pub fn is_subset(&self, other: &Self) -> bool {
986 self._is_subset(other)
987 }
988
989 /// Returns `true` if `other` contains all bytes in `self` and at least one
990 /// other byte not contained in `self`.
991 ///
992 /// This is also known as a "proper subset".
993 #[must_use]
994 // Not inlined because lots of code is generated on x86.
995 // Not `const` because it may be later improved with SIMD intrinsics.
996 pub fn is_strict_subset(&self, other: &Self) -> bool {
997 // On x86, checking inequality first produces less code and uses fewer
998 // registers.
999 self.ne(other) && self.is_subset(other)
1000 }
1001
1002 /// Returns `true` if `self` contains all bytes in `other`.
1003 #[inline]
1004 #[must_use]
1005 pub fn is_superset(&self, other: &Self) -> bool {
1006 other.is_subset(self)
1007 }
1008
1009 /// Returns `true` if `self` contains all bytes in `other` and at least one
1010 /// other byte not contained in `other`.
1011 ///
1012 /// This is also known as a "proper superset".
1013 #[inline]
1014 #[must_use]
1015 pub fn is_strict_superset(&self, other: &Self) -> bool {
1016 other.is_strict_subset(self)
1017 }
1018
1019 /// Returns `true` if `self` and `other` have no bytes in common.
1020 #[inline]
1021 #[must_use]
1022 // Not `const` because it may be later improved with SIMD intrinsics.
1023 pub fn is_disjoint(&self, other: &Self) -> bool {
1024 self.intersection(*other).is_empty()
1025 }
1026
1027 /// Returns a set with the bytes contained in `self`, but not in `other`.
1028 #[inline]
1029 #[must_use]
1030 pub const fn difference(self, other: Self) -> Self {
1031 self.intersection(other.not())
1032 }
1033
1034 /// Returns a set with the bytes contained in `self` or `other`, but not in
1035 /// both.
1036 #[inline]
1037 #[must_use]
1038 pub const fn symmetric_difference(self, other: Self) -> Self {
1039 map_chunks!(self, ^, other)
1040 }
1041
1042 /// Returns a set with the bytes contained both in `self` and `other`.
1043 #[inline]
1044 #[must_use]
1045 pub const fn intersection(self, other: Self) -> Self {
1046 map_chunks!(self, &, other)
1047 }
1048
1049 /// Returns a new set with the bytes contained in `self` or `other`.
1050 #[inline]
1051 #[must_use]
1052 pub const fn union(self, other: Self) -> Self {
1053 map_chunks!(self, |, other)
1054 }
1055
1056 /// Returns a new set with the bytes not contained in `self`.
1057 ///
1058 /// This exists because the [`Not`] trait cannot be used in `const` yet.
1059 ///
1060 /// [`Not`]: https://doc.rust-lang.org/std/ops/trait.Not.html
1061 #[inline]
1062 #[must_use]
1063 #[allow(clippy::should_implement_trait)]
1064 pub const fn not(self) -> Self {
1065 map_chunks!(self, !)
1066 }
1067
1068 /// Returns `self` with its bits reversed.
1069 ///
1070 /// This is equivalent to checking for `255 - b` in all subsequent searches
1071 /// of `b`.
1072 #[must_use]
1073 // The `rbit` instruction makes this reasonable to inline.
1074 #[cfg_attr(target_arch = "aarch64", inline)]
1075 // Not inlined because lots of code is generated on x86.
1076 pub const fn reverse_bits(self) -> Self {
1077 Self([
1078 #[cfg(not(target_pointer_width = "64"))]
1079 self.0[7].reverse_bits(),
1080 #[cfg(not(target_pointer_width = "64"))]
1081 self.0[6].reverse_bits(),
1082 #[cfg(not(target_pointer_width = "64"))]
1083 self.0[5].reverse_bits(),
1084 #[cfg(not(target_pointer_width = "64"))]
1085 self.0[4].reverse_bits(),
1086 self.0[3].reverse_bits(),
1087 self.0[2].reverse_bits(),
1088 self.0[1].reverse_bits(),
1089 self.0[0].reverse_bits(),
1090 ])
1091 }
1092
1093 /// Returns `true` if `self` and `other` contain the same bytes.
1094 ///
1095 /// This exists because `PartialEq` is currently unstable in `const`.
1096 #[inline]
1097 #[must_use]
1098 #[allow(clippy::should_implement_trait)]
1099 #[allow(clippy::let_and_return)]
1100 pub const fn eq(&self, other: &Self) -> bool {
1101 let eq = (self.0[0] == other.0[0])
1102 & (self.0[1] == other.0[1])
1103 & (self.0[2] == other.0[2])
1104 & (self.0[3] == other.0[3]);
1105
1106 #[cfg(not(target_pointer_width = "64"))]
1107 {
1108 eq & (self.0[4] == other.0[4])
1109 & (self.0[5] == other.0[5])
1110 & (self.0[6] == other.0[6])
1111 & (self.0[7] == other.0[7])
1112 }
1113
1114 #[cfg(target_pointer_width = "64")]
1115 eq
1116 }
1117
1118 /// Returns `true` if `self` and `other` do not contain the same bytes.
1119 ///
1120 /// This exists because `PartialEq` is currently unstable in `const`.
1121 #[inline]
1122 #[must_use]
1123 #[allow(clippy::should_implement_trait)]
1124 pub const fn ne(&self, other: &Self) -> bool {
1125 !self.eq(other)
1126 }
1127}
1128
1129/// Operations related to the ASCII character set.
1130impl ByteSet {
1131 /// The set of all ASCII characters: U+0000 NULL ..= U+007F DELETE.
1132 ///
1133 /// # Examples
1134 ///
1135 /// This contains all bytes for which [`u8::is_ascii`] returns `true`:
1136 ///
1137 /// ```
1138 /// # use byte_set::ByteSet;
1139 /// for byte in ByteSet::ASCII {
1140 /// assert!(byte.is_ascii());
1141 /// }
1142 ///
1143 /// for byte in !ByteSet::ASCII {
1144 /// assert!(!byte.is_ascii());
1145 /// }
1146 /// ```
1147 ///
1148 /// [`u8::is_ascii`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii
1149 pub const ASCII: Self = {
1150 #[cfg(target_pointer_width = "64")]
1151 {
1152 Self([!0, !0, 0, 0])
1153 }
1154
1155 #[cfg(not(target_pointer_width = "64"))]
1156 {
1157 Self([!0, !0, !0, !0, 0, 0, 0, 0])
1158 }
1159 };
1160
1161 /// The set of all ASCII alphabetic characters:
1162 ///
1163 /// - U+0041 'A' ..= U+005A 'Z'
1164 /// - U+0061 'a' ..= U+007A 'z'
1165 ///
1166 /// # Examples
1167 ///
1168 /// This contains all bytes for which [`u8::is_ascii_alphabetic`] returns
1169 /// `true`:
1170 ///
1171 /// ```
1172 /// # use byte_set::ByteSet;
1173 /// for byte in ByteSet::ASCII_ALPHABETIC {
1174 /// assert!(byte.is_ascii_alphabetic());
1175 /// }
1176 ///
1177 /// for byte in !ByteSet::ASCII_ALPHABETIC {
1178 /// assert!(!byte.is_ascii_alphabetic());
1179 /// }
1180 /// ```
1181 ///
1182 /// [`u8::is_ascii_alphabetic`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_alphabetic
1183 pub const ASCII_ALPHABETIC: Self =
1184 Self::ASCII_LOWERCASE.inserting_all(Self::ASCII_UPPERCASE);
1185
1186 /// The set of all ASCII uppercase characters: U+0041 'A' ..= U+005A 'Z'.
1187 ///
1188 /// # Examples
1189 ///
1190 /// This contains all bytes for which [`u8::is_ascii_uppercase`] returns
1191 /// `true`:
1192 ///
1193 /// ```
1194 /// # use byte_set::ByteSet;
1195 /// for byte in ByteSet::ASCII_UPPERCASE {
1196 /// assert!(byte.is_ascii_uppercase());
1197 /// }
1198 ///
1199 /// for byte in !ByteSet::ASCII_UPPERCASE {
1200 /// assert!(!byte.is_ascii_uppercase());
1201 /// }
1202 /// ```
1203 ///
1204 /// [`u8::is_ascii_uppercase`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_uppercase
1205 pub const ASCII_UPPERCASE: Self = Self::from_range_inclusive(b'A'..=b'Z');
1206
1207 /// The set of all ASCII lowercase characters: U+0061 'a' ..= U+007A 'z'.
1208 ///
1209 /// # Examples
1210 ///
1211 /// This contains all bytes for which [`u8::is_ascii_lowercase`] returns
1212 /// `true`:
1213 ///
1214 /// ```
1215 /// # use byte_set::ByteSet;
1216 /// for byte in ByteSet::ASCII_LOWERCASE {
1217 /// assert!(byte.is_ascii_lowercase());
1218 /// }
1219 ///
1220 /// for byte in !ByteSet::ASCII_LOWERCASE {
1221 /// assert!(!byte.is_ascii_lowercase());
1222 /// }
1223 /// ```
1224 ///
1225 /// [`u8::is_ascii_lowercase`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_lowercase
1226 pub const ASCII_LOWERCASE: Self = Self::from_range_inclusive(b'a'..=b'z');
1227
1228 /// The set of all ASCII alphanumeric characters:
1229 ///
1230 /// - U+0041 'A' ..= U+005A 'Z'
1231 /// - U+0061 'a' ..= U+007A 'z'
1232 /// - U+0030 '0' ..= U+0039 '9'
1233 ///
1234 /// # Examples
1235 ///
1236 /// This contains all bytes for which [`u8::is_ascii_alphanumeric`] returns
1237 /// `true`:
1238 ///
1239 /// ```
1240 /// # use byte_set::ByteSet;
1241 /// for byte in ByteSet::ASCII_ALPHANUMERIC {
1242 /// assert!(byte.is_ascii_alphanumeric());
1243 /// }
1244 ///
1245 /// for byte in !ByteSet::ASCII_ALPHANUMERIC {
1246 /// assert!(!byte.is_ascii_alphanumeric());
1247 /// }
1248 /// ```
1249 ///
1250 /// [`u8::is_ascii_alphanumeric`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_alphanumeric
1251 pub const ASCII_ALPHANUMERIC: Self =
1252 Self::ASCII_ALPHABETIC.inserting_all(Self::ASCII_DIGIT);
1253
1254 /// The set of all ASCII decimal digits: U+0030 '0' ..= U+0039 '9'.
1255 ///
1256 /// # Examples
1257 ///
1258 /// This contains all bytes for which [`u8::is_ascii_digit`] returns `true`:
1259 ///
1260 /// ```
1261 /// # use byte_set::ByteSet;
1262 /// for byte in ByteSet::ASCII_DIGIT {
1263 /// assert!(byte.is_ascii_digit());
1264 /// }
1265 ///
1266 /// for byte in !ByteSet::ASCII_DIGIT {
1267 /// assert!(!byte.is_ascii_digit());
1268 /// }
1269 /// ```
1270 ///
1271 /// [`u8::is_ascii_digit`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_digit
1272 pub const ASCII_DIGIT: Self = Self::from_range_inclusive(b'0'..=b'9');
1273
1274 /// The set of all ASCII hexadecimal digits:
1275 ///
1276 /// - U+0030 '0' ..= U+0039 '9'
1277 /// - U+0041 'A' ..= U+0046 'F'
1278 /// - U+0061 'a' ..= U+0066 'f'
1279 ///
1280 /// # Examples
1281 ///
1282 /// This contains all bytes for which [`u8::is_ascii_hexdigit`] returns
1283 /// `true`:
1284 ///
1285 /// ```
1286 /// # use byte_set::ByteSet;
1287 /// for byte in ByteSet::ASCII_HEXDIGIT {
1288 /// assert!(byte.is_ascii_hexdigit());
1289 /// }
1290 ///
1291 /// for byte in !ByteSet::ASCII_HEXDIGIT {
1292 /// assert!(!byte.is_ascii_hexdigit());
1293 /// }
1294 /// ```
1295 ///
1296 /// [`u8::is_ascii_hexdigit`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_hexdigit
1297 pub const ASCII_HEXDIGIT: Self = Self::ASCII_DIGIT
1298 .inserting_all(Self::from_range_inclusive(b'A'..=b'F'))
1299 .inserting_all(Self::from_range_inclusive(b'a'..=b'f'));
1300
1301 /// The set of all ASCII punctuation characters:
1302 ///
1303 /// - U+0021 ..= U+002F `! " # $ % & ' ( ) * + , - . /`
1304 /// - U+003A ..= U+0040 `: ; < = > ? @`
1305 /// - U+005B ..= U+0060 ``[ \ ] ^ _ ` ``
1306 /// - U+007B ..= U+007E `{ | } ~`
1307 ///
1308 /// # Examples
1309 ///
1310 /// This contains all bytes for which [`u8::is_ascii_punctuation`] returns
1311 /// `true`:
1312 ///
1313 /// ```
1314 /// # use byte_set::ByteSet;
1315 /// for byte in ByteSet::ASCII_PUNCTUATION {
1316 /// assert!(byte.is_ascii_punctuation());
1317 /// }
1318 ///
1319 /// for byte in !ByteSet::ASCII_PUNCTUATION {
1320 /// assert!(!byte.is_ascii_punctuation());
1321 /// }
1322 /// ```
1323 ///
1324 /// [`u8::is_ascii_punctuation`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_punctuation
1325 pub const ASCII_PUNCTUATION: Self = byte_set![
1326 b'!', b'"', b'#', b'$', b'%', b'&', b'\'', b'(', b')', b'*', b'+',
1327 b',', b'-', b'.', b'/', b':', b';', b'<', b'=', b'>', b'?', b'@', b'[',
1328 b'\\', b']', b'^', b'_', b'`', b'{', b'|', b'}', b'~',
1329 ];
1330
1331 /// The set of all ASCII graphic characters: U+0021 '!' ..= U+007E '~'.
1332 ///
1333 /// # Examples
1334 ///
1335 /// This contains all bytes for which [`u8::is_ascii_graphic`] returns
1336 /// `true`:
1337 ///
1338 /// ```
1339 /// # use byte_set::ByteSet;
1340 /// for byte in ByteSet::ASCII_GRAPHIC {
1341 /// assert!(byte.is_ascii_graphic());
1342 /// }
1343 ///
1344 /// for byte in !ByteSet::ASCII_GRAPHIC {
1345 /// assert!(!byte.is_ascii_graphic());
1346 /// }
1347 /// ```
1348 ///
1349 /// [`u8::is_ascii_graphic`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_graphic
1350 pub const ASCII_GRAPHIC: Self =
1351 Self::ASCII_ALPHANUMERIC.inserting_all(Self::ASCII_PUNCTUATION);
1352
1353 /// The set of all ASCII whitespace characters:
1354 ///
1355 /// - U+0020 SPACE
1356 /// - U+0009 HORIZONTAL TAB
1357 /// - U+000A LINE FEED
1358 /// - U+000C FORM FEED
1359 /// - U+000D CARRIAGE RETURN
1360 ///
1361 /// # Examples
1362 ///
1363 /// This contains all bytes for which [`u8::is_ascii_whitespace`] returns
1364 /// `true`:
1365 ///
1366 /// ```
1367 /// # use byte_set::ByteSet;
1368 /// for byte in ByteSet::ASCII_WHITESPACE {
1369 /// assert!(byte.is_ascii_whitespace());
1370 /// }
1371 ///
1372 /// for byte in !ByteSet::ASCII_WHITESPACE {
1373 /// assert!(!byte.is_ascii_whitespace());
1374 /// }
1375 /// ```
1376 ///
1377 /// [`u8::is_ascii_whitespace`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_whitespace
1378 pub const ASCII_WHITESPACE: Self =
1379 byte_set![b'\t', b'\n', 0x0C, b'\r', b' '];
1380
1381 /// The set of all ASCII control characters:
1382 ///
1383 /// - U+0000 NUL ..= U+001F UNIT SEPARATOR
1384 /// - U+007F DELETE.
1385 ///
1386 /// Note that most ASCII whitespace characters are control characters, but
1387 /// SPACE is not.
1388 ///
1389 /// # Examples
1390 ///
1391 /// This contains all bytes for which [`u8::is_ascii_control`] returns
1392 /// `true`:
1393 ///
1394 /// ```
1395 /// # use byte_set::ByteSet;
1396 /// for byte in ByteSet::ASCII_CONTROL {
1397 /// assert!(byte.is_ascii_control());
1398 /// }
1399 ///
1400 /// for byte in !ByteSet::ASCII_CONTROL {
1401 /// assert!(!byte.is_ascii_control());
1402 /// }
1403 /// ```
1404 ///
1405 /// [`u8::is_ascii_whitespace`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_whitespace
1406 pub const ASCII_CONTROL: Self =
1407 Self::from_range_inclusive(0..=0x1F).inserting(0x7F);
1408
1409 /// Returns `true` if [`u8::is_ascii`] returns `true` for all bytes in
1410 /// `self`.
1411 ///
1412 /// This is significantly more efficient than checking each byte in `self`
1413 /// individually.
1414 ///
1415 /// [`u8::is_ascii`]:
1416 /// https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii
1417 #[inline]
1418 #[must_use]
1419 pub const fn is_ascii(&self) -> bool {
1420 self._is_subset(&Self::ASCII)
1421 }
1422
1423 /// Returns `true` if [`u8::is_ascii_alphabetic`] returns `true` for all
1424 /// bytes in `self`.
1425 ///
1426 /// This is significantly more efficient than checking each byte in `self`
1427 /// individually.
1428 ///
1429 /// [`u8::is_ascii_alphabetic`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_alphabetic
1430 #[inline]
1431 #[must_use]
1432 pub const fn is_ascii_alphabetic(&self) -> bool {
1433 self._is_subset(&Self::ASCII_ALPHABETIC)
1434 }
1435
1436 /// Returns `true` if [`u8::is_ascii_uppercase`] returns `true` for all
1437 /// bytes in `self`.
1438 ///
1439 /// This is significantly more efficient than checking each byte in `self`
1440 /// individually.
1441 ///
1442 /// [`u8::is_ascii_uppercase`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_uppercase
1443 #[inline]
1444 #[must_use]
1445 pub const fn is_ascii_uppercase(&self) -> bool {
1446 self._is_subset(&Self::ASCII_UPPERCASE)
1447 }
1448
1449 /// Returns `true` if [`u8::is_ascii_lowercase`] returns `true` for all
1450 /// bytes in `self`.
1451 ///
1452 /// This is significantly more efficient than checking each byte in `self`
1453 /// individually.
1454 ///
1455 /// [`u8::is_ascii_lowercase`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_lowercase
1456 #[inline]
1457 #[must_use]
1458 pub const fn is_ascii_lowercase(&self) -> bool {
1459 self._is_subset(&Self::ASCII_LOWERCASE)
1460 }
1461
1462 /// Returns `true` if [`u8::is_ascii_alphanumeric`] returns `true` for all
1463 /// bytes in `self`.
1464 ///
1465 /// This is significantly more efficient than checking each byte in `self`
1466 /// individually.
1467 ///
1468 /// [`u8::is_ascii_alphanumeric`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_alphanumeric
1469 #[inline]
1470 #[must_use]
1471 pub const fn is_ascii_alphanumeric(&self) -> bool {
1472 self._is_subset(&Self::ASCII_ALPHANUMERIC)
1473 }
1474
1475 /// Returns `true` if [`u8::is_ascii_digit`] returns `true` for all bytes in
1476 /// `self`.
1477 ///
1478 /// This is significantly more efficient than checking each byte in `self`
1479 /// individually.
1480 ///
1481 /// [`u8::is_ascii_digit`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_digit
1482 #[inline]
1483 #[must_use]
1484 pub const fn is_ascii_digit(&self) -> bool {
1485 self._is_subset(&Self::ASCII_DIGIT)
1486 }
1487
1488 /// Returns `true` if [`u8::is_ascii_hexdigit`] returns `true` for all bytes
1489 /// in `self`.
1490 ///
1491 /// This is significantly more efficient than checking each byte in `self`
1492 /// individually.
1493 ///
1494 /// [`u8::is_ascii_hexdigit`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_hexdigit
1495 #[inline]
1496 #[must_use]
1497 pub const fn is_ascii_hexdigit(&self) -> bool {
1498 self._is_subset(&Self::ASCII_HEXDIGIT)
1499 }
1500
1501 /// Returns `true` if [`u8::is_ascii_punctuation`] returns `true` for all
1502 /// bytes in `self`.
1503 ///
1504 /// This is significantly more efficient than checking each byte in `self`
1505 /// individually.
1506 ///
1507 /// [`u8::is_ascii_punctuation`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_punctuation
1508 #[inline]
1509 #[must_use]
1510 pub const fn is_ascii_punctuation(&self) -> bool {
1511 self._is_subset(&Self::ASCII_PUNCTUATION)
1512 }
1513
1514 /// Returns `true` if [`u8::is_ascii_graphic`] returns `true` for all bytes
1515 /// in `self`.
1516 ///
1517 /// This is significantly more efficient than checking each byte in `self`
1518 /// individually.
1519 ///
1520 /// [`u8::is_ascii_graphic`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_graphic
1521 #[inline]
1522 #[must_use]
1523 pub const fn is_ascii_graphic(&self) -> bool {
1524 self._is_subset(&Self::ASCII_GRAPHIC)
1525 }
1526
1527 /// Returns `true` if [`u8::is_ascii_whitespace`] returns `true` for all
1528 /// bytes in `self`.
1529 ///
1530 /// This is significantly more efficient than checking each byte in `self`
1531 /// individually.
1532 ///
1533 /// [`u8::is_ascii_whitespace`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_whitespace
1534 #[inline]
1535 #[must_use]
1536 pub const fn is_ascii_whitespace(&self) -> bool {
1537 self._is_subset(&Self::ASCII_WHITESPACE)
1538 }
1539
1540 /// Returns `true` if [`u8::is_ascii_control`] returns `true` for all bytes
1541 /// in `self`.
1542 ///
1543 /// This is significantly more efficient than checking each byte in `self`
1544 /// individually.
1545 ///
1546 /// [`u8::is_ascii_control`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_control
1547 #[inline]
1548 #[must_use]
1549 pub const fn is_ascii_control(&self) -> bool {
1550 self._is_subset(&Self::ASCII_CONTROL)
1551 }
1552}
1553
1554/// Operations over the internal memory representation.
1555///
1556/// There are currently no stability guarantees over the internal bytes. This is
1557/// being tracked in [#8](https://github.com/nvzqz/byte-set-rs/issues/8).
1558impl ByteSet {
1559 const SIZE: usize = mem::size_of::<Self>();
1560
1561 /// Returns a shared reference to the underlying bytes of `self`.
1562 #[inline]
1563 pub fn as_bytes(&self) -> &[u8; Self::SIZE] {
1564 unsafe { &*self.0.as_ptr().cast() }
1565 }
1566
1567 /// Returns a mutable reference to the underlying bytes of `self`.
1568 #[inline]
1569 pub fn as_bytes_mut(&mut self) -> &mut [u8; Self::SIZE] {
1570 unsafe { &mut *self.0.as_mut_ptr().cast() }
1571 }
1572
1573 /// Returns a shared reference to the underlying bytes of `slice`.
1574 #[inline]
1575 pub fn slice_as_bytes(slice: &[Self]) -> &[u8] {
1576 let ptr = slice.as_ptr().cast::<u8>();
1577 let len = slice.len() * Self::SIZE;
1578 unsafe { slice::from_raw_parts(ptr, len) }
1579 }
1580
1581 /// Returns a mutable reference to the underlying bytes of `slice`.
1582 #[inline]
1583 pub fn slice_as_bytes_mut(slice: &mut [Self]) -> &mut [u8] {
1584 let ptr = slice.as_mut_ptr().cast::<u8>();
1585 let len = slice.len() * Self::SIZE;
1586 unsafe { slice::from_raw_parts_mut(ptr, len) }
1587 }
1588}
1589
1590impl Default for ByteSet {
1591 #[inline]
1592 fn default() -> Self {
1593 Self::new()
1594 }
1595}
1596
1597impl From<u8> for ByteSet {
1598 #[inline]
1599 fn from(byte: u8) -> ByteSet {
1600 ByteSet::from_byte(byte)
1601 }
1602}
1603
1604impl From<&[u8]> for ByteSet {
1605 #[inline]
1606 fn from(bytes: &[u8]) -> Self {
1607 let mut set = ByteSet::new();
1608 set.extend(bytes);
1609 set
1610 }
1611}
1612
1613impl From<&str> for ByteSet {
1614 #[inline]
1615 fn from(s: &str) -> Self {
1616 s.as_bytes().into()
1617 }
1618}
1619
1620impl From<ops::Range<u8>> for ByteSet {
1621 #[inline]
1622 fn from(range: ops::Range<u8>) -> Self {
1623 Self::from_range(range)
1624 }
1625}
1626
1627impl From<ops::RangeTo<u8>> for ByteSet {
1628 #[inline]
1629 fn from(range: ops::RangeTo<u8>) -> Self {
1630 Self::from_range_to(range)
1631 }
1632}
1633
1634impl From<ops::RangeFrom<u8>> for ByteSet {
1635 #[inline]
1636 fn from(range: ops::RangeFrom<u8>) -> Self {
1637 Self::from_range_from(range)
1638 }
1639}
1640
1641impl From<ops::RangeInclusive<u8>> for ByteSet {
1642 #[inline]
1643 fn from(range: ops::RangeInclusive<u8>) -> Self {
1644 Self::from_range_inclusive(range)
1645 }
1646}
1647
1648impl From<ops::RangeToInclusive<u8>> for ByteSet {
1649 #[inline]
1650 fn from(range: ops::RangeToInclusive<u8>) -> Self {
1651 Self::from_range_to_inclusive(range)
1652 }
1653}
1654
1655impl From<ops::RangeFull> for ByteSet {
1656 #[inline]
1657 fn from(_: ops::RangeFull) -> Self {
1658 Self::full()
1659 }
1660}
1661
1662impl Extend<u8> for ByteSet {
1663 fn extend<T: IntoIterator<Item = u8>>(&mut self, iter: T) {
1664 iter.into_iter().for_each(|byte| self.insert(byte));
1665 }
1666}
1667
1668impl<'a> Extend<&'a u8> for ByteSet {
1669 fn extend<T: IntoIterator<Item = &'a u8>>(&mut self, iter: T) {
1670 self.extend(iter.into_iter().cloned());
1671 }
1672}
1673
1674impl FromIterator<u8> for ByteSet {
1675 fn from_iter<T: IntoIterator<Item = u8>>(iter: T) -> Self {
1676 // Make sure to use `insert` over `inserting` to not copy so many bytes
1677 // on each iteration.
1678 let mut set = ByteSet::new();
1679 set.extend(iter);
1680 set
1681 }
1682}
1683
1684impl<'a> FromIterator<&'a u8> for ByteSet {
1685 fn from_iter<T: IntoIterator<Item = &'a u8>>(iter: T) -> Self {
1686 iter.into_iter().cloned().collect()
1687 }
1688}
1689
1690impl IntoIterator for ByteSet {
1691 type Item = u8;
1692 type IntoIter = Iter;
1693
1694 #[inline]
1695 fn into_iter(self) -> Self::IntoIter {
1696 self.into()
1697 }
1698}
1699
1700impl fmt::Debug for ByteSet {
1701 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1702 f.debug_set().entries(*self).finish()
1703 }
1704}
1705
1706// This is implemented manually over `[u8; 32]` because it seems to produce
1707// better code than `[usize; N]`.
1708impl PartialOrd for ByteSet {
1709 #[inline]
1710 fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {
1711 Some(self.cmp(other))
1712 }
1713
1714 #[inline]
1715 fn lt(&self, other: &Self) -> bool {
1716 self.as_bytes().lt(other.as_bytes())
1717 }
1718
1719 #[inline]
1720 fn le(&self, other: &Self) -> bool {
1721 self.as_bytes().le(other.as_bytes())
1722 }
1723
1724 #[inline]
1725 fn gt(&self, other: &Self) -> bool {
1726 self.as_bytes().gt(other.as_bytes())
1727 }
1728
1729 #[inline]
1730 fn ge(&self, other: &Self) -> bool {
1731 self.as_bytes().ge(other.as_bytes())
1732 }
1733}
1734
1735impl Ord for ByteSet {
1736 #[inline]
1737 fn cmp(&self, other: &Self) -> cmp::Ordering {
1738 self.as_bytes().cmp(other.as_bytes())
1739 }
1740}
1741
1742#[allow(clippy::derive_hash_xor_eq)]
1743impl hash::Hash for ByteSet {
1744 #[inline]
1745 fn hash<H: hash::Hasher>(&self, state: &mut H) {
1746 self.as_bytes().hash(state)
1747 }
1748
1749 #[inline]
1750 fn hash_slice<H: hash::Hasher>(data: &[Self], state: &mut H) {
1751 Self::slice_as_bytes(data).hash(state)
1752 }
1753}
1754
1755impl ops::Sub for ByteSet {
1756 type Output = Self;
1757
1758 #[inline]
1759 fn sub(self, rhs: Self) -> Self::Output {
1760 self.removing_all(rhs)
1761 }
1762}
1763
1764impl ops::SubAssign for ByteSet {
1765 #[inline]
1766 fn sub_assign(&mut self, rhs: Self) {
1767 self.remove_all(rhs);
1768 }
1769}
1770
1771impl ops::BitAnd for ByteSet {
1772 type Output = Self;
1773
1774 #[inline]
1775 fn bitand(self, rhs: Self) -> Self::Output {
1776 self.intersection(rhs)
1777 }
1778}
1779
1780impl ops::BitAndAssign for ByteSet {
1781 #[inline]
1782 fn bitand_assign(&mut self, rhs: Self) {
1783 *self = *self & rhs;
1784 }
1785}
1786
1787impl ops::BitOr for ByteSet {
1788 type Output = Self;
1789
1790 #[inline]
1791 fn bitor(self, rhs: Self) -> Self::Output {
1792 self.inserting_all(rhs)
1793 }
1794}
1795
1796impl ops::BitOrAssign for ByteSet {
1797 #[inline]
1798 fn bitor_assign(&mut self, rhs: Self) {
1799 self.insert_all(rhs);
1800 }
1801}
1802
1803impl ops::BitXor for ByteSet {
1804 type Output = Self;
1805
1806 #[inline]
1807 fn bitxor(self, rhs: Self) -> Self::Output {
1808 self.symmetric_difference(rhs)
1809 }
1810}
1811
1812impl ops::BitXorAssign for ByteSet {
1813 #[inline]
1814 fn bitxor_assign(&mut self, rhs: Self) {
1815 *self = *self ^ rhs;
1816 }
1817}
1818
1819impl ops::Not for ByteSet {
1820 type Output = Self;
1821
1822 #[inline]
1823 fn not(self) -> Self::Output {
1824 ByteSet::not(self)
1825 }
1826}
1827
1828// Enables `rand::random::<ByteSet>()`.
1829#[cfg(feature = "rand")]
1830#[cfg_attr(docsrs, doc(cfg(feature = "rand")))]
1831impl rand::distributions::Distribution<ByteSet>
1832 for rand::distributions::Standard
1833{
1834 #[inline]
1835 fn sample<R: rand::Rng + ?Sized>(&self, rng: &mut R) -> ByteSet {
1836 ByteSet::rand(rng)
1837 }
1838}
1839
1840#[cfg(feature = "serde")]
1841#[cfg_attr(docsrs, doc(cfg(feature = "serde")))]
1842impl serde::Serialize for ByteSet {
1843 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
1844 where
1845 S: serde::Serializer,
1846 {
1847 use serde::ser::SerializeSeq;
1848
1849 let mut seq = serializer.serialize_seq(Some(self.len()))?;
1850 for byte in *self {
1851 seq.serialize_element(&byte)?;
1852 }
1853 seq.end()
1854 }
1855}
1856
1857#[cfg(feature = "serde")]
1858#[cfg_attr(docsrs, doc(cfg(feature = "serde")))]
1859impl<'de> serde::Deserialize<'de> for ByteSet {
1860 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
1861 where
1862 D: serde::Deserializer<'de>,
1863 {
1864 struct ByteSetVisitor;
1865
1866 impl<'de> serde::de::Visitor<'de> for ByteSetVisitor {
1867 type Value = ByteSet;
1868
1869 fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result {
1870 f.write_str("a set of bytes")
1871 }
1872
1873 fn visit_bytes<E>(self, v: &[u8]) -> Result<Self::Value, E>
1874 where
1875 E: serde::de::Error,
1876 {
1877 Ok(v.into())
1878 }
1879
1880 fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
1881 where
1882 A: serde::de::SeqAccess<'de>,
1883 {
1884 let mut set = ByteSet::new();
1885 while let Some(byte) = seq.next_element::<u8>()? {
1886 set.insert(byte);
1887 }
1888 Ok(set)
1889 }
1890 }
1891
1892 deserializer.deserialize_seq(ByteSetVisitor)
1893 }
1894}