format_bytes/
lib.rs

1/*!
2This crate exposes a procedural macro that allows you to format bytestrings.
3For more background on why you would want to do that,
4[read this article](https://octobus.net/blog/2020-06-05-not-everything-is-utf8.html).
5
6## Usage
7
8Add this to your `Cargo.toml`:
9
10```toml
11[dependencies]
12format-bytes = "0.1"
13```
14
15then use the macro like so:
16
17```rust
18use format_bytes::format_bytes;
19
20fn main() {
21    assert_eq!(
22        format_bytes!(b"look at those {} bytes", &[0u8, 1, 2]),
23        b"look at those \x00\x01\x02 bytes"
24    );
25}
26```
27
28See more examples of how it works on the documentation of
29[`format_bytes!` itself](https://docs.rs/format-bytes/\*\/format_bytes/macro.format_bytes.html).
30
31## Missing features
32
33* Named arguments, but they should be added in a future version
34* Python-like "f-string" functionality is not planned because of its more
35complex implementation and limited actual benefit
36* ``format!``-like padding helpers: if the need manifests itself, they might
37appear
38
39
40## Why not 1.0?
41
42Not until named arguments have landed and the macro gets a bit of mileage (it
43will be used in [Mercurial](https://www.mercurial-scm.org)).
44*/
45
46use std::fmt;
47use std::io;
48
49/// Creates a `Vec<u8>` using interpolation of runtime expressions.
50///
51/// The first argument `format_bytes!` receives is a format bytestring.
52/// This must be a bytestring literal. The power of the formatting string
53/// is in the `{}`s contained.
54///
55/// Additional arguments passed to `format_bytes!` replace the `{}`s
56/// within the formatting bytestring in the order given. It only supports
57/// positional arguments for now, but a future version should add support
58/// for named arguments.
59///
60/// These additional arguments may have any type that implements
61/// the [`DisplayBytes`] trait.
62///
63/// # Examples
64///
65/// ```
66/// use format_bytes::format_bytes;
67///
68/// assert_eq!(format_bytes!(b""), b"");
69/// assert_eq!(format_bytes!(b"here"), b"here");
70/// assert_eq!(format_bytes!(b"this {{ escapes {{"), b"this {{ escapes {{");
71/// assert_eq!(format_bytes!(b"also this {{}}"), b"also this {{}}");
72/// assert_eq!(format_bytes!(b"this works {{{}}}", b"a"), b"this works {{a}}");
73/// assert_eq!(
74///     format_bytes!(b"look at those {} bytes", &[0u8, 1, 2]),
75///     b"look at those \x00\x01\x02 bytes"
76/// );
77///
78/// let bytes = vec![0u8, 1, 2];
79///
80/// assert_eq!(
81///     format_bytes!(b"look at those {} bytes", bytes),
82///     b"look at those \x00\x01\x02 bytes"
83/// );
84/// assert_eq!(
85///     format_bytes!(b"{}.{}.{}.{}", 1_i32, 2_u8, 3_f32, &4),
86///     b"1.2.3.4"
87/// );
88/// assert_eq!(
89///     format_bytes!(b"what about this very very long message {}?", "here".as_bytes()),
90///     b"what about this very very long message here?".to_vec()
91/// );
92/// assert_eq!(format_bytes!(b"{}", std::borrow::Cow::Borrowed("cow".as_bytes())), b"cow");
93/// ```
94#[macro_export]
95macro_rules! format_bytes {
96    ($($args: tt)*) => {{
97        let mut vec = Vec::<u8>::new();
98        $crate::write_bytes!(&mut vec, $($args)*)
99            // Never panics since `impl std::fmt::Write for Vec<u8>` never errors:
100            .unwrap();
101        vec
102    }}
103}
104
105#[doc(hidden)]
106pub use format_bytes_macros::_write_bytes;
107
108/// Like [`format_bytes!`], but writes to a stream given as an additional first argument.
109///
110/// The stream is an expression of any type that implements the [`DisplayBytes`] trait.
111/// The macro returns [`std::io::Result<()>`](std::io::Result).
112///
113/// # Examples
114///
115/// ```
116/// use format_bytes::write_bytes;
117///
118/// const BUFFER_LEN: usize = 20;
119/// let mut buffer = [0_u8; BUFFER_LEN];
120/// let mut slice = &mut buffer[..];
121///
122/// write_bytes!(&mut slice, b"{}", 3.14).unwrap();
123///
124/// // `impl std::io::Write for &mut [u8]` reassigns the slice to the unwritten remainder:
125/// let written = BUFFER_LEN - slice.len();
126/// assert_eq!(&buffer[..written], b"3.14");
127/// ```
128#[macro_export]
129macro_rules! write_bytes {
130    ($($args: tt)*) => {
131        $crate::_write_bytes!($($args)*)
132    };
133}
134
135/// Let types decide how to format themselves for presentation to users in a byte-stream output.
136///
137/// Similar to `std::fmt::Display`, but the output stream is bytes instead of Unicode.
138///
139/// When output is presented to users, it is decoded with an unspecified character encoding
140/// that is presumed to be ASCII-compatible.
141///
142/// Implementers should return any error from `output` (e.g. with the `?` operator),
143/// and not emit other errors.
144///
145/// # Example
146///
147/// A typical `impl` for a struct with multiple fields might use the `write_bytes` macro:
148///
149/// ```
150/// use format_bytes::{DisplayBytes, write_bytes};
151///
152/// struct Point2D { x: f32, y: f32 }
153///
154/// impl DisplayBytes for Point2D {
155///     fn display_bytes(
156///         &self,
157///         out: &mut dyn std::io::Write,
158///     ) -> std::io::Result<()> {
159///         write_bytes!(out, b"x = {}, y = {}", self.x, self.y)
160///     }
161/// }
162/// ```
163pub trait DisplayBytes {
164    fn display_bytes(&self, output: &mut dyn io::Write) -> io::Result<()>;
165}
166
167macro_rules! impl_through_deref {
168    // Macro hygiene requires the `$Inner` ident to be an input
169    // so it matches corresponding idents in `$Wrapper` types:
170    ($Inner: ident => $( $Wrapper: ty, )*) => {
171        $(
172            /// Forward to the inner type.
173            impl<$Inner: ?Sized + DisplayBytes> DisplayBytes for $Wrapper {
174                #[inline]
175                fn display_bytes(&self, output: &mut dyn io::Write) -> io::Result<()> {
176                    (**self).display_bytes(output)
177                }
178            }
179        )*
180    };
181}
182
183impl_through_deref! {
184    Inner =>
185    &'_ Inner,
186    &'_ mut Inner,
187    Box<Inner>,
188    std::rc::Rc<Inner>,
189    std::sync::Arc<Inner>,
190}
191
192/// Forward to the inner type.
193impl<Inner> DisplayBytes for std::borrow::Cow<'_, Inner>
194where
195    Inner: ?Sized + ToOwned + DisplayBytes,
196{
197    #[inline]
198    fn display_bytes(&self, output: &mut dyn io::Write) -> io::Result<()> {
199        (**self).display_bytes(output)
200    }
201}
202
203macro_rules! impl_for_byte_string {
204    ($($Ty: ty),+) => {
205        $(
206            /// Byte strings are "formatted" as-is.
207            impl DisplayBytes for $Ty {
208                #[inline]
209                fn display_bytes(&self, output: &mut dyn io::Write) -> io::Result<()> {
210                    output.write_all(self)
211                }
212            }
213        )+
214    };
215}
216
217impl_for_byte_string!([u8], Vec<u8>);
218
219macro_rules! impl_for_arrays {
220    ($( $LEN: expr )+) => {
221        impl_for_byte_string! {
222            $( [u8; $LEN] ),+
223        }
224    };
225}
226
227impl_for_arrays! {
228    0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
229    17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
230    33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
231    49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
232}
233
234/// Adaptor for types that implement `std::fmt::Display`. The Unicode output is encoded as UTF-8.
235///
236/// # Example
237///
238/// ```rust
239/// use format_bytes::{format_bytes, Utf8};
240///
241/// assert_eq!(format_bytes!(b"{}", Utf8("è_é")), b"\xc3\xa8_\xc3\xa9");
242/// ```
243pub struct Utf8<Inner>(pub Inner);
244
245impl<Inner: fmt::Display> DisplayBytes for Utf8<Inner> {
246    fn display_bytes(&self, output: &mut dyn io::Write) -> io::Result<()> {
247        // Calling `Display::fmt` requires a `Formatter` which we can’t create directly,
248        // so let’s go through `write!` with an adaptor type.
249        struct Adapter<'a> {
250            output: &'a mut dyn io::Write,
251            result: io::Result<()>,
252        }
253
254        impl fmt::Write for Adapter<'_> {
255            fn write_str(&mut self, s: &str) -> fmt::Result {
256                if self.result.is_err() {
257                    return Err(fmt::Error);
258                }
259                let utf8 = s.as_bytes();
260                match self.output.write_all(utf8) {
261                    Ok(()) => Ok(()),
262                    Err(error) => {
263                        // `fmt::Error` cannot carry any data so stash the error
264                        self.result = Err(error);
265                        Err(fmt::Error)
266                    }
267                }
268            }
269        }
270
271        let mut adapter = Adapter {
272            output,
273            result: Ok(()),
274        };
275        {
276            // `write!` requires this import: https://github.com/rust-lang/rust/issues/21826
277            use fmt::Write;
278            write!(adapter, "{}", self.0)
279                // Recover stashed error
280                .map_err(|fmt::Error| adapter.result.unwrap_err())
281        }
282    }
283}
284
285macro_rules! impl_ascii_only {
286    ($( $Ty: ident )*) => {
287        $(
288            /// Format to ASCII bytes with `std::fmt::Display`.
289            ///
290            /// The `Display` impl for this type only emits ASCII characters,
291            /// so it’s less useful than in the general case
292            /// to make users explicitly opt-in to UTF-8 encoding.
293            impl DisplayBytes for $Ty {
294                #[inline]
295                fn display_bytes(&self, output: &mut dyn io::Write) -> io::Result<()> {
296                    Utf8(self).display_bytes(output)
297                }
298            }
299        )*
300    };
301}
302
303impl_ascii_only! {
304    u8 u16 u32 u64 u128 usize
305    i8 i16 i32 i64 i128 isize
306    f32 f64
307}
308
309/// Format a sequence of values with the given separator repeated
310/// between any two consecutive values, but not at the start or end of the sequence.
311///
312/// The return value can be formatted with `DisplayBytes` *once*.
313/// Formatting consumes the input iterator. Formatting again will produce an empty output.
314///
315/// # Example
316///
317/// ```
318/// use format_bytes::{format_bytes, join};
319///
320/// let formatted = format_bytes!(b"Got {}.", join(&[4, 3, 2], b" and "));
321/// assert_eq!(formatted, b"Got 4 and 3 and 2.");
322/// ```
323pub fn join(
324    iterable: impl IntoIterator<Item = impl DisplayBytes>,
325    separator: impl DisplayBytes,
326) -> impl DisplayBytes {
327    Join {
328        iter: std::cell::Cell::new(Some(iterable.into_iter())),
329        separator,
330    }
331}
332
333struct Join<I, S> {
334    iter: std::cell::Cell<Option<I>>,
335    separator: S,
336}
337
338impl<I, T, S> DisplayBytes for Join<I, S>
339where
340    I: Iterator<Item = T>,
341    T: DisplayBytes,
342    S: DisplayBytes,
343{
344    fn display_bytes(&self, output: &mut dyn io::Write) -> io::Result<()> {
345        if let Some(mut iter) = self.iter.take() {
346            if let Some(first) = iter.next() {
347                first.display_bytes(output)?;
348                for item in iter {
349                    self.separator.display_bytes(output)?;
350                    item.display_bytes(output)?;
351                }
352            }
353        }
354        Ok(())
355    }
356}