format_bytes/lib.rs
1/*!
2This crate exposes a procedural macro that allows you to format bytestrings.
3For more background on why you would want to do that,
4[read this article](https://octobus.net/blog/2020-06-05-not-everything-is-utf8.html).
5
6## Usage
7
8Add this to your `Cargo.toml`:
9
10```toml
11[dependencies]
12format-bytes = "0.1"
13```
14
15then use the macro like so:
16
17```rust
18use format_bytes::format_bytes;
19
20fn main() {
21 assert_eq!(
22 format_bytes!(b"look at those {} bytes", &[0u8, 1, 2]),
23 b"look at those \x00\x01\x02 bytes"
24 );
25}
26```
27
28See more examples of how it works on the documentation of
29[`format_bytes!` itself](https://docs.rs/format-bytes/\*\/format_bytes/macro.format_bytes.html).
30
31## Missing features
32
33* Named arguments, but they should be added in a future version
34* Python-like "f-string" functionality is not planned because of its more
35complex implementation and limited actual benefit
36* ``format!``-like padding helpers: if the need manifests itself, they might
37appear
38
39
40## Why not 1.0?
41
42Not until named arguments have landed and the macro gets a bit of mileage (it
43will be used in [Mercurial](https://www.mercurial-scm.org)).
44*/
45
46use std::fmt;
47use std::io;
48
49/// Creates a `Vec<u8>` using interpolation of runtime expressions.
50///
51/// The first argument `format_bytes!` receives is a format bytestring.
52/// This must be a bytestring literal. The power of the formatting string
53/// is in the `{}`s contained.
54///
55/// Additional arguments passed to `format_bytes!` replace the `{}`s
56/// within the formatting bytestring in the order given. It only supports
57/// positional arguments for now, but a future version should add support
58/// for named arguments.
59///
60/// These additional arguments may have any type that implements
61/// the [`DisplayBytes`] trait.
62///
63/// # Examples
64///
65/// ```
66/// use format_bytes::format_bytes;
67///
68/// assert_eq!(format_bytes!(b""), b"");
69/// assert_eq!(format_bytes!(b"here"), b"here");
70/// assert_eq!(format_bytes!(b"this {{ escapes {{"), b"this {{ escapes {{");
71/// assert_eq!(format_bytes!(b"also this {{}}"), b"also this {{}}");
72/// assert_eq!(format_bytes!(b"this works {{{}}}", b"a"), b"this works {{a}}");
73/// assert_eq!(
74/// format_bytes!(b"look at those {} bytes", &[0u8, 1, 2]),
75/// b"look at those \x00\x01\x02 bytes"
76/// );
77///
78/// let bytes = vec![0u8, 1, 2];
79///
80/// assert_eq!(
81/// format_bytes!(b"look at those {} bytes", bytes),
82/// b"look at those \x00\x01\x02 bytes"
83/// );
84/// assert_eq!(
85/// format_bytes!(b"{}.{}.{}.{}", 1_i32, 2_u8, 3_f32, &4),
86/// b"1.2.3.4"
87/// );
88/// assert_eq!(
89/// format_bytes!(b"what about this very very long message {}?", "here".as_bytes()),
90/// b"what about this very very long message here?".to_vec()
91/// );
92/// assert_eq!(format_bytes!(b"{}", std::borrow::Cow::Borrowed("cow".as_bytes())), b"cow");
93/// ```
94#[macro_export]
95macro_rules! format_bytes {
96 ($($args: tt)*) => {{
97 let mut vec = Vec::<u8>::new();
98 $crate::write_bytes!(&mut vec, $($args)*)
99 // Never panics since `impl std::fmt::Write for Vec<u8>` never errors:
100 .unwrap();
101 vec
102 }}
103}
104
105#[doc(hidden)]
106pub use format_bytes_macros::_write_bytes;
107
108/// Like [`format_bytes!`], but writes to a stream given as an additional first argument.
109///
110/// The stream is an expression of any type that implements the [`DisplayBytes`] trait.
111/// The macro returns [`std::io::Result<()>`](std::io::Result).
112///
113/// # Examples
114///
115/// ```
116/// use format_bytes::write_bytes;
117///
118/// const BUFFER_LEN: usize = 20;
119/// let mut buffer = [0_u8; BUFFER_LEN];
120/// let mut slice = &mut buffer[..];
121///
122/// write_bytes!(&mut slice, b"{}", 3.14).unwrap();
123///
124/// // `impl std::io::Write for &mut [u8]` reassigns the slice to the unwritten remainder:
125/// let written = BUFFER_LEN - slice.len();
126/// assert_eq!(&buffer[..written], b"3.14");
127/// ```
128#[macro_export]
129macro_rules! write_bytes {
130 ($($args: tt)*) => {
131 $crate::_write_bytes!($($args)*)
132 };
133}
134
135/// Let types decide how to format themselves for presentation to users in a byte-stream output.
136///
137/// Similar to `std::fmt::Display`, but the output stream is bytes instead of Unicode.
138///
139/// When output is presented to users, it is decoded with an unspecified character encoding
140/// that is presumed to be ASCII-compatible.
141///
142/// Implementers should return any error from `output` (e.g. with the `?` operator),
143/// and not emit other errors.
144///
145/// # Example
146///
147/// A typical `impl` for a struct with multiple fields might use the `write_bytes` macro:
148///
149/// ```
150/// use format_bytes::{DisplayBytes, write_bytes};
151///
152/// struct Point2D { x: f32, y: f32 }
153///
154/// impl DisplayBytes for Point2D {
155/// fn display_bytes(
156/// &self,
157/// out: &mut dyn std::io::Write,
158/// ) -> std::io::Result<()> {
159/// write_bytes!(out, b"x = {}, y = {}", self.x, self.y)
160/// }
161/// }
162/// ```
163pub trait DisplayBytes {
164 fn display_bytes(&self, output: &mut dyn io::Write) -> io::Result<()>;
165}
166
167macro_rules! impl_through_deref {
168 // Macro hygiene requires the `$Inner` ident to be an input
169 // so it matches corresponding idents in `$Wrapper` types:
170 ($Inner: ident => $( $Wrapper: ty, )*) => {
171 $(
172 /// Forward to the inner type.
173 impl<$Inner: ?Sized + DisplayBytes> DisplayBytes for $Wrapper {
174 #[inline]
175 fn display_bytes(&self, output: &mut dyn io::Write) -> io::Result<()> {
176 (**self).display_bytes(output)
177 }
178 }
179 )*
180 };
181}
182
183impl_through_deref! {
184 Inner =>
185 &'_ Inner,
186 &'_ mut Inner,
187 Box<Inner>,
188 std::rc::Rc<Inner>,
189 std::sync::Arc<Inner>,
190}
191
192/// Forward to the inner type.
193impl<Inner> DisplayBytes for std::borrow::Cow<'_, Inner>
194where
195 Inner: ?Sized + ToOwned + DisplayBytes,
196{
197 #[inline]
198 fn display_bytes(&self, output: &mut dyn io::Write) -> io::Result<()> {
199 (**self).display_bytes(output)
200 }
201}
202
203macro_rules! impl_for_byte_string {
204 ($($Ty: ty),+) => {
205 $(
206 /// Byte strings are "formatted" as-is.
207 impl DisplayBytes for $Ty {
208 #[inline]
209 fn display_bytes(&self, output: &mut dyn io::Write) -> io::Result<()> {
210 output.write_all(self)
211 }
212 }
213 )+
214 };
215}
216
217impl_for_byte_string!([u8], Vec<u8>);
218
219macro_rules! impl_for_arrays {
220 ($( $LEN: expr )+) => {
221 impl_for_byte_string! {
222 $( [u8; $LEN] ),+
223 }
224 };
225}
226
227impl_for_arrays! {
228 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
229 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
230 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
231 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
232}
233
234/// Adaptor for types that implement `std::fmt::Display`. The Unicode output is encoded as UTF-8.
235///
236/// # Example
237///
238/// ```rust
239/// use format_bytes::{format_bytes, Utf8};
240///
241/// assert_eq!(format_bytes!(b"{}", Utf8("è_é")), b"\xc3\xa8_\xc3\xa9");
242/// ```
243pub struct Utf8<Inner>(pub Inner);
244
245impl<Inner: fmt::Display> DisplayBytes for Utf8<Inner> {
246 fn display_bytes(&self, output: &mut dyn io::Write) -> io::Result<()> {
247 // Calling `Display::fmt` requires a `Formatter` which we can’t create directly,
248 // so let’s go through `write!` with an adaptor type.
249 struct Adapter<'a> {
250 output: &'a mut dyn io::Write,
251 result: io::Result<()>,
252 }
253
254 impl fmt::Write for Adapter<'_> {
255 fn write_str(&mut self, s: &str) -> fmt::Result {
256 if self.result.is_err() {
257 return Err(fmt::Error);
258 }
259 let utf8 = s.as_bytes();
260 match self.output.write_all(utf8) {
261 Ok(()) => Ok(()),
262 Err(error) => {
263 // `fmt::Error` cannot carry any data so stash the error
264 self.result = Err(error);
265 Err(fmt::Error)
266 }
267 }
268 }
269 }
270
271 let mut adapter = Adapter {
272 output,
273 result: Ok(()),
274 };
275 {
276 // `write!` requires this import: https://github.com/rust-lang/rust/issues/21826
277 use fmt::Write;
278 write!(adapter, "{}", self.0)
279 // Recover stashed error
280 .map_err(|fmt::Error| adapter.result.unwrap_err())
281 }
282 }
283}
284
285macro_rules! impl_ascii_only {
286 ($( $Ty: ident )*) => {
287 $(
288 /// Format to ASCII bytes with `std::fmt::Display`.
289 ///
290 /// The `Display` impl for this type only emits ASCII characters,
291 /// so it’s less useful than in the general case
292 /// to make users explicitly opt-in to UTF-8 encoding.
293 impl DisplayBytes for $Ty {
294 #[inline]
295 fn display_bytes(&self, output: &mut dyn io::Write) -> io::Result<()> {
296 Utf8(self).display_bytes(output)
297 }
298 }
299 )*
300 };
301}
302
303impl_ascii_only! {
304 u8 u16 u32 u64 u128 usize
305 i8 i16 i32 i64 i128 isize
306 f32 f64
307}
308
309/// Format a sequence of values with the given separator repeated
310/// between any two consecutive values, but not at the start or end of the sequence.
311///
312/// The return value can be formatted with `DisplayBytes` *once*.
313/// Formatting consumes the input iterator. Formatting again will produce an empty output.
314///
315/// # Example
316///
317/// ```
318/// use format_bytes::{format_bytes, join};
319///
320/// let formatted = format_bytes!(b"Got {}.", join(&[4, 3, 2], b" and "));
321/// assert_eq!(formatted, b"Got 4 and 3 and 2.");
322/// ```
323pub fn join(
324 iterable: impl IntoIterator<Item = impl DisplayBytes>,
325 separator: impl DisplayBytes,
326) -> impl DisplayBytes {
327 Join {
328 iter: std::cell::Cell::new(Some(iterable.into_iter())),
329 separator,
330 }
331}
332
333struct Join<I, S> {
334 iter: std::cell::Cell<Option<I>>,
335 separator: S,
336}
337
338impl<I, T, S> DisplayBytes for Join<I, S>
339where
340 I: Iterator<Item = T>,
341 T: DisplayBytes,
342 S: DisplayBytes,
343{
344 fn display_bytes(&self, output: &mut dyn io::Write) -> io::Result<()> {
345 if let Some(mut iter) = self.iter.take() {
346 if let Some(first) = iter.next() {
347 first.display_bytes(output)?;
348 for item in iter {
349 self.separator.display_bytes(output)?;
350 item.display_bytes(output)?;
351 }
352 }
353 }
354 Ok(())
355 }
356}