bp3d_util/
format.rs

1// Copyright (c) 2025, BlockProject 3D
2//
3// All rights reserved.
4//
5// Redistribution and use in source and binary forms, with or without modification,
6// are permitted provided that the following conditions are met:
7//
8//     * Redistributions of source code must retain the above copyright notice,
9//       this list of conditions and the following disclaimer.
10//     * Redistributions in binary form must reproduce the above copyright notice,
11//       this list of conditions and the following disclaimer in the documentation
12//       and/or other materials provided with the distribution.
13//     * Neither the name of BlockProject 3D nor the names of its contributors
14//       may be used to endorse or promote products derived from this software
15//       without specific prior written permission.
16//
17// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
21// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
24// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
25// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29//! Formatting utilities.
30
31use std::mem::MaybeUninit;
32
33/// A structure which acts similar to a [FixedBufStr] but borrows from a buffer instead of owning a
34/// stack allocation.
35pub struct MemBufStr<'a> {
36    len: &'a mut usize,
37    buffer: &'a mut [MaybeUninit<u8>],
38}
39
40impl<'a> MemBufStr<'a> {
41    /// Wraps a memory buffer with its length in a new string buffer.
42    ///
43    /// # Safety
44    ///
45    /// It is UB to construct a [MemBufStr] if `len` is not a valid position in the buffer `buffer`.
46    /// It is also UB to construct a [MemBufStr] from a `buffer` which does not contain only UTF-8
47    /// bytes. If `len` points to uninitialized memory in `buffer` constructing [MemBufStr] is UB.
48    pub unsafe fn wrap_uninit(
49        len: &'a mut usize,
50        buffer: &'a mut [MaybeUninit<u8>],
51    ) -> MemBufStr<'a> {
52        MemBufStr { buffer, len }
53    }
54
55    /// Wraps a memory buffer with its length in a new string buffer.
56    ///
57    /// # Safety
58    ///
59    /// It is UB to construct a [MemBufStr] if `len` is not a valid position in the buffer `buffer`.
60    /// It is also UB to construct a [MemBufStr] from a `buffer` which does not contain only UTF-8
61    /// bytes.
62    pub unsafe fn wrap(len: &'a mut usize, buffer: &'a mut [u8]) -> MemBufStr<'a> {
63        MemBufStr {
64            #[allow(clippy::missing_transmute_annotations)]
65            buffer: std::mem::transmute(buffer),
66            len,
67        }
68    }
69
70    /// Extracts the string from this buffer.
71    //type inference works so why should the code look awfully more complex?
72    #[allow(clippy::missing_transmute_annotations)]
73    pub fn str(&self) -> &str {
74        unsafe { std::str::from_utf8_unchecked(std::mem::transmute(&self.buffer[..*self.len])) }
75    }
76
77    /// Appends a raw byte buffer at the end of this string buffer.
78    ///
79    /// Returns the number of bytes written.
80    ///
81    /// # Arguments
82    ///
83    /// * `buf`: the raw byte buffer to append.
84    ///
85    /// returns: usize
86    ///
87    /// # Safety
88    ///
89    /// * [MemBufStr](MemBufStr) contains only valid UTF-8 strings so buf must contain only valid UTF-8
90    ///   bytes.
91    /// * If buf contains invalid UTF-8 bytes, further operations on the log message buffer may
92    ///   result in UB.
93    //type inference works so why should the code look awfully more complex?
94    #[allow(clippy::missing_transmute_annotations)]
95    pub unsafe fn write(&mut self, buf: &[u8]) -> usize {
96        let len = utf8_max(buf, self.buffer.len() - *self.len);
97        unsafe {
98            std::ptr::copy_nonoverlapping(
99                buf.as_ptr(),
100                std::mem::transmute(self.buffer.as_mut_ptr().add(*self.len)),
101                len,
102            );
103        }
104        *self.len += len;
105        len
106    }
107}
108
109impl std::fmt::Write for MemBufStr<'_> {
110    fn write_str(&mut self, value: &str) -> std::fmt::Result {
111        unsafe { self.write(value.as_bytes()) };
112        Ok(())
113    }
114}
115
116/// Fixed length string buffer.
117#[derive(Clone, Debug)]
118pub struct FixedBufStr<const N: usize> {
119    len: usize,
120    buffer: [MaybeUninit<u8>; N],
121}
122
123impl<const N: usize> Default for FixedBufStr<N> {
124    fn default() -> Self {
125        Self::new()
126    }
127}
128
129// This function is full of unsafe because it ran slower than expected.
130// It appears that even a single subtraction has a HUGE impact on performance in Rust.
131// It also appears that having this as a function instead of being inlined multiplies by 2 running
132// time.
133// Unfortunately that thing is in a hot path within debug.tracing.
134#[inline(always)]
135fn utf8_max(buf: &[u8], max: usize) -> usize {
136    let buf_len = buf.len();
137    if buf_len <= max {
138        buf_len
139    } else if max == 0 {
140        0
141    } else if unsafe { buf.get_unchecked(max.unchecked_sub(1)) } & 0x80 == 0x00 {
142        max
143    } else {
144        let start = unsafe { max.unchecked_sub(1) };
145        let mut i = start;
146        unsafe {
147            while buf.get_unchecked(i) & 0xC0 == 0x80 {
148                i = i.unchecked_sub(1);
149            }
150            let n = start.unchecked_sub(i);
151            if (buf.get_unchecked(i) & 0xF0 == 0xF0 && n == 4)
152                || (buf.get_unchecked(i) & 0xE0 == 0xE0 && n == 3)
153                || (buf.get_unchecked(i) & 0xC0 == 0xC0 && n == 2)
154            {
155                max
156            } else {
157                i
158            }
159        }
160    }
161}
162
163impl<const N: usize> FixedBufStr<N> {
164    /// Creates a new fixed length string buffer.
165    pub fn new() -> FixedBufStr<N> {
166        FixedBufStr {
167            buffer: unsafe { MaybeUninit::uninit().assume_init() },
168            len: 0,
169        }
170    }
171
172    /// Extracts the string from this buffer.
173    //type inference works so why should the code look awfully more complex?
174    #[allow(clippy::missing_transmute_annotations)]
175    pub fn str(&self) -> &str {
176        unsafe { std::str::from_utf8_unchecked(std::mem::transmute(&self.buffer[..self.len as _])) }
177    }
178
179    /// Constructs this buffer from an existing string.
180    //type inference works so why should the code look awfully more complex?
181    #[allow(clippy::missing_transmute_annotations)]
182    //I believe this is a false-positive, FromStr returns a Result not Self.
183    #[allow(clippy::should_implement_trait)]
184    pub fn from_str(value: &str) -> Self {
185        let mut buffer = FixedBufStr::new();
186        let len = utf8_max(value.as_bytes(), N);
187        unsafe {
188            std::ptr::copy_nonoverlapping(
189                value.as_ptr(),
190                std::mem::transmute(buffer.buffer.as_mut_ptr()),
191                len,
192            );
193        }
194        buffer.len = len as _;
195        buffer
196    }
197
198    /// Appends a raw byte buffer at the end of this string buffer.
199    ///
200    /// Returns the number of bytes written.
201    ///
202    /// # Arguments
203    ///
204    /// * `buf`: the raw byte buffer to append.
205    ///
206    /// returns: usize
207    ///
208    /// # Safety
209    ///
210    /// * [FixedBufStr](FixedBufStr) contains only valid UTF-8 strings so buf must contain only valid UTF-8
211    ///   bytes.
212    /// * If buf contains invalid UTF-8 bytes, further operations on the log message buffer may
213    ///   result in UB.
214    //type inference works so why should the code look awfully more complex?
215    #[allow(clippy::missing_transmute_annotations)]
216    pub unsafe fn write(&mut self, buf: &[u8]) -> usize {
217        let len = utf8_max(buf, N - self.len);
218        unsafe {
219            std::ptr::copy_nonoverlapping(
220                buf.as_ptr(),
221                std::mem::transmute(self.buffer.as_mut_ptr().add(self.len)),
222                len,
223            );
224        }
225        self.len += len;
226        len
227    }
228}
229
230impl<const N: usize> std::fmt::Write for FixedBufStr<N> {
231    fn write_str(&mut self, value: &str) -> std::fmt::Result {
232        unsafe { self.write(value.as_bytes()) };
233        Ok(())
234    }
235}
236
237/// An io [Write](std::io::Write) to fmt [Write](std::fmt::Write).
238///
239/// This may look like a hack but is a requirement for pathological APIs such as presented by the
240/// time crate.
241pub struct IoToFmt<W: std::fmt::Write>(W);
242
243impl<W: std::fmt::Write> IoToFmt<W> {
244    /// Create a new [IoToFmt](IoToFmt) wrapper.
245    ///
246    /// # Arguments
247    ///
248    /// * `w`: target fmt [Write](std::fmt::Write) to write into.
249    ///
250    /// returns: IoToFmt<W>
251    pub fn new(w: W) -> Self {
252        Self(w)
253    }
254
255    /// Extracts the underlying [Write](std::fmt::Write).
256    pub fn into_inner(self) -> W {
257        self.0
258    }
259}
260
261impl<W: std::fmt::Write> std::io::Write for IoToFmt<W> {
262    fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
263        let str = std::str::from_utf8(buf)
264            .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?;
265        self.0
266            .write_str(str)
267            .map(|_| str.len())
268            .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))
269    }
270
271    fn flush(&mut self) -> std::io::Result<()> {
272        Ok(())
273    }
274}
275
276#[cfg(test)]
277mod tests {
278    use crate::format::{FixedBufStr, MemBufStr};
279    use std::fmt::Write;
280    use std::mem::MaybeUninit;
281
282    #[test]
283    fn basic() {
284        let mut msg: FixedBufStr<64> = FixedBufStr::new();
285        let _ = write!(msg, "this");
286        let _ = write!(msg, " is");
287        let _ = write!(msg, " a");
288        let _ = write!(msg, " test");
289        assert_eq!(msg.str(), "this is a test");
290    }
291
292    #[test]
293    fn basic_mem() {
294        let mut buf: [MaybeUninit<u8>; 64] = unsafe { MaybeUninit::uninit().assume_init() };
295        let mut len = 0;
296        let mut msg = unsafe { MemBufStr::wrap_uninit(&mut len, &mut buf) };
297        let _ = write!(msg, "this");
298        let _ = write!(msg, " is");
299        let _ = write!(msg, " a");
300        let _ = write!(msg, " test");
301        assert_eq!(msg.str(), "this is a test");
302    }
303
304    #[test]
305    fn truncate_ascii() {
306        let mut msg: FixedBufStr<4> = FixedBufStr::new();
307        let _ = write!(msg, "this");
308        let _ = write!(msg, " is");
309        let _ = write!(msg, " a");
310        let _ = write!(msg, " test");
311        assert_eq!(msg.str().len(), 4);
312        assert_eq!(msg.str(), "this");
313    }
314
315    #[test]
316    fn truncate_ascii_mem() {
317        let mut buf = [0; 4];
318        let mut len = 0;
319        let mut msg = unsafe { MemBufStr::wrap(&mut len, &mut buf) };
320        let _ = write!(msg, "this");
321        let _ = write!(msg, " is");
322        let _ = write!(msg, " a");
323        let _ = write!(msg, " test");
324        assert_eq!(msg.str().len(), 4);
325        assert_eq!(msg.str(), "this");
326    }
327
328    #[test]
329    fn truncate_utf8_exact() {
330        let mut msg: FixedBufStr<3> = FixedBufStr::new();
331        let _ = write!(msg, "我");
332        assert_eq!(msg.str().len(), 3);
333        assert_eq!(msg.str(), "我");
334    }
335
336    #[test]
337    fn truncate_utf8_exact_mem() {
338        let mut buf = [0; 3];
339        let mut len = 0;
340        let mut msg = unsafe { MemBufStr::wrap(&mut len, &mut buf) };
341        let _ = write!(msg, "我");
342        assert_eq!(msg.str().len(), 3);
343        assert_eq!(msg.str(), "我");
344    }
345
346    #[test]
347    fn truncate_utf8_exact2() {
348        let mut msg: FixedBufStr<6> = FixedBufStr::new();
349        let _ = write!(msg, "我是");
350        assert_eq!(msg.str().len(), 6);
351        assert_eq!(msg.str(), "我是");
352    }
353
354    #[test]
355    fn truncate_utf8_exact2_mem() {
356        let mut buf = [0; 6];
357        let mut len = 0;
358        let mut msg = unsafe { MemBufStr::wrap(&mut len, &mut buf) };
359        let _ = write!(msg, "我是");
360        assert_eq!(msg.str().len(), 6);
361        assert_eq!(msg.str(), "我是");
362    }
363
364    #[test]
365    fn truncate_utf8_exact3() {
366        let mut msg: FixedBufStr<6> = FixedBufStr::new();
367        let _ = write!(msg, "我abcd");
368        assert_eq!(msg.str().len(), 6);
369        assert_eq!(msg.str(), "我abc");
370    }
371
372    #[test]
373    fn truncate_utf8_exact3_mem() {
374        let mut buf = [0; 6];
375        let mut len = 0;
376        let mut msg = unsafe { MemBufStr::wrap(&mut len, &mut buf) };
377        let _ = write!(msg, "我abcd");
378        assert_eq!(msg.str().len(), 6);
379        assert_eq!(msg.str(), "我abc");
380    }
381
382    #[test]
383    fn truncate_utf8() {
384        let mut msg: FixedBufStr<4> = FixedBufStr::new();
385        let _ = write!(msg, "我是");
386        assert_eq!(msg.str().len(), 3);
387        assert_eq!(msg.str(), "我");
388    }
389
390    #[test]
391    fn truncate_utf8_mem() {
392        let mut buf = [0; 4];
393        let mut len = 0;
394        let mut msg = unsafe { MemBufStr::wrap(&mut len, &mut buf) };
395        let _ = write!(msg, "我是");
396        assert_eq!(msg.str().len(), 3);
397        assert_eq!(msg.str(), "我");
398    }
399}