d_major/
null_term_str.rs

1/*
2 * Description: Handle null-terminated strings with explicit length measurement.
3 *
4 * Copyright (C) 2025 d@nny mc² <dmc2@hypnicjerk.ai>
5 * SPDX-License-Identifier: LGPL-3.0-or-later
6 *
7 * This program is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU Lesser General Public License as published
9 * by the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public License
18 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
19 */
20
21//! Handle null-terminated strings with explicit length measurement.
22
23use std::{cmp, ffi, fmt, hash, marker::PhantomData, ptr, slice};
24
25
26/// Similar to [`ffi::CStr`], except that [`Self::from_ptr()`] does not implicitly perform
27/// a [`libc::strlen()`].
28///
29/// The stdlib docs note that this is the eventual goal for `CStr`, but in this case we want to
30/// control where the length measurement is performed, so that we can avoid performing it until we
31/// actually want to use the string data.
32#[derive(Copy, Clone)]
33#[repr(transparent)]
34pub struct NullTermStr<'s> {
35  start: *const ffi::c_char,
36  _ph: PhantomData<&'s ffi::c_char>,
37}
38
39impl<'s> NullTermStr<'s> {
40  /// Wrap a null-terminated C-style string, *without* performing any length calculation or reading
41  /// any bytes from the pointed-to value.
42  ///
43  /// # Safety
44  /// `start` must be non-null, and point to a contiguous region of memory which ends with
45  /// a null byte. This is the same requirement as for [`ffi::CStr::from_ptr()`].
46  #[inline]
47  pub const unsafe fn from_ptr(start: *const ffi::c_char) -> Self {
48    Self {
49      start,
50      _ph: PhantomData,
51    }
52  }
53
54  /// Return the input that was provided to [`Self::from_ptr()`].
55  #[inline]
56  pub const fn as_ptr(&self) -> *const ffi::c_char { self.start }
57
58  /// Execute [`libc::strlen()`] to calculate the length of the pointed-to string.
59  #[inline]
60  pub fn measure(self) -> MeasuredNullTermStr<'s> {
61    let n = unsafe { libc::strlen(self.start) };
62    unsafe { MeasuredNullTermStr::given_measurement(self, n) }
63  }
64
65  /// Return whether this matches either of the always-present directory entries `"."` or `".."`.
66  ///
67  /// This is performed without any call to `strlen()`, and will not read past the first null byte.
68  #[inline]
69  pub fn match_dir_entries_unmeasured(&self) -> bool {
70    let mut p: *const u8 = self.start.cast();
71    match unsafe { p.read() } {
72      /* This is a zero-length string (should never happen with directory entries). */
73      0 => return false,
74      /* This begins with a '.' character, so continue. */
75      b'.' => (),
76      _ => return false,
77    }
78    /* We know it's not terminated yet, so we can advance the pointer by 1. */
79    p = unsafe { p.add(1) };
80    match unsafe { p.read() } {
81      /* This was the string ".". */
82      0 => return true,
83      /* This is ".." so far. */
84      b'.' => (),
85      _ => return false,
86    }
87    /* Advance a final time. */
88    p = unsafe { p.add(1) };
89    match unsafe { p.read() } {
90      /* This was the string "..". */
91      0 => true,
92      _ => false,
93    }
94  }
95}
96
97impl cmp::PartialEq for NullTermStr<'_> {
98  fn eq(&self, rhs: &Self) -> bool { ptr::eq(self.start, rhs.start) }
99}
100impl cmp::Eq for NullTermStr<'_> {}
101impl hash::Hash for NullTermStr<'_> {
102  fn hash<H>(&self, state: &mut H)
103  where H: hash::Hasher {
104    ptr::hash(self.start, state);
105  }
106}
107
108impl fmt::Debug for NullTermStr<'_> {
109  fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
110    let s: &ffi::CStr = self.measure().into();
111    f.debug_tuple("NullTermStr")
112      .field(&self.start)
113      .field(&s)
114      .finish()
115  }
116}
117
118/// The more direct analog to [`ffi::CStr`] which explicitly knows the length of its string data.
119#[derive(Copy, Clone)]
120pub struct MeasuredNullTermStr<'s> {
121  s: NullTermStr<'s>,
122  n: usize,
123}
124
125impl<'s> MeasuredNullTermStr<'s> {
126  /// Construct a C-style string with a known length.
127  ///
128  /// # Safety
129  /// The length `n` must correctly describe the number of bytes in the string value pointed to by
130  /// `s`, *without* including the trailing null byte. This is to say that `n` may be 0, but the
131  /// string pointed to by `s` must be non-null, and must point to a region of memory exactly 1 byte
132  /// longer than `n`.
133  ///
134  /// If the string's length is not known in advance, use [`NullTermStr::measure()`], which
135  /// internally calls this method.
136  #[inline]
137  pub const unsafe fn given_measurement(s: NullTermStr<'s>, n: usize) -> Self { Self { s, n } }
138
139  /// Translate a reference to a byte slice into a measured null-terminated string.
140  ///
141  /// This method will panic if `s` is empty, if it does not end with a null byte, or if it contains
142  /// any internal null bytes.
143  #[inline]
144  pub fn from_bytes_with_nul(s: &'s [u8]) -> Self {
145    assert!(!s.is_empty(), "expected non-empty slice");
146    assert_eq!(s[s.len() - 1], 0, "slice must end with null byte");
147    assert_eq!(
148      memchr::memchr(0, s),
149      Some(s.len() - 1),
150      "slice must contain no internal null bytes"
151    );
152    unsafe { Self::from_bytes_with_nul_unchecked(s) }
153  }
154
155  /// Translate a reference to a byte slice into a measured null-terminated string.
156  ///
157  /// # Safety
158  /// `s` must be non-empty, must end with a null byte, and may not contain any internal null
159  /// bytes. [`Self::from_bytes_with_nul()`] may be used to validate slices.
160  #[inline]
161  pub const unsafe fn from_bytes_with_nul_unchecked(s: &'s [u8]) -> Self {
162    let n = unsafe { s.len().unchecked_sub(1) };
163    Self {
164      s: unsafe { NullTermStr::from_ptr(s.as_ptr().cast()) },
165      n,
166    }
167  }
168
169  /// Return a slice of the internal string data, *without* trailing null byte.
170  #[inline]
171  pub const fn as_bytes(&self) -> &'s [u8] {
172    unsafe { slice::from_raw_parts(self.as_ptr().cast(), self.len()) }
173  }
174
175  /// Translate this data to a platform-specific string, suitable for translation to
176  /// [`Path`](std::path::Path).
177  #[inline]
178  pub fn as_os_str(&self) -> &'s ffi::OsStr {
179    use std::os::unix::ffi::OsStrExt;
180    ffi::OsStr::from_bytes(self.as_bytes())
181  }
182
183  #[inline]
184  const fn as_ptr(&self) -> *const ffi::c_char { self.as_unmeasured().as_ptr() }
185
186  /// Length of the internal string data, *without* trailing null byte.
187  #[inline]
188  pub const fn len(&self) -> usize { self.n }
189
190  /// Whether the internal string data points to a single null byte.
191  #[inline]
192  pub const fn is_empty(&self) -> bool { self.len() == 0 }
193
194  /// Length of the internal string data, *with* trailing null byte.
195  #[inline]
196  pub const fn len_with_nul(&self) -> usize { self.n.checked_add(1).unwrap() }
197
198  /// Return a slice of the internal string data, *with* trailing null byte.
199  #[inline]
200  pub const fn as_bytes_with_nul(&self) -> &'s [u8] {
201    unsafe { slice::from_raw_parts(self.as_ptr().cast(), self.len_with_nul()) }
202  }
203
204  /// Retrieve the internal string data, which may be converted to a pointer again.
205  #[inline]
206  pub const fn as_unmeasured(&self) -> NullTermStr<'s> { self.s }
207
208  /// Allocate the necessary space in `v` and copy over the internal string data.
209  ///
210  /// `v`'s length will be reset to the length of the internal string data, although it will not
211  /// reallocate the underlying vector.
212  #[inline]
213  pub fn clone_into(&self, v: &mut NullTermString) {
214    let NullTermString(v) = v;
215    v.clear();
216    let src = self.as_bytes_with_nul();
217    v.reserve(src.len());
218    unsafe {
219      cfg_if::cfg_if! {
220        if #[cfg(feature = "nightly")] {
221          v.spare_capacity_mut()[..src.len()].write_copy_of_slice(src);
222        } else {
223          v.as_mut_ptr()
224            .copy_from_nonoverlapping(src.as_ptr(), src.len());
225        }
226      }
227      v.set_len(src.len());
228    }
229  }
230}
231
232impl<'s> From<&'s [u8]> for MeasuredNullTermStr<'s> {
233  fn from(s: &'s [u8]) -> Self { Self::from_bytes_with_nul(s) }
234}
235
236impl<'s> From<MeasuredNullTermStr<'s>> for &'s [u8] {
237  fn from(s: MeasuredNullTermStr<'s>) -> Self { s.as_bytes_with_nul() }
238}
239
240impl<'s> From<&'s ffi::CStr> for MeasuredNullTermStr<'s> {
241  fn from(s: &'s ffi::CStr) -> Self {
242    unsafe { Self::from_bytes_with_nul_unchecked(s.to_bytes_with_nul()) }
243  }
244}
245
246impl<'s> From<MeasuredNullTermStr<'s>> for &'s ffi::CStr {
247  fn from(s: MeasuredNullTermStr<'s>) -> &'s ffi::CStr {
248    unsafe { ffi::CStr::from_bytes_with_nul_unchecked(s.as_bytes_with_nul()) }
249  }
250}
251
252impl cmp::PartialEq for MeasuredNullTermStr<'_> {
253  fn eq(&self, rhs: &Self) -> bool { self.as_bytes().eq(rhs.as_bytes()) }
254}
255impl cmp::Eq for MeasuredNullTermStr<'_> {}
256impl cmp::PartialOrd for MeasuredNullTermStr<'_> {
257  fn partial_cmp(&self, rhs: &Self) -> Option<cmp::Ordering> { Some(self.cmp(rhs)) }
258}
259impl cmp::Ord for MeasuredNullTermStr<'_> {
260  fn cmp(&self, rhs: &Self) -> cmp::Ordering { self.as_bytes().cmp(rhs.as_bytes()) }
261}
262impl hash::Hash for MeasuredNullTermStr<'_> {
263  fn hash<H>(&self, state: &mut H)
264  where H: hash::Hasher {
265    self.as_bytes_with_nul().hash(state);
266  }
267}
268
269impl fmt::Debug for MeasuredNullTermStr<'_> {
270  fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
271    let s: &ffi::CStr = (*self).into();
272    f.debug_tuple("MeasuredNullTermStr")
273      .field(&self.s.start)
274      .field(&self.n)
275      .field(&s)
276      .finish()
277  }
278}
279
280/// Owned version of [`MeasuredNullTermStr`].
281///
282/// Create with [`Self::new()`], then copy over data with [`MeasuredNullTermStr::clone_into()`].
283#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
284pub struct NullTermString(Vec<u8>);
285
286impl NullTermString {
287  /// Create a new owned string without any data reserved.
288  pub const fn new() -> Self { Self(Vec::new()) }
289
290  /// Create a new owned string with at least `n` bytes reserved.
291  pub fn with_capacity(n: usize) -> Self { Self(Vec::with_capacity(n)) }
292}
293
294impl Default for NullTermString {
295  fn default() -> Self { Self::new() }
296}
297
298impl From<NullTermString> for ffi::CString {
299  fn from(x: NullTermString) -> Self {
300    let NullTermString(v) = x;
301    unsafe { ffi::CString::from_vec_with_nul_unchecked(v) }
302  }
303}
304
305impl From<ffi::CString> for NullTermString {
306  fn from(x: ffi::CString) -> Self { Self(x.into_bytes_with_nul()) }
307}
308
309impl fmt::Debug for NullTermString {
310  fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
311    let s: ffi::CString = self.clone().into();
312    f.debug_tuple("NullTermString").field(&s).finish()
313  }
314}
315
316/// Allow [`NullTermString`] instances to be interpreted as references to [`MeasuredNullTermStr`].
317pub trait AsNullTermStr {
318  /// Similar to [`AsRef`] or [`Borrow`](std::borrow::Borrow).
319  ///
320  /// This is necessary because those traits require the use of DSTs (dynamically-sized types) and
321  /// cannot be made to work with tagged lifetimes like we use in this module.
322  fn as_null_term_str(&self) -> MeasuredNullTermStr<'_>;
323}
324
325impl AsNullTermStr for MeasuredNullTermStr<'_> {
326  fn as_null_term_str(&self) -> MeasuredNullTermStr<'_> { *self }
327}
328
329impl AsNullTermStr for NullTermString {
330  fn as_null_term_str(&self) -> MeasuredNullTermStr<'_> {
331    unsafe { MeasuredNullTermStr::from_bytes_with_nul_unchecked(&self.0) }
332  }
333}
334
335
336#[cfg(test)]
337mod test {
338  use proptest::{prelude::*, string::bytes_regex};
339
340  use super::*;
341
342  #[test]
343  fn matches_dir_entries() {
344    let s = NullTermString(vec![b'.', 0]);
345    let s = s.as_null_term_str().as_unmeasured();
346    assert!(s.match_dir_entries_unmeasured());
347
348    let s = NullTermString(vec![b'.', b'.', 0]);
349    let s = s.as_null_term_str().as_unmeasured();
350    assert!(s.match_dir_entries_unmeasured());
351  }
352
353  proptest! {
354    #[test]
355    fn not_dir_entries(
356      mut s in bytes_regex("(?s-u:[^\x00]*)").unwrap()
357        .prop_filter("not '.' or '..'",
358                     |v| !(&v[..] == b"." || &v[..] == b".."))
359    ) {
360      s.push(0);
361      let s = NullTermString(s);
362      let s = s.as_null_term_str().as_unmeasured();
363      prop_assert!(!s.match_dir_entries_unmeasured());
364    }
365
366    #[test]
367    fn cstring_roundtrip(s in any::<Vec<u8>>()) {
368      let n1 = NullTermString(s);
369      let c1: ffi::CString = n1.clone().into();
370      let n2: NullTermString = c1.into();
371      prop_assert_eq!(n1, n2);
372    }
373
374    #[test]
375    fn nonnull_roundtrip(mut s in bytes_regex("(?s-u:[^\x00]*)").unwrap()) {
376      s.push(0);
377      let s = NullTermString(s);
378      let c: ffi::CString = s.clone().into();
379      let c2: &ffi::CStr = s.as_null_term_str().into();
380      prop_assert_eq!(c.as_c_str(), c2);
381    }
382
383    #[test]
384    fn slice_roundtrip(mut s in bytes_regex("(?s-u:[^\x00]*)").unwrap()) {
385      s.push(0);
386      let s = NullTermString(s);
387      let s: MeasuredNullTermStr = s.as_null_term_str();
388      let sl: &[u8] = s.into();
389      let s2: MeasuredNullTermStr = sl.into();
390      prop_assert_eq!(s, s2);
391      let sl2: &[u8] = s2.into();
392      prop_assert_eq!(sl, sl2);
393    }
394
395    #[test]
396    fn nonnull_ref(s in bytes_regex("(?s-u:[^\x00]*)").unwrap()) {
397      let mut t = s.clone();
398      t.push(0);
399      let v = NullTermString(t.clone());
400      prop_assert_eq!(&t[..], v.as_null_term_str().as_bytes_with_nul());
401      prop_assert_eq!(&s[..], v.as_null_term_str().as_bytes());
402    }
403
404    #[test]
405    fn nonnull_measure(mut s in bytes_regex("(?s-u:[^\x00]*)").unwrap()) {
406      s.push(0);
407      let s = NullTermString(s);
408      let s = s.as_null_term_str();
409      prop_assert_eq!(s, s.as_unmeasured().measure());
410    }
411
412    #[test]
413    fn nonnull_clone_into(mut s in bytes_regex("(?s-u:[^\x00]*)").unwrap()) {
414      s.push(0);
415      let v = NullTermString(s);
416      let s = v.as_null_term_str();
417      let mut v2 = NullTermString(Vec::new());
418      s.clone_into(&mut v2);
419      let s2 = v2.as_null_term_str();
420      prop_assert_eq!(s, s2);
421      prop_assert_eq!(v, v2);
422    }
423  }
424}