measure_null_str/
lib.rs

1/*
2 * Description: Handle null-terminated strings with explicit length measurement.
3 *
4 * Copyright (C) 2025 d@nny mc² <dmc2@hypnicjerk.ai>
5 * SPDX-License-Identifier: LGPL-3.0-or-later
6 *
7 * This program is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU Lesser General Public License as published
9 * by the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public License
18 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
19 */
20
21//! Handle null-terminated strings with explicit length measurement.
22
23/* Ensure any doctest warnings fail the doctest! */
24#![doc(test(attr(deny(warnings))))]
25#![deny(missing_docs)]
26#![cfg_attr(feature = "nightly", feature(maybe_uninit_write_slice))]
27
28use std::{borrow, cmp, ffi, fmt, hash, marker::PhantomData, num, ptr, slice};
29
30
31/// Similar to [`ffi::CStr`], except that [`Self::from_ptr()`] does not implicitly perform
32/// a [`libc::strlen()`].
33///
34/// The stdlib docs note that this is the eventual goal for `CStr`, but in this case we want to
35/// control where the length measurement is performed, so that we can avoid performing it until we
36/// actually want to use the string data.
37#[derive(Copy, Clone)]
38#[repr(transparent)]
39pub struct NullTermStr<'s> {
40  start: *const ffi::c_char,
41  _ph: PhantomData<&'s ffi::c_char>,
42}
43
44impl<'s> NullTermStr<'s> {
45  /// Wrap a null-terminated C-style string, *without* performing any length calculation or reading
46  /// any bytes from the pointed-to value.
47  ///
48  /// # Safety
49  /// `start` must be non-null, and point to a contiguous region of memory which ends with
50  /// a null byte. This is the same requirement as for [`ffi::CStr::from_ptr()`].
51  #[inline]
52  pub const unsafe fn from_ptr(start: *const ffi::c_char) -> Self {
53    Self {
54      start,
55      _ph: PhantomData,
56    }
57  }
58
59  /// Return the input that was provided to [`Self::from_ptr()`].
60  #[inline]
61  pub const fn as_ptr(&self) -> *const ffi::c_char { self.start }
62
63  /// Execute [`libc::strlen()`] to calculate the length of the pointed-to string.
64  #[inline]
65  pub fn measure(self) -> &'s MeasuredNullTermStr {
66    let n = unsafe { libc::strlen(self.start) };
67    unsafe { MeasuredNullTermStr::given_measurement(self, n) }
68  }
69
70  /// Return whether this matches either of the always-present directory entries `"."` or `".."`.
71  ///
72  /// This is performed without any call to `strlen()`, and will not read past the first null byte.
73  #[inline]
74  pub fn match_dir_entries_unmeasured(&self) -> bool {
75    let mut p: *const u8 = self.start.cast();
76    match unsafe { p.read() } {
77      /* This is a zero-length string (should never happen with directory entries). */
78      0 => return false,
79      /* This begins with a '.' character, so continue. */
80      b'.' => (),
81      _ => return false,
82    }
83    /* We know it's not terminated yet, so we can advance the pointer by 1. */
84    p = unsafe { p.add(1) };
85    match unsafe { p.read() } {
86      /* This was the string ".". */
87      0 => return true,
88      /* This is ".." so far. */
89      b'.' => (),
90      _ => return false,
91    }
92    /* Advance a final time. */
93    p = unsafe { p.add(1) };
94    match unsafe { p.read() } {
95      /* This was the string "..". */
96      0 => true,
97      _ => false,
98    }
99  }
100}
101
102impl cmp::PartialEq for NullTermStr<'_> {
103  fn eq(&self, rhs: &Self) -> bool { ptr::eq(self.start, rhs.start) }
104}
105impl cmp::Eq for NullTermStr<'_> {}
106impl hash::Hash for NullTermStr<'_> {
107  fn hash<H>(&self, state: &mut H)
108  where H: hash::Hasher {
109    ptr::hash(self.start, state);
110  }
111}
112
113impl fmt::Debug for NullTermStr<'_> {
114  fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
115    let s: &ffi::CStr = self.measure().into();
116    f.debug_tuple("NullTermStr")
117      .field(&self.start)
118      .field(&s)
119      .finish()
120  }
121}
122
123/// The more direct analog to [`ffi::CStr`] which explicitly knows the length of its string data.
124#[repr(transparent)]
125pub struct MeasuredNullTermStr {
126  inner: [ffi::c_char],
127}
128
129impl MeasuredNullTermStr {
130  /// Construct a C-style string with a known length.
131  ///
132  /// # Safety
133  /// The length `n` must correctly describe the number of bytes in the string value pointed to by
134  /// `s`, *without* including the trailing null byte. This is to say that `n` may be 0, but the
135  /// string pointed to by `s` must be non-null, and must point to a region of memory exactly 1 byte
136  /// longer than `n`.
137  ///
138  /// If the string's length is not known in advance, use [`NullTermStr::measure()`], which
139  /// internally calls this method.
140  #[inline]
141  pub const unsafe fn given_measurement<'s>(s: NullTermStr<'s>, n: usize) -> &'s Self {
142    let n = unsafe { n.unchecked_add(1) };
143    let bytes: &'s [ffi::c_char] = unsafe { slice::from_raw_parts(s.as_ptr(), n) };
144    unsafe { &*(bytes as *const [ffi::c_char] as *const Self) }
145  }
146
147  /// Translate a reference to a byte slice into a measured null-terminated string.
148  ///
149  /// This method will panic if `s` is empty, if it does not end with a null byte, or if it contains
150  /// any internal null bytes.
151  #[inline]
152  pub fn from_bytes_with_nul<'s>(s: &'s [u8]) -> &'s Self {
153    assert!(!s.is_empty(), "expected non-empty slice");
154    assert_eq!(s[s.len() - 1], 0, "slice must end with null byte");
155    assert_eq!(
156      memchr::memchr(0, s),
157      Some(s.len() - 1),
158      "slice must contain no internal null bytes"
159    );
160    unsafe { Self::from_bytes_with_nul_unchecked(s) }
161  }
162
163  /// Translate a reference to a byte slice into a measured null-terminated string.
164  ///
165  /// # Safety
166  /// `s` must be non-empty, must end with a null byte, and may not contain any internal null
167  /// bytes. [`Self::from_bytes_with_nul()`] may be used to validate slices.
168  #[inline]
169  pub const unsafe fn from_bytes_with_nul_unchecked<'s>(s: &'s [u8]) -> &'s Self {
170    let bytes: &'s [ffi::c_char] = unsafe { slice::from_raw_parts(s.as_ptr().cast(), s.len()) };
171    unsafe { &*(bytes as *const [ffi::c_char] as *const Self) }
172  }
173
174  /// Return a slice of the internal string data, *without* trailing null byte.
175  #[inline]
176  pub const fn as_bytes(&self) -> &[u8] {
177    unsafe { slice::from_raw_parts(self.as_ptr().cast(), self.len()) }
178  }
179
180  /// Translate this data to a platform-specific string, suitable for translation to
181  /// [`Path`](std::path::Path).
182  #[inline]
183  pub fn as_os_str(&self) -> &ffi::OsStr {
184    use std::os::unix::ffi::OsStrExt;
185    ffi::OsStr::from_bytes(self.as_bytes())
186  }
187
188  #[inline]
189  const fn as_ptr(&self) -> *const ffi::c_char { self.as_unmeasured().as_ptr() }
190
191  /// Length of the internal string data, *without* trailing null byte.
192  #[inline]
193  pub const fn len(&self) -> usize { unsafe { self.inner.len().unchecked_sub(1) } }
194
195  /// Whether the internal string data points to a single null byte.
196  #[inline]
197  pub const fn is_empty(&self) -> bool { self.len() == 0 }
198
199  /// Length of the internal string data, *with* trailing null byte.
200  #[inline]
201  pub const fn len_with_nul(&self) -> num::NonZeroUsize {
202    unsafe { num::NonZeroUsize::new_unchecked(self.inner.len()) }
203  }
204
205  /// Return a slice of the internal string data, *with* trailing null byte.
206  #[inline]
207  pub const fn as_bytes_with_nul(&self) -> &[u8] {
208    unsafe { slice::from_raw_parts(self.as_ptr().cast(), self.len_with_nul().get()) }
209  }
210
211  /// Retrieve the internal string data, which may be converted to a pointer again.
212  #[inline]
213  pub const fn as_unmeasured<'s>(&'s self) -> NullTermStr<'s> {
214    unsafe { NullTermStr::from_ptr(self.inner.as_ptr()) }
215  }
216
217  /// Allocate the necessary space in `v` and copy over the internal string data.
218  ///
219  /// `v`'s length will be reset to the length of the internal string data, although it will not
220  /// reallocate the underlying vector.
221  #[inline]
222  pub fn clone_into(&self, v: &mut NullTermString) {
223    let NullTermString(v) = v;
224    v.clear();
225    let src = self.as_bytes_with_nul();
226    v.reserve(src.len());
227    unsafe {
228      cfg_if::cfg_if! {
229        if #[cfg(feature = "nightly")] {
230          v.spare_capacity_mut()[..src.len()].write_copy_of_slice(src);
231        } else {
232          v.as_mut_ptr()
233            .copy_from_nonoverlapping(src.as_ptr(), src.len());
234        }
235      }
236      v.set_len(src.len());
237    }
238  }
239}
240
241impl<'s> From<&'s [u8]> for &'s MeasuredNullTermStr {
242  fn from(s: &'s [u8]) -> Self { MeasuredNullTermStr::from_bytes_with_nul(s) }
243}
244
245impl<'s> From<&'s MeasuredNullTermStr> for &'s [u8] {
246  fn from(s: &'s MeasuredNullTermStr) -> Self { s.as_bytes_with_nul() }
247}
248
249impl<'s> From<&'s ffi::CStr> for &'s MeasuredNullTermStr {
250  fn from(s: &'s ffi::CStr) -> Self {
251    unsafe { MeasuredNullTermStr::from_bytes_with_nul_unchecked(s.to_bytes_with_nul()) }
252  }
253}
254
255impl<'s> From<&'s MeasuredNullTermStr> for &'s ffi::CStr {
256  fn from(s: &'s MeasuredNullTermStr) -> &'s ffi::CStr {
257    unsafe { ffi::CStr::from_bytes_with_nul_unchecked(s.as_bytes_with_nul()) }
258  }
259}
260
261impl cmp::PartialEq for MeasuredNullTermStr {
262  fn eq(&self, rhs: &Self) -> bool { self.as_bytes().eq(rhs.as_bytes()) }
263}
264impl cmp::Eq for MeasuredNullTermStr {}
265impl cmp::PartialOrd for MeasuredNullTermStr {
266  fn partial_cmp(&self, rhs: &Self) -> Option<cmp::Ordering> { Some(self.cmp(rhs)) }
267}
268impl cmp::Ord for MeasuredNullTermStr {
269  fn cmp(&self, rhs: &Self) -> cmp::Ordering { self.as_bytes().cmp(rhs.as_bytes()) }
270}
271impl hash::Hash for MeasuredNullTermStr {
272  fn hash<H>(&self, state: &mut H)
273  where H: hash::Hasher {
274    self.as_bytes_with_nul().hash(state);
275  }
276}
277
278impl fmt::Debug for MeasuredNullTermStr {
279  fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
280    let s: &ffi::CStr = (*self).into();
281    f.debug_tuple("MeasuredNullTermStr")
282      .field(&self.as_ptr())
283      .field(&self.len())
284      .field(&s)
285      .finish()
286  }
287}
288
289/// Owned version of [`MeasuredNullTermStr`].
290///
291/// Create with [`Self::new()`], then copy over data with [`MeasuredNullTermStr::clone_into()`].
292#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
293pub struct NullTermString(Vec<u8>);
294
295impl NullTermString {
296  /// Create a new owned string without any data reserved.
297  pub const fn new() -> Self { Self(Vec::new()) }
298
299  /// Create a new owned string with at least `n` bytes reserved.
300  pub fn with_capacity(n: usize) -> Self { Self(Vec::with_capacity(n)) }
301}
302
303impl Default for NullTermString {
304  fn default() -> Self { Self::new() }
305}
306
307impl From<NullTermString> for ffi::CString {
308  fn from(x: NullTermString) -> Self {
309    let NullTermString(v) = x;
310    unsafe { ffi::CString::from_vec_with_nul_unchecked(v) }
311  }
312}
313
314impl From<ffi::CString> for NullTermString {
315  fn from(x: ffi::CString) -> Self { Self(x.into_bytes_with_nul()) }
316}
317
318impl fmt::Debug for NullTermString {
319  fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
320    let s: ffi::CString = self.clone().into();
321    f.debug_tuple("NullTermString").field(&s).finish()
322  }
323}
324
325impl borrow::Borrow<MeasuredNullTermStr> for NullTermString {
326  fn borrow(&self) -> &MeasuredNullTermStr { self.as_null_term_str() }
327}
328
329impl borrow::ToOwned for MeasuredNullTermStr {
330  type Owned = NullTermString;
331
332  fn to_owned(&self) -> Self::Owned {
333    let mut ret = NullTermString::default();
334    Self::clone_into(self, &mut ret);
335    ret
336  }
337
338  fn clone_into(&self, target: &mut Self::Owned) { Self::clone_into(self, target); }
339}
340
341/// Allow [`NullTermString`] instances to be interpreted as references to [`MeasuredNullTermStr`].
342pub trait AsNullTermStr {
343  /// Similar to [`AsRef`] or [`Borrow`](std::borrow::Borrow).
344  ///
345  /// This is necessary because those traits require the use of DSTs (dynamically-sized types) and
346  /// cannot be made to work with tagged lifetimes like we use in this module.
347  fn as_null_term_str(&self) -> &MeasuredNullTermStr;
348}
349
350impl AsNullTermStr for MeasuredNullTermStr {
351  fn as_null_term_str(&self) -> &MeasuredNullTermStr { self }
352}
353
354impl AsNullTermStr for NullTermString {
355  fn as_null_term_str(&self) -> &MeasuredNullTermStr {
356    unsafe { MeasuredNullTermStr::from_bytes_with_nul_unchecked(&self.0) }
357  }
358}
359
360
361#[cfg(test)]
362mod test {
363  use proptest::{prelude::*, string::bytes_regex};
364
365  use super::*;
366
367  #[test]
368  fn matches_dir_entries() {
369    let s = NullTermString(vec![b'.', 0]);
370    let s = s.as_null_term_str().as_unmeasured();
371    assert!(s.match_dir_entries_unmeasured());
372
373    let s = NullTermString(vec![b'.', b'.', 0]);
374    let s = s.as_null_term_str().as_unmeasured();
375    assert!(s.match_dir_entries_unmeasured());
376  }
377
378  proptest! {
379    #[test]
380    fn not_dir_entries(
381      mut s in bytes_regex("(?s-u:[^\x00]*)").unwrap()
382        .prop_filter("not '.' or '..'",
383                     |v| !(&v[..] == b"." || &v[..] == b".."))
384    ) {
385      s.push(0);
386      let s = NullTermString(s);
387      let s = s.as_null_term_str().as_unmeasured();
388      prop_assert!(!s.match_dir_entries_unmeasured());
389    }
390
391    #[test]
392    fn cstring_roundtrip(s in any::<Vec<u8>>()) {
393      let n1 = NullTermString(s);
394      let c1: ffi::CString = n1.clone().into();
395      let n2: NullTermString = c1.into();
396      prop_assert_eq!(n1, n2);
397    }
398
399    #[test]
400    fn nonnull_roundtrip(mut s in bytes_regex("(?s-u:[^\x00]*)").unwrap()) {
401      s.push(0);
402      let s = NullTermString(s);
403      let c: ffi::CString = s.clone().into();
404      let c2: &ffi::CStr = s.as_null_term_str().into();
405      prop_assert_eq!(c.as_c_str(), c2);
406    }
407
408    #[test]
409    fn slice_roundtrip(mut s in bytes_regex("(?s-u:[^\x00]*)").unwrap()) {
410      s.push(0);
411      let s = NullTermString(s);
412      let s: &MeasuredNullTermStr = s.as_null_term_str();
413      let sl: &[u8] = s.into();
414      let s2: &MeasuredNullTermStr = sl.into();
415      prop_assert_eq!(s, s2);
416      let sl2: &[u8] = s2.into();
417      prop_assert_eq!(sl, sl2);
418    }
419
420    #[test]
421    fn nonnull_ref(s in bytes_regex("(?s-u:[^\x00]*)").unwrap()) {
422      let mut t = s.clone();
423      t.push(0);
424      let v = NullTermString(t.clone());
425      prop_assert_eq!(&t[..], v.as_null_term_str().as_bytes_with_nul());
426      prop_assert_eq!(&s[..], v.as_null_term_str().as_bytes());
427    }
428
429    #[test]
430    fn nonnull_measure(mut s in bytes_regex("(?s-u:[^\x00]*)").unwrap()) {
431      s.push(0);
432      let s = NullTermString(s);
433      let s = s.as_null_term_str();
434      prop_assert_eq!(s, s.as_unmeasured().measure());
435    }
436
437    #[test]
438    fn nonnull_clone_into(mut s in bytes_regex("(?s-u:[^\x00]*)").unwrap()) {
439      s.push(0);
440      let v = NullTermString(s);
441      let s = v.as_null_term_str();
442      let mut v2 = NullTermString(Vec::new());
443      s.clone_into(&mut v2);
444      let s2 = v2.as_null_term_str();
445      prop_assert_eq!(s, s2);
446      prop_assert_eq!(v, v2);
447    }
448  }
449}