str_concat/
lib.rs

1#![no_std]
2use core::{mem, slice, str};
3
4/// Error that can occur during [`concat`](fn.concat.html).
5#[derive(Debug, Clone, Copy, PartialEq, Eq)]
6pub enum Error {
7    /// The passed strs are not adjacent.
8    NotAdjacent,
9    /// The first str is too long for concatenation.
10    TooLong,
11}
12
13/// Concatenate two string slices if they are adjacent.
14///
15/// If two strs are adjacent to each other in memory, this function
16/// concatenates both, creating a single str.
17///
18/// # Errors
19///
20/// Returns `Err` if the two slices aren't adjacent, `a` is after `b`, or if
21/// `a` is too long for proper concatenation (longer than `isize::MAX`).
22///
23/// # Safety
24///
25/// The provided slices must come from the same underlying allocation. The adjacency test can not
26/// reliably differentiate between the one-past-the-end pointer of one allocation and the start of
27/// another. However, all slices must be within a single allocation.
28///
29/// # Examples
30///
31/// Correct usage:
32///
33/// ```rust
34/// # use str_concat::concat;
35/// let s = "0123456789";
36/// unsafe {
37///     // SAFETY: slices from the same str originally.
38///     assert_eq!("0123456", concat(&s[..5], &s[5..7]).unwrap());
39/// }
40/// ```
41///
42/// Non-adjacent string slices:
43///
44/// ```rust
45/// # use str_concat::{concat, Error};
46/// let s = "0123456789";
47/// unsafe {
48///     // SAFETY: slices from the same str originally.
49///     assert_eq!(Err(Error::NotAdjacent), concat(&s[..5], &s[6..7]))
50/// }
51/// ```
52pub unsafe fn concat<'a>(a: &'a str, b: &'a str) -> Result<&'a str, Error> {
53    let slice = concat_slice(a.as_bytes(), b.as_bytes())?;
54
55    // * concatenating two valid UTF8 strings will produce a valid UTF8 string
56    // * a BOM in `b` is still valid:
57    //   > It is important to understand that the character U+FEFF appearing at
58    //   > any position other than the beginning of a stream MUST be interpreted
59    //   > with the semantics for the zero-width non-breaking space, and MUST
60    //   > NOT be interpreted as a signature.
61    // * the grapheme *clusters* (and thus potentially the semantics of the string
62    //   might change if the first code point of `b` is a combining character,
63    //   a zero width joiner or similar.
64    //   This does not affect the correctness of UTF-8.
65    Ok(str::from_utf8_unchecked(slice))
66}
67
68/// Concatenate two slices if they are adjacent.
69///
70/// If two slices are adjacent to each other in memory, this function
71/// concatenates both, creating a single longer slice. Note that slices of
72/// zero-sized types (ZST) are never considered adjacent. Otherwise it would be
73/// possible to concatenate a slice to itself.
74///
75/// # Errors
76///
77/// Returns `Err` if the two slices aren't adjacent, `a` is after `b`, or if the
78/// result is too long to be represented as a slice (size in bytes is larger
79/// than `isize::MAX`).
80///
81/// When T is a zero-sized type (ZST) then always returns `Err(NotAdjacent)` otherwise. This is
82/// because ZST-slices are [extra weird][zst-str-concat] and [their safety][zst-unsafe-wg1] is not
83/// yet [fully determined][zst-unsafe-wg2].
84///
85/// [zst-str-concat]: https://github.com/oberien/str-concat/issues/5
86/// [zst-unsafe-wg1]: https://github.com/rust-lang/unsafe-code-guidelines/issues/93
87/// [zst-unsafe-wg2]: https://github.com/rust-lang/unsafe-code-guidelines/issues/168
88///
89/// # Safety
90///
91/// The provided slices must come from the same underlying allocation. The adjacency test can not
92/// reliably differentiate between the one-past-the-end pointer of one allocation and the start of
93/// another. However, all slices must be within a single allocation.
94///
95/// # Examples
96///
97/// Correct usage:
98///
99/// ```rust
100/// # use str_concat::concat_slice;
101/// let s = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
102/// unsafe {
103///     // SAFETY: slices from the same bytes originally.
104///     assert_eq!(
105///         [0, 1, 2, 3, 4, 5, 6], 
106///         concat_slice(&s[..5], &s[5..7]).unwrap());
107/// }
108/// ```
109///
110/// Non-adjacent byte slices:
111///
112/// ```rust
113/// # use str_concat::{concat_slice, Error};
114/// let s = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
115/// unsafe {
116///     // SAFETY: slices from the same bytes originally.
117///     assert_eq!(Err(Error::NotAdjacent), concat_slice(&s[..5], &s[6..7]))
118/// }
119/// ```
120///
121pub unsafe fn concat_slice<'a, T>(a: &'a [T], b: &'a [T]) -> Result<&'a [T], Error> {
122    let a_ptr = a.as_ptr();
123    let b_ptr = b.as_ptr();
124
125    let a_len = a.len();
126    let b_len = b.len();
127
128    if mem::size_of::<T>() == 0 {
129        // NOTE(HeroicKatora)
130        // Never consider ZST slices adjacent through this function. You could
131        // infinitely duplicate a non-zero length slice by concatenating it to
132        // itself as opposed to non-ZST slice types. That would just be weird.
133        //
134        // It is however safe.
135        // See: https://github.com/rust-lang/unsafe-code-guidelines/issues/93
136        // and https://github.com/rust-lang/unsafe-code-guidelines/issues/168
137        // Issue: https://github.com/oberien/str-concat/issues/5
138        return Err(Error::NotAdjacent)
139    }
140
141    // `max_len <= isize::max_value()`
142    let max_len = isize::max_value() as usize / mem::size_of::<T>();
143
144    // These should be guaranteed for the slices.
145    assert!(a_len <= max_len as usize);
146    assert!(b_len <= max_len as usize);
147
148    // https://doc.rust-lang.org/std/primitive.pointer.html#safety-1
149    // * starting pointer in-bounds obviously
150    // * ending pointer one byte past the end of an allocated object
151    // * explicit isize overflow check above
152    // * no wraparound required
153    // why: this is the one byte past the end pointer for the input slice `a`
154    if a_ptr.offset(a_len as isize) != b_ptr {
155        return Err(Error::NotAdjacent);
156    }
157    // UNWRAP: both smaller than isize, can't wrap in usize.
158    // This is because in rust `usize` and `isize` are both guaranteed to have
159    // the same number of bits as a pointer [1]. As `isize` is signed, a `usize`
160    // can always store the sum of two positive `isize`.
161    // [1]: https://doc.rust-lang.org/reference/types/numeric.html#machine-dependent-integer-types
162    let new_len = a_len.checked_add(b_len).unwrap();
163    // Ensure the length is bounded. The bound is strict from the definition of `max_len`
164    // `new_len <= max_len` <=> `new_len * mem::size_of::<T>() <= isize::max_value()`
165    if !(new_len <= max_len) {
166        return Err(Error::TooLong);
167    }
168    // https://doc.rust-lang.org/std/slice/fn.from_raw_parts.html#safety
169    // * slices are adjacent (checked above)
170    // * no double-free / leak because we work on borrowed data
171    // * no use-after-free because `a` and `b` have same lifetime
172    // * the total size is smaller than `isize::MAX` bytes, as max_len is rounded down
173    Ok(slice::from_raw_parts(a_ptr, new_len))
174}
175
176/// Concatenate two adjacent string slices no matter their order.
177///
178/// This is the same as [`concat`] except that it also concatenates
179/// `b` to `a` if `b` is in front of `a` (in which case [`concat`] errors).
180///
181/// # Safety
182///
183/// The provided slices must come from the same underlying allocation. The adjacency test can not
184/// reliably differentiate between the one-past-the-end pointer of one allocation and the start of
185/// another. However, all slices must be within a single allocation.
186///
187/// # Examples
188///
189/// Reversed order:
190///
191/// ```rust
192/// # use str_concat::concat_unordered;
193/// let s = "0123456789";
194/// unsafe {
195///     // SAFETY: slices from the same str originally.
196///     assert_eq!("0123456", concat_unordered(&s[5..7], &s[..5]).unwrap());
197/// }
198/// ```
199///
200/// Normal order:
201///
202/// ```rust
203/// # use str_concat::{concat_unordered, Error};
204/// let s = "0123456789";
205/// unsafe {
206///     // SAFETY: slices from the same str originally.
207///     assert_eq!("0123456", concat_unordered(&s[..5], &s[5..7]).unwrap())
208/// }
209/// ```
210///
211/// [`concat`]: fn.concat.html
212pub unsafe fn concat_unordered<'a>(a: &'a str, b: &'a str) -> Result<&'a str, Error> {
213    // add lengths to handle empty-string cases correctly
214    let a_ptr = a.as_bytes().as_ptr() as usize;
215    let a_end_ptr = a_ptr + a.len();
216    let b_ptr = b.as_bytes().as_ptr() as usize;
217
218    // make the order of `a` and `b` not matter
219    let (a, b) = if a_ptr <= b_ptr && a_end_ptr <= b_ptr {
220        (a, b)
221    } else {
222        (b, a)
223    };
224
225    concat(a, b)
226}
227
228/// Concatenate two adjacent slices no matter their order.
229///
230/// This is the same as [`concat_slice`] except that it also concatenates `b` to
231/// `a` if `b` is in front of `a` (in which case of [`concat_slice`] errors).
232/// Keep in mind that slices of zero-sized types (ZST) will still not be concatenated.
233///
234/// # Safety
235///
236/// The provided slices must come from the same underlying allocation. The adjacency test can not
237/// reliably differentiate between the one-past-the-end pointer of one allocation and the start of
238/// another. However, all slices must be within a single allocation.
239///
240/// # Examples
241///
242/// Reversed order:
243///
244/// ```rust
245/// # use str_concat::concat_slice_unordered;
246/// let s = [0, 1, 2, 3, 4, 5, 6];
247/// unsafe {
248///     // SAFETY: slices from the same bytes originally.
249///     assert_eq!(
250///         [0, 1, 2, 3, 4, 5, 6],
251///         concat_slice_unordered(&s[5..7], &s[..5]).unwrap());
252/// }
253/// ```
254///
255/// Normal order:
256///
257/// ```rust
258/// # use str_concat::{concat_slice_unordered, Error};
259/// let s = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
260/// unsafe {
261///     // SAFETY: slices from the same bytes originally.
262///     assert_eq!(
263///         [0, 1, 2, 3, 4, 5, 6],
264///         concat_slice_unordered(&s[..5], &s[5..7]).unwrap())
265/// }
266/// ```
267///
268/// [`concat_slice`]: fn.concat_slice.html
269pub unsafe fn concat_slice_unordered<'a, T>(a: &'a [T], b: &'a [T]) -> Result<&'a [T], Error> {
270    // add lengths to handle empty cases correctly
271    let a_ptr = a.as_ptr() as usize;
272    let a_end_ptr = a_ptr + a.len() * mem::size_of::<T>();
273    let b_ptr = b.as_ptr() as usize;
274
275    // make the order of `a` and `b` not matter
276    let (a, b) = if a_ptr <= b_ptr && a_end_ptr <= b_ptr {
277        (a, b)
278    } else {
279        (b, a)
280    };
281
282    concat_slice(a, b)
283}
284
285#[cfg(test)]
286mod tests {
287    use super::{concat, concat_unordered, concat_slice, concat_slice_unordered, Error};
288
289    #[test]
290    fn simple_success() {
291        let s = "0123456789";
292        unsafe {
293            assert_eq!(Ok("0123456"), concat(&s[..5], &s[5..7]));
294            assert_eq!(Ok("0123456"), concat_unordered(&s[..5], &s[5..7]));
295        }
296    }
297
298    #[test]
299    fn unordered() {
300        let s = "0123456789";
301        unsafe {
302            assert_eq!(Err(Error::NotAdjacent), concat(&s[5..7], &s[..5]));
303            assert_eq!(Ok("0123456"), concat_unordered(&s[5..7], &s[..5]));
304        }
305    }
306
307    #[test]
308    fn simple_fail() {
309        let s = "0123456789";
310        unsafe {
311            assert_eq!(Err(Error::NotAdjacent), concat(&s[..5], &s[6..7]))
312        }
313    }
314
315    #[test]
316    fn zero_width_joiner() {
317        let s = "0\u{200d}1";
318        unsafe {
319            assert_eq!(Ok("0\u{200d}1"), concat(&s[..1], &s[1..5]));
320        }
321    }
322
323    #[test]
324    fn zero_width_joiner_combining_grave() {
325        let s = "0\u{200d}̀1";
326        unsafe {
327            assert_eq!(Ok("0\u{200d}\u{300}1"), concat(&s[..1], &s[1..7]));
328        }
329    }
330
331    #[test]
332    fn bom() {
333        let s = "0\u{FEFF}1";
334        unsafe {
335            assert_eq!(Ok("0\u{FEFF}1"), concat(&s[..1], &s[1..5]));
336        }
337    }
338
339    #[test]
340    fn empty_str() {
341        let s = "0123";
342        unsafe {
343            assert_eq!(Ok("0123"), concat(&s[..0], s));
344            assert_eq!(Ok("0123"), concat_unordered(&s[..0], s));
345            assert_eq!(Ok("0123"), concat_unordered(s, &s[..0]));
346            assert_eq!(Ok("0123"), concat(s, &s[4..]));
347            assert_eq!(Ok("0123"), concat_unordered(s, &s[4..]));
348            assert_eq!(Ok("0123"), concat_unordered(&s[4..], s));
349        }
350    }
351
352    #[test]
353    fn typed_slices() {
354        #[derive(Debug, PartialEq)]
355        struct T(usize);
356
357        let s: &[T] = &[T(0), T(1), T(2), T(3)][..];
358        unsafe {
359            assert_eq!(Ok(s), concat_slice(&s[..2], &s[2..]));
360            assert_eq!(Ok(s), concat_slice_unordered(&s[..2], &s[2..]));
361            assert_eq!(Ok(s), concat_slice_unordered(&s[2..], &s[..2]));
362
363            // One slice empty
364            assert_eq!(Ok(s), concat_slice(&s[..0], s));
365            assert_eq!(Ok(s), concat_slice_unordered(&s[..0], s));
366            assert_eq!(Ok(s), concat_slice_unordered(s, &s[..0]));
367            assert_eq!(Ok(s), concat_slice(s, &s[4..]));
368            assert_eq!(Ok(s), concat_slice_unordered(s, &s[4..]));
369            assert_eq!(Ok(s), concat_slice_unordered(&s[4..], s));
370        }
371    }
372
373    #[test]
374    fn typed_fail() {
375        #[derive(Debug, PartialEq)]
376        struct T(usize);
377
378        let s: &[T] = &[T(0), T(1), T(2), T(3)][..];
379        unsafe {
380            assert_eq!(Err(Error::NotAdjacent), concat_slice(&s[..1], &s[2..]));
381            assert_eq!(Err(Error::NotAdjacent), concat_slice_unordered(&s[..1], &s[2..]));
382            assert_eq!(Err(Error::NotAdjacent), concat_slice(&s[2..], &s[..2]));
383        }
384    }
385
386    #[test]
387    fn zst_fail() {
388        #[derive(Clone, Copy, Debug, PartialEq)]
389        struct Zst;
390
391        let s: &[Zst] = &[Zst; 4];
392        unsafe {
393            assert_eq!(Err(Error::NotAdjacent), concat_slice(&s[..1], &s[1..]));
394            assert_eq!(Err(Error::NotAdjacent), concat_slice_unordered(&s[..1], &s[1..]));
395        }
396    }
397}