str_concat/lib.rs
1#![no_std]
2use core::{mem, slice, str};
3
4/// Error that can occur during [`concat`](fn.concat.html).
5#[derive(Debug, Clone, Copy, PartialEq, Eq)]
6pub enum Error {
7 /// The passed strs are not adjacent.
8 NotAdjacent,
9 /// The first str is too long for concatenation.
10 TooLong,
11}
12
13/// Concatenate two string slices if they are adjacent.
14///
15/// If two strs are adjacent to each other in memory, this function
16/// concatenates both, creating a single str.
17///
18/// # Errors
19///
20/// Returns `Err` if the two slices aren't adjacent, `a` is after `b`, or if
21/// `a` is too long for proper concatenation (longer than `isize::MAX`).
22///
23/// # Safety
24///
25/// The provided slices must come from the same underlying allocation. The adjacency test can not
26/// reliably differentiate between the one-past-the-end pointer of one allocation and the start of
27/// another. However, all slices must be within a single allocation.
28///
29/// # Examples
30///
31/// Correct usage:
32///
33/// ```rust
34/// # use str_concat::concat;
35/// let s = "0123456789";
36/// unsafe {
37/// // SAFETY: slices from the same str originally.
38/// assert_eq!("0123456", concat(&s[..5], &s[5..7]).unwrap());
39/// }
40/// ```
41///
42/// Non-adjacent string slices:
43///
44/// ```rust
45/// # use str_concat::{concat, Error};
46/// let s = "0123456789";
47/// unsafe {
48/// // SAFETY: slices from the same str originally.
49/// assert_eq!(Err(Error::NotAdjacent), concat(&s[..5], &s[6..7]))
50/// }
51/// ```
52pub unsafe fn concat<'a>(a: &'a str, b: &'a str) -> Result<&'a str, Error> {
53 let slice = concat_slice(a.as_bytes(), b.as_bytes())?;
54
55 // * concatenating two valid UTF8 strings will produce a valid UTF8 string
56 // * a BOM in `b` is still valid:
57 // > It is important to understand that the character U+FEFF appearing at
58 // > any position other than the beginning of a stream MUST be interpreted
59 // > with the semantics for the zero-width non-breaking space, and MUST
60 // > NOT be interpreted as a signature.
61 // * the grapheme *clusters* (and thus potentially the semantics of the string
62 // might change if the first code point of `b` is a combining character,
63 // a zero width joiner or similar.
64 // This does not affect the correctness of UTF-8.
65 Ok(str::from_utf8_unchecked(slice))
66}
67
68/// Concatenate two slices if they are adjacent.
69///
70/// If two slices are adjacent to each other in memory, this function
71/// concatenates both, creating a single longer slice. Note that slices of
72/// zero-sized types (ZST) are never considered adjacent. Otherwise it would be
73/// possible to concatenate a slice to itself.
74///
75/// # Errors
76///
77/// Returns `Err` if the two slices aren't adjacent, `a` is after `b`, or if the
78/// result is too long to be represented as a slice (size in bytes is larger
79/// than `isize::MAX`).
80///
81/// When T is a zero-sized type (ZST) then always returns `Err(NotAdjacent)` otherwise. This is
82/// because ZST-slices are [extra weird][zst-str-concat] and [their safety][zst-unsafe-wg1] is not
83/// yet [fully determined][zst-unsafe-wg2].
84///
85/// [zst-str-concat]: https://github.com/oberien/str-concat/issues/5
86/// [zst-unsafe-wg1]: https://github.com/rust-lang/unsafe-code-guidelines/issues/93
87/// [zst-unsafe-wg2]: https://github.com/rust-lang/unsafe-code-guidelines/issues/168
88///
89/// # Safety
90///
91/// The provided slices must come from the same underlying allocation. The adjacency test can not
92/// reliably differentiate between the one-past-the-end pointer of one allocation and the start of
93/// another. However, all slices must be within a single allocation.
94///
95/// # Examples
96///
97/// Correct usage:
98///
99/// ```rust
100/// # use str_concat::concat_slice;
101/// let s = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
102/// unsafe {
103/// // SAFETY: slices from the same bytes originally.
104/// assert_eq!(
105/// [0, 1, 2, 3, 4, 5, 6],
106/// concat_slice(&s[..5], &s[5..7]).unwrap());
107/// }
108/// ```
109///
110/// Non-adjacent byte slices:
111///
112/// ```rust
113/// # use str_concat::{concat_slice, Error};
114/// let s = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
115/// unsafe {
116/// // SAFETY: slices from the same bytes originally.
117/// assert_eq!(Err(Error::NotAdjacent), concat_slice(&s[..5], &s[6..7]))
118/// }
119/// ```
120///
121pub unsafe fn concat_slice<'a, T>(a: &'a [T], b: &'a [T]) -> Result<&'a [T], Error> {
122 let a_ptr = a.as_ptr();
123 let b_ptr = b.as_ptr();
124
125 let a_len = a.len();
126 let b_len = b.len();
127
128 if mem::size_of::<T>() == 0 {
129 // NOTE(HeroicKatora)
130 // Never consider ZST slices adjacent through this function. You could
131 // infinitely duplicate a non-zero length slice by concatenating it to
132 // itself as opposed to non-ZST slice types. That would just be weird.
133 //
134 // It is however safe.
135 // See: https://github.com/rust-lang/unsafe-code-guidelines/issues/93
136 // and https://github.com/rust-lang/unsafe-code-guidelines/issues/168
137 // Issue: https://github.com/oberien/str-concat/issues/5
138 return Err(Error::NotAdjacent)
139 }
140
141 // `max_len <= isize::max_value()`
142 let max_len = isize::max_value() as usize / mem::size_of::<T>();
143
144 // These should be guaranteed for the slices.
145 assert!(a_len <= max_len as usize);
146 assert!(b_len <= max_len as usize);
147
148 // https://doc.rust-lang.org/std/primitive.pointer.html#safety-1
149 // * starting pointer in-bounds obviously
150 // * ending pointer one byte past the end of an allocated object
151 // * explicit isize overflow check above
152 // * no wraparound required
153 // why: this is the one byte past the end pointer for the input slice `a`
154 if a_ptr.offset(a_len as isize) != b_ptr {
155 return Err(Error::NotAdjacent);
156 }
157 // UNWRAP: both smaller than isize, can't wrap in usize.
158 // This is because in rust `usize` and `isize` are both guaranteed to have
159 // the same number of bits as a pointer [1]. As `isize` is signed, a `usize`
160 // can always store the sum of two positive `isize`.
161 // [1]: https://doc.rust-lang.org/reference/types/numeric.html#machine-dependent-integer-types
162 let new_len = a_len.checked_add(b_len).unwrap();
163 // Ensure the length is bounded. The bound is strict from the definition of `max_len`
164 // `new_len <= max_len` <=> `new_len * mem::size_of::<T>() <= isize::max_value()`
165 if !(new_len <= max_len) {
166 return Err(Error::TooLong);
167 }
168 // https://doc.rust-lang.org/std/slice/fn.from_raw_parts.html#safety
169 // * slices are adjacent (checked above)
170 // * no double-free / leak because we work on borrowed data
171 // * no use-after-free because `a` and `b` have same lifetime
172 // * the total size is smaller than `isize::MAX` bytes, as max_len is rounded down
173 Ok(slice::from_raw_parts(a_ptr, new_len))
174}
175
176/// Concatenate two adjacent string slices no matter their order.
177///
178/// This is the same as [`concat`] except that it also concatenates
179/// `b` to `a` if `b` is in front of `a` (in which case [`concat`] errors).
180///
181/// # Safety
182///
183/// The provided slices must come from the same underlying allocation. The adjacency test can not
184/// reliably differentiate between the one-past-the-end pointer of one allocation and the start of
185/// another. However, all slices must be within a single allocation.
186///
187/// # Examples
188///
189/// Reversed order:
190///
191/// ```rust
192/// # use str_concat::concat_unordered;
193/// let s = "0123456789";
194/// unsafe {
195/// // SAFETY: slices from the same str originally.
196/// assert_eq!("0123456", concat_unordered(&s[5..7], &s[..5]).unwrap());
197/// }
198/// ```
199///
200/// Normal order:
201///
202/// ```rust
203/// # use str_concat::{concat_unordered, Error};
204/// let s = "0123456789";
205/// unsafe {
206/// // SAFETY: slices from the same str originally.
207/// assert_eq!("0123456", concat_unordered(&s[..5], &s[5..7]).unwrap())
208/// }
209/// ```
210///
211/// [`concat`]: fn.concat.html
212pub unsafe fn concat_unordered<'a>(a: &'a str, b: &'a str) -> Result<&'a str, Error> {
213 // add lengths to handle empty-string cases correctly
214 let a_ptr = a.as_bytes().as_ptr() as usize;
215 let a_end_ptr = a_ptr + a.len();
216 let b_ptr = b.as_bytes().as_ptr() as usize;
217
218 // make the order of `a` and `b` not matter
219 let (a, b) = if a_ptr <= b_ptr && a_end_ptr <= b_ptr {
220 (a, b)
221 } else {
222 (b, a)
223 };
224
225 concat(a, b)
226}
227
228/// Concatenate two adjacent slices no matter their order.
229///
230/// This is the same as [`concat_slice`] except that it also concatenates `b` to
231/// `a` if `b` is in front of `a` (in which case of [`concat_slice`] errors).
232/// Keep in mind that slices of zero-sized types (ZST) will still not be concatenated.
233///
234/// # Safety
235///
236/// The provided slices must come from the same underlying allocation. The adjacency test can not
237/// reliably differentiate between the one-past-the-end pointer of one allocation and the start of
238/// another. However, all slices must be within a single allocation.
239///
240/// # Examples
241///
242/// Reversed order:
243///
244/// ```rust
245/// # use str_concat::concat_slice_unordered;
246/// let s = [0, 1, 2, 3, 4, 5, 6];
247/// unsafe {
248/// // SAFETY: slices from the same bytes originally.
249/// assert_eq!(
250/// [0, 1, 2, 3, 4, 5, 6],
251/// concat_slice_unordered(&s[5..7], &s[..5]).unwrap());
252/// }
253/// ```
254///
255/// Normal order:
256///
257/// ```rust
258/// # use str_concat::{concat_slice_unordered, Error};
259/// let s = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
260/// unsafe {
261/// // SAFETY: slices from the same bytes originally.
262/// assert_eq!(
263/// [0, 1, 2, 3, 4, 5, 6],
264/// concat_slice_unordered(&s[..5], &s[5..7]).unwrap())
265/// }
266/// ```
267///
268/// [`concat_slice`]: fn.concat_slice.html
269pub unsafe fn concat_slice_unordered<'a, T>(a: &'a [T], b: &'a [T]) -> Result<&'a [T], Error> {
270 // add lengths to handle empty cases correctly
271 let a_ptr = a.as_ptr() as usize;
272 let a_end_ptr = a_ptr + a.len() * mem::size_of::<T>();
273 let b_ptr = b.as_ptr() as usize;
274
275 // make the order of `a` and `b` not matter
276 let (a, b) = if a_ptr <= b_ptr && a_end_ptr <= b_ptr {
277 (a, b)
278 } else {
279 (b, a)
280 };
281
282 concat_slice(a, b)
283}
284
285#[cfg(test)]
286mod tests {
287 use super::{concat, concat_unordered, concat_slice, concat_slice_unordered, Error};
288
289 #[test]
290 fn simple_success() {
291 let s = "0123456789";
292 unsafe {
293 assert_eq!(Ok("0123456"), concat(&s[..5], &s[5..7]));
294 assert_eq!(Ok("0123456"), concat_unordered(&s[..5], &s[5..7]));
295 }
296 }
297
298 #[test]
299 fn unordered() {
300 let s = "0123456789";
301 unsafe {
302 assert_eq!(Err(Error::NotAdjacent), concat(&s[5..7], &s[..5]));
303 assert_eq!(Ok("0123456"), concat_unordered(&s[5..7], &s[..5]));
304 }
305 }
306
307 #[test]
308 fn simple_fail() {
309 let s = "0123456789";
310 unsafe {
311 assert_eq!(Err(Error::NotAdjacent), concat(&s[..5], &s[6..7]))
312 }
313 }
314
315 #[test]
316 fn zero_width_joiner() {
317 let s = "0\u{200d}1";
318 unsafe {
319 assert_eq!(Ok("0\u{200d}1"), concat(&s[..1], &s[1..5]));
320 }
321 }
322
323 #[test]
324 fn zero_width_joiner_combining_grave() {
325 let s = "0\u{200d}̀1";
326 unsafe {
327 assert_eq!(Ok("0\u{200d}\u{300}1"), concat(&s[..1], &s[1..7]));
328 }
329 }
330
331 #[test]
332 fn bom() {
333 let s = "0\u{FEFF}1";
334 unsafe {
335 assert_eq!(Ok("0\u{FEFF}1"), concat(&s[..1], &s[1..5]));
336 }
337 }
338
339 #[test]
340 fn empty_str() {
341 let s = "0123";
342 unsafe {
343 assert_eq!(Ok("0123"), concat(&s[..0], s));
344 assert_eq!(Ok("0123"), concat_unordered(&s[..0], s));
345 assert_eq!(Ok("0123"), concat_unordered(s, &s[..0]));
346 assert_eq!(Ok("0123"), concat(s, &s[4..]));
347 assert_eq!(Ok("0123"), concat_unordered(s, &s[4..]));
348 assert_eq!(Ok("0123"), concat_unordered(&s[4..], s));
349 }
350 }
351
352 #[test]
353 fn typed_slices() {
354 #[derive(Debug, PartialEq)]
355 struct T(usize);
356
357 let s: &[T] = &[T(0), T(1), T(2), T(3)][..];
358 unsafe {
359 assert_eq!(Ok(s), concat_slice(&s[..2], &s[2..]));
360 assert_eq!(Ok(s), concat_slice_unordered(&s[..2], &s[2..]));
361 assert_eq!(Ok(s), concat_slice_unordered(&s[2..], &s[..2]));
362
363 // One slice empty
364 assert_eq!(Ok(s), concat_slice(&s[..0], s));
365 assert_eq!(Ok(s), concat_slice_unordered(&s[..0], s));
366 assert_eq!(Ok(s), concat_slice_unordered(s, &s[..0]));
367 assert_eq!(Ok(s), concat_slice(s, &s[4..]));
368 assert_eq!(Ok(s), concat_slice_unordered(s, &s[4..]));
369 assert_eq!(Ok(s), concat_slice_unordered(&s[4..], s));
370 }
371 }
372
373 #[test]
374 fn typed_fail() {
375 #[derive(Debug, PartialEq)]
376 struct T(usize);
377
378 let s: &[T] = &[T(0), T(1), T(2), T(3)][..];
379 unsafe {
380 assert_eq!(Err(Error::NotAdjacent), concat_slice(&s[..1], &s[2..]));
381 assert_eq!(Err(Error::NotAdjacent), concat_slice_unordered(&s[..1], &s[2..]));
382 assert_eq!(Err(Error::NotAdjacent), concat_slice(&s[2..], &s[..2]));
383 }
384 }
385
386 #[test]
387 fn zst_fail() {
388 #[derive(Clone, Copy, Debug, PartialEq)]
389 struct Zst;
390
391 let s: &[Zst] = &[Zst; 4];
392 unsafe {
393 assert_eq!(Err(Error::NotAdjacent), concat_slice(&s[..1], &s[1..]));
394 assert_eq!(Err(Error::NotAdjacent), concat_slice_unordered(&s[..1], &s[1..]));
395 }
396 }
397}