1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153
use std::{slice, str}; /// Error that can occur during [`concat`](fn.concat.html). #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum Error { /// The passed strs are not adjacent. NotAdjacent, /// The first str is too long for concatenation. TooLong, } /// Concatenate two string slices if they are adjacent. /// /// If two strs are adjacent to each other in memory, this function /// concatenates both, creating a single str. /// /// # Errors /// /// Returns `Err` if the two slices aren't adjacent, `a` is after `b`, or if /// `a` is too long for proper concatenation (longer than `isize::MAX`). /// /// # Examples /// /// Correct usage: /// /// ```rust /// # use str_concat::concat; /// let s = "0123456789"; /// assert_eq!("0123456", concat(&s[..5], &s[5..7]).unwrap()); /// ``` /// /// Non-adjacent string slices: /// /// ```rust /// # use str_concat::{concat, Error}; /// let s = "0123456789"; /// assert_eq!(Err(Error::NotAdjacent), concat(&s[..5], &s[6..7])) /// ``` pub fn concat<'a>(a: &'a str, b: &'a str) -> Result<&'a str, Error> { let a_ptr = a.as_bytes().as_ptr(); let b_ptr = b.as_bytes().as_ptr(); unsafe { if a.len() > isize::max_value() as usize { return Err(Error::TooLong); } // https://doc.rust-lang.org/std/primitive.pointer.html#safety-1 // * starting pointer in-bounds obviously // * ending pointer one byte past the end of an allocated object // * explicit isize overflow check above // * no wraparound required if a_ptr.offset(a.len() as isize) != b_ptr { return Err(Error::NotAdjacent); } // * strs are adjacent (checked above) // * no double-free / leak because we work on borrowed data // * no use-after-free because `a` and `b` have same lifetime let slice = slice::from_raw_parts(a_ptr, a.len() + b.len()); // * concatenating two valid UTF8 strings will produce a valid UTF8 string // * a BOM in `b` is still valid: // > It is important to understand that the character U+FEFF appearing at // > any position other than the beginning of a stream MUST be interpreted // > with the semantics for the zero-width non-breaking space, and MUST // > NOT be interpreted as a signature. // * the grapheme *clusters* (and thus potentially the semantics of the string // might change if the first code point of `b` is a combining character, // a zero width joiner or similar. // This does not affect the correctness of UTF-8. Ok(str::from_utf8_unchecked(slice)) } } /// Concatenate two adjacent string slices no matter their order. /// /// This is the same as [`concat`] except that it also concatenates /// `b` to `a` if `b` is in front of `a` (in which case [`concat`] errors). /// /// # Examples /// /// Reversed order: /// /// ```rust /// # use str_concat::concat_unordered; /// let s = "0123456789"; /// assert_eq!("0123456", concat_unordered(&s[5..7], &s[..5]).unwrap()); /// ``` /// /// Normal order: /// /// ```rust /// # use str_concat::{concat_unordered, Error}; /// let s = "0123456789"; /// assert_eq!("0123456", concat_unordered(&s[..5], &s[5..7]).unwrap()) /// ``` /// /// [`concat`]: fn.concat.html pub fn concat_unordered<'a>(a: &'a str, b: &'a str) -> Result<&'a str, Error> { let a_ptr = a.as_bytes().as_ptr(); let b_ptr = b.as_bytes().as_ptr(); // make the order of `a` and `b` not matter let (a, b) = if a_ptr < b_ptr { (a, b) } else { (b, a) }; concat(a, b) } #[cfg(test)] mod tests { use super::{concat, concat_unordered, Error}; #[test] fn simple_success() { let s = "0123456789"; assert_eq!(Ok("0123456"), concat(&s[..5], &s[5..7])); assert_eq!(Ok("0123456"), concat_unordered(&s[..5], &s[5..7])); } #[test] fn unordered() { let s = "0123456789"; assert_eq!(Err(Error::NotAdjacent), concat(&s[5..7], &s[..5])); assert_eq!(Ok("0123456"), concat_unordered(&s[5..7], &s[..5])); } #[test] fn simple_fail() { let s = "0123456789"; assert_eq!(Err(Error::NotAdjacent), concat(&s[..5], &s[6..7])) } #[test] fn zero_width_joiner() { let s = "0\u{200d}1"; assert_eq!(Ok("0\u{200d}1"), concat(&s[..1], &s[1..5])); } #[test] fn zero_width_joiner_combining_grave() { let s = "0\u{200d}̀1"; assert_eq!(Ok("0\u{200d}\u{300}1"), concat(&s[..1], &s[1..7])); } #[test] fn bom() { let s = "0\u{FEFF}1"; assert_eq!(Ok("0\u{FEFF}1"), concat(&s[..1], &s[1..5])); } }