1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
use std::{slice, str};

/// Error that can occur during [`concat`](fn.concat.html).
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Error {
    /// The passed strs are not adjacent.
    NotAdjacent,
    /// The first str is too long for concatenation.
    TooLong,
}

/// Concatenate two string slices if they are adjacent.
///
/// If two strs are adjacent to each other in memory, this function
/// concatenates both, creating a single str.
///
/// # Errors
///
/// Returns `Err` if the two slices aren't adjacent, `a` is after `b`, or if
/// `a` is too long for proper concatenation (longer than `isize::MAX`).
///
/// # Examples
///
/// Correct usage:
///
/// ```rust
/// # use str_concat::concat;
/// let s = "0123456789";
/// assert_eq!("0123456", concat(&s[..5], &s[5..7]).unwrap());
/// ```
///
/// Non-adjacent string slices:
///
/// ```rust
/// # use str_concat::{concat, Error};
/// let s = "0123456789";
/// assert_eq!(Err(Error::NotAdjacent), concat(&s[..5], &s[6..7]))
/// ```
pub fn concat<'a>(a: &'a str, b: &'a str) -> Result<&'a str, Error> {
    let a_ptr = a.as_bytes().as_ptr();
    let b_ptr = b.as_bytes().as_ptr();
    
    unsafe {
        if a.len() > isize::max_value() as usize {
            return Err(Error::TooLong);
        }
        // https://doc.rust-lang.org/std/primitive.pointer.html#safety-1
        // * starting pointer in-bounds obviously
        // * ending pointer one byte past the end of an allocated object
        // * explicit isize overflow check above
        // * no wraparound required
        if a_ptr.offset(a.len() as isize) != b_ptr {
            return Err(Error::NotAdjacent);
        }
        // * strs are adjacent (checked above)
        // * no double-free / leak because we work on borrowed data
        // * no use-after-free because `a` and `b` have same lifetime
        let slice = slice::from_raw_parts(a_ptr, a.len() + b.len());
        // * concatenating two valid UTF8 strings will produce a valid UTF8 string
        // * a BOM in `b` is still valid:
        //   > It is important to understand that the character U+FEFF appearing at
        //   > any position other than the beginning of a stream MUST be interpreted
        //   > with the semantics for the zero-width non-breaking space, and MUST
        //   > NOT be interpreted as a signature.
        // * the grapheme *clusters* (and thus potentially the semantics of the string
        //   might change if the first code point of `b` is a combining character,
        //   a zero width joiner or similar.
        //   This does not affect the correctness of UTF-8.
        Ok(str::from_utf8_unchecked(slice))
    }
}

/// Concatenate two adjacent string slices no matter their order.
///
/// This is the same as [`concat`] except that it also concatenates
/// `b` to `a` if `b` is in front of `a` (in which case [`concat`] errors).
///
/// # Examples
///
/// Reversed order:
///
/// ```rust
/// # use str_concat::concat_unordered;
/// let s = "0123456789";
/// assert_eq!("0123456", concat_unordered(&s[5..7], &s[..5]).unwrap());
/// ```
///
/// Normal order:
///
/// ```rust
/// # use str_concat::{concat_unordered, Error};
/// let s = "0123456789";
/// assert_eq!("0123456", concat_unordered(&s[..5], &s[5..7]).unwrap())
/// ```
///
/// [`concat`]: fn.concat.html
pub fn concat_unordered<'a>(a: &'a str, b: &'a str) -> Result<&'a str, Error> {
    let a_ptr = a.as_bytes().as_ptr();
    let b_ptr = b.as_bytes().as_ptr();
    
    // make the order of `a` and `b` not matter
    let (a, b) = if a_ptr < b_ptr {
        (a, b)
    } else {
        (b, a)
    };

    concat(a, b)
}

#[cfg(test)]
mod tests {
    use super::{concat, concat_unordered, Error};

    #[test]
    fn simple_success() {
        let s = "0123456789";
        assert_eq!(Ok("0123456"), concat(&s[..5], &s[5..7]));
        assert_eq!(Ok("0123456"), concat_unordered(&s[..5], &s[5..7]));
    }

    #[test]
    fn unordered() {
        let s = "0123456789";
        assert_eq!(Err(Error::NotAdjacent), concat(&s[5..7], &s[..5]));
        assert_eq!(Ok("0123456"), concat_unordered(&s[5..7], &s[..5]));
    }

    #[test]
    fn simple_fail() {
        let s = "0123456789";
        assert_eq!(Err(Error::NotAdjacent), concat(&s[..5], &s[6..7]))
    }

    #[test]
    fn zero_width_joiner() {
        let s = "0\u{200d}1";
        assert_eq!(Ok("0\u{200d}1"), concat(&s[..1], &s[1..5]));
    }

    #[test]
    fn zero_width_joiner_combining_grave() {
        let s = "0\u{200d}̀1";
        assert_eq!(Ok("0\u{200d}\u{300}1"), concat(&s[..1], &s[1..7]));
    }

    #[test]
    fn bom() {
        let s = "0\u{FEFF}1";
        assert_eq!(Ok("0\u{FEFF}1"), concat(&s[..1], &s[1..5]));
    }

}