jiff 0.2.23 - Docs.rs

use core::cmp::Ordering;

/// Represents an invalid UTF-8 sequence.
///
/// This is an error returned by `decode`. It is guaranteed to
/// contain 1, 2 or 3 bytes.
pub(crate) struct Utf8Error {
    bytes: [u8; 3],
    len: u8,
}

impl Utf8Error {
    #[cold]
    #[inline(never)]
    fn new(original_bytes: &[u8], err: core::str::Utf8Error) -> Utf8Error {
        let len = err.error_len().unwrap_or_else(|| original_bytes.len());
        // OK because the biggest invalid UTF-8
        // sequence possible is 3.
        debug_assert!(1 <= len && len <= 3);
        let mut bytes = [0; 3];
        bytes[..len].copy_from_slice(&original_bytes[..len]);
        Utf8Error {
            bytes,
            // OK because the biggest invalid UTF-8
            // sequence possible is 3.
            len: u8::try_from(len).unwrap(),
        }
    }

    /// Returns the slice of invalid UTF-8 bytes.
    ///
    /// The slice returned is guaranteed to have length equivalent
    /// to `Utf8Error::len`.
    pub(crate) fn as_slice(&self) -> &[u8] {
        &self.bytes[..self.len()]
    }

    /// Returns the length of the invalid UTF-8 sequence found.
    ///
    /// This is guaranteed to be 1, 2 or 3.
    pub(crate) fn len(&self) -> usize {
        usize::from(self.len)
    }
}

impl core::fmt::Display for Utf8Error {
    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
        write!(
            f,
            "found invalid UTF-8 byte {errant_bytes:?} in format \
             string (format strings must be valid UTF-8)",
            errant_bytes = crate::util::escape::Bytes(self.as_slice()),
        )
    }
}

/// Decodes the next UTF-8 encoded codepoint from the given byte slice.
///
/// If no valid encoding of a codepoint exists at the beginning of the
/// given byte slice, then a 1-3 byte slice is returned (which is guaranteed
/// to be a prefix of `bytes`). That byte slice corresponds either to a single
/// invalid byte, or to a prefix of a valid UTF-8 encoding of a Unicode scalar
/// value (but which ultimately did not lead to a valid encoding).
///
/// This returns `None` if and only if `bytes` is empty.
///
/// This never panics.
///
/// *WARNING*: This is not designed for performance. If you're looking for
/// a fast UTF-8 decoder, this is not it. If you feel like you need one in
/// this crate, then please file an issue and discuss your use case.
pub(crate) fn decode(bytes: &[u8]) -> Option<Result<char, Utf8Error>> {
    if bytes.is_empty() {
        return None;
    }
    let string = match core::str::from_utf8(&bytes[..bytes.len().min(4)]) {
        Ok(s) => s,
        Err(ref err) if err.valid_up_to() > 0 => {
            // OK because we just verified we have at least some
            // valid UTF-8.
            core::str::from_utf8(&bytes[..err.valid_up_to()]).unwrap()
        }
        // In this case, we want to return 1-3 bytes that make up a prefix of
        // a potentially valid codepoint.
        Err(err) => return Some(Err(Utf8Error::new(bytes, err))),
    };
    // OK because we guaranteed above that `string`
    // must be non-empty. And thus, `str::chars` must
    // yield at least one Unicode scalar value.
    Some(Ok(string.chars().next().unwrap()))
}

/// Like std's `eq_ignore_ascii_case`, but returns a full `Ordering`.
#[inline]
pub(crate) fn cmp_ignore_ascii_case(s1: &str, s2: &str) -> Ordering {
    cmp_ignore_ascii_case_bytes(s1.as_bytes(), s2.as_bytes())
}

/// Like std's `eq_ignore_ascii_case`, but returns a full `Ordering` on
/// `&[u8]`.
#[inline]
pub(crate) fn cmp_ignore_ascii_case_bytes(s1: &[u8], s2: &[u8]) -> Ordering {
    // This function used to look like this:
    //
    //     let it1 = s1.iter().map(|&b| b.to_ascii_lowercase());
    //     let it2 = s2.iter().map(|&b| b.to_ascii_lowercase());
    //     it1.cmp(it2)
    //
    // But the code below seems to do better in microbenchmarks.
    let mut i = 0;
    loop {
        let b1 = s1.get(i).copied().map(|b| b.to_ascii_lowercase());
        let b2 = s2.get(i).copied().map(|b| b.to_ascii_lowercase());
        match (b1, b2) {
            (None, None) => return Ordering::Equal,
            (Some(_), None) => return Ordering::Greater,
            (None, Some(_)) => return Ordering::Less,
            (Some(b1), Some(b2)) if b1 == b2 => i += 1,
            (Some(b1), Some(b2)) => return b1.cmp(&b2),
        }
    }
}