gdbstub 0.6.3

An implementation of the GDB Remote Serial Protocol in Rust
Documentation
use num_traits::{CheckedAdd, CheckedMul, FromPrimitive, Zero};

#[derive(Debug)]
pub enum DecodeHexError {
    NotAscii,
    Empty,
    Overflow,
    InvalidOutput,
}

/// Decode a GDB hex string into the specified integer.
///
/// GDB hex strings may include "xx", which represent "missing" data. This
/// method simply treats "xx" as 0x00.
pub fn decode_hex<I>(buf: &[u8]) -> Result<I, DecodeHexError>
where
    I: FromPrimitive + Zero + CheckedAdd + CheckedMul,
{
    use DecodeHexError::*;

    let radix = I::from_u8(16).ok_or(InvalidOutput)?;

    if buf.is_empty() {
        return Err(Empty);
    }

    let mut result = I::zero();

    for &digit in buf {
        let x = I::from_u8(ascii2byte(digit).ok_or(NotAscii)?).ok_or(InvalidOutput)?;
        result = result.checked_mul(&radix).ok_or(Overflow)?;
        result = result.checked_add(&x).ok_or(Overflow)?
    }

    Ok(result)
}

/// Wrapper around a raw hex string. Enables "late" calls to `decode` from
/// outside the `crate::protocol` module.
#[derive(Debug, Clone, Copy)]
pub struct HexString<'a>(pub &'a [u8]);

impl HexString<'_> {
    pub fn decode<I>(&self) -> Result<I, DecodeHexError>
    where
        I: FromPrimitive + Zero + CheckedAdd + CheckedMul,
    {
        decode_hex(self.0)
    }
}

#[derive(Debug)]
pub enum DecodeHexBufError {
    NotAscii,
}

#[inline]
fn ascii2byte(c: u8) -> Option<u8> {
    match c {
        b'0'..=b'9' => Some(c - b'0'),
        b'a'..=b'f' => Some(c - b'a' + 10),
        b'A'..=b'F' => Some(c - b'A' + 10),
        b'x' | b'X' => Some(0),
        _ => None,
    }
}

/// Check if the byte `c` is a valid GDB hex digit `[0-9a-fA-FxX]`
#[inline]
pub fn is_hex(c: u8) -> bool {
    #[allow(clippy::match_like_matches_macro)] // mirror ascii2byte
    match c {
        b'0'..=b'9' => true,
        b'a'..=b'f' => true,
        b'A'..=b'F' => true,
        b'x' | b'X' => true,
        _ => false,
    }
}

/// Decode a GDB hex string into a byte slice _in place_.
///
/// GDB hex strings may include "xx", which represent "missing" data. This
/// method simply treats "xx" as 0x00.
// TODO: maybe don't blindly translate "xx" as 0x00?
#[cfg(not(feature = "paranoid_unsafe"))]
pub fn decode_hex_buf(base_buf: &mut [u8]) -> Result<&mut [u8], DecodeHexBufError> {
    use DecodeHexBufError::*;

    if base_buf.is_empty() {
        return Ok(&mut []);
    }

    let odd_adust = base_buf.len() % 2;
    if odd_adust != 0 {
        base_buf[0] = ascii2byte(base_buf[0]).ok_or(NotAscii)?;
    }

    let buf = &mut base_buf[odd_adust..];

    let decoded_len = buf.len() / 2;
    for i in 0..decoded_len {
        // SAFETY: rustc isn't smart enough to automatically elide these bound checks.
        //
        // If buf.len() == 0 or 1: trivially safe, since the for block is never taken
        // If buf.len() >= 2: the range of values for `i` is 0..(buf.len() / 2 - 1)
        let (hi, lo, b) = unsafe {
            (
                //    (buf.len() / 2 - 1) * 2
                // == (buf.len() - 2)
                // since buf.len() is >2, this is in-bounds
                *buf.get_unchecked(i * 2),
                //    (buf.len() / 2 - 1) * 2 + 1
                // == (buf.len() - 1)
                // since buf.len() is >2, this is in-bounds
                *buf.get_unchecked(i * 2 + 1),
                // since buf.len() is >2, (buf.len() / 2 - 1) is always in-bounds
                buf.get_unchecked_mut(i),
            )
        };

        let hi = ascii2byte(hi).ok_or(NotAscii)?;
        let lo = ascii2byte(lo).ok_or(NotAscii)?;
        *b = hi << 4 | lo;
    }

    // SAFETY: rustc isn't smart enough to automatically elide this bound check.
    //
    // Consider the different values (decoded_len + odd_adust) can take:
    //
    //  buf.len() | (decoded_len + odd_adust)
    // -----------|---------------------------
    //      0     | (0 + 0) == 0
    //      1     | (0 + 1) == 1
    //      2     | (1 + 0) == 1
    //      3     | (1 + 1) == 2
    //      4     | (2 + 0) == 2
    //      5     | (2 + 1) == 3
    //
    // Note that the computed index is always in-bounds.
    //
    // If I were still in undergrad, I could probably have whipped up a proper
    // mathematical proof by induction or whatnot, but hopefully this "proof by
    // example" ought to suffice.
    unsafe { Ok(base_buf.get_unchecked_mut(..decoded_len + odd_adust)) }
}

/// Decode a GDB hex string into a byte slice _in place_.
///
/// GDB hex strings may include "xx", which represent "missing" data. This
/// method simply treats "xx" as 0x00.
// TODO: maybe don't blindly translate "xx" as 0x00?
#[cfg(feature = "paranoid_unsafe")]
pub fn decode_hex_buf(base_buf: &mut [u8]) -> Result<&mut [u8], DecodeHexBufError> {
    use DecodeHexBufError::*;

    let odd_adust = base_buf.len() % 2;
    if odd_adust != 0 {
        base_buf[0] = ascii2byte(base_buf[0]).ok_or(NotAscii)?;
    }
    let buf = &mut base_buf[odd_adust..];

    let decoded_len = buf.len() / 2;
    for i in 0..decoded_len {
        let b = ascii2byte(buf[i * 2]).ok_or(NotAscii)? << 4
            | ascii2byte(buf[i * 2 + 1]).ok_or(NotAscii)?;
        buf[i] = b as u8;
    }

    Ok(&mut base_buf[..decoded_len + odd_adust])
}

#[derive(Debug)]
pub enum DecodeBinBufError {
    UnexpectedEnd,
}

/// Decode GDB escaped binary bytes into origin bytes _in place_.
pub fn decode_bin_buf(buf: &mut [u8]) -> Result<&mut [u8], DecodeBinBufError> {
    use DecodeBinBufError::*;
    let mut i = 0;
    let mut j = 0;
    let len = buf.len();
    while i < len && j < len {
        if buf[i] == b'}' {
            if i + 1 >= len {
                return Err(UnexpectedEnd);
            } else {
                buf[j] = buf[i + 1] ^ 0x20;
                i += 1;
            }
        } else {
            buf[j] = buf[i];
        }
        i += 1;
        j += 1;
    }

    // SAFETY: by inspection, the value of j will never exceed buf.len().
    // Unfortunately, the LLVM optimizer isn't smart enough to see this, so
    // we have to manually elide the bounds check...
    if cfg!(feature = "paranoid_unsafe") {
        Ok(&mut buf[..j])
    } else {
        debug_assert!(j <= len);
        unsafe { Ok(buf.get_unchecked_mut(..j)) }
    }
}

#[derive(Debug)]
pub enum EncodeHexBufError {
    SmallBuffer,
}

/// Encode a GDB hex string into a byte slice _in place_.
///
/// The data to be encoded should be copied into the buffer from
/// `buf[start_idx..]`. The buffer itself must be at least `data.len() * 2`
/// bytes in size, as each byte is expanded into a two byte hex string.
#[allow(dead_code)]
pub fn encode_hex_buf(buf: &mut [u8], start_idx: usize) -> Result<&mut [u8], EncodeHexBufError> {
    use EncodeHexBufError::*;

    let len = buf.len() - start_idx;
    let encoded_len = len * 2;

    if buf.len() < encoded_len {
        return Err(SmallBuffer);
    }

    for i in 0..encoded_len {
        let byte = buf[start_idx + i / 2];
        let nybble = if i % 2 == 0 {
            // high
            (byte & 0xf0) >> 4
        } else {
            // low
            byte & 0x0f
        };

        buf[i] = match nybble {
            0x0..=0x9 => b'0' + nybble,
            0xa..=0xf => b'A' + (nybble - 0xa),
            #[allow(clippy::unreachable)] // will be optimized out
            _ => unreachable!(), // TODO: use unreachable_unchecked?
        };
    }

    Ok(&mut buf[..encoded_len])
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn encode_hex_simple() {
        let payload = [0xde, 0xad, 0xbe, 0xef];
        let mut buf = [0; 16];

        let start_idx = buf.len() - payload.len();

        // copy the payload into the buffer
        buf[start_idx..].copy_from_slice(&payload);
        let out = encode_hex_buf(&mut buf, start_idx).unwrap();

        assert_eq!(out, b"DEADBEEF");
    }

    #[test]
    fn encode_hex_in_chunks() {
        let payload = (0..=255).collect::<Vec<u8>>();
        let mut out = Vec::new();

        let mut buf = [0; 30];

        for c in payload.chunks(15) {
            let start_idx = buf.len() - c.len();

            let data_buf = &mut buf[start_idx..];
            data_buf[..c.len()].copy_from_slice(c);
            out.extend_from_slice(encode_hex_buf(&mut buf, start_idx).unwrap());
        }

        let expect = (0..=255).map(|b| format!("{:02X?}", b)).collect::<String>();

        assert_eq!(out, expect.as_bytes())
    }

    #[test]
    fn decode_hex_buf_odd() {
        let mut payload = b"ffffff4".to_vec();
        let res = decode_hex_buf(&mut payload).unwrap();
        assert_eq!(res, [0xf, 0xff, 0xff, 0xf4]);
    }

    #[test]
    fn decode_hex_buf_2() {
        let mut payload = b"12345".to_vec();
        let res = decode_hex_buf(&mut payload).unwrap();
        assert_eq!(res, [0x1, 0x23, 0x45]);
    }

    #[test]
    fn decode_hex_buf_short() {
        let mut payload = b"1".to_vec();
        let res = decode_hex_buf(&mut payload).unwrap();
        assert_eq!(res, [0x1]);
    }

    #[test]
    fn decode_bin_buf_escaped() {
        let mut payload = b"}\x03}\x04}]}\n".to_vec();
        let res = decode_bin_buf(&mut payload).unwrap();
        assert_eq!(res, [0x23, 0x24, 0x7d, 0x2a]);
    }
}