docopticon 0.1.2

//! Core parts of the `const`-time string parser.

/// Checks if the right slice exists in the left slice while optionally ignoring casing of ASCII letters,
/// returning the index in the left slice where the match first occurs.
#[inline]
pub(crate) const fn find_slice(
    start_index: usize,
    left: &[u8],
    right: &[u8],
    case_insensitive: bool,
    direction_left: bool,
) -> Option<usize> {
    if start_index >= left.len() || right.len() > left.len() {
        return None;
    }

    let mut i = start_index;
    let mut count = 0;
    while i < left.len() {
        let (l, r) = if direction_left {
            let l = left[i];
            let r = right[count];
            (l, r)
        } else {
            // read backwards
            let l = left[(left.len() - 1) - i];
            let r = right[(right.len() - 1) - count];
            (l, r)
        };

        // Check if left character and right character match
        if l == r || (case_insensitive && l.eq_ignore_ascii_case(&r)) {
            count += 1;
            // All characters match
            if count == right.len() {
                // Subtract from the current index the length to get the proper
                // starting index of the matching bytes
                if direction_left {
                    return Some(i - (right.len() - 1));
                } else {
                    return Some(left.len() - i - 1);
                }
            }
        } else {
            count = 0;
        }
        i += 1;
    }
    None
}

// /// Finds the first instance of any of the bytes in the list returning a tuple
// /// containing the index and what in the list matched.
// pub(crate) fn find_first_bytes(
//     start_index: usize,
//     s: &[u8],
//     bytes: &[u8],
//     case_insensitive: bool,
// ) -> Option<(u8, usize)> {
//     if start_index >= s.len() {
//         return None;
//     }
//
//     let mut i = start_index;
//     while i < s.len() {
//         let c = s[i];
//         let mut n = 0;
//         while n < bytes.len() {
//             let b = bytes[n];
//             if b == c || (case_insensitive && c.eq_ignore_ascii_case(&b)) {
//                 return Some((c, i));
//             }
//             n += 1
//         }
//         i += 1;
//     }
//     None
// }

/// Reads from 'the left' until a full non-case sensitive match is made returning the index of the
/// match.
///
/// If no match is made an Error will be returned instead.
///
/// Can technically panic due to indexing bounds (or an overflow), but the internal-usage should
/// never encounter that.
pub(crate) const fn find<'a>(
    start_index: usize,
    left: &'a str,
    right: &'a str,
    case_insensitive: bool,
) -> Option<usize> {
    find_slice(
        start_index,
        left.as_bytes(),
        right.as_bytes(),
        case_insensitive,
        true,
    )
}

/// Reads from 'the right' until a full non-case sensitive match is made returning the index of the
/// match.
///
/// If no match is made an Error will be returned instead.
///
/// Can technically panic due to indexing bounds (or an overflow), but the internal-usage should
/// never encounter that.
pub(crate) const fn find_from_end<'a>(
    start_index: usize,
    left: &'a str,
    right: &'a str,
    case_insensitive: bool,
) -> Option<usize> {
    find_slice(
        start_index,
        left.as_bytes(),
        right.as_bytes(),
        case_insensitive,
        false,
    )
}

/// Trims the whitespace from the left, returning the resulting str.
pub(crate) const fn trim_start<'a>(s: &'a str) -> &'a str {
    let mut i = 0;
    let mut count = 0;
    let bytes = s.as_bytes();
    while i < bytes.len() {
        if bytes[i] != b' ' {
            break;
        }
        count += 1;
        i += 1;
    }

    match str_from(s, count) {
        Some(s) => s,
        None => unreachable!(),
    }
}

/// Trims the whitespace from the right, returning the resulting str.
pub(crate) const fn trim_end<'a>(s: &'a str) -> &'a str {
    let mut i = s.len() - 1;
    let mut count = 0;
    let bytes = s.as_bytes();
    while i > 0 {
        if bytes[i] != b' ' {
            break;
        }
        count += 1;
        i -= 1;
    }

    match str_up_to(s, i + 1) {
        Some(s) => s,
        None => unreachable!(),
    }
}

/// Trims the whitespace from left and right, returning the resulting str.
pub(crate) const fn trim<'a>(s: &'a str) -> &'a str {
    let s = trim_start(s);
    trim_end(s)
}

/// Reads up to a given index, returning a subslice of the given str.
///
/// This is a hack to get subslices of a str to work in const time, since several things are not yet
/// stabilized in `const`: see [https://github.com/rust-lang/rfcs/pull/2632] and [https://github.com/rust-lang/rust/issues/66753].
pub(crate) const fn str_up_to<'a>(s: &'a str, index: usize) -> Option<&'a str> {
    let bytes = s.as_bytes();
    if index < bytes.len() {
        let (_, overflowed) = bytes.len().overflowing_sub(index);
        if overflowed {
            return None;
        }
        // SAFETY: This is safe since we check that index is within the length of bytes before
        unsafe {
            let slice = core::slice::from_raw_parts(bytes.as_ptr(), index);
            return Some(core::str::from_utf8_unchecked(slice));
        };
    }
    None
}

/// Reads from a given index, returning the remainder of the given str.
///
/// This is a hack to get subslices of a str to work in const time, since several things are not yet
/// stabilized in `const`: see [https://github.com/rust-lang/rfcs/pull/2632] and [https://github.com/rust-lang/rust/issues/66753].
pub(crate) const fn str_from<'a>(s: &'a str, index: usize) -> Option<&'a str> {
    let bytes = s.as_bytes();
    if index < bytes.len() {
        let (rem, overflowed) = bytes.len().overflowing_sub(index);
        if overflowed {
            return None;
        }
        // SAFETY: This is safe since we check that index is within the length of bytes before
        unsafe {
            let slice = core::slice::from_raw_parts(bytes.as_ptr().offset(index as _), rem);
            return Some(core::str::from_utf8_unchecked(slice));
        };
    }
    None
}

/// Reads up to a given range, returning a subslice of the given str.
///
/// This is a hack to get subslices of a str to work in const time, since several things are not yet
/// stabilized in `const`: see [https://github.com/rust-lang/rfcs/pull/2632] and [https://github.com/rust-lang/rust/issues/66753].
pub(crate) const fn str_range<'a>(s: &'a str, start: usize, end: usize) -> Option<&'a str> {
    let Some(s) = str_up_to(s, end) else {
        return None;
    };
    str_from(s, start)
}

/// Constant-time parser of help text.
///
/// A lot of credit for this goes to [`konst`](https://github.com/rodrimati1992/konst)
/// from which I have lifted a bunch of constant-time hacks to make this feasible.
#[derive(Debug, Copy, Clone)]
pub(crate) struct Parser<'a> {
    /// Help text.
    data: &'a str,
    /// Current byte index.
    current_index: usize,
}

impl<'a> Parser<'a> {
    /// Create a new parser for a help text.
    pub(crate) const fn new(data: &'a str, start_index: usize) -> Self {
        Self {
            data,
            current_index: start_index,
        }
    }

    /// Returns the internal string data of the parser.
    pub(crate) const fn data(&self) -> &'a str {
        self.data
    }

    /// Returns the remaining data in the parser.
    pub(crate) const fn remainder(&self) -> Option<&'a str> {
        str_from(self.data, self.current_index)
    }

    /// Returns the first byte of the parser if there are still chars in it.
    pub(crate) const fn first(&self) -> Option<u8> {
        self.get(self.current_index)
    }

    /// Returns the last byte of the parser if there are still chars in it.
    pub(crate) const fn last(&self) -> Option<u8> {
        self.get(self.data.as_bytes().len() - 1)
    }

    /// Returns the byte at the given index (if it exists).
    pub(crate) const fn get(&self, index: usize) -> Option<u8> {
        let bytes = self.data.as_bytes();
        if index >= bytes.len() {
            return None;
        }
        Some(bytes[index])
    }

    /// Checks if the current data starts with a given pattern.
    pub(crate) const fn starts_with(&self, pattern: &'a str) -> bool {
        debug_assert!(pattern.len() < self.data.len());

        let bytes = self.data.as_bytes();
        let pattern = pattern.as_bytes();

        let mut i = 0;
        while i < pattern.len() {
            if bytes[i + self.current_index] != pattern[i] {
                return false;
            }
            i += 1;
        }
        true
    }

    /// Checks if the current data starts with a given pattern a number of times.
    pub(crate) const fn starts_with_num(&self, pattern: &'a str, num: usize) -> bool {
        debug_assert!(pattern.len() * num < self.data.len());

        let bytes = self.data.as_bytes();
        let pattern = pattern.as_bytes();

        let mut i = 0;
        let mut count = 0;
        while count < num {
            while i < pattern.len() {
                if bytes[i + self.current_index] != pattern[i] {
                    return false;
                }
                i += 1;
            }
            count += 1;
        }
        num == count
    }

    /// Read a number of bytes from the parser.
    pub(crate) const fn read(mut self, num: usize) -> Option<(&'a str, Self)> {
        if num >= self.data.as_bytes().len() {
            return None;
        }
        match str_up_to(self.data, self.current_index + num) {
            Some(s) => {
                self.current_index += num;
                return Some((s, self));
            }
            None => None,
        }
    }

    /// Reads until a given delimiter occurs.
    pub(crate) const fn read_until(
        mut self,
        delimiter: &'a str,
        case_insensitive: bool,
    ) -> Option<(&'a str, Self)> {
        let Some(index) = find(self.current_index, self.data, delimiter, case_insensitive) else {
            return None;
        };

        match str_range(self.data, self.current_index, index) {
            Some(s) => {
                self.current_index = index;
                return Some((s, self));
            }
            None => None,
        }
    }

    /// Reads until one of the delimiters occurs, if multiple match the one with the lowest index
    /// will be the one chosen.
    pub(crate) fn read_until_either(
        mut self,
        delimiters: &[&'a str],
        case_insensitive: bool,
    ) -> Option<(&'a str, Self)> {
        let mut lowest_index = None;
        let mut i = 0;
        while i < delimiters.len() {
            let delimiter = delimiters[i];
            if let Some(index) = find(self.current_index, self.data, delimiter, case_insensitive) {
                match lowest_index {
                    Some(li) => {
                        if index < li {
                            lowest_index = Some(index);
                        }
                    }
                    None => lowest_index = Some(index),
                }
            }
            i += 1;
        }

        if let Some(index) = lowest_index {
            match str_range(self.data, self.current_index, index) {
                Some(s) => {
                    self.current_index = index;
                    return Some((s, self));
                }
                None => None,
            }
        } else {
            None
        }
    }

    /// Reads from the parser while there are whitespace characters (' ') until
    /// any character is encountered.
    pub(crate) const fn read_while_whitespace(mut self) -> Option<(&'a str, Self)> {
        let mut index = self.current_index;
        while index < self.data.len() {
            let Some(b) = self.get(index) else {
                return None;
            };
            if !b.is_ascii_whitespace() {
                break;
            }
            index += 1;
        }

        match str_range(self.data, self.current_index, index) {
            Some(s) => {
                self.current_index = index;
                return Some((s, self));
            }
            None => None,
        }
    }

    /// Reads from the parser while there are alphanumeric characters (including '-' and '_') until
    /// a non-alphanumeric character is encountered.
    pub(crate) const fn read_while_alphanumeric(mut self) -> Option<(&'a str, Self)> {
        let mut index = self.current_index;
        while index < self.data.as_bytes().len() {
            let Some(b) = self.get(index) else {
                return None;
            };
            if !b.is_ascii_alphanumeric() && b != b'-' && b != b'_' {
                break;
            }
            index += 1;
        }

        match str_range(self.data, self.current_index, index) {
            Some(s) => {
                self.current_index = index;
                return Some((s, self));
            }
            None => None,
        }
    }

    /// Skips the internal parser's current index a given number forward.
    pub(crate) const fn skip(mut self, num: usize) -> Option<Self> {
        if num >= self.data.as_bytes().len() {
            return None;
        }
        self.current_index += num;
        Some(self)
    }

    /// Skips the internal parser to before a given delimiter if it exists. Returning None
    /// if the delimiter does not exist, thus no skipping.
    pub(crate) const fn skip_until(
        mut self,
        delimiter: &'a str,
        case_insensitive: bool,
    ) -> Option<Self> {
        let Some(index) = find(self.current_index, self.data, delimiter, case_insensitive) else {
            return None;
        };
        self.current_index = index;
        Some(self)
    }

    /// Skips the internal parser to after a given delimiter if it exists. Returning None
    /// if the delimiter does not exist, thus no skipping.
    pub(crate) const fn skip_after(
        mut self,
        delimiter: &'a str,
        case_insensitive: bool,
    ) -> Option<Self> {
        let Some(index) = find(self.current_index, self.data, delimiter, case_insensitive) else {
            return None;
        };
        // TODO: no bounds check here but there should be one
        self.current_index = index + delimiter.len();
        Some(self)
    }

    /// Reads until the end of a line from the parser.
    pub(crate) const fn readline(mut self) -> Option<(&'a str, Self)> {
        self.split_once_delimiter("\n", false)
    }

    /// Splits the string in the parser, returning the result before (and not including) the delimiter
    /// and the returns the new parser state after reading skipping the delimiter.
    pub(crate) const fn split_once_delimiter(
        mut self,
        delimiter: &'a str,
        case_insensitive: bool,
    ) -> Option<(&'a str, Self)> {
        let Some(index) = find(self.current_index, self.data, delimiter, case_insensitive) else {
            return None;
        };

        // Self::str_from(self.data, index + delimiter.len()),
        match str_range(self.data, self.current_index, index) {
            Some(s) => {
                self.current_index = index + delimiter.len();
                Some((s, self))
            }
            _ => None,
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn find() {
        assert_eq!(super::find(0, "foo-bar", "foo", false), Some(0));
        assert_eq!(super::find(0, "foo-bar", "BAR", true), Some(4));
        assert_eq!(super::find(0, "foo-foo", "foo", false), Some(0));
        assert_eq!(super::find(0, "foo-bar", "biz", false), None);
        assert_eq!(super::find(0, "fo-oof", "foo", false), None);
    }

    #[test]
    fn find_from_end() {
        assert_eq!(super::find_from_end(0, "foo-bar", "bar", false), Some(4));
        assert_eq!(super::find_from_end(0, "foo-bar", "BAR", true), Some(4));
        assert_eq!(super::find_from_end(0, "foo-bar", "biz", false), None);
        assert_eq!(super::find_from_end(0, "foo-foo", "foo", false), Some(4));
        assert_eq!(super::find_from_end(0, "fo-oof", "foo", false), None);
    }

    #[test]
    fn starts_with() {
        let mut parser = Parser::new("abc def ghi jkl", 0);
        assert_eq!(parser.starts_with("abc"), true);
        assert_eq!(parser.starts_with("def"), false);
        parser = parser.skip(4).unwrap();
        assert_eq!(parser.starts_with("def"), true);
        dbg!(parser);
        assert_eq!(parser.starts_with("abc"), false);
        assert_eq!(parser.starts_with("ghi"), false);
    }

    // #[test]
    // fn find_first_bytes() {
    //     assert_eq!(
    //         super::find_first_bytes(0, b"abcdefghjkl", b"lkjh", false),
    //         Some((b'h', 7))
    //     );
    //     assert_eq!(
    //         super::find_first_bytes(0, b"abcdefghjkl", b"mno", false),
    //         None,
    //     );
    //     assert_eq!(
    //         super::find_first_bytes(0, b"abcdefghjkl", b"D", true),
    //         Some((b'd', 3)),
    //     );
    //     assert_eq!(
    //         super::find_first_bytes(0, b"abcdefghjkl", b"a", false),
    //         Some((b'a', 0))
    //     );
    // }

    #[test]
    fn split_once_delimiter() {
        let parser = Parser::new("abc def ghi", 0);
        let (s, parser) = parser.split_once_delimiter(" ", false).unwrap();
        assert_eq!(s, "abc");
        let (s, parser) = parser.split_once_delimiter(" ", false).unwrap();
        assert_eq!(s, "def");
        assert_eq!(parser.remainder(), Some("ghi"));
    }

    #[test]
    fn read_until() {
        let parser = Parser::new("abc def ghi jkl", 0);
        let (s, parser) = parser.read_until("d", false).unwrap();
        assert_eq!(s, "abc ");
        assert_eq!(parser.remainder(), Some("def ghi jkl"));
        // does not advance parser index
        let prev_index = parser.current_index;
        let read_opt = parser.read_until("m", false);
        assert!(read_opt.is_none());
        assert_eq!(prev_index, parser.current_index);
    }

    #[test]
    fn read_until_either() {
        let parser = Parser::new("abc def ghi jkl", 0);
        let (s, parser) = parser
            .read_until_either(&["mno", "jkl", "ghi", "def"], false)
            .unwrap();
        assert_eq!(s, "abc ");
        assert_eq!(parser.remainder(), Some("def ghi jkl"));
        // does not advance parser index
        let prev_index = parser.current_index;
        let read_opt = parser.read_until("m", false);
        assert!(read_opt.is_none());
        assert_eq!(prev_index, parser.current_index);
    }

    #[test]
    fn skip() {
        let mut parser = Parser::new("abc def ghi", 0);
        parser = parser.skip(4).unwrap();
        assert_eq!(parser.remainder(), Some("def ghi"));
    }

    #[test]
    fn skip_until() {
        let mut parser = Parser::new("abc def ghi", 0);
        parser = parser.skip_until("ghi", false).unwrap();
        assert_eq!(parser.remainder(), Some("ghi"));
    }

    #[test]
    fn skip_after() {
        let mut parser = Parser::new("abc def ghi", 0);
        parser = parser.skip_after("def", false).unwrap();
        assert_eq!(parser.remainder(), Some(" ghi"));
    }

    #[test]
    fn readline() {
        let parser = Parser::new("foo\nbar\nbiz", 0);
        let (s, parser) = parser.readline().unwrap();
        assert_eq!(s, "foo");
        let (s, parser) = parser.readline().unwrap();
        assert_eq!(s, "bar");
        assert_eq!(parser.remainder(), Some("biz"));
    }

    #[test]
    fn trim() {
        let s = "   abc   ";
        assert_eq!("abc   ", super::trim_start(s));
        assert_eq!("   abc", super::trim_end(s));
        assert_eq!("abc", super::trim(s));
        let s = "  there is no spoon  ";
        assert_eq!("there is no spoon", super::trim(s));
    }
}