git-bug 0.2.4

A rust library for interfacing with git-bug repositories
Documentation
// git-bug-rs - A rust library for interfacing with git-bug repositories
//
// Copyright (C) 2025 Benedikt Peetz <benedikt.peetz@b-peetz.de>
// SPDX-License-Identifier: GPL-3.0-or-later
//
// This file is part of git-bug-rs/git-gub.
//
// You should have received a copy of the License along with this program.
// If not, see <https://www.gnu.org/licenses/agpl.txt>.

// FIXME(@bpeetz): This should not need to allocate. But to remove the quotes in
// the input string, we need to either allocate, or return something like a
// Option<Option<char>>, which would required the tokenizer api to change.
// <2025-04-13>

use std::mem;

pub(crate) struct Splitter<'a> {
    input: &'a str,
    output: String,
    delimiter: char,
    state: SplitState,
}

struct SplitState {
    in_double_quote: bool,
    in_single_quote: bool,
}

impl SplitState {
    fn in_quote(&self) -> bool {
        self.in_double_quote || self.in_single_quote
    }

    /// Check whether the `token` should be added to the output.
    fn should_add(&mut self, token: SplitToken) -> bool {
        match (token, self.in_single_quote, self.in_double_quote) {
            (SplitToken::DoubleQuote, false, true) => {
                // End double quote
                self.in_double_quote = false;
                false
            }
            (SplitToken::DoubleQuote, false, false) => {
                // Start double quote
                self.in_double_quote = true;
                false
            }
            (SplitToken::SingleQuote, true, false) => {
                // End single quote
                self.in_single_quote = false;
                false
            }
            (SplitToken::SingleQuote, false, false) => {
                // Start single quote
                self.in_single_quote = true;
                false
            }
            (SplitToken::DoubleQuote | SplitToken::SingleQuote, true, true) => {
                unreachable!("Never have two quotes active at the some time.")
            }
            #[allow(clippy::match_same_arms)]
            (SplitToken::DoubleQuote, true, false) | (SplitToken::SingleQuote, false, true) => {
                // Quotes should actually be taken
                true
            }
            (SplitToken::Char(_), _, _) => true,
            (SplitToken::Delimiter, _, _) => self.in_quote(),
        }
    }
}

#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Debug)]
enum SplitToken {
    DoubleQuote,
    SingleQuote,
    Delimiter,
    Char(char),
}

impl SplitToken {
    fn to_char(self, delimiter: char) -> char {
        match self {
            SplitToken::DoubleQuote => '"',
            SplitToken::SingleQuote => '\'',
            SplitToken::Delimiter => delimiter,
            SplitToken::Char(val) => val,
        }
    }
}

impl Splitter<'_> {
    fn next_part(&mut self) -> Option<String> {
        if self.input.is_empty() {
            return None;
        }

        loop {
            let Some(next) = self.next_token() else {
                return Some(self.return_output());
            };
            self.chomp(next.to_char(self.delimiter));

            if self.state.should_add(next) {
                self.add_to_output(next);
            }

            if next == SplitToken::Delimiter && !self.state.in_quote() {
                // Try to chomp away any extra delimiters.
                while let Some(SplitToken::Delimiter) = self.next_token() {
                    self.chomp(self.delimiter);
                }

                return Some(self.return_output());
            }
        }
    }

    fn return_output(&mut self) -> String {
        mem::take(&mut self.output)
    }

    fn next_token(&mut self) -> Option<SplitToken> {
        self.peek_char().map(|next| match next {
            delimiter if delimiter == self.delimiter => SplitToken::Delimiter,
            '"' => SplitToken::DoubleQuote,
            '\'' => SplitToken::SingleQuote,
            other => SplitToken::Char(other),
        })
    }

    fn peek_char(&mut self) -> Option<char> {
        self.input.chars().next()
    }

    fn chomp(&mut self, ch: char) {
        self.input = &self.input[ch.len_utf8()..];
    }

    fn add_to_output(&mut self, token: SplitToken) {
        self.output.push(token.to_char(self.delimiter));
    }
}

impl<'a> Splitter<'a> {
    /// Create a new token iterator
    pub(crate) fn new(src: &'a str, delimiter: char) -> Self {
        Self {
            input: src.trim_matches(delimiter),
            output: String::new(),
            delimiter,
            state: SplitState {
                in_double_quote: false,
                in_single_quote: false,
            },
        }
    }
}

impl Iterator for Splitter<'_> {
    type Item = String;

    fn next(&mut self) -> Option<String> {
        self.next_part()
    }
}

#[cfg(test)]
mod split_unquoted_whitespace_test {
    use super::*;

    macro_rules! t {
        ($src:literal -> [$($token:literal),* $(,)?]) => {
            t!(@with ' ' $src -> [$($token),*])
        };

        (@with $delimiter:literal $src:literal -> [$($token:literal),* $(,)?]) => {
            let mut split = Splitter::new($src, $delimiter);
            $(
                assert_eq!(split.next(), Some($token.to_owned()));
            )*
            assert_eq!(split.next(), None);
        }
    }

    #[test]
    fn test_previous_doc() {
        t!(r#"Type "rhit -p blog" or "rhit --path blog""# -> ["Type", "rhit -p blog", "or", "rhit --path blog"]);
    }

    #[test]
    fn test_multiple_delimitors() {
        t!("" -> []);
        t!("    " -> []);
        t!(" \"  \"" -> ["  "]);

        t!(@with ',' "" -> []);
        t!(@with ',' ",,,," -> []);
    }

    #[test]
    fn test_quote_trimming() {
        t!("1234" -> ["1234"]);
        t!("1234\"" -> ["1234"]);
        t!(r#"""# -> [""]);
        t!(r#""a""# -> ["a"]);
        t!(r#" " "# -> [""]);
    }

    #[test]
    fn test_complex() {
        t!(r#" a  "2 * 试" x"x "z "# -> ["a", "2 * 试", "xx z"]);
    }

    #[test]
    fn test_many_quotes() {
        // TODO(@bpeetz): This should probably just return None instead of Some("").
        // <2025-05-03>
        t!(r#"""""# -> [""]);
        t!(r#""""""# -> [""]);
    }

    #[test]
    fn test_utf8_bytes() {
        t!("e^iπ^ = 1" -> ["e^iπ^", "=", "1"]);
    }

    #[test]
    fn test_multi_space_infix() {
        t!(" a    试bc d  " -> ["a", "试bc", "d"]);
        t!(r#"a  "deux mots" b"# -> ["a", "deux mots", "b"]);
    }

    #[test]
    fn test_commas() {
        t!(@with ',' "1,2,3,4" -> ["1", "2", "3", "4"]);
    }

    #[test]
    fn test_quote_delimitor() {
        t!(@with '"' "one\" two\"three" -> ["one", " two", "three"]);
        t!(@with '\'' "one' two'three" -> ["one", " two", "three"]);
    }
}