rstring 0.1.0 - Docs.rs

//! Character and substring removal utilities.
//!
//! This module provides functions for removing prefixes, suffixes, characters,
//! substrings, and whitespace from strings.
//!
//! # Usage
//!
//! Import the [`StringRemove`] trait to use methods directly on strings:
//!
//! ```
//! use rstring::StringRemove;
//!
//! assert_eq!("www.domain.com".remove_start("www."), "domain.com");
//! assert_eq!("www.domain.com".remove_end(".com"), "www.domain");
//! assert_eq!("queued".remove_occurrence("ue"), "qd");
//! assert_eq!("  ab  c  ".delete_whitespace(), "abc");
//! ```

use std::borrow::Cow;

use crate::shared::{
    ends_with_ignore_ascii_case, ends_with_ignore_case, starts_with_ignore_ascii_case,
    starts_with_ignore_case,
};

/// Extension trait for string removal methods.
///
/// This trait is implemented for `str`, allowing you to call removal
/// methods directly on `&str`, `String`, and other string types.
///
/// # Examples
///
/// ```
/// use rstring::StringRemove;
///
/// // Prefix/suffix removal
/// assert_eq!("/path/to/file".remove_start("/"), "path/to/file");
/// assert_eq!("file.txt".remove_end(".txt"), "file");
///
/// // Substring removal
/// assert_eq!("queued".remove_occurrence("ue"), "qd");
/// assert_eq!("quEUed".remove_ignore_case("UE"), "qd");
///
/// // Whitespace removal
/// assert_eq!("  ab  c  ".delete_whitespace(), "abc");
/// ```
pub trait StringRemove {
    /// Removes the prefix from the start of the string if present.
    ///
    /// Returns the string unchanged if the prefix is empty or not found.
    ///
    /// # Examples
    ///
    /// ```
    /// use rstring::StringRemove;
    ///
    /// assert_eq!("".remove_start(""), "");
    /// assert_eq!("www.domain.com".remove_start("www."), "domain.com");
    /// assert_eq!("domain.com".remove_start("www."), "domain.com");
    /// assert_eq!("www.domain.com".remove_start("WWW."), "www.domain.com");
    /// ```
    #[must_use]
    fn remove_start(&self, prefix: &str) -> &str;

    /// Removes the prefix from the start of the string if present
    /// (ASCII case-insensitive comparison).
    ///
    /// Only handles A-Z/a-z case folding. For full Unicode support, use
    /// [`remove_start_ignore_case`](StringRemove::remove_start_ignore_case).
    ///
    /// Returns the string unchanged if the prefix is empty or not found.
    ///
    /// # Examples
    ///
    /// ```
    /// use rstring::StringRemove;
    ///
    /// assert_eq!("".remove_start_ignore_ascii_case(""), "");
    /// assert_eq!("www.domain.com".remove_start_ignore_ascii_case("www."), "domain.com");
    /// assert_eq!("www.domain.com".remove_start_ignore_ascii_case("WWW."), "domain.com");
    /// assert_eq!("domain.com".remove_start_ignore_ascii_case("www."), "domain.com");
    /// ```
    #[must_use]
    fn remove_start_ignore_ascii_case(&self, prefix: &str) -> &str;

    /// Removes the prefix from the start of the string if present
    /// (Unicode case-insensitive comparison).
    ///
    /// Handles full Unicode case folding (e.g., é/É, ñ/Ñ) but requires allocation
    /// for the comparison. For ASCII-only strings, prefer
    /// [`remove_start_ignore_ascii_case`](StringRemove::remove_start_ignore_ascii_case).
    ///
    /// Returns the string unchanged if the prefix is empty or not found.
    ///
    /// # Examples
    ///
    /// ```
    /// use rstring::StringRemove;
    ///
    /// assert_eq!("".remove_start_ignore_case(""), "");
    /// assert_eq!("www.domain.com".remove_start_ignore_case("www."), "domain.com");
    /// assert_eq!("www.domain.com".remove_start_ignore_case("WWW."), "domain.com");
    /// assert_eq!("domain.com".remove_start_ignore_case("www."), "domain.com");
    /// assert_eq!("Éclair".remove_start_ignore_case("é"), "clair");
    /// ```
    #[must_use]
    fn remove_start_ignore_case(&self, prefix: &str) -> &str;

    /// Removes the first character from the string if it matches the given character.
    ///
    /// Returns the string unchanged if the character is not found.
    ///
    /// # Examples
    ///
    /// ```
    /// use rstring::StringRemove;
    ///
    /// assert_eq!("".remove_start_char('/'), "");
    /// assert_eq!("/path".remove_start_char('/'), "path");
    /// assert_eq!("path".remove_start_char('/'), "path");
    /// ```
    #[must_use]
    fn remove_start_char(&self, c: char) -> &str;

    /// Removes the suffix from the end of the string if present.
    ///
    /// Returns the string unchanged if the suffix is empty or not found.
    ///
    /// # Examples
    ///
    /// ```
    /// use rstring::StringRemove;
    ///
    /// assert_eq!("".remove_end(""), "");
    /// assert_eq!("www.domain.com".remove_end(".com"), "www.domain");
    /// assert_eq!("www.domain.com.".remove_end(".com"), "www.domain.com.");
    /// assert_eq!("www.domain.com".remove_end(".COM"), "www.domain.com");
    /// ```
    #[must_use]
    fn remove_end(&self, suffix: &str) -> &str;

    /// Removes the suffix from the end of the string if present
    /// (ASCII case-insensitive comparison).
    ///
    /// Only handles A-Z/a-z case folding. For full Unicode support, use
    /// [`remove_end_ignore_case`](StringRemove::remove_end_ignore_case).
    ///
    /// Returns the string unchanged if the suffix is empty or not found.
    ///
    /// # Examples
    ///
    /// ```
    /// use rstring::StringRemove;
    ///
    /// assert_eq!("".remove_end_ignore_ascii_case(""), "");
    /// assert_eq!("www.domain.com".remove_end_ignore_ascii_case(".com"), "www.domain");
    /// assert_eq!("www.domain.COM".remove_end_ignore_ascii_case(".com"), "www.domain");
    /// assert_eq!("www.domain.com.".remove_end_ignore_ascii_case(".com"), "www.domain.com.");
    /// ```
    #[must_use]
    fn remove_end_ignore_ascii_case(&self, suffix: &str) -> &str;

    /// Removes the suffix from the end of the string if present
    /// (Unicode case-insensitive comparison).
    ///
    /// Handles full Unicode case folding (e.g., é/É, ñ/Ñ) but requires allocation
    /// for the comparison. For ASCII-only strings, prefer
    /// [`remove_end_ignore_ascii_case`](StringRemove::remove_end_ignore_ascii_case).
    ///
    /// Returns the string unchanged if the suffix is empty or not found.
    ///
    /// # Examples
    ///
    /// ```
    /// use rstring::StringRemove;
    ///
    /// assert_eq!("".remove_end_ignore_case(""), "");
    /// assert_eq!("www.domain.com".remove_end_ignore_case(".com"), "www.domain");
    /// assert_eq!("www.domain.COM".remove_end_ignore_case(".com"), "www.domain");
    /// assert_eq!("www.domain.com.".remove_end_ignore_case(".com"), "www.domain.com.");
    /// assert_eq!("caféÉ".remove_end_ignore_case("É"), "café");
    /// ```
    #[must_use]
    fn remove_end_ignore_case(&self, suffix: &str) -> &str;

    /// Removes the last character from the string if it matches the given character.
    ///
    /// Returns the string unchanged if the character is not found.
    ///
    /// # Examples
    ///
    /// ```
    /// use rstring::StringRemove;
    ///
    /// assert_eq!("".remove_end_char('/'), "");
    /// assert_eq!("path/".remove_end_char('/'), "path");
    /// assert_eq!("path".remove_end_char('/'), "path");
    /// ```
    #[must_use]
    fn remove_end_char(&self, c: char) -> &str;

    /// Removes all occurrences of a character from the string.
    ///
    /// Returns `Cow::Borrowed` if the character is not found,
    /// `Cow::Owned` otherwise.
    ///
    /// # Examples
    ///
    /// ```
    /// use rstring::StringRemove;
    ///
    /// assert_eq!("".remove_char('u'), "");
    /// assert_eq!("queued".remove_char('u'), "qeed");
    /// assert_eq!("queued".remove_char('z'), "queued");
    /// ```
    #[must_use]
    fn remove_char(&self, c: char) -> Cow<'_, str>;

    /// Removes all occurrences of a substring from the string.
    ///
    /// Returns `Cow::Borrowed` if the substring is empty or not found,
    /// `Cow::Owned` otherwise.
    ///
    /// # Examples
    ///
    /// ```
    /// use rstring::StringRemove;
    ///
    /// assert_eq!("".remove_occurrence("a"), "");
    /// assert_eq!("queued".remove_occurrence("ue"), "qd");
    /// assert_eq!("queued".remove_occurrence("zz"), "queued");
    /// assert_eq!("a".remove_occurrence(""), "a");
    /// ```
    #[must_use]
    fn remove_occurrence(&self, remove: &str) -> Cow<'_, str>;

    /// Removes all occurrences of a substring from the string (case-insensitive).
    ///
    /// Uses Unicode case folding for comparison.
    ///
    /// Returns `Cow::Borrowed` if the substring is empty or not found,
    /// `Cow::Owned` otherwise.
    ///
    /// # Examples
    ///
    /// ```
    /// use rstring::StringRemove;
    ///
    /// assert_eq!("".remove_ignore_case("a"), "");
    /// assert_eq!("queued".remove_ignore_case("ue"), "qd");
    /// assert_eq!("quEUed".remove_ignore_case("UE"), "qd");
    /// assert_eq!("queued".remove_ignore_case("zz"), "queued");
    /// assert_eq!("\u{0130}x".remove_ignore_case("x"), "\u{0130}");
    /// ```
    #[must_use]
    fn remove_ignore_case(&self, remove: &str) -> Cow<'_, str>;

    /// Deletes all whitespace characters from the string as defined by
    /// [`char::is_whitespace`].
    ///
    /// Returns `Cow::Borrowed` if no whitespace was found,
    /// `Cow::Owned` otherwise.
    ///
    /// # Examples
    ///
    /// ```
    /// use rstring::StringRemove;
    ///
    /// assert_eq!("".delete_whitespace(), "");
    /// assert_eq!("abc".delete_whitespace(), "abc");
    /// assert_eq!("   ab  c  ".delete_whitespace(), "abc");
    /// assert_eq!("\u{000B}t  \t\n\u{0009}e\rs\n\n   \tt".delete_whitespace(), "test");
    /// ```
    #[must_use]
    fn delete_whitespace(&self) -> Cow<'_, str>;
}

macro_rules! impl_remove_start_ignore_case {
    ($fn_name:ident, $starts_with_check:expr) => {
        fn $fn_name(&self, prefix: &str) -> &str {
            if !prefix.is_empty() && $starts_with_check(self, prefix) {
                &self[prefix.len()..]
            } else {
                self
            }
        }
    };
}

macro_rules! impl_remove_end_ignore_case {
    ($fn_name:ident, $ends_with_check:expr) => {
        fn $fn_name(&self, suffix: &str) -> &str {
            if !suffix.is_empty() && $ends_with_check(self, suffix) {
                &self[..self.len() - suffix.len()]
            } else {
                self
            }
        }
    };
}

/// Finds the next case-insensitive match of `search` in `s` starting from byte position `start`.
/// Returns `(byte_offset, byte_length_of_match)` or `None` if not found.
///
/// Uses char-by-char comparison with `to_lowercase()` to handle cases where
/// lowercasing changes the number of characters (e.g., Turkish İ → i̇).
fn find_ignore_case(s: &str, search: &str, start: usize) -> Option<(usize, usize)> {
    let search_lower = search.to_lowercase();
    let search_char_count = search.chars().count();
    for (byte_idx, _) in s[start..].char_indices() {
        let abs_byte_idx = start + byte_idx;
        let remaining = &s[abs_byte_idx..];
        let match_byte_len: usize = remaining
            .char_indices()
            .nth(search_char_count)
            .map(|(i, _)| i)
            .unwrap_or(remaining.len());
        let window = &remaining[..match_byte_len];
        if window.to_lowercase() == search_lower {
            return Some((abs_byte_idx, match_byte_len));
        }
    }
    None
}

impl StringRemove for str {
    fn remove_start(&self, prefix: &str) -> &str {
        self.strip_prefix(prefix).unwrap_or(self)
    }

    impl_remove_start_ignore_case!(
        remove_start_ignore_ascii_case,
        starts_with_ignore_ascii_case
    );
    impl_remove_start_ignore_case!(remove_start_ignore_case, starts_with_ignore_case);

    fn remove_start_char(&self, c: char) -> &str {
        self.strip_prefix(c).unwrap_or(self)
    }

    fn remove_end(&self, suffix: &str) -> &str {
        self.strip_suffix(suffix).unwrap_or(self)
    }

    impl_remove_end_ignore_case!(remove_end_ignore_ascii_case, ends_with_ignore_ascii_case);
    impl_remove_end_ignore_case!(remove_end_ignore_case, ends_with_ignore_case);

    fn remove_end_char(&self, c: char) -> &str {
        self.strip_suffix(c).unwrap_or(self)
    }

    fn remove_char(&self, c: char) -> Cow<'_, str> {
        if self.is_empty() || !self.contains(c) {
            return Cow::Borrowed(self);
        }
        Cow::Owned(self.chars().filter(|&ch| ch != c).collect())
    }

    fn remove_occurrence(&self, remove: &str) -> Cow<'_, str> {
        if self.is_empty() || remove.is_empty() || !self.contains(remove) {
            return Cow::Borrowed(self);
        }
        Cow::Owned(self.replace(remove, ""))
    }

    fn remove_ignore_case(&self, remove: &str) -> Cow<'_, str> {
        if self.is_empty() || remove.is_empty() {
            return Cow::Borrowed(self);
        }
        let mut result = String::with_capacity(self.len());
        let mut pos = 0;
        let mut found = false;
        while pos < self.len() {
            if let Some((match_pos, match_len)) = find_ignore_case(self, remove, pos) {
                found = true;
                result.push_str(&self[pos..match_pos]);
                pos = match_pos + match_len;
            } else {
                result.push_str(&self[pos..]);
                break;
            }
        }
        if !found {
            return Cow::Borrowed(self);
        }
        Cow::Owned(result)
    }

    fn delete_whitespace(&self) -> Cow<'_, str> {
        if self.is_empty() || !self.contains(char::is_whitespace) {
            return Cow::Borrowed(self);
        }
        let result: String = self.chars().filter(|c| !c.is_whitespace()).collect();
        if result.is_empty() {
            Cow::Owned(String::new())
        } else {
            Cow::Owned(result)
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    mod remove_start {
        use super::*;

        #[test]
        fn empty_string() {
            assert_eq!("".remove_start(""), "");
        }

        #[test]
        fn empty_prefix() {
            assert_eq!("abc".remove_start(""), "abc");
        }

        #[test]
        fn prefix_present() {
            assert_eq!("www.domain.com".remove_start("www."), "domain.com");
        }

        #[test]
        fn prefix_not_present() {
            assert_eq!("domain.com".remove_start("www."), "domain.com");
        }

        #[test]
        fn prefix_different_case() {
            assert_eq!("www.domain.com".remove_start("WWW."), "www.domain.com");
        }

        #[test]
        fn prefix_partial_match() {
            assert_eq!("wwwdomain.com".remove_start("www."), "wwwdomain.com");
        }
    }

    mod remove_start_ignore_case {
        use super::*;

        #[test]
        fn empty_string() {
            assert_eq!("".remove_start_ignore_case(""), "");
        }

        #[test]
        fn empty_prefix() {
            assert_eq!("abc".remove_start_ignore_case(""), "abc");
        }

        #[test]
        fn prefix_present() {
            assert_eq!(
                "www.domain.com".remove_start_ignore_case("www."),
                "domain.com"
            );
        }

        #[test]
        fn prefix_different_case() {
            assert_eq!(
                "www.domain.com".remove_start_ignore_case("WWW."),
                "domain.com"
            );
        }

        #[test]
        fn prefix_not_present() {
            assert_eq!("domain.com".remove_start_ignore_case("www."), "domain.com");
        }

        #[test]
        fn prefix_mixed_case() {
            assert_eq!(
                "WwW.domain.com".remove_start_ignore_case("www."),
                "domain.com"
            );
        }
    }

    mod remove_start_char {
        use super::*;

        #[test]
        fn empty_string() {
            assert_eq!("".remove_start_char('/'), "");
        }

        #[test]
        fn char_present() {
            assert_eq!("/path".remove_start_char('/'), "path");
        }

        #[test]
        fn char_not_present() {
            assert_eq!("path".remove_start_char('/'), "path");
        }

        #[test]
        fn unicode_char() {
            assert_eq!("\u{00e9}abc".remove_start_char('\u{00e9}'), "abc");
        }

        #[test]
        fn only_char() {
            assert_eq!("/".remove_start_char('/'), "");
        }
    }

    mod remove_end {
        use super::*;

        #[test]
        fn empty_string() {
            assert_eq!("".remove_end(""), "");
        }

        #[test]
        fn empty_suffix() {
            assert_eq!("abc".remove_end(""), "abc");
        }

        #[test]
        fn suffix_present() {
            assert_eq!("www.domain.com".remove_end(".com"), "www.domain");
        }

        #[test]
        fn suffix_not_present() {
            assert_eq!("www.domain.com.".remove_end(".com"), "www.domain.com.");
        }

        #[test]
        fn suffix_different_case() {
            assert_eq!("www.domain.com".remove_end(".COM"), "www.domain.com");
        }

        #[test]
        fn suffix_partial_match() {
            assert_eq!("www.domaincom".remove_end(".com"), "www.domaincom");
        }
    }

    mod remove_end_ignore_case {
        use super::*;

        #[test]
        fn empty_string() {
            assert_eq!("".remove_end_ignore_case(""), "");
        }

        #[test]
        fn empty_suffix() {
            assert_eq!("abc".remove_end_ignore_case(""), "abc");
        }

        #[test]
        fn suffix_present() {
            assert_eq!(
                "www.domain.com".remove_end_ignore_case(".com"),
                "www.domain"
            );
        }

        #[test]
        fn suffix_different_case() {
            assert_eq!(
                "www.domain.COM".remove_end_ignore_case(".com"),
                "www.domain"
            );
        }

        #[test]
        fn suffix_not_present() {
            assert_eq!(
                "www.domain.com.".remove_end_ignore_case(".com"),
                "www.domain.com."
            );
        }

        #[test]
        fn suffix_mixed_case() {
            assert_eq!(
                "www.domain.CoM".remove_end_ignore_case(".com"),
                "www.domain"
            );
        }
    }

    mod remove_end_char {
        use super::*;

        #[test]
        fn empty_string() {
            assert_eq!("".remove_end_char('/'), "");
        }

        #[test]
        fn char_present() {
            assert_eq!("path/".remove_end_char('/'), "path");
        }

        #[test]
        fn char_not_present() {
            assert_eq!("path".remove_end_char('/'), "path");
        }

        #[test]
        fn unicode_char() {
            assert_eq!("abc\u{00e9}".remove_end_char('\u{00e9}'), "abc");
        }

        #[test]
        fn only_char() {
            assert_eq!("/".remove_end_char('/'), "");
        }
    }

    mod remove_char {
        use super::*;

        #[test]
        fn empty_string() {
            assert_eq!("".remove_char('a'), "");
        }

        #[test]
        fn char_present() {
            assert_eq!("queued".remove_char('u'), "qeed");
        }

        #[test]
        fn char_not_present() {
            assert_eq!("queued".remove_char('z'), "queued");
        }

        #[test]
        fn all_chars_match() {
            assert_eq!("aaa".remove_char('a'), "");
        }

        #[test]
        fn unicode_char() {
            assert_eq!("caféé".remove_char('é'), "caf");
        }
    }

    mod remove_occurrence {
        use super::*;

        #[test]
        fn empty_string() {
            assert_eq!("".remove_occurrence("a"), "");
        }

        #[test]
        fn empty_remove() {
            assert_eq!("a".remove_occurrence(""), "a");
        }

        #[test]
        fn substring_present() {
            assert_eq!("queued".remove_occurrence("ue"), "qd");
        }

        #[test]
        fn substring_not_present() {
            assert_eq!("queued".remove_occurrence("zz"), "queued");
        }

        #[test]
        fn multiple_occurrences() {
            assert_eq!("ababa".remove_occurrence("ab"), "a");
        }
    }

    mod remove_ignore_case {
        use super::*;

        #[test]
        fn empty_string() {
            assert_eq!("".remove_ignore_case("a"), "");
        }

        #[test]
        fn empty_remove() {
            assert_eq!("a".remove_ignore_case(""), "a");
        }

        #[test]
        fn exact_case_match() {
            assert_eq!("queued".remove_ignore_case("ue"), "qd");
        }

        #[test]
        fn different_case() {
            assert_eq!("quEUed".remove_ignore_case("UE"), "qd");
        }

        #[test]
        fn no_match() {
            assert_eq!("queued".remove_ignore_case("zz"), "queued");
        }

        #[test]
        fn no_match_different_case() {
            assert_eq!("queued".remove_ignore_case("zZ"), "queued");
        }

        #[test]
        fn unicode_turkish_i() {
            assert_eq!("\u{0130}x".remove_ignore_case("x"), "\u{0130}");
        }

        #[test]
        fn unicode_turkish_i_no_crash() {
            // LANG-1453: should not panic
            let _ = "İa".remove_ignore_case("a");
        }
    }

    mod delete_whitespace {
        use super::*;

        #[test]
        fn empty_string() {
            assert_eq!("".delete_whitespace(), "");
        }

        #[test]
        fn no_whitespace() {
            assert_eq!("abc".delete_whitespace(), "abc");
        }

        #[test]
        fn all_whitespace() {
            assert_eq!(
                "  \u{000C}  \t\t\u{001F}\n\n \u{000B}  ".delete_whitespace(),
                "\u{001F}"
            );
        }

        #[test]
        fn mixed_whitespace_and_text() {
            assert_eq!("   ab  c  ".delete_whitespace(), "abc");
        }

        #[test]
        fn extract_test_from_whitespace() {
            assert_eq!(
                "\u{000B}t  \t\n\u{0009}e\rs\n\n   \tt".delete_whitespace(),
                "test"
            );
        }

        #[test]
        fn only_spaces() {
            assert_eq!("   ".delete_whitespace(), "");
        }

        #[test]
        fn tabs_and_newlines() {
            assert_eq!("\t\n\r".delete_whitespace(), "");
        }
    }

    mod string_types {
        use super::*;

        #[test]
        fn string_ref_remove_start() {
            let s = String::from("www.domain.com");
            assert_eq!(s.remove_start("www."), "domain.com");
        }

        #[test]
        fn boxed_str_remove_end() {
            let s: Box<str> = "file.txt".into();
            assert_eq!(s.remove_end(".txt"), "file");
        }

        #[test]
        fn string_type_remove_char() {
            assert_eq!(String::from("queued").remove_char('u'), "qeed");
        }

        #[test]
        fn string_type_delete_whitespace() {
            assert_eq!(String::from("  ab c  ").delete_whitespace(), "abc");
        }
    }
}