use crate::shared::char_offset_to_byte_index;
pub trait StringSubstring {
#[must_use]
fn left(&self, len: usize) -> &str;
#[must_use]
fn right(&self, len: usize) -> &str;
#[must_use]
fn mid(&self, pos: usize, len: usize) -> &str;
#[must_use]
fn substring_before(&self, separator: &str) -> Option<&str>;
#[must_use]
fn substring_before_char(&self, separator: char) -> Option<&str>;
#[must_use]
fn substring_after(&self, separator: &str) -> Option<&str>;
#[must_use]
fn substring_after_char(&self, separator: char) -> Option<&str>;
#[must_use]
fn substring_before_last(&self, separator: &str) -> Option<&str>;
#[must_use]
fn substring_before_last_char(&self, separator: char) -> Option<&str>;
#[must_use]
fn substring_after_last(&self, separator: &str) -> Option<&str>;
#[must_use]
fn substring_after_last_char(&self, separator: char) -> Option<&str>;
#[must_use]
fn substring_between(&self, tag: &str) -> Option<&str>;
#[must_use]
fn substring_between_with(&self, open: &str, close: &str) -> Option<&str>;
#[must_use]
fn substrings_between(&self, open: &str, close: &str) -> Vec<&str>;
}
impl StringSubstring for str {
fn left(&self, len: usize) -> &str {
if self.is_empty() || len == 0 {
return "";
}
let byte_index = char_offset_to_byte_index(self, len);
&self[..byte_index]
}
fn right(&self, len: usize) -> &str {
if self.is_empty() || len == 0 {
return "";
}
let str_len = self.chars().count();
if len >= str_len {
return self;
}
let start_char = str_len - len;
let byte_index = char_offset_to_byte_index(self, start_char);
&self[byte_index..]
}
fn mid(&self, pos: usize, len: usize) -> &str {
if self.is_empty() || len == 0 {
return "";
}
let str_len = self.chars().count();
if pos >= str_len {
return "";
}
let end_pos = pos.saturating_add(len).min(str_len);
let start_byte = char_offset_to_byte_index(self, pos);
let end_byte = char_offset_to_byte_index(self, end_pos);
&self[start_byte..end_byte]
}
fn substring_before(&self, separator: &str) -> Option<&str> {
if self.is_empty() || separator.is_empty() {
return None;
}
self.find(separator).map(|pos| &self[..pos])
}
fn substring_before_char(&self, separator: char) -> Option<&str> {
if self.is_empty() {
return None;
}
self.find(separator).map(|pos| &self[..pos])
}
fn substring_after(&self, separator: &str) -> Option<&str> {
if self.is_empty() || separator.is_empty() {
return None;
}
self.find(separator)
.map(|pos| &self[pos + separator.len()..])
}
fn substring_after_char(&self, separator: char) -> Option<&str> {
if self.is_empty() {
return None;
}
self.find(separator)
.map(|pos| &self[pos + separator.len_utf8()..])
}
fn substring_before_last(&self, separator: &str) -> Option<&str> {
if self.is_empty() || separator.is_empty() {
return None;
}
self.rfind(separator).map(|pos| &self[..pos])
}
fn substring_before_last_char(&self, separator: char) -> Option<&str> {
if self.is_empty() {
return None;
}
self.rfind(separator).map(|pos| &self[..pos])
}
fn substring_after_last(&self, separator: &str) -> Option<&str> {
if self.is_empty() || separator.is_empty() {
return None;
}
self.rfind(separator)
.map(|pos| &self[pos + separator.len()..])
}
fn substring_after_last_char(&self, separator: char) -> Option<&str> {
if self.is_empty() {
return None;
}
self.rfind(separator)
.map(|pos| &self[pos + separator.len_utf8()..])
}
fn substring_between(&self, tag: &str) -> Option<&str> {
if self.is_empty() || tag.is_empty() {
return None;
}
let start = self.find(tag)? + tag.len();
self[start..].find(tag).map(|pos| &self[start..start + pos])
}
fn substring_between_with(&self, open: &str, close: &str) -> Option<&str> {
if self.is_empty() || open.is_empty() || close.is_empty() {
return None;
}
let start = self.find(open)? + open.len();
self[start..]
.find(close)
.map(|pos| &self[start..start + pos])
}
fn substrings_between(&self, open: &str, close: &str) -> Vec<&str> {
if self.is_empty() || open.is_empty() || close.is_empty() {
return Vec::new();
}
let close_len = close.len();
let open_len = open.len();
let mut results = Vec::new();
let mut pos = 0;
while pos < self.len().saturating_sub(close_len) {
let start = match self[pos..].find(open) {
Some(idx) => pos + idx + open_len,
None => break,
};
let end = match self[start..].find(close) {
Some(idx) => start + idx,
None => break,
};
results.push(&self[start..end]);
pos = end + close_len;
}
results
}
}
#[cfg(test)]
mod tests {
use super::*;
const FOO: &str = "foo";
const BAR: &str = "bar";
const FOOBAR: &str = "foobar";
mod left {
use super::*;
#[test]
fn empty_string_zero_len() {
assert_eq!("".left(0), "");
}
#[test]
fn empty_string_positive_len() {
assert_eq!("".left(2), "");
}
#[test]
fn foobar_zero_len() {
assert_eq!(FOOBAR.left(0), "");
}
#[test]
fn foobar_three() {
assert_eq!(FOOBAR.left(3), FOO);
}
#[test]
fn foobar_exceeds_length() {
assert_eq!(FOOBAR.left(80), FOOBAR);
}
#[test]
fn unicode() {
assert_eq!("日本語".left(2), "日本");
}
#[test]
fn emoji() {
assert_eq!("🦊🐱🐶".left(2), "🦊🐱");
}
}
mod right {
use super::*;
#[test]
fn empty_string_zero_len() {
assert_eq!("".right(0), "");
}
#[test]
fn empty_string_positive_len() {
assert_eq!("".right(2), "");
}
#[test]
fn foobar_zero_len() {
assert_eq!(FOOBAR.right(0), "");
}
#[test]
fn foobar_three() {
assert_eq!(FOOBAR.right(3), BAR);
}
#[test]
fn foobar_exceeds_length() {
assert_eq!(FOOBAR.right(80), FOOBAR);
}
#[test]
fn unicode() {
assert_eq!("日本語".right(2), "本語");
}
#[test]
fn emoji() {
assert_eq!("🦊🐱🐶".right(2), "🐱🐶");
}
}
mod mid {
use super::*;
#[test]
fn empty_string_zero_len() {
assert_eq!("".mid(0, 0), "");
}
#[test]
fn empty_string_positive_len() {
assert_eq!("".mid(0, 2), "");
}
#[test]
fn foobar_pos3_len0() {
assert_eq!(FOOBAR.mid(3, 0), "");
}
#[test]
fn foobar_pos3_len1() {
assert_eq!(FOOBAR.mid(3, 1), "b");
}
#[test]
fn foobar_pos0_len3() {
assert_eq!(FOOBAR.mid(0, 3), FOO);
}
#[test]
fn foobar_pos3_len3() {
assert_eq!(FOOBAR.mid(3, 3), BAR);
}
#[test]
fn foobar_pos0_len80() {
assert_eq!(FOOBAR.mid(0, 80), FOOBAR);
}
#[test]
fn foobar_pos3_len80() {
assert_eq!(FOOBAR.mid(3, 80), BAR);
}
#[test]
fn foobar_pos9_len3() {
assert_eq!(FOOBAR.mid(9, 3), "");
}
#[test]
fn unicode() {
assert_eq!("日本語".mid(1, 1), "本");
}
#[test]
fn emoji() {
assert_eq!("🦊🐱🐶".mid(1, 1), "🐱");
}
}
mod substring_before {
use super::*;
#[test]
fn multi_char_separator() {
assert_eq!("fooXXbarXXbaz".substring_before("XX"), Some("foo"));
}
#[test]
fn empty_string_empty_sep() {
assert_eq!("".substring_before(""), None);
}
#[test]
fn empty_string_xx_sep() {
assert_eq!("".substring_before("XX"), None);
}
#[test]
fn foo_separator_not_found() {
assert_eq!("foo".substring_before("b"), None);
}
#[test]
fn foot_o() {
assert_eq!("foot".substring_before("o"), Some("f"));
}
#[test]
fn abc_a() {
assert_eq!("abc".substring_before("a"), Some(""));
}
#[test]
fn abcba_b() {
assert_eq!("abcba".substring_before("b"), Some("a"));
}
#[test]
fn abc_c() {
assert_eq!("abc".substring_before("c"), Some("ab"));
}
#[test]
fn empty_separator() {
assert_eq!("abc".substring_before(""), None);
}
#[test]
fn abc_x() {
assert_eq!("abc".substring_before("X"), None);
}
#[test]
fn unicode() {
assert_eq!("日本語です".substring_before("語"), Some("日本"));
}
}
mod substring_before_char {
use super::*;
#[test]
fn fooxxbarxxbaz_x() {
assert_eq!("fooXXbarXXbaz".substring_before_char('X'), Some("foo"));
}
#[test]
fn empty_string() {
assert_eq!("".substring_before_char('X'), None);
}
#[test]
fn foo_not_found() {
assert_eq!("foo".substring_before_char('b'), None);
}
#[test]
fn foot_o() {
assert_eq!("foot".substring_before_char('o'), Some("f"));
}
#[test]
fn abc_a() {
assert_eq!("abc".substring_before_char('a'), Some(""));
}
#[test]
fn abcba_b() {
assert_eq!("abcba".substring_before_char('b'), Some("a"));
}
#[test]
fn abc_c() {
assert_eq!("abc".substring_before_char('c'), Some("ab"));
}
#[test]
fn unicode() {
assert_eq!("日本語".substring_before_char('本'), Some("日"));
}
}
mod substring_after {
use super::*;
#[test]
fn multi_char_separator() {
assert_eq!("fooXXbarXXbaz".substring_after("XX"), Some("barXXbaz"));
}
#[test]
fn empty_string_empty_sep() {
assert_eq!("".substring_after(""), None);
}
#[test]
fn empty_string_xx_sep() {
assert_eq!("".substring_after("XX"), None);
}
#[test]
fn foot_o() {
assert_eq!("foot".substring_after("o"), Some("ot"));
}
#[test]
fn abc_a() {
assert_eq!("abc".substring_after("a"), Some("bc"));
}
#[test]
fn abcba_b() {
assert_eq!("abcba".substring_after("b"), Some("cba"));
}
#[test]
fn abc_c() {
assert_eq!("abc".substring_after("c"), Some(""));
}
#[test]
fn empty_separator() {
assert_eq!("abc".substring_after(""), None);
}
#[test]
fn abc_d() {
assert_eq!("abc".substring_after("d"), None);
}
#[test]
fn unicode() {
assert_eq!("日本語です".substring_after("語"), Some("です"));
}
}
mod substring_after_char {
use super::*;
#[test]
fn empty_string() {
assert_eq!("".substring_after_char('X'), None);
}
#[test]
fn foot_o() {
assert_eq!("foot".substring_after_char('o'), Some("ot"));
}
#[test]
fn abc_a() {
assert_eq!("abc".substring_after_char('a'), Some("bc"));
}
#[test]
fn abcba_b() {
assert_eq!("abcba".substring_after_char('b'), Some("cba"));
}
#[test]
fn abc_c() {
assert_eq!("abc".substring_after_char('c'), Some(""));
}
#[test]
fn abc_d() {
assert_eq!("abc".substring_after_char('d'), None);
}
#[test]
fn unicode() {
assert_eq!("日本語".substring_after_char('本'), Some("語"));
}
}
mod substring_before_last {
use super::*;
#[test]
fn multi_char_separator() {
assert_eq!(
"fooXXbarXXbaz".substring_before_last("XX"),
Some("fooXXbar")
);
}
#[test]
fn empty_string_empty_sep() {
assert_eq!("".substring_before_last(""), None);
}
#[test]
fn empty_string_xx_sep() {
assert_eq!("".substring_before_last("XX"), None);
}
#[test]
fn foo_not_found() {
assert_eq!("foo".substring_before_last("b"), None);
}
#[test]
fn foo_o() {
assert_eq!("foo".substring_before_last("o"), Some("fo"));
}
#[test]
fn abc_newline_d() {
assert_eq!("abc\r\n".substring_before_last("d"), None);
}
#[test]
fn abcdabc_d() {
assert_eq!("abcdabc".substring_before_last("d"), Some("abc"));
}
#[test]
fn abcdabcd_d() {
assert_eq!("abcdabcd".substring_before_last("d"), Some("abcdabc"));
}
#[test]
fn abc_b() {
assert_eq!("abc".substring_before_last("b"), Some("a"));
}
#[test]
fn abc_space_newline() {
assert_eq!("abc \n".substring_before_last("\n"), Some("abc "));
}
#[test]
fn a_empty() {
assert_eq!("a".substring_before_last(""), None);
}
#[test]
fn a_a() {
assert_eq!("a".substring_before_last("a"), Some(""));
}
#[test]
fn unicode() {
assert_eq!("日本語日本".substring_before_last("本"), Some("日本語日"));
}
}
mod substring_before_last_char {
use super::*;
#[test]
fn empty_string() {
assert_eq!("".substring_before_last_char('a'), None);
}
#[test]
fn foo_o() {
assert_eq!("foo".substring_before_last_char('o'), Some("fo"));
}
#[test]
fn abcba_b() {
assert_eq!("abcba".substring_before_last_char('b'), Some("abc"));
}
#[test]
fn separator_not_found() {
assert_eq!("abc".substring_before_last_char('d'), None);
}
#[test]
fn unicode() {
assert_eq!(
"日本語日本".substring_before_last_char('本'),
Some("日本語日")
);
}
}
mod substring_after_last {
use super::*;
#[test]
fn multi_char_separator() {
assert_eq!("fooXXbarXXbaz".substring_after_last("XX"), Some("baz"));
}
#[test]
fn empty_string_empty_sep() {
assert_eq!("".substring_after_last(""), None);
}
#[test]
fn empty_string_a_sep() {
assert_eq!("".substring_after_last("a"), None);
}
#[test]
fn foo_not_found() {
assert_eq!("foo".substring_after_last("b"), None);
}
#[test]
fn foot_o() {
assert_eq!("foot".substring_after_last("o"), Some("t"));
}
#[test]
fn abc_a() {
assert_eq!("abc".substring_after_last("a"), Some("bc"));
}
#[test]
fn abcba_b() {
assert_eq!("abcba".substring_after_last("b"), Some("a"));
}
#[test]
fn abc_c() {
assert_eq!("abc".substring_after_last("c"), Some(""));
}
#[test]
fn empty_string_d() {
assert_eq!("".substring_after_last("d"), None);
}
#[test]
fn abc_empty() {
assert_eq!("abc".substring_after_last(""), None);
}
#[test]
fn unicode() {
assert_eq!("日本語日本".substring_after_last("本"), Some(""));
}
}
mod substring_after_last_char {
use super::*;
#[test]
fn empty_string() {
assert_eq!("".substring_after_last_char('a'), None);
}
#[test]
fn foo_not_found() {
assert_eq!("foo".substring_after_last_char('b'), None);
}
#[test]
fn foot_o() {
assert_eq!("foot".substring_after_last_char('o'), Some("t"));
}
#[test]
fn abc_a() {
assert_eq!("abc".substring_after_last_char('a'), Some("bc"));
}
#[test]
fn abcba_b() {
assert_eq!("abcba".substring_after_last_char('b'), Some("a"));
}
#[test]
fn abc_c() {
assert_eq!("abc".substring_after_last_char('c'), Some(""));
}
#[test]
fn unicode() {
assert_eq!("日本語".substring_after_last_char('本'), Some("語"));
}
}
mod substring_between {
use super::*;
#[test]
fn empty_string_empty_tag() {
assert_eq!("".substring_between(""), None);
}
#[test]
fn empty_string_abc_tag() {
assert_eq!("".substring_between("abc"), None);
}
#[test]
fn spaces_space_tag() {
assert_eq!(" ".substring_between(" "), Some(""));
}
#[test]
fn abc_empty_tag() {
assert_eq!("abc".substring_between(""), None);
}
#[test]
fn abc_single_a_tag() {
assert_eq!("abc".substring_between("a"), None);
}
#[test]
fn abca_a_tag() {
assert_eq!("abca".substring_between("a"), Some("bc"));
}
#[test]
fn abcabca_a_tag() {
assert_eq!("abcabca".substring_between("a"), Some("bc"));
}
#[test]
fn newline_bar_newline() {
assert_eq!("\nbar\n".substring_between("\n"), Some("bar"));
}
#[test]
fn tagabctag_tag() {
assert_eq!("tagabctag".substring_between("tag"), Some("abc"));
}
#[test]
fn unicode() {
assert_eq!("★abc★".substring_between("★"), Some("abc"));
}
}
mod substring_between_with {
use super::*;
#[test]
fn empty_string_empty_delimiters() {
assert_eq!("".substring_between_with("", ""), None);
}
#[test]
fn foo_empty_delimiters() {
assert_eq!("foo".substring_between_with("", ""), None);
}
#[test]
fn foo_empty_open_bracket_close() {
assert_eq!("foo".substring_between_with("", "]"), None);
}
#[test]
fn foo_bracket_open_bracket_close() {
assert_eq!("foo".substring_between_with("[", "]"), None);
}
#[test]
fn spaces_with_space_delimiters() {
assert_eq!(" ".substring_between_with(" ", " "), Some(""));
}
#[test]
fn foo_bar_html_tags() {
assert_eq!(
"<foo>bar</foo>".substring_between_with("<foo>", "</foo>"),
Some("bar")
);
}
#[test]
fn yabczyabcz_yz() {
assert_eq!("yabczyabcz".substring_between_with("y", "z"), Some("abc"));
}
#[test]
fn wx_b_yz() {
assert_eq!("wx[b]yz".substring_between_with("[", "]"), Some("b"));
}
#[test]
fn unicode() {
assert_eq!("【abc】".substring_between_with("【", "】"), Some("abc"));
}
}
mod substrings_between {
use super::*;
#[test]
fn three_items() {
let results = "[one], [two], [three]".substrings_between("[", "]");
assert_eq!(results.len(), 3);
assert_eq!(results[0], "one");
assert_eq!(results[1], "two");
assert_eq!(results[2], "three");
}
#[test]
fn two_items_third_unclosed() {
let results = "[one], [two], three".substrings_between("[", "]");
assert_eq!(results.len(), 2);
assert_eq!(results[0], "one");
assert_eq!(results[1], "two");
}
#[test]
fn two_items_third_close_only() {
let results = "[one], [two], three]".substrings_between("[", "]");
assert_eq!(results.len(), 2);
assert_eq!(results[0], "one");
assert_eq!(results[1], "two");
}
#[test]
fn one_item_others_close_only() {
let results = "[one], two], three]".substrings_between("[", "]");
assert_eq!(results.len(), 1);
assert_eq!(results[0], "one");
}
#[test]
fn one_item_first_two_close_only() {
let results = "one], two], [three]".substrings_between("[", "]");
assert_eq!(results.len(), 1);
assert_eq!(results[0], "three");
}
#[test]
fn shared_delimiter_char() {
let results = "aabhellobabnonba".substrings_between("ab", "ba");
assert_eq!(results.len(), 1);
assert_eq!(results[0], "hello");
}
#[test]
fn no_matches() {
let results: Vec<&str> = "one, two, three".substrings_between("[", "]");
assert!(results.is_empty());
}
#[test]
fn open_only() {
let results: Vec<&str> = "[one, two, three".substrings_between("[", "]");
assert!(results.is_empty());
}
#[test]
fn close_only() {
let results: Vec<&str> = "one, two, three]".substrings_between("[", "]");
assert!(results.is_empty());
}
#[test]
fn empty_delimiters() {
let results: Vec<&str> = "[one], [two], [three]".substrings_between("", "");
assert!(results.is_empty());
}
#[test]
fn empty_string() {
let results: Vec<&str> = "".substrings_between("[", "]");
assert!(results.is_empty());
}
#[test]
fn adjacent_delimiters() {
assert_eq!("aabaa".substrings_between("a", "a"), vec!["", ""]);
}
#[test]
fn same_delimiter_with_content() {
assert_eq!("aXaYa".substrings_between("a", "a"), vec!["X"]);
}
#[test]
fn unicode() {
assert_eq!("【a】【b】".substrings_between("【", "】"), vec!["a", "b"]);
}
}
mod string_types {
use super::*;
#[test]
fn string_type() {
assert_eq!(String::from("hello world").left(5), "hello");
}
#[test]
fn string_ref() {
let s = String::from("hello world");
assert_eq!(s.right(5), "world");
}
#[test]
fn boxed_str() {
let s: Box<str> = "hello world".into();
assert_eq!(s.mid(6, 5), "world");
}
}
}