use std::fmt;
use std::ops::Deref;
use regex::Regex;
use serde::{Deserialize, Serialize};
use crate::error::{Error, Result};
use crate::utils::{clean_whitespace, flatten};
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
#[serde(transparent)]
pub struct TextHandler(String);
impl TextHandler {
pub fn new(s: impl Into<String>) -> Self {
Self(s.into())
}
pub fn into_inner(self) -> String {
self.0
}
pub fn re(
&self,
regex: &str,
replace_entities: bool,
clean_match: bool,
case_sensitive: bool,
) -> Result<TextHandlers> {
let pattern = compile_regex(regex, case_sensitive)?;
let input = if clean_match {
self.clean(false)
} else {
self.clone()
};
let handlers = pattern
.captures_iter(&input)
.flat_map(|caps| {
if caps.len() > 1 {
caps.iter()
.skip(1)
.flatten()
.map(|m| m.as_str().to_owned())
.collect::<Vec<_>>()
} else {
vec![caps[0].to_owned()]
}
})
.map(|s| {
if replace_entities {
TextHandler::new(htmlize::unescape(&s).into_owned())
} else {
TextHandler::new(s)
}
})
.collect();
Ok(TextHandlers::new(handlers))
}
pub fn re_matches(&self, regex: &str, case_sensitive: bool) -> Result<bool> {
let pattern = compile_regex(regex, case_sensitive)?;
let input: &str = self;
Ok(pattern.is_match(input))
}
pub fn re_first(
&self,
regex: &str,
default: Option<TextHandler>,
replace_entities: bool,
clean_match: bool,
case_sensitive: bool,
) -> Result<Option<TextHandler>> {
let results = self.re(regex, replace_entities, clean_match, case_sensitive)?;
Ok(results.first().cloned().or(default))
}
pub fn clean(&self, remove_entities: bool) -> TextHandler {
let data = if remove_entities {
htmlize::unescape(self.as_ref()).into_owned()
} else {
self.0.clone()
};
TextHandler::new(clean_whitespace(&data).trim().to_owned())
}
pub fn json<T: serde::de::DeserializeOwned>(&self) -> Result<T> {
serde_json::from_str(self).map_err(Error::from)
}
pub fn to_uppercase_text(&self) -> TextHandler {
TextHandler::new(self.0.to_uppercase())
}
pub fn to_lowercase_text(&self) -> TextHandler {
TextHandler::new(self.0.to_lowercase())
}
pub fn replace_text(&self, from: &str, to: &str) -> TextHandler {
TextHandler::new(self.0.replace(from, to))
}
pub fn trim_text(&self) -> TextHandler {
TextHandler::new(self.0.trim().to_owned())
}
pub fn trim_start_text(&self) -> TextHandler {
TextHandler::new(self.0.trim_start().to_owned())
}
pub fn trim_end_text(&self) -> TextHandler {
TextHandler::new(self.0.trim_end().to_owned())
}
pub fn split_text(&self, sep: &str) -> TextHandlers {
TextHandlers::new(self.0.split(sep).map(TextHandler::new).collect())
}
pub fn sort_chars(&self, reverse: bool) -> TextHandler {
let mut chars: Vec<char> = self.0.chars().collect();
match reverse {
true => chars.sort_by(|a, b| b.cmp(a)),
false => chars.sort(),
}
TextHandler::new(chars.into_iter().collect::<String>())
}
pub fn get(&self) -> &TextHandler {
self
}
pub fn getall(&self) -> &TextHandler {
self
}
}
impl Deref for TextHandler {
type Target = str;
fn deref(&self) -> &str {
&self.0
}
}
impl AsRef<str> for TextHandler {
fn as_ref(&self) -> &str {
&self.0
}
}
impl fmt::Display for TextHandler {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(&self.0)
}
}
impl From<String> for TextHandler {
fn from(s: String) -> Self {
Self(s)
}
}
impl From<&str> for TextHandler {
fn from(s: &str) -> Self {
Self(s.to_owned())
}
}
impl From<TextHandler> for String {
fn from(t: TextHandler) -> Self {
t.0
}
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(transparent)]
pub struct TextHandlers(Vec<TextHandler>);
impl TextHandlers {
pub fn new(items: Vec<TextHandler>) -> Self {
Self(items)
}
pub fn into_inner(self) -> Vec<TextHandler> {
self.0
}
pub fn re(
&self,
regex: &str,
replace_entities: bool,
clean_match: bool,
case_sensitive: bool,
) -> Result<TextHandlers> {
self.0
.iter()
.map(|h| {
h.re(regex, replace_entities, clean_match, case_sensitive)
.map(TextHandlers::into_inner)
})
.collect::<Result<Vec<_>>>()
.map(|vecs| TextHandlers::new(flatten(vecs)))
}
pub fn re_first(
&self,
regex: &str,
default: Option<TextHandler>,
replace_entities: bool,
clean_match: bool,
case_sensitive: bool,
) -> Result<Option<TextHandler>> {
self.0
.iter()
.try_fold(None, |acc, handler| match acc {
Some(_) => Ok(acc),
None => handler
.re(regex, replace_entities, clean_match, case_sensitive)
.map(|m| m.first().cloned()),
})
.map(|found| found.or(default))
}
pub fn get(&self) -> Option<&TextHandler> {
self.0.first()
}
pub fn getall(&self) -> &[TextHandler] {
&self.0
}
}
impl Deref for TextHandlers {
type Target = Vec<TextHandler>;
fn deref(&self) -> &Vec<TextHandler> {
&self.0
}
}
impl IntoIterator for TextHandlers {
type Item = TextHandler;
type IntoIter = std::vec::IntoIter<TextHandler>;
fn into_iter(self) -> Self::IntoIter {
self.0.into_iter()
}
}
impl<'a> IntoIterator for &'a TextHandlers {
type Item = &'a TextHandler;
type IntoIter = std::slice::Iter<'a, TextHandler>;
fn into_iter(self) -> Self::IntoIter {
self.0.iter()
}
}
impl FromIterator<TextHandler> for TextHandlers {
fn from_iter<I: IntoIterator<Item = TextHandler>>(iter: I) -> Self {
Self(iter.into_iter().collect())
}
}
impl fmt::Display for TextHandlers {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let inner = self
.0
.iter()
.map(|item| format!("\"{item}\""))
.collect::<Vec<_>>()
.join(", ");
write!(f, "[{inner}]")
}
}
fn compile_regex(pattern: &str, case_sensitive: bool) -> Result<Regex> {
match case_sensitive {
true => Regex::new(pattern).map_err(Into::into),
false => Regex::new(&format!("(?i){pattern}")).map_err(Into::into),
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn text_handler_deref() {
let t = TextHandler::new("hello");
assert_eq!(t.len(), 5);
assert!(t.starts_with("hel"));
}
#[test]
fn text_handler_clean() {
let t = TextHandler::new(" hello\t\tworld\n\nfoo ");
assert_eq!(t.clean(false).as_ref(), "hello world foo");
}
#[test]
fn text_handler_clean_idempotent() {
let t = TextHandler::new(" a\t\tb\nc ");
let once = t.clean(false);
let twice = once.clean(false);
assert_eq!(once, twice);
}
#[test]
fn text_handler_re_basic() {
let t = TextHandler::new("price: $42.99 and $10.50");
let matches = t.re(r"\$(\d+\.\d+)", false, false, true).unwrap();
assert_eq!(matches.len(), 2);
assert_eq!(matches[0].as_ref(), "42.99");
assert_eq!(matches[1].as_ref(), "10.50");
}
#[test]
fn text_handler_re_no_groups() {
let t = TextHandler::new("abc 123 def 456");
let matches = t.re(r"\d+", false, false, true).unwrap();
assert_eq!(matches.len(), 2);
assert_eq!(matches[0].as_ref(), "123");
assert_eq!(matches[1].as_ref(), "456");
}
#[test]
fn text_handler_re_case_insensitive() {
let t = TextHandler::new("Hello WORLD");
let matches = t.re(r"hello", false, false, false).unwrap();
assert_eq!(matches.len(), 1);
}
#[test]
fn text_handler_re_first() {
let t = TextHandler::new("foo 123 bar 456");
let first = t.re_first(r"\d+", None, false, false, true).unwrap();
assert_eq!(first.unwrap().as_ref(), "123");
}
#[test]
fn text_handler_re_first_default() {
let t = TextHandler::new("no numbers here");
let default = TextHandler::new("N/A");
let result = t
.re_first(r"\d+", Some(default), false, false, true)
.unwrap();
assert_eq!(result.unwrap().as_ref(), "N/A");
}
#[test]
fn text_handler_json() {
let t = TextHandler::new(r#"{"name": "test", "value": 42}"#);
let v: serde_json::Value = t.json().unwrap();
assert_eq!(v["name"], "test");
assert_eq!(v["value"], 42);
}
#[test]
fn text_handler_transforms() {
let t = TextHandler::new("Hello World");
assert_eq!(t.to_uppercase_text().as_ref(), "HELLO WORLD");
assert_eq!(t.to_lowercase_text().as_ref(), "hello world");
assert_eq!(t.replace_text("World", "Rust").as_ref(), "Hello Rust");
}
#[test]
fn text_handler_split() {
let t = TextHandler::new("a,b,c");
let parts = t.split_text(",");
assert_eq!(parts.len(), 3);
assert_eq!(parts[0].as_ref(), "a");
}
#[test]
fn text_handler_sort_chars() {
let t = TextHandler::new("cba");
assert_eq!(t.sort_chars(false).as_ref(), "abc");
assert_eq!(t.sort_chars(true).as_ref(), "cba");
}
#[test]
fn text_handlers_re() {
let handlers = TextHandlers::new(vec![
TextHandler::new("foo 1"),
TextHandler::new("bar 2 baz 3"),
]);
let matches = handlers.re(r"\d+", false, false, true).unwrap();
assert_eq!(matches.len(), 3);
}
#[test]
fn text_handlers_re_first() {
let handlers = TextHandlers::new(vec![
TextHandler::new("no match"),
TextHandler::new("has 42"),
]);
let first = handlers.re_first(r"\d+", None, false, false, true).unwrap();
assert_eq!(first.unwrap().as_ref(), "42");
}
#[test]
fn text_handlers_get() {
let handlers =
TextHandlers::new(vec![TextHandler::new("first"), TextHandler::new("second")]);
assert_eq!(handlers.get().unwrap().as_ref(), "first");
let empty = TextHandlers::new(vec![]);
assert!(empty.get().is_none());
}
}