#[derive(Debug, Clone)]
pub struct Sanitizer {
replace_newline: Vec<char>,
replace_tab: Vec<char>,
}
impl Default for Sanitizer {
fn default() -> Self {
Self::new()
}
}
impl Sanitizer {
#[must_use]
pub fn new() -> Self {
Self {
replace_newline: vec!['\n'],
replace_tab: vec![' ', ' ', ' ', ' '],
}
}
#[must_use]
pub fn builder() -> SanitizerBuilder {
SanitizerBuilder::new()
}
#[must_use]
pub fn with_tab_replacement(mut self, replacement: &str) -> Self {
self.replace_tab = replacement.chars().collect();
self
}
#[must_use]
pub fn with_newline_replacement(mut self, replacement: &str) -> Self {
self.replace_newline = replacement.chars().collect();
self
}
#[must_use]
pub fn sanitize(&self, runes: &[char]) -> Vec<char> {
let mut result = Vec::with_capacity(runes.len());
let mut iter = runes.iter().peekable();
while let Some(&r) = iter.next() {
match r {
'\u{FFFD}' => {}
'\r' => {
if let Some(&&next) = iter.peek()
&& next == '\n'
{
continue; }
result.extend(&self.replace_newline);
}
'\n' => {
result.extend(&self.replace_newline);
}
'\t' => {
result.extend(&self.replace_tab);
}
c if c.is_control() => {}
c => {
result.push(c);
}
}
}
result
}
#[must_use]
pub fn sanitize_string(&self, s: &str) -> String {
let chars: Vec<char> = s.chars().collect();
self.sanitize(&chars).into_iter().collect()
}
}
#[derive(Debug, Clone, Default)]
pub struct SanitizerBuilder {
replace_newline: Option<Vec<char>>,
replace_tab: Option<Vec<char>>,
}
impl SanitizerBuilder {
#[must_use]
pub fn new() -> Self {
Self::default()
}
#[must_use]
pub fn replace_tabs(mut self, replacement: &str) -> Self {
self.replace_tab = Some(replacement.chars().collect());
self
}
#[must_use]
pub fn replace_newlines(mut self, replacement: &str) -> Self {
self.replace_newline = Some(replacement.chars().collect());
self
}
#[must_use]
pub fn build(self) -> Sanitizer {
Sanitizer {
replace_newline: self.replace_newline.unwrap_or_else(|| vec!['\n']),
replace_tab: self.replace_tab.unwrap_or_else(|| vec![' ', ' ', ' ', ' ']),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_sanitize_basic() {
let sanitizer = Sanitizer::new();
let input: Vec<char> = "hello world".chars().collect();
let output = sanitizer.sanitize(&input);
assert_eq!(output.iter().collect::<String>(), "hello world");
}
#[test]
fn test_sanitize_tabs() {
let sanitizer = Sanitizer::new();
let input: Vec<char> = "hello\tworld".chars().collect();
let output = sanitizer.sanitize(&input);
assert_eq!(output.iter().collect::<String>(), "hello world");
}
#[test]
fn test_sanitize_custom_tabs() {
let sanitizer = Sanitizer::new().with_tab_replacement(" ");
let input: Vec<char> = "a\tb".chars().collect();
let output = sanitizer.sanitize(&input);
assert_eq!(output.iter().collect::<String>(), "a b");
}
#[test]
fn test_sanitize_newlines() {
let sanitizer = Sanitizer::new();
let input: Vec<char> = "hello\nworld".chars().collect();
let output = sanitizer.sanitize(&input);
assert_eq!(output.iter().collect::<String>(), "hello\nworld");
}
#[test]
fn test_sanitize_custom_newlines() {
let sanitizer = Sanitizer::new().with_newline_replacement(" ");
let input: Vec<char> = "hello\nworld".chars().collect();
let output = sanitizer.sanitize(&input);
assert_eq!(output.iter().collect::<String>(), "hello world");
}
#[test]
fn test_sanitize_carriage_return() {
let sanitizer = Sanitizer::new().with_newline_replacement("");
let input: Vec<char> = "hello\r\nworld".chars().collect();
let output = sanitizer.sanitize(&input);
assert_eq!(output.iter().collect::<String>(), "helloworld");
}
#[test]
fn test_sanitize_control_chars() {
let sanitizer = Sanitizer::new();
let input: Vec<char> = "hello\x01\x02world".chars().collect();
let output = sanitizer.sanitize(&input);
assert_eq!(output.iter().collect::<String>(), "helloworld");
}
#[test]
fn test_sanitize_unicode_replacement() {
let sanitizer = Sanitizer::new();
let input: Vec<char> = "hello\u{FFFD}world".chars().collect();
let output = sanitizer.sanitize(&input);
assert_eq!(output.iter().collect::<String>(), "helloworld");
}
#[test]
fn test_sanitize_string() {
let sanitizer = Sanitizer::new().with_tab_replacement("--");
let output = sanitizer.sanitize_string("a\tb");
assert_eq!(output, "a--b");
}
#[test]
fn test_builder() {
let sanitizer = Sanitizer::builder()
.replace_tabs(" ")
.replace_newlines("")
.build();
let output = sanitizer.sanitize_string("a\tb\nc");
assert_eq!(output, "a bc");
}
#[test]
fn test_unicode_preserved() {
let sanitizer = Sanitizer::new();
let input: Vec<char> = "hello δΈη π".chars().collect();
let output = sanitizer.sanitize(&input);
assert_eq!(output.iter().collect::<String>(), "hello δΈη π");
}
}