use crate::ranges::ENABLED_RANGES;
pub fn sanitize(s: &str) -> Option<String> {
let mut ret: Option<String> = None;
for (i, c) in s.char_indices() {
if !ENABLED_RANGES
.iter()
.any(|range| range.contains(&(c as u32)))
{
if let Some(ret) = &mut ret {
ret.push('�');
continue;
} else {
ret = Some(s[..i].to_string() + "�");
continue;
}
}
if let Some(ret) = &mut ret {
ret.push(c);
}
}
if let Some(ret) = ret {
let first_invalid = ret.find('�').unwrap();
let last_invalid = ret.rfind('�').unwrap();
if first_invalid != last_invalid {
let begin = &ret[..first_invalid];
let end = &ret[last_invalid + 3..];
#[cfg(feature = "verbose")]
{
let n_invalid_bytes = last_invalid - first_invalid + 6;
return Some(format!(
"{}[{} BYTES SANITIZED]{}",
begin, n_invalid_bytes, end
));
}
#[cfg(not(feature = "verbose"))]
return Some(format!("{}{}", begin, end));
} else {
#[cfg(feature = "verbose")]
{
return Some(ret);
}
#[cfg(not(feature = "verbose"))]
return Some(ret.replace("�", ""));
}
} else {
None
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_sanitize() {
assert_eq!(sanitize("hello \t\n"), None);
#[cfg(feature = "latin-1-supplement")]
assert_eq!(sanitize("Ā"), None);
#[cfg(all(not(feature = "latin-1-supplement"), feature = "verbose"))]
assert_eq!(sanitize("Ā"), Some("�".to_string()));
#[cfg(all(not(feature = "latin-1-supplement"), not(feature = "verbose")))]
assert_eq!(sanitize("Ā"), Some("".to_string()));
#[cfg(all(not(feature = "tags"), feature = "verbose"))]
assert_eq!(
sanitize("https://wuzzi.net/copirate/"),
Some("https://wuzzi.net/copirate/[120 BYTES SANITIZED]".to_string())
);
#[cfg(all(not(feature = "tags"), not(feature = "verbose")))]
assert_eq!(
sanitize("https://wuzzi.net/copirate/"),
Some("https://wuzzi.net/copirate/".to_string())
);
}
}