pub trait StripBomFromUTF8 {
#[must_use]
fn strip_bom(self) -> Self;
}
static BOM_MARKER_BYTES: &[u8] = &[0xef, 0xbb, 0xbf];
static BOM_MARKER: char = '\u{feff}';
impl StripBomFromUTF8 for &str {
fn strip_bom(self) -> Self {
self.trim_start_matches(BOM_MARKER)
}
}
impl StripBomFromUTF8 for &[u8] {
fn strip_bom(self) -> Self {
self.strip_prefix(BOM_MARKER_BYTES).unwrap_or(self)
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_strip_bom_str_from_bytes() {
let raw = &[BOM_MARKER_BYTES, &[0x7b, 0x7d]].concat(); let raw: &str = std::str::from_utf8(raw).unwrap();
assert_eq!(raw.len(), 5);
assert_eq!(raw, format!("{BOM_MARKER}{{}}"));
let stripped = raw.strip_bom();
assert_eq!(stripped.len(), 2);
assert_eq!(stripped, "{}");
}
#[test]
fn test_strip_bom_str_with_utf8_escape() {
let raw = format!("{BOM_MARKER}{{}}"); let raw: &str = raw.as_str();
assert_eq!(raw.len(), 5);
let stripped = raw.strip_bom();
assert_eq!(stripped.len(), 2);
assert_eq!(stripped, "{}");
}
#[test]
fn test_strip_bom_u8_slice() {
let raw = &[BOM_MARKER_BYTES, &[0x7b, 0x7d]].concat(); assert_eq!(raw.len(), 5);
let stripped = raw.strip_bom();
assert_eq!(stripped.len(), 2);
assert_eq!(stripped, &[0x7b, 0x7d]);
}
}