use content_inspector::{ContentType, inspect};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum TextGateResult {
Text { bom_stripped_offset: usize },
Binary,
Empty,
}
pub fn inspect_text(bytes: &[u8]) -> TextGateResult {
if bytes.is_empty() {
return TextGateResult::Empty;
}
let offset = detect_bom_offset(bytes);
let payload = &bytes[offset..];
if payload.is_empty() {
return TextGateResult::Text {
bom_stripped_offset: offset,
};
}
match inspect(payload) {
ContentType::BINARY => TextGateResult::Binary,
_ => TextGateResult::Text {
bom_stripped_offset: offset,
},
}
}
fn detect_bom_offset(bytes: &[u8]) -> usize {
if bytes.starts_with(&[0xEF, 0xBB, 0xBF]) {
3
} else if bytes.starts_with(&[0xFF, 0xFE]) || bytes.starts_with(&[0xFE, 0xFF]) {
2
} else {
0
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn plain_ascii_is_text() {
let r = inspect_text(b"hello world");
assert!(matches!(r, TextGateResult::Text { .. }));
}
#[test]
fn utf8_bom_stripped() {
let r = inspect_text(b"\xEF\xBB\xBFhello");
assert_eq!(
r,
TextGateResult::Text {
bom_stripped_offset: 3
}
);
}
#[test]
fn utf16_le_bom_stripped() {
let r = inspect_text(b"\xFF\xFEhello");
assert_eq!(
r,
TextGateResult::Text {
bom_stripped_offset: 2
}
);
}
#[test]
fn utf16_be_bom_stripped() {
let r = inspect_text(b"\xFE\xFFhello");
assert_eq!(
r,
TextGateResult::Text {
bom_stripped_offset: 2
}
);
}
#[test]
fn binary_nul_bytes_detected() {
let r = inspect_text(&[0x00, 0x01, 0x02, 0xFF, 0xFE, 0x00, 0xAB, 0xCD]);
assert_eq!(r, TextGateResult::Binary);
}
#[test]
fn empty_input() {
assert_eq!(inspect_text(&[]), TextGateResult::Empty);
}
#[test]
fn empty_after_bom_is_text() {
let r = inspect_text(b"\xEF\xBB\xBF");
assert_eq!(
r,
TextGateResult::Text {
bom_stripped_offset: 3
}
);
}
#[test]
fn markdown_is_text() {
let r = inspect_text(b"# Heading\n\nparagraph");
assert!(matches!(r, TextGateResult::Text { .. }));
}
#[test]
fn json_is_text() {
let r = inspect_text(b"{\"k\": 1}");
assert!(matches!(r, TextGateResult::Text { .. }));
}
#[test]
fn code_is_text() {
let r = inspect_text(b"const x = 1;\nfunction f() {}");
assert!(matches!(r, TextGateResult::Text { .. }));
}
}