use crate::{Error, abi, util::string_from_utf16};
pub(crate) fn decode_body(data: &[u8], charset: &str) -> Result<String, Error> {
if data.is_empty() {
return Ok(String::new());
}
let label = normalize_label(charset);
if is_utf8_label(&label) {
trace!(label = charset, "charset: UTF-8 fast path");
let data = data.strip_prefix(&[0xEF, 0xBB, 0xBF]).unwrap_or(data);
return Ok(match String::from_utf8(data.to_vec()) {
Ok(s) => s,
Err(e) => String::from_utf8_lossy(e.as_bytes()).into_owned(),
});
}
let Some(codepage) = whatwg_label_to_codepage(&label) else {
warn!(label = charset, "unknown charset label, falling back to UTF-8");
return Ok(match String::from_utf8(data.to_vec()) {
Ok(s) => s,
Err(e) => String::from_utf8_lossy(e.as_bytes()).into_owned(),
});
};
match codepage {
CP_X_USER_DEFINED => return Ok(decode_x_user_defined(data)),
CP_REPLACEMENT => return Ok(String::from('\u{FFFD}')),
CP_UTF16_LE => return decode_utf16le(data),
CP_UTF16_BE => return decode_utf16be(data),
CP_ISO_8859_16 => return Ok(decode_iso_8859_16(data)),
_ => {}
}
if let Some(icu_name) = codepage_to_icu_name(codepage) {
trace!(label = charset, codepage, icu = icu_name, "charset: ICU fallback decode");
return abi::icu_decode(icu_name, data);
}
trace!(label = charset, codepage, "charset: Win32 codepage decode");
abi::multi_byte_to_string(codepage, data)
}
const CP_X_USER_DEFINED: u32 = 0;
const CP_REPLACEMENT: u32 = u32::MAX;
const CP_UTF16_LE: u32 = 1200;
const CP_UTF16_BE: u32 = 1201;
const CP_ISO_8859_16: u32 = 28606;
fn codepage_to_icu_name(codepage: u32) -> Option<&'static str> {
match codepage {
28600 => Some("ISO-8859-10"),
28604 => Some("ISO-8859-14"),
51932 => Some("EUC-JP"),
_ => None,
}
}
fn normalize_label(label: &str) -> String {
label
.trim_matches(|c: char| c.is_ascii_whitespace())
.to_ascii_lowercase()
}
fn is_utf8_label(label: &str) -> bool {
matches!(
label,
"utf-8"
| "utf8"
| "unicode-1-1-utf-8"
| "unicode11utf8"
| "unicode20utf8"
| "x-unicode20utf8"
)
}
fn whatwg_label_to_codepage(label: &str) -> Option<u32> {
Some(match label {
"unicode-1-1-utf-8" | "unicode11utf8" | "unicode20utf8" | "utf-8" | "utf8"
| "x-unicode20utf8" => 65001,
"866" | "cp866" | "csibm866" | "ibm866" => 866,
"csisolatin2" | "iso-8859-2" | "iso-ir-101" | "iso8859-2" | "iso88592" | "iso_8859-2"
| "iso_8859-2:1987" | "l2" | "latin2" => 28592,
"csisolatin3" | "iso-8859-3" | "iso-ir-109" | "iso8859-3" | "iso88593" | "iso_8859-3"
| "iso_8859-3:1988" | "l3" | "latin3" => 28593,
"csisolatin4" | "iso-8859-4" | "iso-ir-110" | "iso8859-4" | "iso88594" | "iso_8859-4"
| "iso_8859-4:1988" | "l4" | "latin4" => 28594,
"csisolatincyrillic" | "cyrillic" | "iso-8859-5" | "iso-ir-144" | "iso8859-5"
| "iso88595" | "iso_8859-5" | "iso_8859-5:1988" => 28595,
"arabic" | "asmo-708" | "csiso88596e" | "csiso88596i" | "csisolatinarabic" | "ecma-114"
| "iso-8859-6" | "iso-8859-6-e" | "iso-8859-6-i" | "iso-ir-127" | "iso8859-6"
| "iso88596" | "iso_8859-6" | "iso_8859-6:1987" => 28596,
"csisolatingreek" | "ecma-118" | "elot_928" | "greek" | "greek8" | "iso-8859-7"
| "iso-ir-126" | "iso8859-7" | "iso88597" | "iso_8859-7" | "iso_8859-7:1987"
| "sun_eu_greek" => 28597,
"csiso88598e" | "csisolatinhebrew" | "hebrew" | "iso-8859-8" | "iso-8859-8-e"
| "iso-ir-138" | "iso8859-8" | "iso88598" | "iso_8859-8" | "iso_8859-8:1988" | "visual" => {
28598
}
"csiso88598i" | "iso-8859-8-i" | "logical" => 38598,
"csisolatin6" | "iso-8859-10" | "iso-ir-157" | "iso8859-10" | "iso885910" | "l6"
| "latin6" => 28600,
"iso-8859-13" | "iso8859-13" | "iso885913" => 28603,
"iso-8859-14" | "iso8859-14" | "iso885914" => 28604,
"csisolatin9" | "iso-8859-15" | "iso8859-15" | "iso885915" | "iso_8859-15" | "l9" => 28605,
"iso-8859-16" => 28606,
"cskoi8r" | "koi" | "koi8" | "koi8-r" | "koi8_r" => 20866,
"koi8-ru" | "koi8-u" => 21866,
"csmacintosh" | "mac" | "macintosh" | "x-mac-roman" => 10000,
"dos-874" | "iso-8859-11" | "iso8859-11" | "iso885911" | "tis-620" | "windows-874" => 874,
"cp1250" | "windows-1250" | "x-cp1250" => 1250,
"cp1251" | "windows-1251" | "x-cp1251" => 1251,
"ansi_x3.4-1968" | "ascii" | "cp1252" | "cp819" | "csisolatin1" | "ibm819"
| "iso-8859-1" | "iso-ir-100" | "iso8859-1" | "iso88591" | "iso_8859-1"
| "iso_8859-1:1987" | "l1" | "latin1" | "us-ascii" | "windows-1252" | "x-cp1252" => 1252,
"cp1253" | "windows-1253" | "x-cp1253" => 1253,
"cp1254" | "csisolatin5" | "iso-8859-9" | "iso-ir-148" | "iso8859-9" | "iso88599"
| "iso_8859-9" | "iso_8859-9:1989" | "l5" | "latin5" | "windows-1254" | "x-cp1254" => 1254,
"cp1255" | "windows-1255" | "x-cp1255" => 1255,
"cp1256" | "windows-1256" | "x-cp1256" => 1256,
"cp1257" | "windows-1257" | "x-cp1257" => 1257,
"cp1258" | "windows-1258" | "x-cp1258" => 1258,
"x-mac-cyrillic" | "x-mac-ukrainian" => 10017,
"chinese" | "csgb2312" | "csiso58gb231280" | "gb2312" | "gb_2312" | "gb_2312-80"
| "gbk" | "iso-ir-58" | "x-gbk" => 936,
"gb18030" => 54936,
"big5" | "big5-hkscs" | "cn-big5" | "csbig5" | "x-x-big5" => 950,
"cseucpkdfmtjapanese" | "euc-jp" | "x-euc-jp" => 51932,
"csiso2022jp" | "iso-2022-jp" => 50220,
"csshiftjis" | "ms932" | "ms_kanji" | "shift-jis" | "shift_jis" | "sjis"
| "windows-31j" | "x-sjis" => 932,
"cseuckr" | "csksc56011987" | "euc-kr" | "iso-ir-149" | "korean" | "ks_c_5601-1987"
| "ks_c_5601-1989" | "ksc5601" | "ksc_5601" | "windows-949" => 51949,
"unicodefffe" | "utf-16be" => CP_UTF16_BE,
"csunicode" | "iso-10646-ucs-2" | "ucs-2" | "unicode" | "unicodefeff" | "utf-16"
| "utf-16le" => CP_UTF16_LE,
"x-user-defined" => CP_X_USER_DEFINED,
"csiso2022kr" | "hz-gb-2312" | "iso-2022-cn" | "iso-2022-cn-ext" | "iso-2022-kr"
| "replacement" => CP_REPLACEMENT,
_ => return None,
})
}
fn decode_x_user_defined(data: &[u8]) -> String {
data.iter()
.map(|&b| {
if b < 0x80 {
b as char
} else {
char::from_u32(0xF780 + u32::from(b) - 0x80).unwrap_or('\u{FFFD}')
}
})
.collect()
}
#[rustfmt::skip]
static ISO_8859_16_HIGH: [u16; 128] = [
0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
0x00A0, 0x0104, 0x0105, 0x0141, 0x20AC, 0x201E, 0x0160, 0x00A7,
0x0161, 0x00A9, 0x0218, 0x00AB, 0x0179, 0x00AD, 0x017A, 0x017B,
0x00B0, 0x00B1, 0x010C, 0x0142, 0x017D, 0x201D, 0x00B6, 0x00B7,
0x017E, 0x010D, 0x0219, 0x00BB, 0x0152, 0x0153, 0x0178, 0x017C,
0x00C0, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0106, 0x00C6, 0x00C7,
0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
0x0110, 0x0143, 0x00D2, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x015A,
0x0170, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0118, 0x021A, 0x00DF,
0x00E0, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x0107, 0x00E6, 0x00E7,
0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
0x0111, 0x0144, 0x00F2, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x015B,
0x0171, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x0119, 0x021B, 0x00FF,
];
fn decode_iso_8859_16(data: &[u8]) -> String {
data.iter()
.map(|&b| {
if b < 0x80 {
b as char
} else {
char::from_u32(u32::from(ISO_8859_16_HIGH[(b - 0x80) as usize]))
.unwrap_or('\u{FFFD}')
}
})
.collect()
}
fn decode_utf16le(data: &[u8]) -> Result<String, Error> {
let data = data.strip_prefix(&[0xFF, 0xFE]).unwrap_or(data);
if !data.len().is_multiple_of(2) {
return Err(Error::decode("invalid UTF-16LE: odd byte count"));
}
let words: Vec<u16> = data
.chunks_exact(2)
.map(|c| u16::from_le_bytes([c[0], c[1]]))
.collect();
string_from_utf16(&words, "invalid UTF-16LE")
}
fn decode_utf16be(data: &[u8]) -> Result<String, Error> {
let data = data.strip_prefix(&[0xFE, 0xFF]).unwrap_or(data);
if !data.len().is_multiple_of(2) {
return Err(Error::decode("invalid UTF-16BE: odd byte count"));
}
let words: Vec<u16> = data
.chunks_exact(2)
.map(|c| u16::from_be_bytes([c[0], c[1]]))
.collect();
string_from_utf16(&words, "invalid UTF-16BE")
}
pub(crate) fn extract_charset_from_content_type(headers: &http::HeaderMap) -> Option<String> {
let ct = headers.get(http::header::CONTENT_TYPE)?;
let ct_str = ct.to_str().ok()?;
for param in ct_str.split(';').skip(1) {
let trimmed = param.trim();
let Some((key, value)) = trimmed.split_once('=') else {
continue;
};
if !key.trim().eq_ignore_ascii_case("charset") {
continue;
}
let value = value.trim_start_matches('"');
let charset: String = value
.chars()
.take_while(|&c| c != '"' && c != ';' && !c.is_ascii_whitespace())
.collect();
if charset.is_empty() {
return None;
}
return Some(charset);
}
None
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn all_39_whatwg_encodings() {
let cases: &[(&str, &str, &[u8], &str)] = &[
("UTF-8", "utf-8", &[0x63, 0x61, 0x66, 0xC3, 0xA9], "caf\u{00E9}"),
("UTF-16BE", "utf-16be", &[0x00, 0x48, 0x00, 0x69], "Hi"),
("UTF-16LE", "utf-16le", &[0x48, 0x00, 0x69, 0x00], "Hi"),
("x-user-defined", "x-user-defined", &[0x48, 0x80, 0xFF], "H\u{F780}\u{F7FF}"),
(
"IBM866",
"ibm866",
&[0x8F, 0xE0, 0xA8, 0xA2, 0xA5, 0xE2],
"\u{041F}\u{0440}\u{0438}\u{0432}\u{0435}\u{0442}",
),
("ISO-8859-2", "iso-8859-2", &[0xA3, 0xF3, 0x64, 0xBC], "\u{0141}\u{00F3}d\u{017A}"),
("ISO-8859-3", "iso-8859-3", &[0xA1, 0xB1], "\u{0126}\u{0127}"),
("ISO-8859-4", "iso-8859-4", &[0xBD, 0xBF], "\u{014A}\u{014B}"),
("ISO-8859-5", "iso-8859-5", &[0xBC, 0xD8, 0xE0], "\u{041C}\u{0438}\u{0440}"),
("ISO-8859-6", "iso-8859-6", &[0xC7, 0xC8], "\u{0627}\u{0628}"),
("ISO-8859-7", "iso-8859-7", &[0xC1, 0xF9], "\u{0391}\u{03C9}"),
("ISO-8859-8", "iso-8859-8", &[0xE0, 0xE1], "\u{05D0}\u{05D1}"),
("ISO-8859-8-I", "iso-8859-8-i", &[0xF9, 0xFA], "\u{05E9}\u{05EA}"),
("ISO-8859-13", "iso-8859-13", &[0xD9, 0xF9], "\u{0141}\u{0142}"),
("ISO-8859-15", "iso-8859-15", &[0xA4, 0xBC], "\u{20AC}\u{0152}"),
("KOI8-R", "koi8-r", &[0xF2, 0xD5, 0xD3, 0xD8], "\u{0420}\u{0443}\u{0441}\u{044C}"),
("KOI8-U", "koi8-u", &[0xB4, 0xA4], "\u{0404}\u{0454}"),
(
"macintosh",
"macintosh",
&[0xC7, 0xC8, 0xD2, 0xD3],
"\u{00AB}\u{00BB}\u{201C}\u{201D}",
),
("windows-874", "windows-874", &[0xA1, 0xA2], "\u{0E01}\u{0E02}"),
("windows-1250", "windows-1250", &[0x8A, 0x9A], "\u{0160}\u{0161}"),
(
"windows-1251",
"windows-1251",
&[0xCC, 0xEE, 0xF1, 0xEA, 0xE2, 0xE0],
"\u{041C}\u{043E}\u{0441}\u{043A}\u{0432}\u{0430}",
),
("windows-1252", "windows-1252", &[0x80, 0x93, 0x94], "\u{20AC}\u{201C}\u{201D}"),
("windows-1253", "windows-1253", &[0xC1, 0xF9], "\u{0391}\u{03C9}"),
("windows-1254", "windows-1254", &[0xD0, 0xF0], "\u{011E}\u{011F}"),
("windows-1255", "windows-1255", &[0x80, 0xE0], "\u{20AC}\u{05D0}"),
("windows-1256", "windows-1256", &[0xC7, 0xC8], "\u{0627}\u{0628}"),
("windows-1257", "windows-1257", &[0xC0, 0xE0], "\u{0104}\u{0105}"),
("windows-1258", "windows-1258", &[0xD0, 0xF0], "\u{0110}\u{0111}"),
("x-mac-cyrillic", "x-mac-cyrillic", &[0x80, 0x81], "\u{0410}\u{0411}"),
("GBK", "gbk", &[0xC4, 0xE3, 0xBA, 0xC3], "\u{4F60}\u{597D}"),
("gb18030", "gb18030", &[0xC4, 0xE3, 0xBA, 0xC3], "\u{4F60}\u{597D}"),
("Big5", "big5", &[0xA4, 0xA4], "\u{4E2D}"),
("ISO-2022-JP", "iso-2022-jp", &[0x1B, 0x24, 0x42, 0x46, 0x7C], "\u{65E5}"),
("Shift_JIS", "shift_jis", &[0x82, 0xB1], "\u{3053}"),
("EUC-KR", "euc-kr", &[0xC7, 0xD1], "\u{D55C}"),
("ISO-8859-10", "iso-8859-10", &[0xA1, 0xA2], "\u{0104}\u{0112}"),
("ISO-8859-14", "iso-8859-14", &[0xA1, 0xD0], "\u{1E02}\u{0174}"),
("EUC-JP", "euc-jp", &[0xC6, 0xFC, 0xCB, 0xDC, 0xB8, 0xEC], "\u{65E5}\u{672C}\u{8A9E}"),
("ISO-8859-16", "iso-8859-16", &[0xAA, 0xBA], "\u{0218}\u{0219}"),
];
assert_eq!(cases.len(), 39, "must cover all 39 WHATWG encodings");
for &(name, label, data, expected) in cases {
let result =
decode_body(data, label).unwrap_or_else(|e| panic!("{name} ({label}): {e}"));
assert_eq!(result, expected, "{name} ({label})");
}
}
#[test]
fn decode_body_table() {
let cases: &[(&[u8], &str, &str, &str)] = &[
(b"ok", "utf8", "ok", "utf8 alias"),
(b"ok", "UTF-8", "ok", "uppercase label"),
(b"ok", " utf-8 ", "ok", "whitespace-trimmed label"),
(b"ok", "unicode-1-1-utf-8", "ok", "utf-8 alias long"),
(b"ok", "x-unicode20utf8", "ok", "utf-8 alias x-"),
(&[0xEF, 0xBB, 0xBF, b'h', b'i'], "utf-8", "hi", "UTF-8 BOM stripped"),
(&[0xEF, 0xBB, 0xBF], "utf-8", "", "UTF-8 BOM only"),
(b"hi\xFFlo", "utf-8", "hi\u{FFFD}lo", "UTF-8 invalid byte → U+FFFD"),
(b"", "windows-1252", "", "empty data"),
(b"", "utf-8", "", "empty UTF-8"),
(b"hello", "totally-bogus", "hello", "unknown label → UTF-8"),
(b"hi\xFFlo", "totally-bogus", "hi\u{FFFD}lo", "unknown label → UTF-8 lossy"),
(&[0x80], "ascii", "\u{20AC}", "ascii → windows-1252"),
(&[0x80], "iso-8859-1", "\u{20AC}", "iso-8859-1 → windows-1252"),
(&[0x80], "latin1", "\u{20AC}", "latin1 → windows-1252"),
(&[0x80], "us-ascii", "\u{20AC}", "us-ascii → windows-1252"),
(&[0x93], "WINDOWS-1252", "\u{201C}", "case: UPPER"),
(&[0x93], "Windows-1252", "\u{201C}", "case: Mixed"),
(b"anything", "replacement", "\u{FFFD}", "replacement encoding"),
(b"anything", "hz-gb-2312", "\u{FFFD}", "hz-gb-2312 → replacement"),
(b"anything", "iso-2022-kr", "\u{FFFD}", "iso-2022-kr → replacement"),
(&[0x80, 0xFF], "x-user-defined", "\u{F780}\u{F7FF}", "x-user-defined high bytes"),
(&[0xA1], "latin6", "\u{0104}", "latin6 → ISO-8859-10"),
(&[0xA4, 0xA2], "cseucpkdfmtjapanese", "\u{3042}", "cseucpkdfmtjapanese → EUC-JP"),
];
for &(data, label, expected, desc) in cases {
let result = decode_body(data, label).unwrap_or_else(|e| panic!("{desc}: {e}"));
assert_eq!(result, expected, "{desc}");
}
}
#[test]
fn utf16_decode_table() {
let cases: &[(&str, &[u8], &str, &str)] = &[
("utf-16le", &[0x41, 0x00, 0x42, 0x00], "AB", "LE basic"),
("utf-16be", &[0x00, 0x41, 0x00, 0x42], "AB", "BE basic"),
("utf-16le", &[0xFF, 0xFE, 0x41, 0x00], "A", "LE BOM stripped"),
("utf-16be", &[0xFE, 0xFF, 0x00, 0x41], "A", "BE BOM stripped"),
];
for &(charset, data, expected, desc) in cases {
let result = decode_body(data, charset).unwrap_or_else(|e| panic!("{desc}: {e}"));
assert_eq!(result, expected, "{desc}");
}
}
#[test]
fn utf16_errors_table() {
let cases: &[(&str, &[u8], &str)] = &[
("utf-16le", &[0x00, 0xD8], "LE lone high surrogate"),
("utf-16be", &[0xD8, 0x00], "BE lone high surrogate"),
("utf-16le", &[0x41, 0x00, 0x42], "LE odd byte count"),
("utf-16be", &[0x00, 0x41, 0x42], "BE odd byte count"),
("utf-16le", &[0xFF, 0xFE, 0x42], "LE BOM + odd remainder"),
("utf-16be", &[0xFE, 0xFF, 0x00], "BE BOM + odd remainder"),
];
for &(charset, data, desc) in cases {
assert!(decode_body(data, charset).is_err(), "{desc}: should fail");
}
}
#[test]
fn iso_8859_16_spot_checks() {
let cases: &[(u8, char)] = &[
(0xA1, '\u{0104}'), (0xA2, '\u{0105}'), (0xA3, '\u{0141}'), (0xA4, '\u{20AC}'), (0xA5, '\u{201E}'), (0xA6, '\u{0160}'), (0xA8, '\u{0161}'), (0xAC, '\u{0179}'), (0xAF, '\u{017B}'), (0xB2, '\u{010C}'), (0xB3, '\u{0142}'), (0xB4, '\u{017D}'), (0xB5, '\u{201D}'), (0xB8, '\u{017E}'), (0xB9, '\u{010D}'), (0xBC, '\u{0152}'), (0xBD, '\u{0153}'), (0xBE, '\u{0178}'), (0xBF, '\u{017C}'), (0xC3, '\u{0102}'), (0xC5, '\u{0106}'), (0xD0, '\u{0110}'), (0xD1, '\u{0143}'), (0xD5, '\u{0150}'), (0xD7, '\u{015A}'), (0xD8, '\u{0170}'), (0xDD, '\u{0118}'), (0xE3, '\u{0103}'), (0xE5, '\u{0107}'), (0xF0, '\u{0111}'), (0xF1, '\u{0144}'), (0xF5, '\u{0151}'), (0xF7, '\u{015B}'), (0xF8, '\u{0171}'), (0xFD, '\u{0119}'), (0xFE, '\u{021B}'), ];
for &(byte, expected) in cases {
let result =
decode_body(&[byte], "iso-8859-16").unwrap_or_else(|e| panic!("0x{byte:02X}: {e}"));
assert_eq!(result.chars().next().unwrap(), expected, "byte 0x{byte:02X}");
}
}
#[test]
fn iso_8859_16_full_range() {
let all_bytes: Vec<u8> = (0u8..=255).collect();
let result = decode_body(&all_bytes, "iso-8859-16").expect("full range");
assert_eq!(result.chars().count(), 256);
}
#[test]
fn extract_charset_table() {
let cases: &[(&str, Option<&str>, &str)] = &[
("text/html; charset=utf-8", Some("utf-8"), "plain charset"),
("text/html; charset=\"UTF-8\"", Some("UTF-8"), "quoted charset"),
("application/json", None, "no charset param"),
("text/html; charset=", None, "empty charset value"),
("text/html; charset=\"\"", None, "empty quoted charset value"),
(
"text/html; x=charset=wrong; charset=right",
Some("right"),
"charset substring in other param value",
),
("text/html; notcharset=oops", None, "charset prefix in param name"),
("text/html; Charset=Latin1", Some("Latin1"), "uppercase Charset"),
("text/html; CHARSET=Big5", Some("Big5"), "all-caps CHARSET"),
("text/html; boundary; charset=UTF-8", Some("UTF-8"), "param without equals sign"),
];
for &(content_type, expected, desc) in cases {
let mut headers = http::HeaderMap::new();
headers.insert(http::header::CONTENT_TYPE, content_type.parse().expect("valid"));
assert_eq!(extract_charset_from_content_type(&headers).as_deref(), expected, "{desc}");
}
}
#[test]
fn extract_charset_no_content_type() {
let headers = http::HeaderMap::new();
assert_eq!(extract_charset_from_content_type(&headers), None);
}
}