grit_lib/
commit_encoding.rs1use encoding_rs::Encoding;
7
8fn is_iso_8859_1(label: &str) -> bool {
9 matches!(
10 label.trim().to_ascii_lowercase().as_str(),
11 "iso-8859-1" | "iso8859-1" | "latin1" | "latin-1"
12 )
13}
14
15fn decode_latin1(bytes: &[u8]) -> String {
16 let mut s = String::with_capacity(bytes.len());
17 for &b in bytes {
18 s.push(char::from_u32(u32::from(b)).unwrap_or('\u{FFFD}'));
19 }
20 s
21}
22
23fn encode_latin1_lossy(unicode: &str) -> Vec<u8> {
24 unicode
25 .chars()
26 .map(|c| {
27 let cp = u32::from(c);
28 if cp <= 0xFF {
29 cp as u8
30 } else {
31 b'?'
32 }
33 })
34 .collect()
35}
36
37#[must_use]
39pub fn ensure_body_trailing_newline(mut bytes: Vec<u8>) -> Vec<u8> {
40 if !bytes.is_empty() && !bytes.ends_with(b"\n") {
41 bytes.push(b'\n');
42 }
43 bytes
44}
45
46#[must_use]
51pub fn resolve(label: &str) -> Option<&'static Encoding> {
52 let t = label.trim();
53 if t.is_empty() || is_iso_8859_1(t) {
54 return None;
55 }
56 let normalized = t.replace('_', "-");
57 let lower = normalized.to_ascii_lowercase();
58 let mapped = match lower.as_str() {
59 "eucjp" => "euc-jp",
60 "cp932" | "mskanji" | "sjis" => "shift_jis",
61 _ => normalized.as_str(),
62 };
63 Encoding::for_label(mapped.as_bytes()).or_else(|| Encoding::for_label(t.as_bytes()))
64}
65
66#[must_use]
68pub fn encode_unicode(label: &str, unicode: &str) -> Option<Vec<u8>> {
69 let t = label.trim();
70 let raw = if is_iso_8859_1(t) {
71 encode_latin1_lossy(unicode)
72 } else {
73 let enc = resolve(t)?;
74 let (cow, _, _) = enc.encode(unicode);
75 cow.into_owned()
76 };
77 Some(ensure_body_trailing_newline(raw))
78}
79
80#[must_use]
82pub fn encode_header_text(label: &str, unicode: &str) -> Option<Vec<u8>> {
83 let t = label.trim();
84 if is_iso_8859_1(t) {
85 return Some(encode_latin1_lossy(unicode));
86 }
87 let enc = resolve(t)?;
88 let (cow, _, _) = enc.encode(unicode);
89 Some(cow.into_owned())
90}
91
92#[must_use]
94pub fn decode_bytes(label: Option<&str>, bytes: &[u8]) -> String {
95 if let Some(l) = label {
96 if is_iso_8859_1(l) {
97 return decode_latin1(bytes);
98 }
99 if let Some(enc) = resolve(l) {
100 let (cow, _) = enc.decode_without_bom_handling(bytes);
101 return cow.into_owned();
102 }
103 }
104 String::from_utf8_lossy(bytes).into_owned()
105}
106
107#[must_use]
109pub fn reencode_utf8_to_label(output_label: &str, unicode: &str) -> Option<Vec<u8>> {
110 encode_header_text(output_label, unicode)
111}