grit_lib/
commit_encoding.rs1use encoding_rs::Encoding;
7
8fn is_iso_8859_1(label: &str) -> bool {
9 matches!(
10 label.trim().to_ascii_lowercase().as_str(),
11 "iso-8859-1" | "iso8859-1" | "latin1" | "latin-1"
12 )
13}
14
15fn decode_latin1(bytes: &[u8]) -> String {
16 let mut s = String::with_capacity(bytes.len());
17 for &b in bytes {
18 s.push(char::from_u32(u32::from(b)).unwrap_or('\u{FFFD}'));
19 }
20 s
21}
22
23fn encode_latin1_lossy(unicode: &str) -> Vec<u8> {
24 unicode
25 .chars()
26 .map(|c| {
27 let cp = u32::from(c);
28 if cp <= 0xFF {
29 cp as u8
30 } else {
31 b'?'
32 }
33 })
34 .collect()
35}
36
37#[must_use]
39pub fn ensure_body_trailing_newline(mut bytes: Vec<u8>) -> Vec<u8> {
40 if !bytes.is_empty() && !bytes.ends_with(b"\n") {
41 bytes.push(b'\n');
42 }
43 bytes
44}
45
46#[must_use]
51pub fn resolve(label: &str) -> Option<&'static Encoding> {
52 let t = label.trim();
53 if t.is_empty() || is_iso_8859_1(t) {
54 return None;
55 }
56 let normalized = t.replace('_', "-");
57 let lower = normalized.to_ascii_lowercase();
58 let mapped = match lower.as_str() {
59 "eucjp" => "euc-jp",
60 "cp932" | "mskanji" | "sjis" => "shift_jis",
61 _ => normalized.as_str(),
62 };
63 Encoding::for_label(mapped.as_bytes()).or_else(|| Encoding::for_label(t.as_bytes()))
64}
65
66#[must_use]
68pub fn encode_unicode(label: &str, unicode: &str) -> Option<Vec<u8>> {
69 let t = label.trim();
70 let raw = if is_iso_8859_1(t) {
71 encode_latin1_lossy(unicode)
72 } else {
73 let enc = resolve(t)?;
74 let (cow, _, _) = enc.encode(unicode);
75 cow.into_owned()
76 };
77 Some(ensure_body_trailing_newline(raw))
78}
79
80#[must_use]
82pub fn encode_header_text(label: &str, unicode: &str) -> Option<Vec<u8>> {
83 let t = label.trim();
84 if is_iso_8859_1(t) {
85 return Some(encode_latin1_lossy(unicode));
86 }
87 let enc = resolve(t)?;
88 let (cow, _, _) = enc.encode(unicode);
89 Some(cow.into_owned())
90}
91
92#[must_use]
94pub fn decode_bytes(label: Option<&str>, bytes: &[u8]) -> String {
95 if let Some(l) = label {
96 if is_iso_8859_1(l) {
97 return decode_latin1(bytes);
98 }
99 if let Some(enc) = resolve(l) {
100 let (cow, _) = enc.decode_without_bom_handling(bytes);
101 return cow.into_owned();
102 }
103 }
104 String::from_utf8_lossy(bytes).into_owned()
105}
106
107#[must_use]
109pub fn reencode_utf8_to_label(output_label: &str, unicode: &str) -> Option<Vec<u8>> {
110 encode_header_text(output_label, unicode)
111}
112
113#[must_use]
119pub fn finalize_stored_commit_message(
120 message: String,
121 commit_encoding: Option<&str>,
122) -> (String, Option<String>, Option<Vec<u8>>) {
123 let is_utf8 = match commit_encoding {
124 None => true,
125 Some(e) => e.eq_ignore_ascii_case("utf-8") || e.eq_ignore_ascii_case("utf8"),
126 };
127 if is_utf8 {
128 return (message, None, None);
129 }
130 let Some(label) = commit_encoding.filter(|s| !s.trim().is_empty()) else {
131 return (message, None, None);
132 };
133 let Some(raw) = encode_unicode(label, &message) else {
134 return (message, None, None);
135 };
136 (message, Some(label.to_owned()), Some(raw))
137}
138
139#[must_use]
144pub fn decode_rfc2047_mailbox_from_line(from: &str) -> String {
145 let from = from.trim();
146 let Some(lt) = from.find('<') else {
147 return decode_rfc2047_encoded_words(from);
148 };
149 let name = from[..lt].trim();
150 let tail = &from[lt..];
151 let decoded = decode_rfc2047_encoded_words(name);
152 if decoded.is_empty() {
153 tail.trim_start().to_string()
154 } else {
155 format!("{decoded} {tail}")
156 }
157}
158
159fn decode_rfc2047_encoded_words(s: &str) -> String {
160 let mut out = String::new();
161 let mut rest = s;
162 while let Some(start) = rest.find("=?") {
163 out.push_str(&rest[..start]);
164 rest = &rest[start + 2..];
165 let Some(d1) = rest.find('?') else {
166 out.push_str("=?");
167 out.push_str(rest);
168 return out;
169 };
170 let charset = &rest[..d1];
171 let after_cs = &rest[d1 + 1..];
172 let Some(d2) = after_cs.find('?') else {
173 out.push_str("=?");
174 out.push_str(rest);
175 return out;
176 };
177 let encoding = after_cs[..d2].to_ascii_lowercase();
178 let after_enc = &after_cs[d2 + 1..];
179 let Some(end) = after_enc.find("?=") else {
180 out.push_str("=?");
181 out.push_str(rest);
182 return out;
183 };
184 let payload = &after_enc[..end];
185 rest = &after_enc[end + 2..];
186 if encoding == "q" {
187 let bytes = decode_quoted_printable_soft(payload);
188 out.push_str(&decode_bytes(Some(charset), &bytes));
189 } else if encoding == "b" {
190 if let Some(bytes) = base64_decode_rfc2047(payload) {
191 out.push_str(&decode_bytes(Some(charset), &bytes));
192 }
193 }
194 }
195 out.push_str(rest);
196 out
197}
198
199fn decode_quoted_printable_soft(payload: &str) -> Vec<u8> {
200 let mut out = Vec::new();
201 let mut it = payload.as_bytes().iter().copied().peekable();
202 while let Some(b) = it.next() {
203 if b == b'_' {
204 out.push(b' ');
205 } else if b == b'=' {
206 let h1 = it.next();
207 let h2 = it.next();
208 if let (Some(a), Some(c)) = (h1, h2) {
209 if let (Some(hi), Some(lo)) = (hex_nibble(a), hex_nibble(c)) {
210 out.push((hi << 4) | lo);
211 continue;
212 }
213 }
214 out.push(b'=');
215 } else {
216 out.push(b);
217 }
218 }
219 out
220}
221
222fn hex_nibble(b: u8) -> Option<u8> {
223 match b {
224 b'0'..=b'9' => Some(b - b'0'),
225 b'a'..=b'f' => Some(b - b'a' + 10),
226 b'A'..=b'F' => Some(b - b'A' + 10),
227 _ => None,
228 }
229}
230
231fn base64_decode_rfc2047(input: &str) -> Option<Vec<u8>> {
232 const TABLE: &[u8; 64] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
233 let mut output = Vec::new();
234 let mut buf: u32 = 0;
235 let mut bits: u32 = 0;
236 for &byte in input.as_bytes() {
237 if byte == b'=' {
238 break;
239 }
240 if byte.is_ascii_whitespace() {
241 continue;
242 }
243 let val = TABLE.iter().position(|&c| c == byte)? as u32;
244 buf = (buf << 6) | val;
245 bits += 6;
246 if bits >= 8 {
247 bits -= 8;
248 output.push((buf >> bits) as u8);
249 buf &= (1 << bits) - 1;
250 }
251 }
252 Some(output)
253}
254
255#[must_use]
262pub fn identity_raw_for_serialized_commit(
263 encoding: &Option<String>,
264 author: &str,
265 committer: &str,
266) -> (Vec<u8>, Vec<u8>) {
267 let is_utf8 = match encoding.as_deref() {
268 None => true,
269 Some(e) => e.eq_ignore_ascii_case("utf-8") || e.eq_ignore_ascii_case("utf8"),
270 };
271 if is_utf8 {
272 return (Vec::new(), Vec::new());
273 }
274 let Some(label) = encoding.as_deref() else {
275 return (Vec::new(), Vec::new());
276 };
277 let author_raw = encode_header_text(label, author).unwrap_or_default();
278 let committer_raw = encode_header_text(label, committer).unwrap_or_default();
279 (author_raw, committer_raw)
280}
281
282#[must_use]
286pub fn commit_message_unicode_for_display(
287 encoding: Option<&str>,
288 message: &str,
289 raw_message: Option<&[u8]>,
290) -> String {
291 if let Some(raw) = raw_message {
292 decode_bytes(encoding, raw)
293 } else {
294 message.to_owned()
295 }
296}