Skip to main content

exiftool_rs/
encoding.rs

1//! Text encoding utilities for metadata decoding.
2//!
3//! Many file formats store text metadata in Latin-1 (ISO 8859-1) or other
4//! non-UTF-8 encodings. These helpers provide correct decoding instead of
5//! the lossy `String::from_utf8_lossy()` which silently replaces bytes
6//! >= 0x80 with U+FFFD.
7
8/// Decode bytes as Latin-1 (ISO 8859-1) to String.
9///
10/// Each byte maps directly to its Unicode code point (U+0000–U+00FF),
11/// which is the correct mapping for ISO 8859-1.
12pub fn decode_latin1(bytes: &[u8]) -> String {
13    bytes.iter().map(|&b| b as char).collect()
14}
15
16/// Try decoding as UTF-8 first; fall back to Latin-1 if invalid.
17///
18/// This matches Perl ExifTool's behavior for fields that are historically
19/// Latin-1 but may contain valid UTF-8 in modern files.
20pub fn decode_utf8_or_latin1(bytes: &[u8]) -> String {
21    match std::str::from_utf8(bytes) {
22        Ok(s) => s.to_string(),
23        Err(_) => decode_latin1(bytes),
24    }
25}
26
27#[cfg(test)]
28mod tests {
29    use super::*;
30
31    #[test]
32    fn test_decode_latin1_ascii() {
33        assert_eq!(decode_latin1(b"hello"), "hello");
34    }
35
36    #[test]
37    fn test_decode_latin1_high_bytes() {
38        // 0xE9 = é, 0xFC = ü, 0xF1 = ñ
39        assert_eq!(decode_latin1(&[0xE9, 0xFC, 0xF1]), "éüñ");
40    }
41
42    #[test]
43    fn test_decode_latin1_full_range() {
44        // 0xA9 = ©, 0xAE = ®, 0xF6 = ö
45        assert_eq!(decode_latin1(&[0xA9, 0xAE, 0xF6]), "©®ö");
46    }
47
48    #[test]
49    fn test_decode_utf8_or_latin1_valid_utf8() {
50        assert_eq!(decode_utf8_or_latin1("café".as_bytes()), "café");
51    }
52
53    #[test]
54    fn test_decode_utf8_or_latin1_latin1_fallback() {
55        // 0xE9 alone is invalid UTF-8 but valid Latin-1 for 'é'
56        assert_eq!(decode_utf8_or_latin1(&[0x63, 0x61, 0x66, 0xE9]), "café");
57    }
58
59    #[test]
60    fn test_decode_utf8_or_latin1_pure_ascii() {
61        assert_eq!(decode_utf8_or_latin1(b"hello"), "hello");
62    }
63
64    #[test]
65    fn test_decode_utf8_or_latin1_empty() {
66        assert_eq!(decode_utf8_or_latin1(b""), "");
67        assert_eq!(decode_latin1(b""), "");
68    }
69}