csv_lib/decoders/
latin2.rs

1use std::borrow::Cow;
2
3#[cfg(target_arch = "x86_64")]
4#[allow(unused)]
5use std::arch::x86_64::*;
6#[cfg(target_arch = "aarch64")]
7#[allow(unused)]
8use std::arch::aarch64::*;
9#[cfg(target_arch = "aarch64")]
10use std::arch::is_aarch64_feature_detected;
11
12/// Latin2 (ISO-8859-2) Scalar Decoder.
13/// Maps each byte to its Unicode equivalent.
14/// Redefines part of 0xA0..0xFF compared to Latin1.
15
16const LATIN2_TABLE: [char; 96] = [
17    '\u{00A0}', '\u{0104}', '\u{02D8}', '\u{0141}', '\u{00A4}', '\u{013D}', '\u{015A}', '\u{00A7}',
18    '\u{00A8}', '\u{0160}', '\u{015E}', '\u{0164}', '\u{0179}', '\u{00AD}', '\u{017D}', '\u{017B}',
19    '\u{00B0}', '\u{0105}', '\u{02DB}', '\u{0142}', '\u{00B4}', '\u{013E}', '\u{015B}', '\u{02C7}',
20    '\u{00B8}', '\u{0161}', '\u{015F}', '\u{0165}', '\u{017A}', '\u{02DD}', '\u{017E}', '\u{017C}',
21    '\u{0154}', '\u{00C1}', '\u{00C2}', '\u{0102}', '\u{00C4}', '\u{0139}', '\u{0106}', '\u{00C7}',
22    '\u{010C}', '\u{00C9}', '\u{0118}', '\u{00CB}', '\u{011A}', '\u{00CD}', '\u{00CE}', '\u{010E}',
23    '\u{0110}', '\u{0143}', '\u{0147}', '\u{00D3}', '\u{00D4}', '\u{0150}', '\u{00D6}', '\u{00D7}',
24    '\u{0158}', '\u{016E}', '\u{00DA}', '\u{0170}', '\u{00DC}', '\u{00DD}', '\u{0162}', '\u{00DF}',
25    '\u{0155}', '\u{00E1}', '\u{00E2}', '\u{0103}', '\u{00E4}', '\u{013A}', '\u{0107}', '\u{00E7}',
26    '\u{010D}', '\u{00E9}', '\u{0119}', '\u{00EB}', '\u{011B}', '\u{00ED}', '\u{00EE}', '\u{010F}',
27    '\u{0111}', '\u{0144}', '\u{0148}', '\u{00F3}', '\u{00F4}', '\u{0151}', '\u{00F6}', '\u{00F7}',
28    '\u{0159}', '\u{016F}', '\u{00FA}', '\u{0171}', '\u{00FC}', '\u{00FD}', '\u{0163}', '\u{02D9}',
29];
30
31#[allow(dead_code)]
32pub(crate) fn decode_latin2_scalar(input: &[u8]) -> Cow<'_, str> {
33    let s: String = input.iter().map(|&b| {
34        if b < 0xA0 {
35            b as char
36        } else {
37            LATIN2_TABLE[(b - 0xA0) as usize]
38        }
39    }).collect();
40    Cow::Owned(s)
41}
42
43#[allow(dead_code)]
44pub(crate) fn decode_latin2_avx2(input: &[u8]) -> Cow<'_, str> {
45    decode_latin2_scalar(input)
46}
47
48#[allow(dead_code)]
49pub(crate) fn decode_latin2_neon(input: &[u8]) -> Cow<'_, str> {
50    decode_latin2_scalar(input)
51}
52
53#[allow(dead_code)]
54pub fn decode_latin2(input: &[u8]) -> Cow<'_, str> {
55    #[cfg(target_arch = "x86_64")]
56    {
57        if std::is_x86_feature_detected!("avx2") {
58            return decode_latin2_avx2(input);
59        }
60    }
61    #[cfg(target_arch = "aarch64")]
62    {
63        if is_aarch64_feature_detected!("neon") {
64            return decode_latin2_neon(input);
65        }
66    }
67    decode_latin2_scalar(input)
68}
69
70#[cfg(test)]
71mod tests {
72    use super::*;
73
74    const VALID_LATIN2: &[u8] = b"Hello, world!";
75    const EXTENDED_LATIN2: &[u8] = &[0xA1, 0xA2, 0xA3, 0xA4]; // Latin2-specific characters
76
77    #[test]
78    fn test_decode_latin2_scalar_valid() {
79        let decoded = decode_latin2_scalar(VALID_LATIN2);
80        assert_eq!(decoded.as_ref(), "Hello, world!");
81    }
82
83    #[test]
84    fn test_decode_latin2_scalar_extended() {
85        let decoded = decode_latin2_scalar(EXTENDED_LATIN2);
86        assert_eq!(decoded.as_ref(), "\u{0104}\u{02D8}\u{0141}\u{00A4}"); // Unicode expected
87    }
88
89    #[test]
90    fn test_decode_latin2_avx2_or_neon_valid() {
91        let decoded = {
92            #[cfg(target_arch = "x86_64")]
93            {
94                if std::is_x86_feature_detected!("avx2") {
95                    decode_latin2_avx2(VALID_LATIN2)
96                } else {
97                    decode_latin2_scalar(VALID_LATIN2)
98                }
99            }
100            #[cfg(target_arch = "aarch64")]
101            {
102                if is_aarch64_feature_detected!("neon") {
103                    decode_latin2_neon(VALID_LATIN2)
104                } else {
105                    decode_latin2_scalar(VALID_LATIN2)
106                }
107            }
108        };
109        assert_eq!(decoded.as_ref(), "Hello, world!");
110    }
111
112    #[test]
113    fn test_decode_latin2_public() {
114        let decoded = decode_latin2(VALID_LATIN2);
115        assert_eq!(decoded.as_ref(), "Hello, world!");
116    }
117}