csv_lib/decoders/
windows1252.rs

1use std::borrow::Cow;
2
3#[cfg(target_arch = "x86_64")]
4#[allow(unused)]
5use std::arch::x86_64::*;
6#[cfg(target_arch = "aarch64")]
7#[allow(unused)]
8use std::arch::aarch64::*;
9#[cfg(target_arch = "aarch64")]
10use std::arch::is_aarch64_feature_detected;
11
12/// Mapping table for bytes 0x80..0x9F in Windows-1252 to Unicode.
13/// Other bytes map 1:1.
14const WINDOWS1252_TABLE: [char; 32] = [
15    '\u{20AC}', '\u{FFFD}', '\u{201A}', '\u{0192}', '\u{201E}', '\u{2026}', '\u{2020}', '\u{2021}',
16    '\u{02C6}', '\u{2030}', '\u{0160}', '\u{2039}', '\u{0152}', '\u{FFFD}', '\u{017D}', '\u{FFFD}',
17    '\u{FFFD}', '\u{2018}', '\u{2019}', '\u{201C}', '\u{201D}', '\u{2022}', '\u{2013}', '\u{2014}',
18    '\u{02DC}', '\u{2122}', '\u{0161}', '\u{203A}', '\u{0153}', '\u{FFFD}', '\u{017E}', '\u{0178}',
19];
20
21/// Scalar Windows-1252 decoder.
22/// Maps bytes to Unicode, with special handling for 0x80..0x9F range.
23#[allow(dead_code)]
24pub(crate) fn decode_windows1252_scalar(input: &[u8]) -> Cow<'_, str> {
25    let s: String = input.iter().map(|&b| {
26        if b < 0x80 {
27            b as char
28        } else if (0x80..=0x9F).contains(&b) {
29            WINDOWS1252_TABLE[(b - 0x80) as usize]
30        } else {
31            b as char
32        }
33    }).collect();
34    Cow::Owned(s)
35}
36
37/// AVX2 accelerated Windows-1252 decoding.
38/// (For now uses scalar fallback, future optimization).
39#[allow(dead_code)]
40pub(crate) fn decode_windows1252_avx2(input: &[u8]) -> Cow<'_, str> {
41    decode_windows1252_scalar(input)
42}
43
44/// NEON accelerated Windows-1252 decoding.
45/// (For now uses scalar fallback, future optimization).
46#[allow(dead_code)]
47pub(crate) fn decode_windows1252_neon(input: &[u8]) -> Cow<'_, str> {
48    decode_windows1252_scalar(input)
49}
50
51/// Public caller for Windows-1252 decoding.
52/// Selects best method depending on CPU features.
53#[allow(dead_code)]
54pub fn decode_windows1252(input: &[u8]) -> Cow<'_, str> {
55    #[cfg(target_arch = "x86_64")]
56    {
57        if std::is_x86_feature_detected!("avx2") {
58            return decode_windows1252_avx2(input);
59        }
60    }
61    #[cfg(target_arch = "aarch64")]
62    {
63        if is_aarch64_feature_detected!("neon") {
64            return decode_windows1252_neon(input);
65        }
66    }
67    decode_windows1252_scalar(input)
68}
69
70#[cfg(test)]
71mod tests {
72    use super::*;
73
74    const VALID_WIN1252: &[u8] = b"Hello, world!";
75    const EXTENDED_WIN1252: &[u8] = &[0x80, 0x82, 0x83, 0x84]; // €, ‚, ƒ, „
76
77    #[test]
78    fn test_decode_windows1252_scalar_valid() {
79        let decoded = decode_windows1252_scalar(VALID_WIN1252);
80        assert_eq!(decoded.as_ref(), "Hello, world!");
81    }
82
83    #[test]
84    fn test_decode_windows1252_scalar_extended() {
85        let decoded = decode_windows1252_scalar(EXTENDED_WIN1252);
86        assert_eq!(decoded.as_ref(), "\u{20AC}\u{201A}\u{0192}\u{201E}"); // Unicode mapping
87    }
88
89    #[test]
90    fn test_decode_windows1252_avx2_or_neon_valid() {
91        let decoded = {
92            #[cfg(target_arch = "x86_64")]
93            {
94                if std::is_x86_feature_detected!("avx2") {
95                    decode_windows1252_avx2(VALID_WIN1252)
96                } else {
97                    decode_windows1252_scalar(VALID_WIN1252)
98                }
99            }
100            #[cfg(target_arch = "aarch64")]
101            {
102                if is_aarch64_feature_detected!("neon") {
103                    decode_windows1252_neon(VALID_WIN1252)
104                } else {
105                    decode_windows1252_scalar(VALID_WIN1252)
106                }
107            }
108        };
109        assert_eq!(decoded.as_ref(), "Hello, world!");
110    }
111
112    #[test]
113    fn test_decode_windows1252_public() {
114        let decoded = decode_windows1252(VALID_WIN1252);
115        assert_eq!(decoded.as_ref(), "Hello, world!");
116    }
117}