csv_lib/decoders/
windows1251.rs

1use std::borrow::Cow;
2
3#[cfg(target_arch = "x86_64")]
4#[allow(unused)]
5use std::arch::x86_64::*;
6#[cfg(target_arch = "aarch64")]
7#[allow(unused)]
8use std::arch::aarch64::*;
9#[cfg(target_arch = "aarch64")]
10use std::arch::is_aarch64_feature_detected;
11
12/// Mapping table for bytes 0x80..0xFF in Windows-1251 to Unicode.
13const WINDOWS1251_TABLE: [char; 128] = [
14    '\u{0402}', '\u{0403}', '\u{201A}', '\u{0453}', '\u{201E}', '\u{2026}', '\u{2020}', '\u{2021}',
15    '\u{20AC}', '\u{2030}', '\u{0409}', '\u{2039}', '\u{040A}', '\u{040C}', '\u{040B}', '\u{040F}',
16    '\u{0452}', '\u{2018}', '\u{2019}', '\u{201C}', '\u{201D}', '\u{2022}', '\u{2013}', '\u{2014}',
17    '\u{FFFD}', '\u{2122}', '\u{0459}', '\u{203A}', '\u{045A}', '\u{045C}', '\u{045B}', '\u{045F}',
18    '\u{00A0}', '\u{040E}', '\u{045E}', '\u{0408}', '\u{00A4}', '\u{0490}', '\u{00A6}', '\u{00A7}',
19    '\u{0401}', '\u{00A9}', '\u{0404}', '\u{00AB}', '\u{00AC}', '\u{00AD}', '\u{00AE}', '\u{0407}',
20    '\u{00B0}', '\u{00B1}', '\u{0406}', '\u{0456}', '\u{0491}', '\u{00B5}', '\u{00B6}', '\u{00B7}',
21    '\u{0451}', '\u{2116}', '\u{0454}', '\u{00BB}', '\u{0458}', '\u{0405}', '\u{0455}', '\u{0457}',
22    '\u{0410}', '\u{0411}', '\u{0412}', '\u{0413}', '\u{0414}', '\u{0415}', '\u{0416}', '\u{0417}',
23    '\u{0418}', '\u{0419}', '\u{041A}', '\u{041B}', '\u{041C}', '\u{041D}', '\u{041E}', '\u{041F}',
24    '\u{0420}', '\u{0421}', '\u{0422}', '\u{0423}', '\u{0424}', '\u{0425}', '\u{0426}', '\u{0427}',
25    '\u{0428}', '\u{0429}', '\u{042A}', '\u{042B}', '\u{042C}', '\u{042D}', '\u{042E}', '\u{042F}',
26    '\u{0430}', '\u{0431}', '\u{0432}', '\u{0433}', '\u{0434}', '\u{0435}', '\u{0436}', '\u{0437}',
27    '\u{0438}', '\u{0439}', '\u{043A}', '\u{043B}', '\u{043C}', '\u{043D}', '\u{043E}', '\u{043F}',
28    '\u{0440}', '\u{0441}', '\u{0442}', '\u{0443}', '\u{0444}', '\u{0445}', '\u{0446}', '\u{0447}',
29    '\u{0448}', '\u{0449}', '\u{044A}', '\u{044B}', '\u{044C}', '\u{044D}', '\u{044E}', '\u{044F}',
30];
31
32/// Scalar Windows-1251 decoder.
33#[allow(dead_code)]
34pub(crate) fn decode_windows1251_scalar(input: &[u8]) -> Cow<'_, str> {
35    let s: String = input.iter().map(|&b| {
36        if b < 0x80 {
37            b as char
38        } else {
39            WINDOWS1251_TABLE[(b - 0x80) as usize]
40        }
41    }).collect();
42    Cow::Owned(s)
43}
44
45/// AVX2 accelerated Windows-1251 decoding. (Placeholder)
46#[allow(dead_code)]
47pub(crate) fn decode_windows1251_avx2(input: &[u8]) -> Cow<'_, str> {
48    decode_windows1251_scalar(input)
49}
50
51/// NEON accelerated Windows-1251 decoding. (Placeholder)
52#[allow(dead_code)]
53pub(crate) fn decode_windows1251_neon(input: &[u8]) -> Cow<'_, str> {
54    decode_windows1251_scalar(input)
55}
56
57/// Public caller for Windows-1251 decoding.
58#[allow(dead_code)]
59pub fn decode_windows1251(input: &[u8]) -> Cow<'_, str> {
60    #[cfg(target_arch = "x86_64")]
61    {
62        if std::is_x86_feature_detected!("avx2") {
63            return decode_windows1251_avx2(input);
64        }
65    }
66    #[cfg(target_arch = "aarch64")]
67    {
68        if is_aarch64_feature_detected!("neon") {
69            return decode_windows1251_neon(input);
70        }
71    }
72    decode_windows1251_scalar(input)
73}
74
75#[cfg(test)]
76mod tests {
77    use super::*;
78
79    const VALID_WIN1251: &[u8] = b"Hello, world!";
80    const EXTENDED_WIN1251: &[u8] = &[0xC0, 0xC1, 0xC2, 0xC3]; // 'АБВГ' in Windows-1251
81
82    #[test]
83    fn test_decode_windows1251_scalar_valid() {
84        let decoded = decode_windows1251_scalar(VALID_WIN1251);
85        assert_eq!(decoded.as_ref(), "Hello, world!");
86    }
87
88    #[test]
89    fn test_decode_windows1251_scalar_extended() {
90        let decoded = decode_windows1251_scalar(EXTENDED_WIN1251);
91        assert_eq!(decoded.as_ref(), "\u{0410}\u{0411}\u{0412}\u{0413}"); // Unicode mapping
92    }
93
94    #[test]
95    fn test_decode_windows1251_avx2_or_neon_valid() {
96        let decoded = {
97            #[cfg(target_arch = "x86_64")]
98            {
99                if std::is_x86_feature_detected!("avx2") {
100                    decode_windows1251_avx2(VALID_WIN1251)
101                } else {
102                    decode_windows1251_scalar(VALID_WIN1251)
103                }
104            }
105            #[cfg(target_arch = "aarch64")]
106            {
107                if is_aarch64_feature_detected!("neon") {
108                    decode_windows1251_neon(VALID_WIN1251)
109                } else {
110                    decode_windows1251_scalar(VALID_WIN1251)
111                }
112            }
113        };
114        assert_eq!(decoded.as_ref(), "Hello, world!");
115    }
116
117    #[test]
118    fn test_decode_windows1251_public() {
119        let decoded = decode_windows1251(VALID_WIN1251);
120        assert_eq!(decoded.as_ref(), "Hello, world!");
121    }
122}