csv_lib/decoders/
shiftjis.rs

1use std::borrow::Cow;
2
3#[cfg(target_arch = "x86_64")]
4#[allow(unused)]
5use std::arch::x86_64::*;
6#[cfg(target_arch = "aarch64")]
7#[allow(unused)]
8use std::arch::aarch64::*;
9#[cfg(target_arch = "aarch64")]
10use std::arch::is_aarch64_feature_detected;
11
12/// Scalar Shift_JIS decoder (basic version).
13/// Handles ASCII and Katakana, marks others as invalid for now.
14#[allow(dead_code)]
15pub(crate) fn decode_shiftjis_scalar(input: &[u8]) -> Cow<'_, str> {
16    let mut out = String::with_capacity(input.len());
17    let mut i = 0;
18    while i < input.len() {
19        let b = input[i];
20        if b <= 0x7F {
21            out.push(b as char);
22            i += 1;
23        } else if b >= 0xA1 && b <= 0xDF {
24            // Half-width Katakana range
25            let unicode = 0xFF61 + (b as u32 - 0xA1);
26            if let Some(ch) = std::char::from_u32(unicode) {
27                out.push(ch);
28            } else {
29                out.push('\u{FFFD}'); // Replacement character
30            }
31            i += 1;
32        } else if (b >= 0x81 && b <= 0x9F) || (b >= 0xE0 && b <= 0xEF) {
33            // Start of a double-byte sequence (kanji, kana, etc.)
34            if i + 1 < input.len() {
35                // For now: placeholder for complex multi-byte
36                out.push('�');
37                i += 2;
38            } else {
39                out.push('\u{FFFD}');
40                i += 1;
41            }
42        } else {
43            out.push('\u{FFFD}');
44            i += 1;
45        }
46    }
47    Cow::Owned(out)
48}
49
50#[allow(dead_code)]
51pub(crate) fn decode_shiftjis_avx2(input: &[u8]) -> Cow<'_, str> {
52    decode_shiftjis_scalar(input)
53}
54
55#[allow(dead_code)]
56pub(crate) fn decode_shiftjis_neon(input: &[u8]) -> Cow<'_, str> {
57    decode_shiftjis_scalar(input)
58}
59
60#[allow(dead_code)]
61pub fn decode_shiftjis(input: &[u8]) -> Cow<'_, str> {
62    #[cfg(target_arch = "x86_64")]
63    {
64        if std::is_x86_feature_detected!("avx2") {
65            return decode_shiftjis_avx2(input);
66        }
67    }
68    #[cfg(target_arch = "aarch64")]
69    {
70        if is_aarch64_feature_detected!("neon") {
71            return decode_shiftjis_neon(input);
72        }
73    }
74    decode_shiftjis_scalar(input)
75}
76
77#[cfg(test)]
78mod tests {
79    use super::*;
80
81    const VALID_ASCII: &[u8] = b"Hello, world!";
82    const VALID_KATAKANA: &[u8] = &[0xA6, 0xA7, 0xA8]; // Half-width Katakana Wo, Small A, Small I
83
84    #[test]
85    fn test_decode_shiftjis_scalar_ascii() {
86        let decoded = decode_shiftjis_scalar(VALID_ASCII);
87        assert_eq!(decoded.as_ref(), "Hello, world!");
88    }
89
90    #[test]
91    fn test_decode_shiftjis_scalar_katakana() {
92        let decoded = decode_shiftjis_scalar(VALID_KATAKANA);
93        assert_eq!(decoded.as_ref(), "\u{FF66}\u{FF67}\u{FF68}"); // Katakana Half-width Wo, A, I
94    }
95
96    #[test]
97    fn test_decode_shiftjis_avx2_or_neon_ascii() {
98        let decoded = {
99            #[cfg(target_arch = "x86_64")]
100            {
101                if std::is_x86_feature_detected!("avx2") {
102                    decode_shiftjis_avx2(VALID_ASCII)
103                } else {
104                    decode_shiftjis_scalar(VALID_ASCII)
105                }
106            }
107            #[cfg(target_arch = "aarch64")]
108            {
109                if is_aarch64_feature_detected!("neon") {
110                    decode_shiftjis_neon(VALID_ASCII)
111                } else {
112                    decode_shiftjis_scalar(VALID_ASCII)
113                }
114            }
115        };
116        assert_eq!(decoded.as_ref(), "Hello, world!");
117    }
118
119    #[test]
120    fn test_decode_shiftjis_public() {
121        let decoded = decode_shiftjis(VALID_ASCII);
122        assert_eq!(decoded.as_ref(), "Hello, world!");
123    }
124}