Skip to main content

pasta_lua/encoding/
mod.rs

1//! Encoding conversion module for Windows ANSI code page support.
2//!
3//! This module provides utilities for converting between UTF-8 strings and
4//! Windows ANSI code page encodings. This is necessary because Lua on Windows
5//! uses ANSI APIs for file system access, requiring path strings to be
6//! converted from UTF-8 to the system code page (e.g., Shift-JIS/CP932 for Japanese).
7//!
8//! On non-Windows systems, this module provides passthrough implementations
9//! that simply return the original UTF-8 strings.
10
11#[cfg(windows)]
12mod windows;
13
14#[cfg(windows)]
15#[allow(unused_imports)]
16pub use self::windows::*;
17
18#[cfg(not(windows))]
19mod unix;
20
21#[cfg(not(windows))]
22#[allow(unused_imports)]
23pub use self::unix::*;
24
25use std::io::Result;
26
27/// Converter between Rust strings and system multibyte encoding.
28pub trait Encoder {
29    /// Convert from bytes (system encoding) to UTF-8 string.
30    fn to_string(&self, data: &[u8]) -> Result<String>;
31
32    /// Convert from UTF-8 string to bytes (system encoding).
33    fn to_bytes(&self, data: &str) -> Result<Vec<u8>>;
34}
35
36/// Text conversion encoding type.
37#[derive(Debug, Clone, Copy, PartialEq, Eq)]
38pub enum Encoding {
39    /// ANSI code page (CP_ACP on Windows, UTF-8 on other systems).
40    /// Use for file system operations, GUI text, registry, etc.
41    ANSI,
42    /// OEM code page (CP_OEMCP on Windows, UTF-8 on other systems).
43    /// Use for console output only.
44    OEM,
45}
46
47/// Convert a UTF-8 string to ANSI bytes (system encoding).
48///
49/// On Windows, converts to ANSI code page (e.g., Shift-JIS/CP932 for Japanese locale).
50/// On other systems, returns the original UTF-8 bytes.
51///
52/// # Arguments
53/// * `s` - UTF-8 string to convert
54///
55/// # Returns
56/// * `Ok(Vec<u8>)` - Converted byte vector
57/// * `Err(std::io::Error)` - If encoding conversion fails (Windows only)
58///
59/// # Example
60/// ```rust,ignore
61/// use pasta_lua::encoding::to_ansi_bytes;
62///
63/// let bytes = to_ansi_bytes("hello").unwrap();
64/// assert_eq!(bytes, b"hello");
65/// ```
66#[cfg(windows)]
67pub fn to_ansi_bytes(s: &str) -> Result<Vec<u8>> {
68    Encoding::ANSI.to_bytes(s)
69}
70
71/// Convert a UTF-8 string to ANSI bytes (system encoding).
72///
73/// On Unix systems, returns the original UTF-8 bytes unchanged.
74///
75/// # Arguments
76/// * `s` - UTF-8 string to convert
77///
78/// # Returns
79/// * `Ok(Vec<u8>)` - UTF-8 bytes
80#[cfg(not(windows))]
81pub fn to_ansi_bytes(s: &str) -> Result<Vec<u8>> {
82    Ok(s.as_bytes().to_vec())
83}
84
85/// Convert a path string from Lua (system encoding) to UTF-8.
86///
87/// On Windows, converts from ANSI code page to UTF-8.
88/// On other systems, returns the original string.
89pub fn path_from_lua(path: &str) -> Result<String> {
90    #[cfg(windows)]
91    {
92        let bytes = path.as_bytes();
93        Encoding::ANSI.to_string(bytes)
94    }
95
96    #[cfg(not(windows))]
97    {
98        Ok(path.to_string())
99    }
100}
101
102#[cfg(test)]
103mod tests {
104    use super::*;
105
106    #[test]
107    fn test_to_ansi_bytes_ascii() {
108        // ASCII should pass through unchanged on all platforms
109        let result = to_ansi_bytes("test/path/file.lua").unwrap();
110        assert_eq!(result, b"test/path/file.lua");
111    }
112
113    #[test]
114    fn test_to_ansi_bytes_empty() {
115        // Empty string should return empty bytes
116        let result = to_ansi_bytes("").unwrap();
117        assert!(result.is_empty());
118    }
119
120    #[test]
121    fn test_encoding_enum() {
122        assert_ne!(Encoding::ANSI, Encoding::OEM);
123    }
124
125    #[cfg(windows)]
126    #[test]
127    fn test_to_ansi_bytes_japanese() {
128        // Japanese characters should be converted to ANSI (Shift-JIS/CP932)
129        let result = to_ansi_bytes("日本語パス").unwrap();
130        // On Japanese Windows, this should be convertible
131        assert!(!result.is_empty());
132        // The result should not be the same as UTF-8 bytes
133        let utf8_bytes = "日本語パス".as_bytes();
134        // ANSI encoding will be different from UTF-8 for Japanese
135        assert_ne!(result, utf8_bytes);
136    }
137
138    #[cfg(windows)]
139    #[test]
140    fn test_to_ansi_bytes_roundtrip() {
141        // Roundtrip test: UTF-8 -> ANSI -> UTF-8
142        let original = "日本語テスト";
143        let ansi_bytes = to_ansi_bytes(original).unwrap();
144        let restored = Encoding::ANSI.to_string(&ansi_bytes).unwrap();
145        assert_eq!(restored, original);
146    }
147}