pasta_lua/encoding/mod.rs
1//! Encoding conversion module for Windows ANSI code page support.
2//!
3//! This module provides utilities for converting between UTF-8 strings and
4//! Windows ANSI code page encodings. This is necessary because Lua on Windows
5//! uses ANSI APIs for file system access, requiring path strings to be
6//! converted from UTF-8 to the system code page (e.g., Shift-JIS/CP932 for Japanese).
7//!
8//! On non-Windows systems, this module provides passthrough implementations
9//! that simply return the original UTF-8 strings.
10
11#[cfg(windows)]
12mod windows;
13
14#[cfg(windows)]
15#[allow(unused_imports)]
16pub use self::windows::*;
17
18#[cfg(not(windows))]
19mod unix;
20
21#[cfg(not(windows))]
22#[allow(unused_imports)]
23pub use self::unix::*;
24
25use std::io::Result;
26
27/// Converter between Rust strings and system multibyte encoding.
28pub trait Encoder {
29 /// Convert from bytes (system encoding) to UTF-8 string.
30 fn to_string(&self, data: &[u8]) -> Result<String>;
31
32 /// Convert from UTF-8 string to bytes (system encoding).
33 fn to_bytes(&self, data: &str) -> Result<Vec<u8>>;
34}
35
36/// Text conversion encoding type.
37#[derive(Debug, Clone, Copy, PartialEq, Eq)]
38pub enum Encoding {
39 /// ANSI code page (CP_ACP on Windows, UTF-8 on other systems).
40 /// Use for file system operations, GUI text, registry, etc.
41 ANSI,
42 /// OEM code page (CP_OEMCP on Windows, UTF-8 on other systems).
43 /// Use for console output only.
44 OEM,
45}
46
47/// Convert a UTF-8 string to ANSI bytes (system encoding).
48///
49/// On Windows, converts to ANSI code page (e.g., Shift-JIS/CP932 for Japanese locale).
50/// On other systems, returns the original UTF-8 bytes.
51///
52/// # Arguments
53/// * `s` - UTF-8 string to convert
54///
55/// # Returns
56/// * `Ok(Vec<u8>)` - Converted byte vector
57/// * `Err(std::io::Error)` - If encoding conversion fails (Windows only)
58///
59/// # Example
60/// ```rust,ignore
61/// use pasta_lua::encoding::to_ansi_bytes;
62///
63/// let bytes = to_ansi_bytes("hello").unwrap();
64/// assert_eq!(bytes, b"hello");
65/// ```
66#[cfg(windows)]
67pub fn to_ansi_bytes(s: &str) -> Result<Vec<u8>> {
68 Encoding::ANSI.to_bytes(s)
69}
70
71/// Convert a UTF-8 string to ANSI bytes (system encoding).
72///
73/// On Unix systems, returns the original UTF-8 bytes unchanged.
74///
75/// # Arguments
76/// * `s` - UTF-8 string to convert
77///
78/// # Returns
79/// * `Ok(Vec<u8>)` - UTF-8 bytes
80#[cfg(not(windows))]
81pub fn to_ansi_bytes(s: &str) -> Result<Vec<u8>> {
82 Ok(s.as_bytes().to_vec())
83}
84
85/// Convert a path string from Lua (system encoding) to UTF-8.
86///
87/// On Windows, converts from ANSI code page to UTF-8.
88/// On other systems, returns the original string.
89pub fn path_from_lua(path: &str) -> Result<String> {
90 #[cfg(windows)]
91 {
92 let bytes = path.as_bytes();
93 Encoding::ANSI.to_string(bytes)
94 }
95
96 #[cfg(not(windows))]
97 {
98 Ok(path.to_string())
99 }
100}
101
102#[cfg(test)]
103mod tests {
104 use super::*;
105
106 #[test]
107 fn test_to_ansi_bytes_ascii() {
108 // ASCII should pass through unchanged on all platforms
109 let result = to_ansi_bytes("test/path/file.lua").unwrap();
110 assert_eq!(result, b"test/path/file.lua");
111 }
112
113 #[test]
114 fn test_to_ansi_bytes_empty() {
115 // Empty string should return empty bytes
116 let result = to_ansi_bytes("").unwrap();
117 assert!(result.is_empty());
118 }
119
120 #[test]
121 fn test_encoding_enum() {
122 assert_ne!(Encoding::ANSI, Encoding::OEM);
123 }
124
125 #[cfg(windows)]
126 #[test]
127 fn test_to_ansi_bytes_japanese() {
128 // Japanese characters should be converted to ANSI (Shift-JIS/CP932)
129 let result = to_ansi_bytes("日本語パス").unwrap();
130 // On Japanese Windows, this should be convertible
131 assert!(!result.is_empty());
132 // The result should not be the same as UTF-8 bytes
133 let utf8_bytes = "日本語パス".as_bytes();
134 // ANSI encoding will be different from UTF-8 for Japanese
135 assert_ne!(result, utf8_bytes);
136 }
137
138 #[cfg(windows)]
139 #[test]
140 fn test_to_ansi_bytes_roundtrip() {
141 // Roundtrip test: UTF-8 -> ANSI -> UTF-8
142 let original = "日本語テスト";
143 let ansi_bytes = to_ansi_bytes(original).unwrap();
144 let restored = Encoding::ANSI.to_string(&ansi_bytes).unwrap();
145 assert_eq!(restored, original);
146 }
147}