local_encoding_ng/
lib.rs

1//! Rust library for encoding/decoding string with local charset. It usefull for work with ANSI
2//! strings on Windows.
3//!
4//! Unfortunately Windows widly use 8-bit character encoding instead UTF-8.
5//! This causes a lot of pain.
6//!
7//! For example, in Russian version:
8//!
9//!  * CP-1251 (ANSI codepage) used for 8-bit files;
10//!  * CP-866 (OEM codepage) used for console output.
11//!
12//! To convert between 8-bit and Unicode used Windows have function: MultiByteToWideChar and
13//! WideCharToMultiByte.
14//!
15//! This library provide simple function to convert between 8-bit and Unicode characters on Windows.
16//!
17//! UTF-8 used as 8-bit codepage for non-Windows system.
18
19#![warn(missing_docs)]
20#[cfg(windows)]
21pub mod windows;
22pub mod posix;
23use std::io::Result;
24
25/// Converter between string and multibyte encoding.
26pub trait Encoder {
27    /// Convert from bytes to string.
28    fn to_string(&self, data: &[u8]) -> Result<String>;
29
30    /// Convert from string to bytes.
31    fn to_bytes(&self, data: &str) -> Result<Vec<u8>>;
32}
33
34/// Text convertation encoding.
35pub enum Encoding {
36    /// Use CP_ACP codepage on Windows and UTF-8 on other systems.
37    ANSI,
38    /// Use CP_OEM codepage on Windows and UTF-8 on other systems.
39    OEM,
40}
41
42#[cfg(windows)]
43trait CodePage {
44    fn codepage(&self) -> u32;
45}
46
47#[cfg(windows)]
48impl CodePage for Encoding {
49    fn codepage(&self) -> u32 {
50
51        match self {
52            Encoding::ANSI => winapi::um::winnls::CP_ACP,
53            Encoding::OEM => winapi::um::winnls::CP_OEMCP,
54        }
55    }
56}
57
58#[cfg(windows)]
59impl Encoder for Encoding {
60    /// Convert from bytes to string.
61    fn to_string(&self, data: &[u8]) -> Result<String> {
62        windows::EncoderCodePage(self.codepage()).to_string(data)
63
64    }
65    /// Convert from bytes to string.
66    fn to_bytes(&self, data: &str) -> Result<Vec<u8>> {
67        windows::EncoderCodePage(self.codepage()).to_bytes(data)
68    }
69}
70
71#[cfg(not(windows))]
72impl Encoder for Encoding {
73    /// Convert from bytes to string.
74    fn to_string(self: &Self, data: &[u8]) -> Result<String> {
75        posix::EncoderUtf8.to_string(data)
76
77    }
78    /// Convert from bytes to string.
79    fn to_bytes(self: &Self, data: &str) -> Result<Vec<u8>> {
80        posix::EncoderUtf8.to_bytes(data)
81    }
82}
83
84#[cfg(test)]
85mod tests {
86    use super::*;
87
88    #[test]
89    fn oem_to_string_test() {
90        to_string_test(Encoding::OEM);
91    }
92
93    #[test]
94    fn ansi_to_string_test() {
95        to_string_test(Encoding::ANSI);
96    }
97
98    #[test]
99    fn string_to_oem_test() {
100        from_string_test(Encoding::OEM);
101    }
102
103    #[test]
104    fn string_to_ansi_test() {
105        from_string_test(Encoding::ANSI);
106    }
107
108    fn to_string_test(encoding: Encoding) {
109        assert_eq!(encoding.to_string(b"Test").unwrap(), "Test");
110    }
111
112    fn from_string_test(encoding: Encoding) {
113        assert_eq!(encoding.to_bytes("Test").unwrap(), b"Test");
114    }
115}