local_encoding/
lib.rs

1//! Rust library for encoding/decoding string with local charset. It usefull for work with ANSI
2//! strings on Windows.
3//!
4//! Unfortunately Windows widly use 8-bit character encoding instead UTF-8.
5//! This causes a lot of pain.
6//!
7//! For example, in Russian version:
8//!
9//!  * CP-1251 (ANSI codepage) used for 8-bit files;
10//!  * CP-866 (OEM codepage) used for console output.
11//!
12//! To convert between 8-bit and Unicode used Windows have function: MultiByteToWideChar and
13//! WideCharToMultiByte.
14//!
15//! This library provide simple function to convert between 8-bit and Unicode characters on Windows.
16//!
17//! UTF-8 used as 8-bit codepage for non-Windows system.
18
19#![warn(missing_docs)]
20#[cfg(windows)]
21pub mod windows;
22pub mod posix;
23use std::io::Result;
24
25/// Converter between string and multibyte encoding.
26pub trait Encoder {
27    /// Convert from bytes to string.
28    fn to_string(self: &Self, data: &[u8]) -> Result<String>;
29
30    /// Convert from string to bytes.
31    fn to_bytes(self: &Self, data: &str) -> Result<Vec<u8>>;
32}
33
34/// Text convertation encoding.
35pub enum Encoding {
36    /// Use CP_ACP codepage on Windows and UTF-8 on other systems.
37    ANSI,
38    /// Use CP_OEM codepage on Windows and UTF-8 on other systems.
39    OEM,
40}
41
42#[cfg(windows)]
43trait CodePage {
44    fn codepage(self: &Self) -> u32;
45}
46
47#[cfg(windows)]
48impl CodePage for Encoding {
49    fn codepage(self: &Self) -> u32 {
50        extern crate winapi;
51
52        match self {
53            &Encoding::ANSI => winapi::CP_ACP,
54            &Encoding::OEM => winapi::CP_OEMCP,
55        }
56    }
57}
58
59#[cfg(windows)]
60impl Encoder for Encoding {
61    /// Convert from bytes to string.
62    fn to_string(self: &Self, data: &[u8]) -> Result<String> {
63        windows::EncoderCodePage(self.codepage()).to_string(data)
64
65    }
66    /// Convert from bytes to string.
67    fn to_bytes(self: &Self, data: &str) -> Result<Vec<u8>> {
68        windows::EncoderCodePage(self.codepage()).to_bytes(data)
69    }
70}
71
72#[cfg(not(windows))]
73impl Encoder for Encoding {
74    /// Convert from bytes to string.
75    fn to_string(self: &Self, data: &[u8]) -> Result<String> {
76        posix::EncoderUtf8.to_string(data)
77
78    }
79    /// Convert from bytes to string.
80    fn to_bytes(self: &Self, data: &str) -> Result<Vec<u8>> {
81        posix::EncoderUtf8.to_bytes(data)
82    }
83}
84
85#[cfg(test)]
86mod tests {
87    use super::*;
88
89    #[test]
90    fn oem_to_string_test() {
91        to_string_test(Encoding::OEM);
92    }
93
94    #[test]
95    fn ansi_to_string_test() {
96        to_string_test(Encoding::ANSI);
97    }
98
99    #[test]
100    fn string_to_oem_test() {
101        from_string_test(Encoding::OEM);
102    }
103
104    #[test]
105    fn string_to_ansi_test() {
106        from_string_test(Encoding::ANSI);
107    }
108
109    fn to_string_test(encoding: Encoding) {
110        assert_eq!(encoding.to_string(b"Test").unwrap(), "Test");
111    }
112
113    fn from_string_test(encoding: Encoding) {
114        assert_eq!(encoding.to_bytes("Test").unwrap(), b"Test");
115    }
116}