Skip to main content

nwnrs_encoding/
encoding.rs

1use std::{cell::Cell, env};
2
3use encoding_rs::{Encoding, WINDOWS_1252};
4use tracing::instrument;
5
6use crate::{EncodingConversionError, NativeEncodingError, UnknownEncodingError};
7
8thread_local! {
9    static NWNRS_ENCODING: Cell<&'static Encoding> = Cell::new(WINDOWS_1252);
10    static NATIVE_ENCODING: Cell<Option<&'static Encoding>> = const { Cell::new(None) };
11}
12
13/// Returns the encoding currently used for NWN text data.
14pub fn get_nwnrs_encoding() -> &'static Encoding {
15    NWNRS_ENCODING.with(Cell::get)
16}
17
18/// Returns the canonical label for the current NWN text encoding.
19#[must_use]
20pub fn get_nwnrs_encoding_name() -> &'static str {
21    get_nwnrs_encoding().name()
22}
23
24/// Sets the encoding used for NWN text data.
25///
26/// # Errors
27///
28/// Returns [`UnknownEncodingError`] if `label` does not map to a known
29/// encoding.
30#[instrument(level = "debug", skip_all, err, fields(label = %label))]
31pub fn set_nwnrs_encoding(label: &str) -> Result<(), UnknownEncodingError> {
32    let encoding =
33        Encoding::for_label(label.as_bytes()).ok_or_else(|| UnknownEncodingError::new(label))?;
34    NWNRS_ENCODING.with(|slot| slot.set(encoding));
35    Ok(())
36}
37
38/// Returns the configured or detected native system encoding.
39///
40/// # Errors
41///
42/// Returns [`NativeEncodingError`] if the system encoding cannot be detected.
43#[instrument(level = "debug", err)]
44pub fn get_native_encoding() -> Result<&'static Encoding, NativeEncodingError> {
45    if let Some(encoding) = NATIVE_ENCODING.with(Cell::get) {
46        return Ok(encoding);
47    }
48
49    let encoding = detect_system_native_encoding()?;
50    NATIVE_ENCODING.with(|slot| slot.set(Some(encoding)));
51    Ok(encoding)
52}
53
54/// Returns the canonical label for the native system encoding.
55///
56/// # Errors
57///
58/// Returns [`NativeEncodingError`] if the system encoding cannot be detected.
59#[instrument(level = "debug", err)]
60pub fn get_native_encoding_name() -> Result<&'static str, NativeEncodingError> {
61    Ok(get_native_encoding()?.name())
62}
63
64/// Overrides the detected native system encoding.
65///
66/// # Errors
67///
68/// Returns [`UnknownEncodingError`] if `label` does not map to a known
69/// encoding.
70#[instrument(level = "debug", skip_all, err, fields(label = %label))]
71pub fn set_native_encoding(label: &str) -> Result<(), UnknownEncodingError> {
72    let encoding =
73        Encoding::for_label(label.as_bytes()).ok_or_else(|| UnknownEncodingError::new(label))?;
74    NATIVE_ENCODING.with(|slot| slot.set(Some(encoding)));
75    Ok(())
76}
77
78/// Clears any cached native encoding so it will be detected again on demand.
79pub fn clear_native_encoding() {
80    NATIVE_ENCODING.with(|slot| slot.set(None));
81}
82
83/// Detects the process-native text encoding for the current platform.
84///
85/// # Errors
86///
87/// Returns [`NativeEncodingError`] if the system encoding cannot be determined.
88#[instrument(level = "debug", err)]
89pub fn detect_system_native_encoding() -> Result<&'static Encoding, NativeEncodingError> {
90    #[cfg(windows)]
91    {
92        detect_windows_native_encoding()
93    }
94
95    #[cfg(not(windows))]
96    {
97        detect_unix_native_encoding()
98    }
99}
100
101/// Encodes a string using the current NWN encoding.
102///
103/// # Errors
104///
105/// Returns [`EncodingConversionError`] if the string cannot be represented in
106/// the current NWN encoding.
107#[instrument(level = "debug", skip_all, err, fields(input_len = value.len()))]
108pub fn to_nwnrs_encoding(value: &str) -> Result<Vec<u8>, EncodingConversionError> {
109    encode_with(get_nwnrs_encoding(), value, "encode text for NWN")
110}
111
112/// Decodes bytes using the current NWN encoding.
113///
114/// # Errors
115///
116/// Returns [`EncodingConversionError`] if the bytes cannot be decoded with the
117/// current NWN encoding.
118#[instrument(level = "debug", skip_all, err, fields(input_len = bytes.len()))]
119pub fn from_nwnrs_encoding(bytes: &[u8]) -> Result<String, EncodingConversionError> {
120    decode_with(get_nwnrs_encoding(), bytes, "decode text from NWN")
121}
122
123/// Encodes a string using the current native system encoding.
124///
125/// # Errors
126///
127/// Returns [`EncodingConversionError`] if the system encoding cannot be
128/// detected or the string cannot be represented in it.
129#[instrument(level = "debug", skip_all, err, fields(input_len = value.len()))]
130pub fn to_native_encoding(value: &str) -> Result<Vec<u8>, EncodingConversionError> {
131    let encoding = get_native_encoding().map_err(|error| {
132        EncodingConversionError::new(error.to_string(), "encode text for native output")
133    })?;
134    encode_with(encoding, value, "encode text for native output")
135}
136
137/// Decodes bytes using the current native system encoding.
138///
139/// # Errors
140///
141/// Returns [`EncodingConversionError`] if the system encoding cannot be
142/// detected or the bytes cannot be decoded with it.
143#[instrument(level = "debug", skip_all, err, fields(input_len = bytes.len()))]
144pub fn from_native_encoding(bytes: &[u8]) -> Result<String, EncodingConversionError> {
145    let encoding = get_native_encoding().map_err(|error| {
146        EncodingConversionError::new(error.to_string(), "decode text from native input")
147    })?;
148    decode_with(encoding, bytes, "decode text from native input")
149}
150
151pub(crate) fn encode_with(
152    encoding: &'static Encoding,
153    value: &str,
154    operation: &'static str,
155) -> Result<Vec<u8>, EncodingConversionError> {
156    let (encoded, _, had_errors) = encoding.encode(value);
157    if had_errors {
158        Err(EncodingConversionError::new(encoding.name(), operation))
159    } else {
160        Ok(encoded.into_owned())
161    }
162}
163
164pub(crate) fn decode_with(
165    encoding: &'static Encoding,
166    bytes: &[u8],
167    operation: &'static str,
168) -> Result<String, EncodingConversionError> {
169    let (decoded, _, had_errors) = encoding.decode(bytes);
170    if had_errors {
171        Err(EncodingConversionError::new(encoding.name(), operation))
172    } else {
173        Ok(decoded.into_owned())
174    }
175}
176
177#[cfg(not(windows))]
178fn detect_unix_native_encoding() -> Result<&'static Encoding, NativeEncodingError> {
179    for key in ["LC_ALL", "LC_CTYPE", "LANG"] {
180        if let Ok(value) = env::var(key)
181            && let Some(encoding) = parse_locale_encoding(&value)
182        {
183            return Ok(encoding);
184        }
185    }
186
187    Err(NativeEncodingError::new(
188        "unable to determine native encoding from LC_ALL, LC_CTYPE, or LANG",
189    ))
190}
191
192#[cfg(windows)]
193fn detect_windows_native_encoding() -> Result<&'static Encoding, NativeEncodingError> {
194    if let Ok(chcp_output) = std::process::Command::new("chcp").output() {
195        if let Ok(output_str) = String::from_utf8(chcp_output.stdout) {
196            if let Some(code_page_str) = output_str
197                .lines()
198                .find(|line| line.contains("Active code page:"))
199                .and_then(|line| line.split(':').nth(1))
200                .map(|s| s.trim())
201            {
202                if let Ok(code_page) = code_page_str.parse::<u16>() {
203                    if let Some(encoding) = codepage::to_encoding(code_page) {
204                        return Ok(encoding);
205                    }
206                }
207            }
208        }
209    }
210
211    if let Some(encoding) = codepage::to_encoding(1252) {
212        Ok(encoding)
213    } else {
214        Err(NativeEncodingError::new(
215            "unable to determine Windows native encoding",
216        ))
217    }
218}
219
220pub(crate) fn parse_locale_encoding(locale: &str) -> Option<&'static Encoding> {
221    let trimmed = locale.trim();
222    if trimmed.is_empty() {
223        return None;
224    }
225
226    let without_modifier = trimmed.split('@').next().unwrap_or(trimmed);
227    let candidate = without_modifier
228        .split_once('.')
229        .map_or(without_modifier, |(_, encoding)| encoding);
230
231    Encoding::for_label(candidate.trim().as_bytes())
232}