cross_path/
unicode.rs

1use crate::{PathError, PathResult};
2use encoding_rs::{UTF_8, UTF_16LE, WINDOWS_1252};
3
4/// Unicode encoding handler for path strings
5#[derive(Debug, Clone, Copy)]
6pub struct UnicodeHandler;
7
8impl UnicodeHandler {
9    /// Detect string encoding
10    #[must_use]
11    pub fn detect_encoding(bytes: &[u8]) -> &'static encoding_rs::Encoding {
12        // Simple UTF-8 detection
13        if String::from_utf8(bytes.to_vec()).is_ok() {
14            return UTF_8;
15        }
16
17        // Try to detect UTF-16 LE (BOM)
18        if bytes.len() >= 2 && bytes[0] == 0xFF && bytes[1] == 0xFE {
19            return UTF_16LE;
20        }
21
22        // Default to Windows-1252 (common Windows encoding)
23        WINDOWS_1252
24    }
25
26    /// Convert bytes to UTF-8 string
27    ///
28    /// # Errors
29    ///
30    /// Returns `PathError` if encoding conversion fails.
31    pub fn convert_to_utf8(bytes: &[u8]) -> PathResult<String> {
32        let encoding = Self::detect_encoding(bytes);
33        let (decoded, _, had_errors) = encoding.decode(bytes);
34
35        if had_errors {
36            return Err(PathError::encoding_error(
37                "Encoding conversion encountered errors",
38            ));
39        }
40
41        Ok(decoded.into_owned())
42    }
43
44    /// Convert UTF-8 string to target encoding bytes
45    ///
46    /// # Errors
47    ///
48    /// Returns `PathError` if encoding conversion fails.
49    pub fn convert_from_utf8(
50        text: &str,
51        target_encoding: &'static encoding_rs::Encoding,
52    ) -> PathResult<Vec<u8>> {
53        let (encoded, _, had_errors) = target_encoding.encode(text);
54
55        if had_errors {
56            return Err(PathError::encoding_error(
57                "Encoding conversion encountered errors",
58            ));
59        }
60
61        Ok(encoded.into_owned())
62    }
63
64    /// Normalize Windows path by removing invalid characters
65    #[must_use]
66    pub fn normalize_windows_path(path: &str) -> String {
67        let mut result = path.to_string();
68
69        // Replace Windows-disallowed characters
70        let invalid_chars = ['<', '>', ':', '"', '|', '?', '*'];
71        for c in invalid_chars {
72            result = result.replace(c, "_");
73        }
74
75        // Remove control characters
76        result = result.chars().filter(|c| !c.is_control()).collect();
77
78        result
79    }
80
81    /// Normalize Unix path by removing invalid characters
82    #[must_use]
83    pub fn normalize_unix_path(path: &str) -> String {
84        let mut result = path.to_string();
85
86        // Unix paths disallow null characters
87        result = result.replace('\0', "");
88
89        // Remove control characters
90        result = result.chars().filter(|c| !c.is_control()).collect();
91
92        result
93    }
94
95    /// Convert path encoding (mainly for Windows non-UTF-8 encodings)
96    ///
97    /// # Errors
98    ///
99    /// Returns `PathError` if encoding conversion fails.
100    pub fn convert_path_encoding(
101        path: &str,
102        from: &'static encoding_rs::Encoding,
103        to: &'static encoding_rs::Encoding,
104    ) -> PathResult<String> {
105        if from == to {
106            return Ok(path.to_string());
107        }
108
109        // Encode then decode
110        let bytes = from.encode(path).0;
111        let (decoded, _, had_errors) = to.decode(&bytes);
112
113        if had_errors {
114            return Err(PathError::encoding_error("Path encoding conversion failed"));
115        }
116
117        Ok(decoded.into_owned())
118    }
119}