xtask_todo_lib/devshell/
host_text.rs1use std::io;
10use std::path::Path;
11
12#[must_use]
14pub fn strip_utf8_bom(bytes: &[u8]) -> &[u8] {
15 if bytes.len() >= 3 && bytes[0] == 0xEF && bytes[1] == 0xBB && bytes[2] == 0xBF {
16 &bytes[3..]
17 } else {
18 bytes
19 }
20}
21
22pub fn decode_host_text_bytes(bytes: &[u8]) -> Result<String, io::Error> {
27 use io::ErrorKind::InvalidData;
28
29 if bytes.starts_with(&[0xFF, 0xFE]) {
30 let rest = &bytes[2..];
31 if !rest.len().is_multiple_of(2) {
32 return Err(io::Error::new(
33 InvalidData,
34 "invalid UTF-16LE: odd byte length after BOM",
35 ));
36 }
37 let u16s: Vec<u16> = rest
38 .chunks_exact(2)
39 .map(|c| u16::from_le_bytes([c[0], c[1]]))
40 .collect();
41 return String::from_utf16(&u16s).map_err(|e| io::Error::new(InvalidData, e));
42 }
43
44 if bytes.starts_with(&[0xFE, 0xFF]) {
45 let rest = &bytes[2..];
46 if !rest.len().is_multiple_of(2) {
47 return Err(io::Error::new(
48 InvalidData,
49 "invalid UTF-16BE: odd byte length after BOM",
50 ));
51 }
52 let u16s: Vec<u16> = rest
53 .chunks_exact(2)
54 .map(|c| u16::from_be_bytes([c[0], c[1]]))
55 .collect();
56 return String::from_utf16(&u16s).map_err(|e| io::Error::new(InvalidData, e));
57 }
58
59 let b = strip_utf8_bom(bytes);
60 String::from_utf8(b.to_vec()).map_err(|e| io::Error::new(InvalidData, e))
61}
62
63pub fn read_host_text(path: &Path) -> io::Result<String> {
68 let bytes = std::fs::read(path)?;
69 decode_host_text_bytes(&bytes)
70}
71
72#[must_use]
74pub fn script_text_from_vfs_bytes(bytes: &[u8]) -> Option<String> {
75 decode_host_text_bytes(bytes).ok()
76}
77
78#[cfg(test)]
79mod tests {
80 use super::*;
81
82 #[test]
83 fn utf8_plain() {
84 assert_eq!(decode_host_text_bytes(b"hello").unwrap(), "hello");
85 }
86
87 #[test]
88 fn utf8_bom_stripped() {
89 let mut v = vec![0xEF, 0xBB, 0xBF];
90 v.extend_from_slice(b"echo ok");
91 assert_eq!(decode_host_text_bytes(&v).unwrap(), "echo ok");
92 }
93
94 #[test]
95 fn utf16le_bom_hello() {
96 let bytes: Vec<u8> = vec![0xFF, 0xFE, 0x48, 0x00, 0x69, 0x00];
98 assert_eq!(decode_host_text_bytes(&bytes).unwrap(), "Hi");
99 }
100
101 #[test]
102 fn utf16be_bom_hi() {
103 let bytes: Vec<u8> = vec![0xFE, 0xFF, 0x00, 0x48, 0x00, 0x69];
104 assert_eq!(decode_host_text_bytes(&bytes).unwrap(), "Hi");
105 }
106
107 #[test]
108 fn strip_utf8_bom_only() {
109 assert_eq!(strip_utf8_bom(b"a"), b"a");
110 assert_eq!(strip_utf8_bom(&[0xEF, 0xBB, 0xBF]), b"");
111 }
112}