1use std::io;
2use std::io::{Error, ErrorKind, Read};
3use std::mem::size_of;
4
5pub trait StringRead : private::Sealed {
11
12 fn read_string_u16_le_len_utf8(&mut self) -> io::Result<String>;
17
18 fn read_string_u16_be_len_utf8(&mut self) -> io::Result<String>;
23
24 fn read_string_u32_le_len_utf8(&mut self) -> io::Result<String>;
29
30 fn read_string_u32_be_len_utf8(&mut self) -> io::Result<String>;
35
36 fn read_string_zero_terminated_utf8(&mut self) -> io::Result<String>;
40
41 fn read_string_utf8(&mut self, size: usize) -> io::Result<String>;
45
46 fn read_string_utf16_be(&mut self, size_in_characters: usize) -> io::Result<String>;
50
51 fn read_string_utf16_le(&mut self, size_in_characters: usize) -> io::Result<String>;
55
56 fn read_string_utf32_be(&mut self, size_in_characters: usize) -> io::Result<String>;
60
61 fn read_string_utf32_le(&mut self, size_in_characters: usize) -> io::Result<String>;
65
66 fn read_java_data_input_utf(&mut self) -> io::Result<String>;
75}
76
77impl <T> StringRead for T where T: Read {
78 fn read_string_u16_le_len_utf8(&mut self) -> io::Result<String> {
79 let mut len_bytes = [0u8; 2];
80 self.read_exact(len_bytes.as_mut_slice())?;
81 let len = u16::from_le_bytes(len_bytes);
82 return self.read_string_utf8(len as usize);
83 }
84
85 fn read_string_u16_be_len_utf8(&mut self) -> io::Result<String> {
86 let mut len_bytes = [0u8; 2];
87 self.read_exact(len_bytes.as_mut_slice())?;
88 let len = u16::from_be_bytes(len_bytes);
89 return self.read_string_utf8(len as usize);
90 }
91
92 fn read_string_u32_le_len_utf8(&mut self) -> io::Result<String> {
93 let mut len_bytes = [0u8; 4];
94 self.read_exact(len_bytes.as_mut_slice())?;
95 let len = u32::from_le_bytes(len_bytes);
96 return self.read_string_utf8(len as usize);
97 }
98
99 fn read_string_u32_be_len_utf8(&mut self) -> io::Result<String> {
100 let mut len_bytes = [0u8; 4];
101 self.read_exact(len_bytes.as_mut_slice())?;
102 let len = u32::from_be_bytes(len_bytes);
103 return self.read_string_utf8(len as usize);
104 }
105
106 fn read_string_zero_terminated_utf8(&mut self) -> io::Result<String> {
107 let mut data = Vec::with_capacity(64);
108 let mut buf = [0u8];
109 let sl = buf.as_mut_slice();
110 loop {
111 self.read_exact(sl)?;
112 if sl[0] == 0 {
113 break;
114 }
115
116 data.push(sl[0]);
117 }
118
119 return String::from_utf8(data).map_err(|_e| Error::new(ErrorKind::InvalidData, "invalid utf-8 data"));
120 }
121
122 fn read_string_utf8(&mut self, size: usize) -> io::Result<String> {
123 let mut data = vec![0u8; size];
124 self.read_exact(data.as_mut_slice())?;
125 return String::from_utf8(data).map_err(|_e| Error::new(ErrorKind::InvalidData, "invalid utf-8 data"));
126 }
127
128 #[cfg(target_endian = "little")]
129 fn read_string_utf16_be(&mut self, size_in_characters: usize) -> io::Result<String> {
130 if size_in_characters == 0 {
131 return Ok("".to_string());
132 }
133
134 let mut data = vec![0u8; size_in_characters<<1];
135 self.read_exact(data.as_mut_slice())?;
136
137 let sl :&mut [u16] = unsafe { std::slice::from_raw_parts_mut(data.as_mut_ptr().cast(), size_in_characters) };
138 for i in 0 .. sl.len() {
139 sl[i] = sl[i].to_be();
140 }
141
142 if sl[0] == 0xFFFE {
143 return Err(Error::new(ErrorKind::InvalidData, "Encountered byte order mark 0xFFFE. This indicates a wrong byte order.".to_string()));
144 }
145
146 return String::from_utf16(sl).map_err(|_e| Error::new(ErrorKind::InvalidData, "invalid utf-16 data"));
147 }
148
149 #[cfg(target_endian = "big")]
150 fn read_string_utf16_be(&mut self, size_in_characters: usize) -> io::Result<String> {
151 if size_in_characters == 0 {
152 return Ok("".to_string());
153 }
154
155 let mut data = vec![0u8; size_in_characters<<1];
156 self.read_exact(data.as_mut_slice())?;
157
158 let sl :&[u16] = unsafe { std::slice::from_raw_parts_mut(data.as_mut_ptr().cast(), size_in_characters) };
159
160 if sl[0] == 0xFFFE {
161 return Err(Error::new(ErrorKind::InvalidData, "Encountered byte order mark 0xFFFE. This indicates a wrong byte order.".to_string()));
162 }
163
164 return String::from_utf16(sl).map_err(|_e| Error::new(ErrorKind::InvalidData, "invalid utf-16 data"));
165 }
166
167 #[cfg(target_endian = "little")]
168 fn read_string_utf16_le(&mut self, size_in_characters: usize) -> io::Result<String> {
169 if size_in_characters == 0 {
170 return Ok("".to_string());
171 }
172
173 let mut data = vec![0u8; size_in_characters<<1];
174 self.read_exact(data.as_mut_slice())?;
175
176 let sl :&[u16] = unsafe { std::slice::from_raw_parts_mut(data.as_mut_ptr().cast(), size_in_characters) };
177
178 if sl[0] == 0xFFFE {
179 return Err(Error::new(ErrorKind::InvalidData, "Encountered byte order mark 0xFFFE. This indicates a wrong byte order.".to_string()));
180 }
181
182 return String::from_utf16(sl).map_err(|_e| Error::new(ErrorKind::InvalidData, "invalid utf-16 data"));
183 }
184
185 #[cfg(target_endian = "big")]
186 fn read_string_utf16_le(&mut self, size_in_characters: usize) -> io::Result<String> {
187 if size_in_characters == 0 {
188 return Ok("".to_string());
189 }
190
191 let mut data = vec![0u8; size_in_characters<<1];
192 self.read_exact(data.as_mut_slice())?;
193
194 let sl :&mut [u16] = unsafe { std::slice::from_raw_parts_mut(data.as_mut_ptr().cast(), size_in_characters) };
195 for i in 0 .. sl.len() {
196 sl[i] = sl[i].to_le();
197 }
198
199 if sl[0] == 0xFFFE {
200 return Err(Error::new(ErrorKind::InvalidData, "Encountered byte order mark 0xFFFE. This indicates a wrong byte order.".to_string()));
201 }
202
203 return String::from_utf16(sl).map_err(|_e| Error::new(ErrorKind::InvalidData, "invalid utf-16 data"));
204 }
205
206 #[cfg(target_endian = "big")]
207 fn read_string_utf32_be(&mut self, size_in_characters: usize) -> io::Result<String> {
208 if size_in_characters == 0 {
209 return Ok("".to_string());
210 }
211 let mut data = vec![0u32; size_in_characters];
212 let sl : &mut [u8] = unsafe { std::slice::from_raw_parts_mut(data.as_mut_ptr().cast(), size_in_characters * size_of::<char>()) };
213 self.read_exact(sl)?;
214
215 if data[0] == 0xFFFE0000u32 {
216 return Err(Error::new(ErrorKind::InvalidData, "Encountered byte order mark 0xFFFE. This indicates a wrong byte order.".to_string()));
217 }
218
219 for i in 0 .. data.len() {
220 let cur = data[i];
221
222 if char::from_u32(cur).is_none() {
223 return Err(Error::new(ErrorKind::InvalidData, format!("{} is not a valid unicode codepoint.", cur)));
224 }
225 }
226
227 let sl : &mut [char] = unsafe { std::slice::from_raw_parts_mut(data.as_mut_ptr().cast(), size_in_characters) };
228 let str : String = sl.iter().collect();
229 return Ok(str);
230 }
231
232 #[cfg(target_endian = "big")]
233 fn read_string_utf32_le(&mut self, size_in_characters: usize) -> io::Result<String> {
234 if size_in_characters == 0 {
235 return Ok("".to_string());
236 }
237 let mut data = vec![0u32; size_in_characters];
238 let sl : &mut [u8] = unsafe { std::slice::from_raw_parts_mut(data.as_mut_ptr().cast(), size_in_characters * 4) };
239 self.read_exact(sl)?;
240
241 if data[0].to_le() == 0xFFFE0000u32 {
242 return Err(Error::new(ErrorKind::InvalidData, "Encountered byte order mark 0xFFFE. This indicates a wrong byte order.".to_string()));
243 }
244
245 for i in 0 .. data.len() {
246 let cur = data[i].to_le();
247 data[i] = cur;
248
249 if char::from_u32(cur).is_none() {
250 return Err(Error::new(ErrorKind::InvalidData, format!("{} is not a valid unicode codepoint.", cur)));
251 }
252 }
253
254 let sl : &mut [char] = unsafe { std::slice::from_raw_parts_mut(data.as_mut_ptr().cast(), size_in_characters) };
255 let str : String = sl.iter().collect();
256 return Ok(str);
257 }
258
259 #[cfg(target_endian = "little")]
260 fn read_string_utf32_be(&mut self, size_in_characters: usize) -> io::Result<String> {
261 if size_in_characters == 0 {
262 return Ok("".to_string());
263 }
264 let mut data = vec![0u32; size_in_characters];
265 let sl : &mut [u8] = unsafe { std::slice::from_raw_parts_mut(data.as_mut_ptr().cast(), size_in_characters * 4) };
266 self.read_exact(sl)?;
267
268 if data[0].to_be() == 0xFFFE0000u32 {
269 return Err(Error::new(ErrorKind::InvalidData, "Encountered byte order mark 0xFFFE. This indicates a wrong byte order.".to_string()));
270 }
271
272 for i in 0 .. data.len() {
273 let cur = data[i].to_be();
274 data[i] = cur;
275
276 if char::from_u32(cur).is_none() {
277 return Err(Error::new(ErrorKind::InvalidData, format!("{} is not a valid unicode codepoint.", cur)));
278 }
279 }
280
281 let sl : &mut [char] = unsafe { std::slice::from_raw_parts_mut(data.as_mut_ptr().cast(), size_in_characters) };
282 let str : String = sl.iter().collect();
283 return Ok(str);
284 }
285
286 #[cfg(target_endian = "little")]
287 fn read_string_utf32_le(&mut self, size_in_characters: usize) -> io::Result<String> {
288 if size_in_characters == 0 {
289 return Ok("".to_string());
290 }
291 let mut data = vec![0u32; size_in_characters];
292 let sl : &mut [u8] = unsafe { std::slice::from_raw_parts_mut(data.as_mut_ptr().cast(), size_in_characters * size_of::<char>()) };
293 self.read_exact(sl)?;
294
295 if data[0] == 0xFFFE0000u32 {
296 return Err(Error::new(ErrorKind::InvalidData, "Encountered byte order mark 0xFFFE. This indicates a wrong byte order.".to_string()));
297 }
298
299 for i in 0 .. data.len() {
300 let cur = data[i];
301
302 if char::from_u32(cur).is_none() {
303 return Err(Error::new(ErrorKind::InvalidData, format!("{} is not a valid unicode codepoint.", cur)));
304 }
305 }
306
307 let sl : &mut [char] = unsafe { std::slice::from_raw_parts_mut(data.as_mut_ptr().cast(), size_in_characters) };
308 let str : String = sl.iter().collect();
309 return Ok(str);
310 }
311
312 fn read_java_data_input_utf(&mut self) -> io::Result<String> {
313 let mut buf = [0u8; 2];
314 self.read_exact(buf.as_mut_slice())?;
315 let byte_count = (buf[0] as u16 >> 8 | buf[1] as u16) as usize;
317
318 let mut buf = vec![0u8; byte_count];
319 self.read_exact(buf.as_mut_slice())?;
320
321 let mut characters: Vec<u16> = Vec::with_capacity(byte_count);
323
324 let mut index = 0usize;
325 while index < buf.len() {
326 let c = buf[index] as u32;
327
328 match c >> 4 {
329 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 => {
330 characters.push(c as u16);
331 index += 1;
332 }
333 12 | 13 => {
334 if index + 2 > buf.len() {
335 return Err(Error::new(ErrorKind::InvalidData, "Invalid input"));
336 }
337 let c2 = buf[index + 1] as u32;
338 index += 2;
339 if (c2 & 0xC0) != 0x80 {
340 return Err(Error::new(ErrorKind::InvalidData, "Invalid input"));
341 }
342
343 let v = ((c & 0x1F) << 6) | (c2 & 0x3F);
344 characters.push(v as u16)
345 }
346 14 => {
347 if index + 3 > buf.len() {
348 return Err(Error::new(ErrorKind::InvalidData, "Invalid input"));
349 }
350 let c2 = buf[index + 1] as u32;
351 let c3 = buf[index + 2] as u32;
352 index += 3;
353 if ((c2 & 0xC0) != 0x80) || ((c3 & 0xC0) != 0x80) {
354 return Err(Error::new(ErrorKind::InvalidData, "Invalid input"));
355 }
356 let v = ((c & 0x0F) << 12) | ((c2 & 0x3F) << 6) | ((c3 & 0x3F) << 0);
357 characters.push(v as u16)
358 }
359 _ => {
360 return Err(Error::new(ErrorKind::InvalidData, "Invalid input"));
361 }
362 }
363 }
364
365 let result = String::from_utf16(&characters).map_err(|_| Error::new(ErrorKind::InvalidData, "Invalid input"))?;
366 Ok(result)
367 }
368}
369
370mod private {
371 use std::io::Read;
372
373 impl <T> Sealed for T where T: Read {}
374 pub trait Sealed {
375
376 }
377}