1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
#![warn(bare_trait_objects)]
#![deny(missing_debug_implementations)]
#![warn(missing_docs)]
pub mod read;
pub mod write;
use std::str::{from_utf8, from_utf8_mut, from_utf8_unchecked, from_utf8_unchecked_mut, Utf8Error};
pub fn partial_from_utf8(buf: &[u8]) -> Result<(&str, &[u8]), Utf8Error> {
match from_utf8(buf) {
Ok(buf_str) => Ok((buf_str, &[])),
Err(err) if err.error_len().is_some() => Err(err),
Err(err) => {
let valid_utf8_boundary = err.valid_up_to();
let full_str = unsafe { from_utf8_unchecked(buf.get_unchecked(..valid_utf8_boundary)) };
let partial_code_point = unsafe { buf.get_unchecked(valid_utf8_boundary..) };
Ok((full_str, partial_code_point))
}
}
}
pub fn partial_from_utf8_mut(buf: &mut [u8]) -> Result<(&mut str, &mut [u8]), Utf8Error> {
match from_utf8_mut(buf) {
Ok(_) => Ok((unsafe { from_utf8_unchecked_mut(buf) }, &mut [])),
Err(err) if err.error_len().is_some() => Err(err),
Err(err) => {
let valid_utf8_boundary = err.valid_up_to();
let (lhs, rhs) = buf.split_at_mut(valid_utf8_boundary);
let str_part = unsafe { from_utf8_unchecked_mut(lhs) };
Ok((str_part, rhs))
}
}
}
#[cfg(test)]
mod tests {
mod test_partial_from_utf8 {
use crate::partial_from_utf8;
#[test]
fn test_empty() {
assert_eq!(partial_from_utf8(b""), Ok(("", b"" as &[u8])));
}
#[test]
fn test_simple_string() {
assert_eq!(
partial_from_utf8(&[0x61, 0xC3, 0xA9]),
Ok(("aé", b"" as &[u8]))
);
}
#[test]
fn test_partial_string() {
assert_eq!(
partial_from_utf8(&[0xF0, 0x9F, 0x98, 0x80, 0xF0, 0x9F, 0x98]),
Ok(("😀", &[0xF0u8, 0x9Fu8, 0x98u8] as &[u8]))
);
}
#[test]
fn test_not_unicode() {
match partial_from_utf8(&[0x61, 0xFF]) {
Ok(_) => assert!(false),
Err(err) => {
assert_eq!(err.valid_up_to(), 1);
assert!(err.error_len().is_some());
}
}
}
#[test]
fn test_bad_unicode() {
match partial_from_utf8(&[0x61, 0xF0, 0x9F, 0xF0]) {
Ok(_) => assert!(false),
Err(err) => {
assert_eq!(err.valid_up_to(), 1);
assert!(err.error_len().is_some());
}
}
}
}
}