1#[inline]
6pub fn is_break(byte_idx: usize, text: &[u8]) -> bool {
7 debug_assert!(byte_idx <= text.len());
8
9 if byte_idx == 0 || byte_idx == text.len() {
10 true
11 } else {
12 (text[byte_idx] >> 6 != 0b10) && ((text[byte_idx - 1] != 0x0D) | (text[byte_idx] != 0x0A))
13 }
14}
15
16#[inline]
21pub fn seam_is_break(left: &[u8], right: &[u8]) -> bool {
22 debug_assert!(!left.is_empty() && !right.is_empty());
23 (right[0] >> 6 != 0b10) && ((left[left.len() - 1] != 0x0D) | (right[0] != 0x0A))
24}
25
26#[inline]
32pub fn prev_break(byte_idx: usize, text: &[u8]) -> usize {
33 debug_assert!(byte_idx <= text.len());
35
36 if byte_idx == 0 {
37 0
38 } else {
39 let mut boundary_idx = byte_idx - 1;
40 while !is_break(boundary_idx, text) {
41 boundary_idx -= 1;
42 }
43 boundary_idx
44 }
45}
46
47#[inline]
53pub fn next_break(byte_idx: usize, text: &[u8]) -> usize {
54 debug_assert!(byte_idx <= text.len());
56
57 if byte_idx == text.len() {
58 text.len()
59 } else {
60 let mut boundary_idx = byte_idx + 1;
61 while !is_break(boundary_idx, text) {
62 boundary_idx += 1;
63 }
64 boundary_idx
65 }
66}
67
68#[inline]
75pub fn nearest_internal_break(byte_idx: usize, text: &[u8]) -> usize {
76 debug_assert!(byte_idx <= text.len());
78
79 let left = if is_break(byte_idx, text) && byte_idx != text.len() {
81 byte_idx
82 } else {
83 prev_break(byte_idx, text)
84 };
85 let right = next_break(byte_idx, text);
86
87 if left == 0 || (right != text.len() && (byte_idx - left) >= (right - byte_idx)) {
90 return right;
91 } else {
92 return left;
93 }
94}
95
96#[inline]
97pub fn find_good_split(byte_idx: usize, text: &[u8], bias_left: bool) -> usize {
98 debug_assert!(byte_idx <= text.len());
100
101 if is_break(byte_idx, text) {
102 byte_idx
103 } else {
104 let prev = prev_break(byte_idx, text);
105 let next = next_break(byte_idx, text);
106 if bias_left {
107 if prev > 0 {
108 prev
109 } else {
110 next
111 }
112 } else {
113 if next < text.len() {
114 next
115 } else {
116 prev
117 }
118 }
119 }
120}
121
122#[cfg(test)]
125mod tests {
126 use super::*;
127
128 #[test]
129 fn crlf_segmenter_01() {
130 let text = b"Hello world!\r\nHow's it going?";
131
132 assert!(is_break(0, b""));
133 assert!(is_break(0, text));
134 assert!(is_break(12, text));
135 assert!(!is_break(13, text));
136 assert!(is_break(14, text));
137 assert!(is_break(19, text));
138 }
139
140 #[test]
141 fn crlf_segmenter_02() {
142 let l = b"Hello world!\r";
143 let r = b"\nHow's it going?";
144
145 assert!(!seam_is_break(l, r));
146 assert!(!seam_is_break(l, b"\n"));
147 assert!(!seam_is_break(b"\r", r));
148 assert!(!seam_is_break(b"\r", b"\n"));
149 assert!(seam_is_break(r, l));
150 assert!(seam_is_break(b"\n", b"\r"));
151 }
152
153 #[test]
154 fn nearest_internal_break_01() {
155 let text = b"Hello world!";
156 assert_eq!(1, nearest_internal_break(0, text));
157 assert_eq!(6, nearest_internal_break(6, text));
158 assert_eq!(11, nearest_internal_break(12, text));
159 }
160
161 #[test]
162 fn nearest_internal_break_02() {
163 let text = b"Hello\r\n world!";
164 assert_eq!(5, nearest_internal_break(5, text));
165 assert_eq!(7, nearest_internal_break(6, text));
166 assert_eq!(7, nearest_internal_break(7, text));
167 }
168
169 #[test]
170 fn nearest_internal_break_03() {
171 let text = b"\r\nHello world!\r\n";
172 assert_eq!(2, nearest_internal_break(0, text));
173 assert_eq!(2, nearest_internal_break(1, text));
174 assert_eq!(2, nearest_internal_break(2, text));
175 assert_eq!(14, nearest_internal_break(14, text));
176 assert_eq!(14, nearest_internal_break(15, text));
177 assert_eq!(14, nearest_internal_break(16, text));
178 }
179
180 #[test]
181 fn nearest_internal_break_04() {
182 let text = b"\r\n";
183 assert_eq!(2, nearest_internal_break(0, text));
184 assert_eq!(2, nearest_internal_break(1, text));
185 assert_eq!(2, nearest_internal_break(2, text));
186 }
187
188 #[test]
189 fn is_break_01() {
190 let text = b"\n\r\n\r\n\r\n\r\n\r\n\r";
191
192 assert!(is_break(0, text));
193 assert!(is_break(12, text));
194 assert!(is_break(3, text));
195 assert!(!is_break(6, text));
196 }
197
198 #[test]
199 fn seam_is_break_01() {
200 let text1 = b"\r\n\r\n\r\n";
201 let text2 = b"\r\n\r\n";
202
203 assert!(seam_is_break(text1, text2));
204 }
205
206 #[test]
207 fn seam_is_break_02() {
208 let text1 = b"\r\n\r\n\r";
209 let text2 = b"\n\r\n\r\n";
210
211 assert!(!seam_is_break(text1, text2));
212 }
213}