1use alloc::borrow::Cow; use core::result::Result;
18use alloc::string::String;
19
20
21pub fn unescape_from_bytes<'a>(input:&'a [u8],delimiter:&[u8]) -> Result<Cow<'a, str>,String> {
25 let mut owned = None;
27 let length = input.len();
28 let chunksize = delimiter.len();
29 let mut i = 0;
30 while i < length {
31 let thisbyte= input[i];
32 if thisbyte == b'\\' { if (chunksize + i) < length && &input[i+1..i+1+chunksize] == delimiter{
34 if owned.is_none() { owned = Some(input[0..i].to_owned()); }
38 owned.as_mut().unwrap().extend_from_slice(delimiter);i += chunksize;
40 } else { if let Some(text) = owned.as_mut(){
42 text.push(b'\\');
43 } }
46 }else if let Some(text) = owned.as_mut(){ text.push(thisbyte);
48 }
49 i += 1;
51 }
52 let mut unescape_invalid_utf8 = String::from("Err in `unescape_from_bytes` execution:");
53 match owned {
54 Some(u8_vec) => {
55 let source = match String::from_utf8(u8_vec){
56 Ok(result_string) => result_string,
57 Err(e) => { let err_msg = format!("{e}");
59 unescape_invalid_utf8.push_str(&err_msg);
60 return Err(unescape_invalid_utf8);
61 },
62 };
63 Ok(Cow::Owned(source))
64 },
65 None =>{
66 let source = match std::str::from_utf8(input) {
67 Ok(some_str) => some_str,
68 Err(e) => {
69 let err_msg = format!("{e}");
70 unescape_invalid_utf8.push_str(&err_msg);
71 return Err(unescape_invalid_utf8);
72 },
73 };
74 Ok(Cow::Borrowed(source))
75 },
76 }}
78
79
80
81pub fn unescape<'a>(input:&'a str,delimiter:&str) -> Result<Cow<'a, str>,String> {
87 unescape_from_bytes(input.as_bytes(),delimiter.as_bytes())
88}
89
90
91pub fn undelimit(input:&str, delimiter:&str) -> Result<String,String> {
94 let delim_len = delimiter.len();
95 let input_len = input.len();
96 if input_len > 2 * delim_len {
97 let start_chunk = &input[0..delim_len];
98 let r_delim_index = input_len - delim_len;let end_chunk = &input[r_delim_index..input_len];
100 if start_chunk == delimiter && end_chunk == delimiter {
101 match unescape(&input[delim_len..r_delim_index],delimiter) {
102 Ok(undelimited_cow) => Ok(undelimited_cow.into_owned()) ,
103 Err(msg) => Err(msg),
104 }
105 }
106 else {Err(format!("The input is not delimitable by the delimiter {delimiter}."))}
107 } else {
108 Err(format!("The input length is shorter than the length of a pair of `{delimiter}`."))
109 }
110}
111
112pub fn escape<'a>(input:&'a str,delimiter_str:&str) -> Cow<'a, str> {
114 if input.find(delimiter_str) == None {Cow::Borrowed(input)}
115 else {
116 let mut s = String::from(input);
117 for element in input.rmatch_indices(delimiter_str) {
119 s.insert_str(element.0,"\\")
120 }
121 Cow::Owned(s)
122 }
123}
124
125pub fn escape_into_bytes<'a>(input:&'a str,delimiter_str:&str) -> Cow<'a, [u8]> {
127 let text = escape(input,delimiter_str);
128 if let Cow::Owned(escaped_string) = text {
129 Cow::Owned(escaped_string.into_bytes()) }else {Cow::Borrowed(input.as_bytes())}
131}
132
133pub fn delimit(input:&str,delimiter_str:&str) -> String {
135 let mut output = String::from(delimiter_str);
136 let escaped = escape(input,delimiter_str);
137 output.push_str(escaped.as_ref());
138 output.push_str(delimiter_str);
139 output
140}
141
142pub fn delimit_many(inputs:&[&str],delimiter_str:&str) -> Vec<String> {
144 let v:Vec<String> = inputs.iter().map(|x| delimit(x,delimiter_str)).collect();
145 v
146}
147
148pub fn offset_ws(reliteral:&[u8],start:usize) -> usize {
150
151
152let ws_as_one_byte: [u8;6] = [0x09,0x0A,0x0B,0x0C,0x0D,0x20];let ws_as_two_bytes: [[u8;2];2] = [[0xc2, 0x85],[0xc2, 0xa0]];
175 let ws_as_three_bytes: [[u8;3];17] = [
176 [0xe1, 0x9a, 0x80],[0xe2, 0x80, 0x80],[0xe2, 0x80, 0x81],[0xe2, 0x80, 0x82],
177 [0xe2, 0x80, 0x83],[0xe2, 0x80, 0x84],[0xe2, 0x80, 0x85],[0xe2, 0x80, 0x86],
178 [0xe2, 0x80, 0x87],[0xe2, 0x80, 0x88],[0xe2, 0x80, 0x89],[0xe2, 0x80, 0x8a],
179 [0xe2, 0x80, 0xa8],[0xe2, 0x80, 0xa9],[0xe2, 0x80, 0xaf],[0xe2, 0x81, 0x9f],
180 [0xe3, 0x80, 0x80]
181 ];
182 let mut stepping = true;
183 let mut pos = start;
184 while stepping && pos < reliteral.len() {
185 let start_byte = reliteral[pos];
186 let prefix = start_byte >> 4 ;
187 if prefix < 0b1000 { stepping = ws_as_one_byte.iter().any(|&x| x == start_byte);
189 if stepping {pos += 1 ;}
190 } else {
191 match prefix {
192 0b1101 | 0b1100 => { stepping = ws_as_two_bytes.iter().any(|&x| x == reliteral[pos..(pos+2)]);
194 if stepping {pos += 2;}
195 },
196 0b1110 => { stepping = ws_as_three_bytes.iter().any(|&x| x == reliteral[pos..(pos+3)]);
198 if stepping {pos += 3;}
199 },
200 _ => {stepping = false;}
201 }
202 }
203
204 }
205 pos
206}
207
208
209pub fn infer_char_size (byte:u8) -> u8 {
212 let prefix = byte >> 4;
213 match prefix {
214 0b1111 => 4u8,
215 0b1110 => 3u8,
216 0b1101 | 0b1100 => 2u8,
217 _ => {
218 if prefix < 0b1000 {1u8} else {0u8} }
221 }
222}
223
224
225
226
227
228#[cfg(test)]
229mod tests {
230 use super::*;
231 #[test]
232 fn test_infer_char_size(){
233 let string1 = '~'.to_string();
234 let bytes1 = string1.as_bytes();
235 let string2 = 'ß'.to_string();
236 let bytes2 = string2.as_bytes();
237 let string3 = '中'.to_string();
238 let bytes3 = string3.as_bytes();
239 assert_eq!(infer_char_size(bytes1[0]),1);
240 assert_eq!(infer_char_size(bytes2[0]),2);
241 assert_eq!(infer_char_size(bytes3[0]),3);
242 }
243 #[test]
244 fn test_unescape_from_bytes(){
245 let re0_bytes = br"/\d{4}-\d{2}-\d{2}/";let re0_unescaped = unescape_from_bytes(&re0_bytes[1..18],&[b'/']);
248 let re0_unescaped_unwrapped = re0_unescaped.unwrap();
249 let is_borrowed = match re0_unescaped_unwrapped {
250 Cow::Borrowed(_) => true,
251 Cow::Owned(_) => false,
252 };
253 assert!(is_borrowed);
254 assert_eq!(re0_unescaped_unwrapped.into_owned(),String::from(r"\d{4}-\d{2}-\d{2}"));
255
256 let re1_bytes = b"/\\d{2}\\/\\d{2}\\/\\d{4}/";
257 let re1_unescaped = unescape_from_bytes(&re1_bytes[1..20],&[b'/']);
258 let re1_unescaped_unwrapped = re1_unescaped.unwrap();
259 let is_owned = match re1_unescaped_unwrapped {
260 Cow::Borrowed(_) => false,
261 Cow::Owned(_) => true,
262 };
263 assert!(is_owned);
264 assert_eq!(re1_unescaped_unwrapped.into_owned(),String::from(r#"\d{2}/\d{2}/\d{4}"#));
265
266 let re2_bytes = b"##(?i)ab+c##";
268 let re2_unescaped = unescape_from_bytes(&re2_bytes[2..10],&[b'#',b'#']);
269 assert_eq!(re2_unescaped.unwrap().into_owned(),String::from("(?i)ab+c"));
270
271 let re3_bytes = b"##(?i)\\##ab+c##";
273 let re3_unescaped = unescape_from_bytes(&re3_bytes[2..13],&[b'#',b'#']);
274 assert_eq!(re3_unescaped.unwrap().into_owned(),String::from("(?i)##ab+c"));
275
276 let re3_bytes = vec![0, 159];
278 let re3_unescaped = unescape_from_bytes(&re3_bytes[..],&[b'/']);
279 assert!(re3_unescaped.is_err());
280 }
281
282
283 #[test]
284 fn test_unescape(){
285 let re1_bytes = "/\\d{2}\\/\\d{2}\\/\\d{4}/";
286 let re1_unescaped = unescape(&re1_bytes[1..20],"/");
287 let re1_unescaped_unwrapped = re1_unescaped.unwrap();
288 let is_owned = match re1_unescaped_unwrapped {
289 Cow::Borrowed(_) => false,
290 Cow::Owned(_) => true,
291 };
292 assert!(is_owned);
293 assert_eq!(re1_unescaped_unwrapped.into_owned(),String::from(r#"\d{2}/\d{2}/\d{4}"#));
294 }
295
296 #[test]
297 fn test_undelimit(){
298 let re1_bytes = "/\\d{2}\\/\\d{2}\\/\\d{4}/";
299 let re1_undelimited = undelimit(re1_bytes,"/");
300 assert_eq!(re1_undelimited.unwrap(),String::from(r#"\d{2}/\d{2}/\d{4}"#));
301 }
302
303
304
305 #[test]
306 fn test_escape(){
307 let delimiter_str = "/";
308 let re0 = r"\d{4}-\d{2}-\d{2}";
309 let escaped0 = escape(re0,delimiter_str);
310 assert_eq!(escaped0.as_ref(),r"\d{4}-\d{2}-\d{2}");
311
312 let re1 = r"\d{2}/\d{2}/\d{4}";
314 let escaped1 = escape(re1,delimiter_str);
315 assert_eq!(escaped1.as_ref(),r"\d{2}\/\d{2}\/\d{4}");
316
317 }
318
319 #[test]
320 fn test_escape_into_bytes(){
321 let re1 = r"\d{2}/\d{2}/\d{4}";
322 let delimiter_str = "/";
323 let escaped = escape_into_bytes(re1,delimiter_str);
324 assert_eq!(escaped.as_ref(),br"\d{2}\/\d{2}\/\d{4}");
325
326 }
327 #[test]
329 fn test_delimit(){
330 let mut delimiter_str = "/";
331 let re1 = r"\d{2}/\d{2}/\d{4}";
333 let delimited1 = delimit(re1,delimiter_str);
334 let string1 = String::from(r"/\d{2}\/\d{2}\/\d{4}/");
335 assert_eq!(delimited1,string1);
336
337 delimiter_str = "#";
338 let delimited2 = delimit(re1,delimiter_str);
339 let string2 = String::from(r"#\d{2}/\d{2}/\d{4}#");
340 assert_eq!(delimited2,string2);
341
342 let re_tag_crate = r"(?-u:#[\w+-\.]+)";
343 let delimited_re_tag_crate_1 = delimit(re_tag_crate,delimiter_str);
344 let string3 = String::from(r"#(?-u:\#[\w+-\.]+)#");
345 assert_eq!(delimited_re_tag_crate_1,string3);
346 }
347
348 #[test]
349 fn test_unescape_from_bytes_chinese() {
350 let bytes:[u8;16] = [0x5C,0x2F,0xE5,0xA4, 0xA9, 0xE4, 0xB8, 0x8B,0x5C,0x2F, 0xE4, 0xB8, 0x80, 0xE5, 0xAE, 0xB6];
356
357
358 let re1_unescaped = unescape_from_bytes(&bytes[..],&[b'/']);
359 let re1_unescaped_unwrapped = re1_unescaped.unwrap();
360 assert_eq!(re1_unescaped_unwrapped.into_owned(),String::from(r"/天下/一家"));
361
362 }
363
364}
365
366