use alloc::borrow::Cow; use core::result::Result;
use alloc::string::String;
pub fn unescape_from_bytes<'a>(input:&'a [u8],delimiter:&[u8]) -> Result<Cow<'a, str>,String> {
let mut owned = None;
let length = input.len();
let chunksize = delimiter.len();
let mut i = 0;
while i < length {
let thisbyte= input[i];
if thisbyte == b'\\' { if (chunksize + i) < length && &input[i+1..i+1+chunksize] == delimiter{
if owned.is_none() { owned = Some(input[0..i].to_owned()); }
owned.as_mut().unwrap().extend_from_slice(delimiter);i += chunksize;
} else { if let Some(text) = owned.as_mut(){
text.push(b'\\');
} }
}else if let Some(text) = owned.as_mut(){ text.push(thisbyte);
}
i += 1;
}
let mut unescape_invalid_utf8 = String::from("Err in `unescape_from_bytes` execution:");
match owned {
Some(u8_vec) => {
let source = match String::from_utf8(u8_vec){
Ok(result_string) => result_string,
Err(e) => { let err_msg = format!("{e}");
unescape_invalid_utf8.push_str(&err_msg);
return Err(unescape_invalid_utf8);
},
};
Ok(Cow::Owned(source))
},
None =>{
let source = match std::str::from_utf8(input) {
Ok(some_str) => some_str,
Err(e) => {
let err_msg = format!("{e}");
unescape_invalid_utf8.push_str(&err_msg);
return Err(unescape_invalid_utf8);
},
};
Ok(Cow::Borrowed(source))
},
}}
pub fn unescape<'a>(input:&'a str,delimiter:&str) -> Result<Cow<'a, str>,String> {
unescape_from_bytes(input.as_bytes(),delimiter.as_bytes())
}
pub fn undelimit(input:&str, delimiter:&str) -> Result<String,String> {
let delim_len = delimiter.len();
let input_len = input.len();
if input_len > 2 * delim_len {
let start_chunk = &input[0..delim_len];
let r_delim_index = input_len - delim_len;let end_chunk = &input[r_delim_index..input_len];
if start_chunk == delimiter && end_chunk == delimiter {
match unescape(&input[delim_len..r_delim_index],delimiter) {
Ok(undelimited_cow) => Ok(undelimited_cow.into_owned()) ,
Err(msg) => Err(msg),
}
}
else {Err(format!("The input is not delimitable by the delimiter {delimiter}."))}
} else {
Err(format!("The input length is shorter than the length of a pair of `{delimiter}`."))
}
}
pub fn escape<'a>(input:&'a str,delimiter_str:&str) -> Cow<'a, str> {
if input.find(delimiter_str) == None {Cow::Borrowed(input)}
else {
let mut s = String::from(input);
for element in input.rmatch_indices(delimiter_str) {
s.insert_str(element.0,"\\")
}
Cow::Owned(s)
}
}
pub fn escape_into_bytes<'a>(input:&'a str,delimiter_str:&str) -> Cow<'a, [u8]> {
let text = escape(input,delimiter_str);
if let Cow::Owned(escaped_string) = text {
Cow::Owned(escaped_string.into_bytes()) }else {Cow::Borrowed(input.as_bytes())}
}
pub fn delimit(input:&str,delimiter_str:&str) -> String {
let mut output = String::from(delimiter_str);
let escaped = escape(input,delimiter_str);
output.push_str(escaped.as_ref());
output.push_str(delimiter_str);
output
}
pub fn delimit_many(inputs:&[&str],delimiter_str:&str) -> Vec<String> {
let v:Vec<String> = inputs.iter().map(|x| delimit(x,delimiter_str)).collect();
v
}
pub fn offset_ws(reliteral:&[u8],start:usize) -> usize {
let ws_as_one_byte: [u8;6] = [0x09,0x0A,0x0B,0x0C,0x0D,0x20];let ws_as_two_bytes: [[u8;2];2] = [[0xc2, 0x85],[0xc2, 0xa0]];
let ws_as_three_bytes: [[u8;3];17] = [
[0xe1, 0x9a, 0x80],[0xe2, 0x80, 0x80],[0xe2, 0x80, 0x81],[0xe2, 0x80, 0x82],
[0xe2, 0x80, 0x83],[0xe2, 0x80, 0x84],[0xe2, 0x80, 0x85],[0xe2, 0x80, 0x86],
[0xe2, 0x80, 0x87],[0xe2, 0x80, 0x88],[0xe2, 0x80, 0x89],[0xe2, 0x80, 0x8a],
[0xe2, 0x80, 0xa8],[0xe2, 0x80, 0xa9],[0xe2, 0x80, 0xaf],[0xe2, 0x81, 0x9f],
[0xe3, 0x80, 0x80]
];
let mut stepping = true;
let mut pos = start;
while stepping && pos < reliteral.len() { let start_byte = reliteral[pos];
let prefix = start_byte >> 4 ;
if prefix < 0b1000 { stepping = ws_as_one_byte.iter().any(|&x| x == start_byte);
if stepping {pos += 1 ;}
} else {
match prefix {
0b1100 => { stepping = ws_as_two_bytes.iter().any(|&x| x == reliteral[pos..(pos+2)]);
if stepping {pos += 2;}
},
0b1110 => { stepping = ws_as_three_bytes.iter().any(|&x| x == reliteral[pos..(pos+3)]);
if stepping {pos += 3;}
},
_ => {stepping = false;}
}
}
}
pos
}
pub fn infer_char_size (byte:u8) -> u8 {
let prefix = byte >> 4;
match prefix {
0b1111 => 4u8,
0b1110 => 3u8,
0b1100 => 2u8,
_ => {
if prefix < 0b1000 {1u8} else {0u8} }
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_infer_char_size(){
let string1 = '~'.to_string();
let bytes1 = string1.as_bytes();
let string2 = 'ß'.to_string();
let bytes2 = string2.as_bytes();
let string3 = '中'.to_string();
let bytes3 = string3.as_bytes();
assert_eq!(infer_char_size(bytes1[0]),1);
assert_eq!(infer_char_size(bytes2[0]),2);
assert_eq!(infer_char_size(bytes3[0]),3);
}
#[test]
fn test_unescape_from_bytes(){
let re0_bytes = br"/\d{4}-\d{2}-\d{2}/";let re0_unescaped = unescape_from_bytes(&re0_bytes[1..18],&[b'/']);
let re0_unescaped_unwrapped = re0_unescaped.unwrap();
let is_borrowed = match re0_unescaped_unwrapped {
Cow::Borrowed(_) => true,
Cow::Owned(_) => false,
};
assert!(is_borrowed);
assert_eq!(re0_unescaped_unwrapped.into_owned(),String::from(r"\d{4}-\d{2}-\d{2}"));
let re1_bytes = b"/\\d{2}\\/\\d{2}\\/\\d{4}/";
let re1_unescaped = unescape_from_bytes(&re1_bytes[1..20],&[b'/']);
let re1_unescaped_unwrapped = re1_unescaped.unwrap();
let is_owned = match re1_unescaped_unwrapped {
Cow::Borrowed(_) => false,
Cow::Owned(_) => true,
};
assert!(is_owned);
assert_eq!(re1_unescaped_unwrapped.into_owned(),String::from(r#"\d{2}/\d{2}/\d{4}"#));
let re2_bytes = b"##(?i)ab+c##";
let re2_unescaped = unescape_from_bytes(&re2_bytes[2..10],&[b'#',b'#']);
assert_eq!(re2_unescaped.unwrap().into_owned(),String::from("(?i)ab+c"));
let re3_bytes = b"##(?i)\\##ab+c##";
let re3_unescaped = unescape_from_bytes(&re3_bytes[2..13],&[b'#',b'#']);
assert_eq!(re3_unescaped.unwrap().into_owned(),String::from("(?i)##ab+c"));
let re3_bytes = vec![0, 159];
let re3_unescaped = unescape_from_bytes(&re3_bytes[..],&[b'/']);
assert!(re3_unescaped.is_err());
}
#[test]
fn test_unescape(){
let re1_bytes = "/\\d{2}\\/\\d{2}\\/\\d{4}/";
let re1_unescaped = unescape(&re1_bytes[1..20],"/");
let re1_unescaped_unwrapped = re1_unescaped.unwrap();
let is_owned = match re1_unescaped_unwrapped {
Cow::Borrowed(_) => false,
Cow::Owned(_) => true,
};
assert!(is_owned);
assert_eq!(re1_unescaped_unwrapped.into_owned(),String::from(r#"\d{2}/\d{2}/\d{4}"#));
}
#[test]
fn test_undelimit(){
let re1_bytes = "/\\d{2}\\/\\d{2}\\/\\d{4}/";
let re1_undelimited = undelimit(re1_bytes,"/");
assert_eq!(re1_undelimited.unwrap(),String::from(r#"\d{2}/\d{2}/\d{4}"#));
}
#[test]
fn test_escape(){
let delimiter_str = "/";
let re0 = r"\d{4}-\d{2}-\d{2}";
let escaped0 = escape(re0,delimiter_str);
assert_eq!(escaped0.as_ref(),r"\d{4}-\d{2}-\d{2}");
let re1 = r"\d{2}/\d{2}/\d{4}";
let escaped1 = escape(re1,delimiter_str);
assert_eq!(escaped1.as_ref(),r"\d{2}\/\d{2}\/\d{4}");
}
#[test]
fn test_escape_into_bytes(){
let re1 = r"\d{2}/\d{2}/\d{4}";
let delimiter_str = "/";
let escaped = escape_into_bytes(re1,delimiter_str);
assert_eq!(escaped.as_ref(),br"\d{2}\/\d{2}\/\d{4}");
}
#[test]
fn test_delimit(){
let mut delimiter_str = "/";
let re1 = r"\d{2}/\d{2}/\d{4}";
let delimited1 = delimit(re1,delimiter_str);
let string1 = String::from(r"/\d{2}\/\d{2}\/\d{4}/");
assert_eq!(delimited1,string1);
delimiter_str = "#";
let delimited2 = delimit(re1,delimiter_str);
let string2 = String::from(r"#\d{2}/\d{2}/\d{4}#");
assert_eq!(delimited2,string2);
let re_tag_crate = r"(?-u:#[\w+-\.]+)";
let delimited_re_tag_crate_1 = delimit(re_tag_crate,delimiter_str);
let string3 = String::from(r"#(?-u:\#[\w+-\.]+)#");
assert_eq!(delimited_re_tag_crate_1,string3);
}
#[test]
fn test_unescape_from_bytes_chinese() {
let bytes:[u8;16] = [0x5C,0x2F,0xE5,0xA4, 0xA9, 0xE4, 0xB8, 0x8B,0x5C,0x2F, 0xE4, 0xB8, 0x80, 0xE5, 0xAE, 0xB6];
let re1_unescaped = unescape_from_bytes(&bytes[..],&[b'/']);
let re1_unescaped_unwrapped = re1_unescaped.unwrap();
assert_eq!(re1_unescaped_unwrapped.into_owned(),String::from(r"/天下/一家"));
}
}