1use crate::core::{is_unique, RUMResult, RUMVec};
21use crate::types::RUMBuffer;
22use base64::prelude::*;
23use chardetng::EncodingDetector;
24pub use compact_str::{
25 format_compact as rumtk_format, CompactString, CompactStringExt, ToCompactString,
26};
27use encoding_rs::Encoding;
28use std::cmp::min;
29use unicode_segmentation::UnicodeSegmentation;
30const ESCAPED_STRING_WINDOW: usize = 6;
32const ASCII_ESCAPE_CHAR: char = '\\';
33const MIN_ASCII_READABLE: char = ' ';
34const MAX_ASCII_READABLE: char = '~';
35pub const EMPTY_STRING: &str = "";
36pub const DOT_STR: &str = ".";
37pub const EMPTY_STRING_OPTION: Option<&str> = Some("");
38pub const READABLE_ASCII: &str = " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~";
39
40pub type RUMString = CompactString;
42pub type EscapeException<'a> = (&'a str, &'a str);
43pub type EscapeExceptions<'a> = &'a [EscapeException<'a>];
44pub type StringReplacementPair<'a> = [(&'a str, &'a str)];
45pub type Grapheme<'a> = &'a str;
46pub type GraphemeStringView<'a> = RUMVec<Grapheme<'a>>;
47pub type GraphemePattern<'a> = &'a [Grapheme<'a>];
48pub type GraphemeSlice<'b, 'a> = &'b [Grapheme<'a>];
49pub type GraphemePatternPair<'a> = (GraphemePattern<'a>, GraphemePattern<'a>);
50
51#[derive(Default, Debug, PartialEq, Clone)]
56pub struct GraphemeStr<'a> {
57 view: GraphemeStringView<'a>,
58 start: usize,
59 end: usize,
60}
61
62impl<'a> GraphemeStr<'a> {
63 pub fn from(string: &'a str) -> Self {
64 let view = string.graphemes(true).collect::<GraphemeStringView>();
65 Self::from_view(view)
66 }
67
68 pub fn from_view(view: GraphemeStringView<'a>) -> Self {
69 let start = 0;
70 let end = view.len();
71 Self { view, start, end }
72 }
73
74 pub fn at(&self, index: usize) -> Grapheme<'a> {
75 self.view[index]
76 }
77
78 pub fn trim(&self, pattern: &GraphemePatternPair<'a>) -> Self {
79 let (left_pattern, right_pattern) = pattern;
80 self.trim_left(left_pattern).trim_right(right_pattern)
81 }
82
83 pub fn trim_left(&self, pattern: &GraphemePattern<'a>) -> Self {
84 let new_offset = self.find(pattern, self.start);
85 Self {
86 view: self.view.clone(),
87 start: new_offset,
88 end: self.end,
89 }
90 }
91
92 pub fn trim_right(&self, pattern: &GraphemePattern<'a>) -> Self {
93 let new_offset = self.rfind(pattern, self.end);
94 Self {
95 view: self.view.clone(),
96 start: self.start,
97 end: new_offset,
98 }
99 }
100
101 pub fn splice(&self, skip_pattern: &GraphemePatternPair<'a>) -> Self {
102 let (left_pattern, right_pattern) = skip_pattern;
103 let mut new_view = GraphemeStringView::with_capacity(self.end - self.start);
104 let mut offset = self.start;
105 let l_pattern_s = left_pattern.len();
106
107 while offset < self.end {
108 let target_s = self.find(left_pattern, offset) + l_pattern_s;
109 for i in offset..target_s {
110 new_view.push(self.view[i]);
111 }
112 offset = self.find(right_pattern, target_s);
113 }
114
115 GraphemeStr::from_view(new_view)
116 }
117
118 pub fn find(&self, pattern: &GraphemePattern<'a>, offset: usize) -> usize {
119 let pattern_s = pattern.len();
120 let mut new_offset = offset;
121 let mut pattern_end = new_offset + pattern_s;
122
123 while new_offset < self.end && pattern_end < self.end {
124 if self.view[new_offset..pattern_end] == **pattern {
125 break;
126 }
127
128 new_offset += 1;
129 pattern_end = new_offset + pattern_s;
130 }
131
132 new_offset
133 }
134
135 pub fn rfind(&self, pattern: &GraphemePattern<'a>, offset: usize) -> usize {
136 let pattern_s = pattern.len();
137 let mut new_offset = offset;
138 while new_offset > self.start {
139 if self.view[new_offset - pattern_s..new_offset] == **pattern {
140 break;
141 }
142
143 new_offset -= 1;
144 }
145
146 new_offset
147 }
148
149 pub fn len(&self) -> usize {
150 self.end - self.start
151 }
152
153 pub fn get_graphemes(&self) -> GraphemeSlice<'_, 'a> {
154 &self.view[self.start..self.end]
155 }
156
157 pub fn truncate(&self, size: usize) -> Self {
158 let end = min(size, self.end);
159 Self {
160 view: self.view.clone(),
161 start: self.start,
162 end,
163 }
164 }
165
166 pub fn is_unique(&self) -> bool {
167 is_unique(&self.view)
168 }
169}
170
171impl ToString for GraphemeStr<'_> {
172 fn to_string(&self) -> String {
173 let mut new_string = String::with_capacity(self.len());
174
175 for grapheme in self.view[self.start..self.end].iter() {
176 new_string.push_str(grapheme);
177 }
178
179 new_string
180 }
181}
182
183impl RUMStringConversions for GraphemeStr<'_> {}
184
185pub trait StringLike {
188 fn with_capacity(capacity: usize) -> Self;
189 fn push_str(&mut self, string: &str);
190}
191
192pub trait AsStr {
193 fn as_str(&self) -> &str;
194 fn as_grapheme_str(&self) -> GraphemeStr {
195 GraphemeStr::from(self.as_str())
196 }
197}
198
199pub trait RUMStringConversions: ToString {
200 #[inline(always)]
201 fn to_rumstring(&self) -> RUMString {
202 RUMString::from(self.to_string())
203 }
204
205 #[inline(always)]
206 fn to_raw(&self) -> RUMVec<u8> {
207 self.to_string().as_bytes().to_vec()
208 }
209
210 #[inline(always)]
211 fn to_buffer(&self) -> RUMBuffer {
212 RUMBuffer::from(self.to_string())
213 }
214}
215
216pub trait StringUtils: AsStr + RUMStringConversions {
217 #[inline(always)]
218 fn duplicate(&self, count: usize) -> RUMString {
219 let mut duplicated = RUMString::with_capacity(count);
220 for i in 0..count {
221 duplicated += &self.as_str();
222 }
223 duplicated
224 }
225
226 fn truncate(&self, count: usize) -> RUMString {
227 self.as_grapheme_str().truncate(count).to_rumstring()
228 }
229}
230
231impl AsStr for String {
232 fn as_str(&self) -> &str {
233 self.as_str()
234 }
235}
236
237impl RUMStringConversions for RUMString {}
238impl AsStr for RUMString {
239 fn as_str(&self) -> &str {
240 self.as_str()
241 }
242}
243impl StringUtils for RUMString {}
244
245impl RUMStringConversions for str {}
246
247impl AsStr for str {
248 fn as_str(&self) -> &str {
249 self
250 }
251}
252
253impl StringUtils for str {}
254
255impl RUMStringConversions for char {}
256
257pub trait RUMArrayConversions {
258 fn to_rumstring(&self) -> RUMString;
259}
260
261impl RUMArrayConversions for Vec<u8> {
262 fn to_rumstring(&self) -> RUMString {
263 self.as_slice().to_rumstring()
264 }
265}
266
267impl RUMArrayConversions for &[u8] {
268 fn to_rumstring(&self) -> RUMString {
269 RUMString::from_utf8(&self).unwrap()
270 }
271}
272
273pub fn count_tokens_ignoring_pattern(vector: &Vec<&str>, string_token: &RUMString) -> usize {
276 let mut count: usize = 0;
277 for tok in vector.iter() {
278 if string_token != tok {
279 count += 1;
280 }
281 }
282 count
283}
284
285pub fn try_decode(src: &[u8]) -> RUMString {
292 let mut detector = EncodingDetector::new();
293 detector.feed(&src, true);
294 let encoding = detector.guess(None, true);
295 decode(src, encoding)
296}
297
298pub fn try_decode_with(src: &[u8], encoding_name: &str) -> RUMString {
304 let encoding = match Encoding::for_label(encoding_name.as_bytes()) {
305 Some(v) => v,
306 None => return RUMString::from(""),
307 };
308 decode(src, encoding)
309}
310
311fn decode(src: &[u8], encoding: &'static Encoding) -> RUMString {
317 match encoding.decode_without_bom_handling_and_without_replacement(&src) {
318 Some(res) => RUMString::from(res),
319 None => RUMString::from_utf8(src).unwrap(),
320 }
321}
322
323pub fn unescape_string(escaped_str: &str) -> RUMResult<RUMString> {
332 let graphemes = escaped_str.graphemes(true).collect::<Vec<&str>>();
333 let str_size = graphemes.len();
334 let mut result: Vec<u8> = Vec::with_capacity(escaped_str.len());
335 let mut i = 0;
336 while i < str_size {
337 let seq_start = graphemes[i];
338 match seq_start {
339 "\\" => {
340 let escape_seq = get_grapheme_string(&graphemes, " ", i);
341 let mut c = match unescape(&escape_seq) {
342 Ok(c) => c,
343 Err(_why) => Vec::from(escape_seq.as_bytes()),
344 };
345 result.append(&mut c);
346 i += &escape_seq.as_grapheme_str().len();
347 }
348 _ => {
349 result.append(&mut Vec::from(seq_start.as_bytes()));
350 i += 1;
351 }
352 }
353 }
354 Ok(try_decode(result.as_slice()))
355}
356
357pub fn get_grapheme_string<'a>(
361 graphemes: &Vec<&'a str>,
362 end_grapheme: &str,
363 start_index: usize,
364) -> RUMString {
365 get_grapheme_collection(graphemes, end_grapheme, start_index).join_compact("")
366}
367
368pub fn get_grapheme_collection<'a>(
375 graphemes: &Vec<&'a str>,
376 end_grapheme: &str,
377 start_index: usize,
378) -> Vec<&'a str> {
379 let mut result: Vec<&'a str> = Vec::new();
380 for grapheme in graphemes.iter().skip(start_index) {
381 let item = *grapheme;
382 if item == end_grapheme {
383 break;
384 }
385 result.push(item);
386 }
387 result
388}
389
390pub fn unescape(escaped_str: &str) -> Result<Vec<u8>, RUMString> {
401 let lower_case = escaped_str.to_lowercase();
402 let mut bytes: Vec<u8> = Vec::with_capacity(3);
403 match &lower_case[0..2] {
404 "\\x" => {
406 let byte_str = number_to_char_unchecked(&hex_to_number(&lower_case[2..6])?);
407 bytes.append(&mut byte_str.as_bytes().to_vec());
408 }
409 "\\u" => {
411 let byte_str = number_to_char_unchecked(&hex_to_number(&lower_case[2..6])?);
412 bytes.append(&mut byte_str.as_bytes().to_vec());
413 }
414 "\\c" => {
416 let byte_str = number_to_char_unchecked(&hex_to_number(&lower_case[2..6])?);
417 bytes.append(&mut byte_str.as_bytes().to_vec());
418 }
419 "\\o" => {
421 let byte_str = number_to_char_unchecked(&octal_to_number(&lower_case[2..6])?);
422 bytes.append(&mut byte_str.as_bytes().to_vec());
423 }
424 "\\m" => match lower_case.as_grapheme_str().len() {
426 8 => {
427 bytes.push(hex_to_byte(&lower_case[2..4])?);
428 bytes.push(hex_to_byte(&lower_case[4..6])?);
429 bytes.push(hex_to_byte(&lower_case[6..8])?);
430 }
431 6 => {
432 bytes.push(hex_to_byte(&lower_case[2..4])?);
433 bytes.push(hex_to_byte(&lower_case[4..6])?);
434 }
435 _ => {
436 return Err(rumtk_format!(
437 "Unknown multibyte sequence. Cannot decode {}",
438 lower_case
439 ))
440 }
441 },
442 "\\z" => bytes.append(&mut lower_case.as_bytes().to_vec()),
444 _ => bytes.push(unescape_control_byte(&lower_case[0..2])?),
446 }
447 Ok(bytes)
448}
449
450fn unescape_control(escaped_str: &str) -> Result<char, RUMString> {
455 match escaped_str {
456 "\\t" => Ok('\t'),
458 "\\b" => Ok('\x08'),
459 "\\n" => Ok('\n'),
460 "\\r" => Ok('\r'),
461 "\\f" => Ok('\x14'),
462 "\\s" => Ok('\x20'),
463 "\\\\" => Ok(ASCII_ESCAPE_CHAR),
464 "\\'" => Ok('\''),
465 "\\\"" => Ok('"'),
466 "\\0" => Ok('\0'),
467 "\\v" => Ok('\x0B'),
468 "\\a" => Ok('\x07'),
469 _ => Err(rumtk_format!(
471 "Unknown escape sequence? Sequence: {}!",
472 escaped_str
473 )),
474 }
475}
476
477fn unescape_control_byte(escaped_str: &str) -> Result<u8, RUMString> {
482 match escaped_str {
483 "\\t" => Ok(9), "\\b" => Ok(8), "\\n" => Ok(10), "\\r" => Ok(13), "\\f" => Ok(12), "\\s" => Ok(32), "\\\\" => Ok(27), "\\'" => Ok(39), "\\\"" => Ok(34), "\\0" => Ok(0), "\\v" => Ok(11), "\\a" => Ok(7), _ => hex_to_byte(escaped_str),
499 }
500}
501
502fn hex_to_number(hex_str: &str) -> Result<u32, RUMString> {
506 match u32::from_str_radix(&hex_str, 16) {
507 Ok(result) => Ok(result),
508 Err(val) => Err(rumtk_format!(
509 "Failed to parse string with error {}! Input string {} \
510 is not hex string!",
511 val,
512 hex_str
513 )),
514 }
515}
516
517fn hex_to_byte(hex_str: &str) -> Result<u8, RUMString> {
521 match u8::from_str_radix(&hex_str, 16) {
522 Ok(result) => Ok(result),
523 Err(val) => Err(rumtk_format!(
524 "Failed to parse string with error {}! Input string {} \
525 is not hex string!",
526 val,
527 hex_str
528 )),
529 }
530}
531
532fn octal_to_number(hoctal_str: &str) -> Result<u32, RUMString> {
536 match u32::from_str_radix(&hoctal_str, 8) {
537 Ok(result) => Ok(result),
538 Err(val) => Err(rumtk_format!(
539 "Failed to parse string with error {}! Input string {} \
540 is not an octal string!",
541 val,
542 hoctal_str
543 )),
544 }
545}
546
547fn octal_to_byte(hoctal_str: &str) -> Result<u8, RUMString> {
551 match u8::from_str_radix(&hoctal_str, 8) {
552 Ok(result) => Ok(result),
553 Err(val) => Err(rumtk_format!(
554 "Failed to parse string with error {}! Input string {} \
555 is not an octal string!",
556 val,
557 hoctal_str
558 )),
559 }
560}
561
562fn number_to_char(num: &u32) -> Result<RUMString, RUMString> {
566 match char::from_u32(*num) {
567 Some(result) => Ok(result.to_rumstring()),
568 None => Err(rumtk_format!(
569 "Failed to cast number to character! Number {}",
570 num
571 )),
572 }
573}
574
575fn number_to_char_unchecked(num: &u32) -> RUMString {
581 unsafe { char::from_u32_unchecked(*num).to_rumstring() }
582}
583
584pub fn escape(unescaped_str: &str) -> RUMString {
596 basic_escape(unescaped_str, &vec![("{", ""), ("}", "")])
597}
598
599pub fn basic_escape(unescaped_str: &str, except: EscapeExceptions) -> RUMString {
610 let escaped = is_escaped_str(unescaped_str);
611 if !escaped {
612 let mut escaped_str = unescaped_str.escape_default().to_string();
613 for (from, to) in except {
614 escaped_str = escaped_str.replace(from, to);
615 }
616 return escaped_str.to_rumstring();
617 }
618 unescaped_str.to_rumstring()
619}
620
621pub fn is_ascii_str(unescaped_str: &str) -> bool {
628 unescaped_str.is_ascii()
629}
630
631pub fn is_escaped_str(unescaped_str: &str) -> bool {
640 if !is_ascii_str(unescaped_str) {
641 return false;
642 }
643
644 for c in unescaped_str.chars() {
645 if !is_printable_char(&c) {
646 return false;
647 }
648 }
649 true
650}
651
652pub fn is_printable_char(c: &char) -> bool {
656 &MIN_ASCII_READABLE <= c && c <= &MAX_ASCII_READABLE
657}
658
659pub fn filter_ascii(unescaped_str: &str, closure: fn(char) -> bool) -> RUMString {
663 let mut filtered = unescaped_str.to_rumstring();
664 filtered.retain(closure);
665 filtered
666}
667
668pub fn filter_non_printable_ascii(unescaped_str: &str) -> RUMString {
672 filter_ascii(unescaped_str, |c: char| is_printable_char(&c))
673}
674
675pub fn string_to_buffer(data: &str) -> RUMBuffer {
691 RUMBuffer::copy_from_slice(data.as_bytes())
692}
693
694pub fn buffer_to_string(buffer: &RUMBuffer) -> RUMResult<RUMString> {
710 match RUMString::from_utf8(buffer.as_slice()) {
711 Ok(string) => Ok(string),
712 Err(e) => Err(rumtk_format!("Failure to parse incoming UTF-8 string: {}", e)),
713 }
714}
715
716pub fn string_format(input: &str, formatting: &StringReplacementPair) -> RUMString {
732 let mut output = String::from(input);
733
734 for item in formatting.iter() {
735 output = output.as_str().replace(item.0, item.1);
736 }
737
738 output.to_rumstring()
739}
740
741pub fn string_to_b64(data: &str) -> String {
750 BASE64_STANDARD.encode(data)
751}
752
753pub fn b64_to_string(data: &String) -> RUMResult<RUMVec<u8>> {
761 match BASE64_STANDARD.decode(data) {
762 Ok(result) => Ok(result),
763 Err(e) => Err(rumtk_format!("Failed to decode base64 string: {}", e)),
764 }
765}