1use crate::base::{is_unique, RUMResult, RUMVec};
21use crate::types::RUMBuffer;
22use base64::prelude::*;
23use chardetng::{EncodingDetector, Iso2022JpDetection, Utf8Detection};
24use encoding_rs::Encoding;
25use std::cmp::min;
26pub use std::format as rumtk_format;
27pub use std::primitive::str;
28use unicode_segmentation::UnicodeSegmentation;
29const ESCAPED_STRING_WINDOW: usize = 6;
31const ASCII_ESCAPE_CHAR: char = '\\';
32const MIN_ASCII_READABLE: char = ' ';
33const MAX_ASCII_READABLE: char = '~';
34pub const EMPTY_STRING: &str = "";
35pub static EMPTY_RUMSTRING: RUMString = RUMString::default();
36pub const DOT_STR: &str = ".";
37pub const EMPTY_STRING_OPTION: Option<&str> = Some("");
38pub const READABLE_ASCII: &str = " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~";
39
40pub type RUMString = String;
42pub type EscapeException<'a> = (&'a str, &'a str);
43pub type EscapeExceptions<'a> = Option<&'a [EscapeException<'a>]>;
44pub type StringReplacementPair<'a> = [(&'a str, &'a str)];
45pub type Grapheme<'a> = &'a str;
46pub type GraphemeStringView<'a> = RUMVec<Grapheme<'a>>;
47pub type GraphemePattern<'a> = &'a [Grapheme<'a>];
48pub type GraphemeSlice<'b, 'a> = &'b [Grapheme<'a>];
49pub type GraphemePatternPair<'a> = (GraphemePattern<'a>, GraphemePattern<'a>);
50
51#[derive(Default, Debug, PartialEq, Clone)]
56pub struct GraphemeStr<'a> {
57 view: GraphemeStringView<'a>,
58 start: usize,
59 end: usize,
60}
61
62impl<'a> GraphemeStr<'a> {
63 pub fn from(string: &'a str) -> Self {
64 let view = string.graphemes(true).collect::<GraphemeStringView>();
65 Self::from_view(view)
66 }
67
68 pub fn from_view(view: GraphemeStringView<'a>) -> Self {
69 let start = 0;
70 let end = view.len();
71 Self { view, start, end }
72 }
73
74 pub fn at(&self, index: usize) -> Grapheme<'a> {
75 self.view[index]
76 }
77
78 pub fn trim(&self, pattern: &GraphemePatternPair<'a>) -> Self {
79 let (left_pattern, right_pattern) = pattern;
80 self.trim_left(left_pattern).trim_right(right_pattern)
81 }
82
83 pub fn trim_left(&self, pattern: &GraphemePattern<'a>) -> Self {
84 let new_offset = self.find(pattern, self.start);
85 Self {
86 view: self.view.clone(),
87 start: new_offset,
88 end: self.end,
89 }
90 }
91
92 pub fn trim_right(&self, pattern: &GraphemePattern<'a>) -> Self {
93 let new_offset = self.rfind(pattern, self.end);
94 Self {
95 view: self.view.clone(),
96 start: self.start,
97 end: new_offset,
98 }
99 }
100
101 pub fn splice(&self, skip_pattern: &GraphemePatternPair<'a>) -> Self {
102 let (left_pattern, right_pattern) = skip_pattern;
103 let mut new_view = GraphemeStringView::with_capacity(self.end - self.start);
104 let mut offset = self.start;
105 let l_pattern_s = left_pattern.len();
106
107 while offset < self.end {
108 let target_s = self.find(left_pattern, offset) + l_pattern_s;
109 for i in offset..target_s {
110 new_view.push(self.view[i]);
111 }
112 offset = self.find(right_pattern, target_s);
113 }
114
115 GraphemeStr::from_view(new_view)
116 }
117
118 pub fn find(&self, pattern: &GraphemePattern<'a>, offset: usize) -> usize {
119 let pattern_s = pattern.len();
120 let mut new_offset = offset;
121 let mut pattern_end = new_offset + pattern_s;
122
123 while new_offset < self.end && pattern_end < self.end {
124 if self.view[new_offset..pattern_end] == **pattern {
125 break;
126 }
127
128 new_offset += 1;
129 pattern_end = new_offset + pattern_s;
130 }
131
132 new_offset
133 }
134
135 pub fn rfind(&self, pattern: &GraphemePattern<'a>, offset: usize) -> usize {
136 let pattern_s = pattern.len();
137 let mut new_offset = offset;
138 while new_offset > self.start {
139 if self.view[new_offset - pattern_s..new_offset] == **pattern {
140 break;
141 }
142
143 new_offset -= 1;
144 }
145
146 new_offset
147 }
148
149 pub fn len(&self) -> usize {
150 self.end - self.start
151 }
152
153 pub fn get_graphemes(&self) -> GraphemeSlice<'_, 'a> {
154 &self.view[self.start..self.end]
155 }
156
157 pub fn truncate(&self, size: usize) -> Self {
158 let end = min(size, self.end);
159 Self {
160 view: self.view.clone(),
161 start: self.start,
162 end,
163 }
164 }
165
166 pub fn is_unique(&self) -> bool {
167 is_unique(&self.view)
168 }
169}
170
171impl ToString for GraphemeStr<'_> {
172 fn to_string(&self) -> String {
173 let mut new_string = String::with_capacity(self.len());
174
175 for grapheme in self.view[self.start..self.end].iter() {
176 new_string.push_str(grapheme);
177 }
178
179 new_string
180 }
181}
182
183impl RUMStringConversions for GraphemeStr<'_> {}
184
185pub trait StringLike {
188 fn with_capacity(capacity: usize) -> Self;
189 fn push_str(&mut self, string: &str);
190}
191
192pub trait AsString {
193 fn as_string(&self) -> RUMString;
194}
195
196pub trait AsStr {
197 fn as_str(&self) -> &str;
198 fn as_grapheme_str(&self) -> GraphemeStr {
199 GraphemeStr::from(self.as_str())
200 }
201}
202
203pub trait RUMStringConversions: ToString {
204 #[inline(always)]
205 fn to_raw(&self) -> RUMVec<u8> {
206 self.to_string().as_bytes().to_vec()
207 }
208
209 #[inline(always)]
210 fn to_buffer(&self) -> RUMBuffer {
211 string_to_buffer(self.to_string().as_str())
212 }
213}
214
215pub trait StringUtils: AsStr + RUMStringConversions {
216 #[inline(always)]
217 fn duplicate(&self, count: usize) -> RUMString {
218 let mut duplicated = RUMString::with_capacity(count);
219 for i in 0..count {
220 duplicated += &self.as_str();
221 }
222 duplicated
223 }
224
225 fn truncate(&self, count: usize) -> RUMString {
226 self.as_grapheme_str().truncate(count).to_string()
227 }
228}
229
230impl AsStr for String {
231 fn as_str(&self) -> &str {
232 self.as_str()
233 }
234}
235
236impl RUMStringConversions for RUMString {}
237impl StringUtils for RUMString {}
238
239impl RUMStringConversions for str {}
240
241impl AsStr for str {
242 fn as_str(&self) -> &str {
243 self
244 }
245}
246
247impl StringUtils for str {}
248
249impl RUMStringConversions for char {}
250
251pub trait RUMArrayConversions {
252 fn to_string(&self) -> RUMResult<RUMString>;
253}
254
255impl RUMArrayConversions for Vec<u8> {
256 #[inline(always)]
257 fn to_string(&self) -> RUMResult<RUMString> {
258 match RUMString::from_utf8(self.to_owned()) {
259 Ok(s) => Ok(s),
260 Err(e) => Err(rumtk_format!("Failure to parse incoming UTF-8 string: {}", e))
261 }
262 }
263}
264
265impl RUMArrayConversions for &[u8] {
266 #[inline(always)]
267 fn to_string(&self) -> RUMResult<RUMString> {
268 match RUMString::from_utf8(self.to_vec()) {
269 Ok(s) => Ok(s),
270 Err(e) => Err(rumtk_format!("Failure to parse incoming UTF-8 string: {}", e))
271 }
272 }
273}
274
275impl AsString for u8 {
276 fn as_string(&self) -> RUMString {
277 RUMString::from(char::from_u32((*self).into()).unwrap_or_default())
278 }
279}
280
281pub fn count_tokens_ignoring_pattern(vector: &Vec<&str>, string_token: &RUMString) -> usize {
284 let mut count: usize = 0;
285 for tok in vector.iter() {
286 if string_token != tok {
287 count += 1;
288 }
289 }
290 count
291}
292
293pub fn try_decode(src: &[u8]) -> RUMResult<RUMString> {
300 let mut detector = EncodingDetector::new(Iso2022JpDetection::Allow);
301 detector.feed(&src, true);
302 let encoding = detector.guess(None, Utf8Detection::Allow);
303 decode(src, encoding)
304}
305
306pub fn try_decode_with(src: &[u8], encoding_name: &str) -> RUMResult<RUMString> {
312 let encoding = match Encoding::for_label(encoding_name.as_bytes()) {
313 Some(v) => v,
314 None => return Ok(EMPTY_RUMSTRING.clone()),
315 };
316 decode(src, encoding)
317}
318
319fn decode(src: &[u8], encoding: &'static Encoding) -> RUMResult<RUMString> {
325 Ok(match encoding.decode_without_bom_handling_and_without_replacement(&src) {
326 Some(res) => RUMString::from(res),
327 None => src.to_string()?,
328 })
329}
330
331pub fn unescape_string(escaped_str: &str) -> RUMResult<RUMString> {
340 let graphemes = escaped_str.graphemes(true).collect::<Vec<&str>>();
341 let str_size = graphemes.len();
342 let mut result: Vec<u8> = Vec::with_capacity(escaped_str.len());
343 let mut i = 0;
344 while i < str_size {
345 let seq_start = graphemes[i];
346 match seq_start {
347 "\\" => {
348 let escape_seq = get_grapheme_string(&graphemes, " ", i);
349 let mut c = match unescape(&escape_seq) {
350 Ok(c) => c,
351 Err(_why) => Vec::from(escape_seq.as_bytes()),
352 };
353 result.append(&mut c);
354 i += &escape_seq.as_grapheme_str().len();
355 }
356 _ => {
357 result.append(&mut Vec::from(seq_start.as_bytes()));
358 i += 1;
359 }
360 }
361 }
362 Ok(try_decode(result.as_slice())?)
363}
364
365pub fn get_grapheme_string<'a>(
369 graphemes: &Vec<&'a str>,
370 end_grapheme: &str,
371 start_index: usize,
372) -> RUMString {
373 get_grapheme_collection(graphemes, end_grapheme, start_index).join("")
374}
375
376pub fn get_grapheme_collection<'a>(
383 graphemes: &Vec<&'a str>,
384 end_grapheme: &str,
385 start_index: usize,
386) -> Vec<&'a str> {
387 let mut result: Vec<&'a str> = Vec::new();
388 for grapheme in graphemes.iter().skip(start_index) {
389 let item = *grapheme;
390 if item == end_grapheme {
391 break;
392 }
393 result.push(item);
394 }
395 result
396}
397
398pub fn unescape(escaped_str: &str) -> Result<Vec<u8>, RUMString> {
409 let lower_case = escaped_str.to_lowercase();
410 let mut bytes: Vec<u8> = Vec::with_capacity(3);
411 match &lower_case[0..2] {
412 "\\x" => {
414 let byte_str = number_to_char_unchecked(&hex_to_number(&lower_case[2..6])?);
415 bytes.append(&mut byte_str.as_bytes().to_vec());
416 }
417 "\\u" => {
419 let byte_str = number_to_char_unchecked(&hex_to_number(&lower_case[2..6])?);
420 bytes.append(&mut byte_str.as_bytes().to_vec());
421 }
422 "\\c" => {
424 let byte_str = number_to_char_unchecked(&hex_to_number(&lower_case[2..6])?);
425 bytes.append(&mut byte_str.as_bytes().to_vec());
426 }
427 "\\o" => {
429 let byte_str = number_to_char_unchecked(&octal_to_number(&lower_case[2..6])?);
430 bytes.append(&mut byte_str.as_bytes().to_vec());
431 }
432 "\\m" => match lower_case.as_grapheme_str().len() {
434 8 => {
435 bytes.push(hex_to_byte(&lower_case[2..4])?);
436 bytes.push(hex_to_byte(&lower_case[4..6])?);
437 bytes.push(hex_to_byte(&lower_case[6..8])?);
438 }
439 6 => {
440 bytes.push(hex_to_byte(&lower_case[2..4])?);
441 bytes.push(hex_to_byte(&lower_case[4..6])?);
442 }
443 _ => {
444 return Err(rumtk_format!(
445 "Unknown multibyte sequence. Cannot decode {}",
446 lower_case
447 ))
448 }
449 },
450 "\\z" => bytes.append(&mut lower_case.as_bytes().to_vec()),
452 _ => bytes.push(unescape_control_byte(&lower_case[0..2])?),
454 }
455 Ok(bytes)
456}
457
458fn unescape_control(escaped_str: &str) -> Result<char, RUMString> {
463 match escaped_str {
464 "\\t" => Ok('\t'),
466 "\\b" => Ok('\x08'),
467 "\\n" => Ok('\n'),
468 "\\r" => Ok('\r'),
469 "\\f" => Ok('\x14'),
470 "\\s" => Ok('\x20'),
471 "\\\\" => Ok(ASCII_ESCAPE_CHAR),
472 "\\'" => Ok('\''),
473 "\\\"" => Ok('"'),
474 "\\0" => Ok('\0'),
475 "\\v" => Ok('\x0B'),
476 "\\a" => Ok('\x07'),
477 _ => Err(rumtk_format!(
479 "Unknown escape sequence? Sequence: {}!",
480 escaped_str
481 )),
482 }
483}
484
485fn unescape_control_byte(escaped_str: &str) -> Result<u8, RUMString> {
490 match escaped_str {
491 "\\t" => Ok(9), "\\b" => Ok(8), "\\n" => Ok(10), "\\r" => Ok(13), "\\f" => Ok(12), "\\s" => Ok(32), "\\\\" => Ok(27), "\\'" => Ok(39), "\\\"" => Ok(34), "\\0" => Ok(0), "\\v" => Ok(11), "\\a" => Ok(7), _ => hex_to_byte(escaped_str),
507 }
508}
509
510fn hex_to_number(hex_str: &str) -> Result<u32, RUMString> {
514 match u32::from_str_radix(&hex_str, 16) {
515 Ok(result) => Ok(result),
516 Err(val) => Err(rumtk_format!(
517 "Failed to parse string with error {}! Input string {} \
518 is not hex string!",
519 val,
520 hex_str
521 )),
522 }
523}
524
525fn hex_to_byte(hex_str: &str) -> Result<u8, RUMString> {
529 match u8::from_str_radix(&hex_str, 16) {
530 Ok(result) => Ok(result),
531 Err(val) => Err(rumtk_format!(
532 "Failed to parse string with error {}! Input string {} \
533 is not hex string!",
534 val,
535 hex_str
536 )),
537 }
538}
539
540fn octal_to_number(hoctal_str: &str) -> Result<u32, RUMString> {
544 match u32::from_str_radix(&hoctal_str, 8) {
545 Ok(result) => Ok(result),
546 Err(val) => Err(rumtk_format!(
547 "Failed to parse string with error {}! Input string {} \
548 is not an octal string!",
549 val,
550 hoctal_str
551 )),
552 }
553}
554
555fn octal_to_byte(hoctal_str: &str) -> Result<u8, RUMString> {
559 match u8::from_str_radix(&hoctal_str, 8) {
560 Ok(result) => Ok(result),
561 Err(val) => Err(rumtk_format!(
562 "Failed to parse string with error {}! Input string {} \
563 is not an octal string!",
564 val,
565 hoctal_str
566 )),
567 }
568}
569
570fn number_to_char(num: &u32) -> Result<RUMString, RUMString> {
574 match char::from_u32(*num) {
575 Some(result) => Ok(result.to_string()),
576 None => Err(rumtk_format!(
577 "Failed to cast number to character! Number {}",
578 num
579 )),
580 }
581}
582
583fn number_to_char_unchecked(num: &u32) -> RUMString {
589 unsafe { char::from_u32_unchecked(*num).to_string() }
590}
591
592pub fn escape(unescaped_str: &str) -> RUMString {
604 basic_escape(unescaped_str, Some(&vec![("{", ""), ("}", "")]))
605}
606
607pub fn basic_escape(unescaped_str: &str, except: EscapeExceptions) -> RUMString {
618 let escaped = is_escaped_str(unescaped_str);
619
620 match except {
621 Some(exceptions) => {
622 if !escaped {
623 let mut escaped_str = unescaped_str.escape_default().to_string();
624 for (from, to) in exceptions {
625 escaped_str = escaped_str.replace(from, to);
626 }
627 return escaped_str.to_string();
628 }
629 },
630 None => {}
631 }
632
633 unescaped_str.to_string()
634}
635
636pub fn is_ascii_str(unescaped_str: &str) -> bool {
643 unescaped_str.is_ascii()
644}
645
646pub fn is_escaped_str(unescaped_str: &str) -> bool {
655 if !is_ascii_str(unescaped_str) {
656 return false;
657 }
658
659 for c in unescaped_str.chars() {
660 if !is_printable_char(&c) {
661 return false;
662 }
663 }
664 true
665}
666
667pub fn is_printable_char(c: &char) -> bool {
671 &MIN_ASCII_READABLE <= c && c <= &MAX_ASCII_READABLE
672}
673
674pub fn filter_ascii(unescaped_str: &str, closure: fn(char) -> bool) -> RUMString {
678 let mut filtered = unescaped_str.to_string();
679 filtered.retain(closure);
680 filtered
681}
682
683pub fn filter_non_printable_ascii(unescaped_str: &str) -> RUMString {
687 filter_ascii(unescaped_str, |c: char| is_printable_char(&c))
688}
689
690pub fn string_to_buffer(data: &str) -> RUMBuffer {
707 RUMBuffer::copy_from_slice(data.as_bytes())
708}
709
710pub fn string_format(input: &str, formatting: &StringReplacementPair) -> RUMString {
726 let mut output = String::from(input);
727
728 for item in formatting.iter() {
729 output = output.as_str().replace(item.0, item.1);
730 }
731
732 output.to_string()
733}
734
735pub fn string_to_b64(data: &str) -> String {
744 BASE64_STANDARD.encode(data)
745}
746
747pub fn b64_to_string(data: &String) -> RUMResult<RUMVec<u8>> {
755 match BASE64_STANDARD.decode(data) {
756 Ok(result) => Ok(result),
757 Err(e) => Err(rumtk_format!("Failed to decode base64 string: {}", e)),
758 }
759}